#!/usr/bin/env python3.1 # # $HeadURL: https://svn.spodhuis.org/ksvn/pdp-bincommon/puny $ # $Id: puny 458 2011-02-06 23:43:49Z pdp@SPODHUIS.ORG $ """ puny: convert domain-names to and from punycode Converts multiple domains from argv. By default is verbose, takes the input and shows encoding and decoding. Given -t/--to-puny will just show the results of converting to punycode. Given -f/--from-puny will show just the results of converting from punycode. Offer translations too. """ __author__ = 'syscomet@gmail.com (Phil Pennock)' import codecs import getopt import json import re import sys import urllib.parse import urllib.request GOOGLE_TRANSLATE_API_URL = 'https://www.googleapis.com/language/translate/v2' GOOGLE_TRANSLATE_API_KEY = 'AIzaSyDv3RiIxCRSTJE7-NVroL6m6kK9OV0UHLQ' HUMAN_TARGET_LANGUAGE = 'en' IDN_PREFIX = 'xn--' SAFE_LABEL_RE = re.compile( # Python uses \Z, not \z r'^ (?: [A-Za-z0-9] (?: [A-Za-z0-9] | (?: - (?! -) ) )* )? [A-Za-z0-9] \Z', flags=re.VERBOSE) puny_codec = codecs.lookup('punycode') utf8_codec = codecs.lookup('utf-8') def ToPuny(domain): components = domain.split('.') results = [] for label in components: if SAFE_LABEL_RE.match(label): results.append(label) elif label.startswith(IDN_PREFIX): results.append(label) else: results.append( IDN_PREFIX + puny_codec.encode(label)[0].decode('ascii') ) return '.'.join(results) def FromPuny(domain): components = domain.split('.') results = [] for label in components: if not label.startswith(IDN_PREFIX): results.append(label) else: results.append( puny_codec.decode(label[4:])[0] ) return '.'.join(results) def ParseMixedPuny(domain): """Domain may contain mixed unicode/punycode. Parse it all, return tuple. Returns: tuple containing: 0: unicode form 1: punycode form """ components = domain.split('.') if not components[-1]: # final dot for root anchor components.pop() puny_parts = [] uni_parts = [] for label in components: if label.startswith(IDN_PREFIX): l_puny, l_uni = label, puny_codec.decode(label[4:])[0] elif SAFE_LABEL_RE.match(label): l_puny, l_uni = label, label else: l_puny, l_uni = IDN_PREFIX + puny_codec.encode(label)[0].decode('ascii'), label puny_parts.append(l_puny) uni_parts.append(l_uni) return '.'.join(uni_parts), '.'.join(puny_parts) def Translate(text): """Use Google Translate to get local language. Yields: 2-tuple: translated text as unicode source language, or None """ # API uses auto-detect source language if 'source' param not given # security of key in transit by using https urltext = urllib.parse.quote(utf8_codec.encode(text)[0]) url = '{url}?key={key}&target={targetlang}&q={q}'.format( url=GOOGLE_TRANSLATE_API_URL, key=GOOGLE_TRANSLATE_API_KEY, targetlang=HUMAN_TARGET_LANGUAGE, q=urltext) response = urllib.request.urlopen(url) decode_charset = 'ASCII' m = re.search(r'(?i)\bcharset=([^\s;]+)', response.getheader('content-type')) if m: decode_charset = m.group(1) transblob = json.loads(codecs.lookup(decode_charset).decode(response.read())[0]) if 'data' in transblob: if 'translations' in transblob['data']: for d in transblob['data']['translations']: lang = None if 'detectedSourceLanguage' in d: lang = d['detectedSourceLanguage'] if 'translatedText' in d: yield (d['translatedText'], lang) def VerboseDefault(domains): done_one = False for arg in domains: if done_one: print() done_one = True print('Input: ' + arg) res = ParseMixedPuny(arg) print('Unicode: {0}\nPunycode: {1}'.format(*res)) for unilabel in res[0].split('.'): if re.search(r'[^A-Za-z0-9_-]', unilabel): for trans in Translate(unilabel): if trans[1] is not None: print('Translation: "{0}" [{2}] -> "{1}"'.format(unilabel, trans[0], trans[1])) else: print('Translation: "{0}" -> "{1}"'.format(unilabel, trans[0])) def _main(): options_list, domains = getopt.getopt(sys.argv[1:], 'ft', ['from-puny', 'to-puny']) from_puny, to_puny = False, False for opt in options_list: if opt[0] in ('-f', '--from-puny'): from_puny = True elif opt[0] in ('-t', '--to-puny'): to_puny = True else: raise Exception('Unhandled option "{0}"'.format(opt[0])) if from_puny and to_puny: raise Exception('Converting both from and to punycode is unsupported') if from_puny: for arg in domains: print(FromPuny(arg)) elif to_puny: for arg in domains: print(ToPuny(arg)) else: VerboseDefault(domains) if __name__ == '__main__': _main() # vim: set ft=python sw=2 expandtab: