class NamePrep: """ Implements preparation of internationalized domain names. This class implements preparing internationalized domain names using the rules defined in RFC 3491, section 4 (Conversion operations). We do not perform step 4 since we deal with unicode representations of domain names and do not convert from or to ASCII representations using punycode encoding. When such a conversion is needed, the C{idna} standard library provides the C{ToUnicode()} and C{ToASCII()} functions. Note that C{idna} itself assumes UseSTD3ASCIIRules to be false. The following steps are performed by C{prepare()}: - Split the domain name in labels at the dots (RFC 3490, 3.1) - Apply nameprep proper on each label (RFC 3491) - Enforce the restrictions on ASCII characters in host names by assuming STD3ASCIIRules to be true. (STD 3) - Rejoin the labels using the label separator U+002E (full stop). """ # Prohibited characters. prohibiteds = [ unichr(n) for n in chain(range(0x00, 0x2c + 1), range(0x2e, 0x2f + 1), range(0x3a, 0x40 + 1), range(0x5b, 0x60 + 1), range(0x7b, 0x7f + 1)) ] def prepare(self, string): result = [] labels = idna.dots.split(string) if labels and len(labels[-1]) == 0: trailing_dot = u'.' del labels[-1] else: trailing_dot = u'' for label in labels: result.append(self.nameprep(label)) return u".".join(result) + trailing_dot def check_prohibiteds(self, string): for c in string: if c in self.prohibiteds: raise UnicodeError("Invalid character %s" % repr(c)) def nameprep(self, label): label = idna.nameprep(label) self.check_prohibiteds(label) if label[0] == u'-': raise UnicodeError("Invalid leading hyphen-minus") if label[-1] == u'-': raise UnicodeError("Invalid trailing hyphen-minus") return label
def test_unichr(self): """ unichar exists and returns a unicode string with the given code point. """ self.assertEqual(unichr(0x2603), u"\N{SNOWMAN}")
# # # def GBK2312(): # password = '' # while True: # head = random.randint(0xb0, 0xf7) # body = random.randint(0xa1, 0xf9) # 在head区号为55的那一块最后5个汉字是乱码,为了方便缩减下范围 # val = f'{head:x}{body:x}' # str = bytes.fromhex(val).decode('gb2312') # if str: # password = str # else: # # # return str # # # for i in range(10): # g = GBK2312() # print(g) import codecs from twisted.python.compat import unichr start,end = (0x4E00, 0x9FA5) with codecs.open("chinese.txt", "wb", encoding="utf-8") as f: for codepoint in range(int(start),int(end)): f.write(unichr(codepoint)) #写出汉字