def convert_fullwidth_to_halfwidth(in_str): """全角转半角""" ret_str = "" for uni_char in in_str: inside_code = ord(uni_char) if inside_code == 12288: # 全角空格直接转换 inside_code = 32 elif 65281 <= inside_code <= 65374: # 全角字符(除空格)根据关系转化 inside_code -= 65248 ret_str += unichr(inside_code) return ret_str
def byte2str(data): '''''将原始字节码转为字符串''' i = 0 length = len(data) ret = u'' while i < length: x = (data[i] + data[i + 1]).to_bytes(length=2, byteorder='big', signed=True) print('x=', x) print('struct.unpack=', struct.unpack('H', x)) print('struct.unpack0=', struct.unpack('H', x)[0]) t = unichr(struct.unpack('H', x)[0]) if t == u'\r': ret += u'\n' elif t != u' ': ret += t i += 2 return ret
print(d) print(type(d)) # 转换为不可变集合 f1 = frozenset(d) # frozenset({'b', 'c', 'a'}) print(f1) # <class 'frozenset'> print(type(f1)) # 将一个整数转换为一个字符 c1 = chr(98) print(c1) print(type(c1)) # 将一个整数转换为Unicode字符 uc = unichr(97) print(uc) print(type(uc)) # 将一个字符转换为它的整数值 o1 = ord('a') print(o1) # 将一个整数转换为一个十六进制字符串 h1 = hex(11) print(h1) # 将一个整数转换为一个八进制字符串 o2 = oct(11) print(o2)
"""全角转半角""" ret_str = "" for uni_char in in_str: inside_code = ord(uni_char) if inside_code == 12288: # 全角空格直接转换 inside_code = 32 elif 65281 <= inside_code <= 65374: # 全角字符(除空格)根据关系转化 inside_code -= 65248 ret_str += unichr(inside_code) return ret_str class DataCleaner(tools.ProcessPath): # 类变量,类似于static chinese_pattern = re.compile(parameters.MATCH_CHINESE_PATTERN_STR) def do_in_loop(self, line, src_file, target_file): # if DataCleaner.chinese_pattern.search(line): if self.chinese_pattern.search(line): target_file.write(line) if __name__ == '__main__': seq = '辽机集团将其持有 的沈阳合金投资股份有限公司1,,,0,,00万股股权质押给公司' seq = remove_comma_from_number(seq) print(seq) print(convert_fullwidth_to_halfwidth(seq)) for i in range(65281, 65375): print(i, unichr(i)) print(ord(' '))