def inverse_need_type(text,need_type): if len(text)>0: text_type = chardet.detect(text) codingType = text_type['encoding'] if codingType != need_type: if isinstance(text, unicode): text = text.encode(need_type,'ignore') else: text = text.decode(codingType,'ignore').encode(need_type,'ignore') return text
def inverse_need_type(text, need_type): if len(text) > 0: text_type = chardet.detect(text) codingType = text_type['encoding'] if codingType != need_type: if isinstance(text, unicode): text = text.encode(need_type, 'ignore') else: text = text.decode(codingType, 'ignore').encode(need_type, 'ignore') return text
def format_text(text): text = text.split("\n") size = len(text) - 1 encodeType = chardet.detect(text[1]) result = "" print encodeType for i in range(0,size): text[i] = text[i].split("\t") result = result + text[i][10].decode(encodeType['encoding'],"ignore").encode("gbk","ignore") + "\n" file.close() file = open('./data/test.txt','w') file.write(result) file.close() return result
def format_text(text): text = text.split("\n") size = len(text) - 1 encodeType = chardet.detect(text[1]) result = "" print encodeType for i in range(0, size): text[i] = text[i].split("\t") result = result + text[i][10].decode( encodeType['encoding'], "ignore").encode("gbk", "ignore") + "\n" file.close() file = open('./data/test.txt', 'w') file.write(result) file.close() return result
def getTextEncoding(text): text_type = chardet.detect(text) codingType = text_type['encoding'] return codingType