def seg_txt_search(txt): result = [] for word in seg_txt(txt): if word.isalnum(): result.append(word.lower()) continue word = word.decode("utf-8", "ignore") if len(word) == 1: if u"一" <= word <= u"龥" and word not in SMALLCHAR: result.append(word) else: result.append(word) result = [i.encode("utf-8", "ignore") if type(i) is unicode else i for i in result] return result
def seg_txt_search(txt): result = [] for word in seg_txt(txt): if word.isalnum(): result.append(word.lower()) continue word = word.decode("utf-8", "ignore") if len(word) == 1: if u"一" <= word <= u"龥" and word not in SMALLCHAR: result.append(word) else: result.append(word) result = [ i.encode("utf-8", "ignore") if type(i) is unicode else i for i in result ] return result
def seg_title_search(txt): result = [] for word in seg_txt(txt): if word.isalnum(): result.append(word.lower()) continue word = word.decode("utf-8", "ignore") if len(word) == 1: if u"一" <= word <= u"龥": result.append(word) else: if len(word) <= 2: result.append(word) else: result.extend(word_len2(word)) if not word.encode("utf-8").isalnum(): for char in word: if char not in result: result.append(char) result = [i.encode("utf-8", "ignore") if type(i) is unicode else i for i in result] return result
def seg_title_search(txt): result = [] for word in seg_txt(txt): if word.isalnum(): result.append(word.lower()) continue word = word.decode("utf-8", "ignore") if len(word) == 1: if u"一" <= word <= u"龥": result.append(word) else: if len(word) <= 2: result.append(word) else: result.extend(word_len2(word)) if not word.encode("utf-8").isalnum(): for char in word: if char not in result: result.append(char) result = [ i.encode("utf-8", "ignore") if type(i) is unicode else i for i in result ] return result