def process_unicode(uni): """Receive unicode string, then return a list of unicode as bi-grammed result. """ normalized = unicodedata.normalize('NFKC', uni) for word in rx_U.findall(normalized): swords = [g.group() for g in pattern.finditer(word)] for sword in swords: if not rx_all.match(sword[0]): yield sword else: yield from bigram(sword, 0)
def process_unicode(uni): """Receive unicode string, then return a list of unicode as bi-grammed result. """ normalized = unicodedata.normalize('NFKC', uni) for word in rx_U.findall(normalized): swords = [g.group() for g in pattern.finditer(word)] for sword in swords: if not rx_all.match(sword[0]): yield sword else: for x in bigram(sword, 0): yield x