def part(rule_str): rule_str = Address.normalize(rule_str) rule_tokens = set() def extract(m): token = m.group() retval = u'' if token == u'連': token = u'' elif token == u'附號全': retval = u'號' if token: rule_tokens.add(token) return retval addr_str = Rule.RULE_TOKEN_RE.sub(extract, rule_str) return (rule_tokens, addr_str)
def test_address_init_normalization_chinese_number(): assert Address.normalize('八德路') == '八德路' assert Address.normalize('三元街') == '三元街' assert Address.normalize('三號') == '3號' assert Address.normalize('十八號') == '18號' assert Address.normalize('三十八號') == '38號' assert Address.normalize('三段') == '3段' assert Address.normalize('十八路') == '18路' assert Address.normalize('三十八街') == '38街' assert Address.normalize('信義路一段') == '信義路1段' assert Address.normalize('敬業一路') == '敬業1路' assert Address.normalize('愛富三街') == '愛富3街'
def tokenize(addr_str): return Address.TOKEN_RE.findall(Address.normalize(addr_str))
def test_address_init_normalization_chinese_number(): assert Address.normalize(u'八德路') == u'八德路' assert Address.normalize(u'三元街') == u'三元街' assert Address.normalize(u'三號') == u'3號' assert Address.normalize(u'十八號') == u'18號' assert Address.normalize(u'三十八號') == u'38號' assert Address.normalize(u'三段') == u'3段' assert Address.normalize(u'十八路') == u'18路' assert Address.normalize(u'三十八街') == u'38街' assert Address.normalize(u'信義路一段') == u'信義路1段' assert Address.normalize(u'敬業一路') == u'敬業1路' assert Address.normalize(u'愛富三街') == u'愛富3街'
def test_address_init_normalization_chinese_number(): assert Address.normalize(u"八德路") == u"八德路" assert Address.normalize(u"三元街") == u"三元街" assert Address.normalize(u"三號") == u"3號" assert Address.normalize(u"十八號") == u"18號" assert Address.normalize(u"三十八號") == u"38號" assert Address.normalize(u"三段") == u"3段" assert Address.normalize(u"十八路") == u"18路" assert Address.normalize(u"三十八街") == u"38街" assert Address.normalize(u"信義路一段") == u"信義路1段" assert Address.normalize(u"敬業一路") == u"敬業1路" assert Address.normalize(u"愛富三街") == u"愛富3街"