def put(self, head_addr_str, tail_rule_str, zipcode): addr = Address(head_addr_str) # (a, b, c) self.put_precise( addr.flat(), head_addr_str+tail_rule_str, zipcode ) # (a, b, c) -> (a,); (a, b); (a, b, c); (b,); (b, c); (c,) len_tokens = len(addr) for f in range(len_tokens): for l in range(f, len_tokens): self.put_gradual( addr.flat(f, l+1), zipcode ) if len_tokens >= 3: # (a, b, c, d) -> (a, c) self.put_gradual(addr.pick_to_flat(0, 2), zipcode)
def find(self, addr_str): addr = Address(addr_str) len_addr_tokens = len(addr.tokens) # avoid unnecessary iteration start_len = len_addr_tokens while start_len >= 0: if addr.parse(start_len-1) == (0, 0): break start_len -= 1 for i in range(start_len, 0, -1): addr_str = addr.flat(i) rzpairs = self.get_rule_str_zipcode_pairs(addr_str) # for handling insignificant tokens and redundant unit if ( # It only runs once, and must be the first iteration. i == start_len and len_addr_tokens >= 4 and addr.tokens[2][Address.UNIT] in u'村里' and not rzpairs ): if addr.tokens[3][Address.UNIT] == u'鄰': # delete the insignificant token (whose unit is 鄰) del addr.tokens[3] len_addr_tokens -= 1 if len_addr_tokens >= 4 and addr.tokens[3][Address.UNIT] == u'號': # empty the redundant unit in the token addr.tokens[2] = (u'', u'', addr.tokens[2][Address.NAME], u'') else: # delete insignificant token (whose unit is 村 or 里) del addr.tokens[2] rzpairs = self.get_rule_str_zipcode_pairs(addr.flat(3)) if rzpairs: for rule_str, zipcode in rzpairs: if Rule(rule_str).match(addr): return zipcode gzipcode = self.get_gradual_zipcode(addr_str) if gzipcode: return gzipcode return u''
def test_address_init_tricky_input(): assert Address(u'桃園縣中壢市普義').tokens == [(u'', u'', u'桃園', u'縣'), (u'', u'', u'中壢', u'市'), (u'', u'', u'普義', u'')] assert Address(u'桃園縣中壢市普義10號').tokens == [(u'', u'', u'桃園', u'縣'), (u'', u'', u'中壢', u'市'), (u'', u'', u'普義', u''), (u'10', u'', u'', u'號')] assert Address(u'臺北市中山區敬業1路').tokens == [(u'', u'', u'臺北', u'市'), (u'', u'', u'中山', u'區'), (u'', u'', u'敬業1', u'路')] assert Address(u'臺北市中山區敬業1路10號').tokens == [(u'', u'', u'臺北', u'市'), (u'', u'', u'中山', u'區'), (u'', u'', u'敬業1', u'路'), (u'10', u'', u'', u'號')]
def test_address_init_tricky_input(): assert Address('桃園縣中壢市普義').tokens == [('', '', '桃園', '縣'), ('', '', '中壢', '市'), ('', '', '普義', '')] assert Address('桃園縣中壢市普義10號').tokens == [('', '', '桃園', '縣'), ('', '', '中壢', '市'), ('', '', '普義', ''), ('10', '', '', '號')] assert Address('臺北市中山區敬業1路').tokens == [('', '', '臺北', '市'), ('', '', '中山', '區'), ('', '', '敬業1', '路')] assert Address('臺北市中山區敬業1路10號').tokens == [('', '', '臺北', '市'), ('', '', '中山', '區'), ('', '', '敬業1', '路'), ('10', '', '', '號')]
def test_address_repr(): if six.PY2: repr_str = "Address(u'\\u81fa\\u5317\\u5e02\\u5927\\u5b89\\u5340\\u5e02\\u5e9c\\u8def1\\u865f')" else: repr_str = "Address('臺北市大安區市府路1號')" assert repr(Address('臺北市大安區市府路1號')) == repr_str assert repr(eval(repr_str)) == repr_str
def test_address_flat(): addr = Address('臺北市大安區市府路1之1號') assert addr.flat(1) == addr.flat(-3) == '臺北市' assert addr.flat(2) == addr.flat(-2) == '臺北市大安區' assert addr.flat(3) == addr.flat(-1) == '臺北市大安區市府路' assert addr.flat() == '臺北市大安區市府路1之1號'
def put(self, head_addr_str, tail_rule_str, zipcode): addr = Address(head_addr_str) # (a, b, c) self.put_precise(addr.flat(), head_addr_str + tail_rule_str, zipcode) # (a, b, c) -> (a,); (a, b); (a, b, c); (b,); (b, c); (c,) len_tokens = len(addr) for f in range(len_tokens): for l in range(f, len_tokens): self.put_gradual(addr.flat(f, l + 1), zipcode) if len_tokens >= 3: # (a, b, c, d) -> (a, c) self.put_gradual(addr.pick_to_flat(0, 2), zipcode)
def test_address_init_normalization(): expected_tokens = [(u'', u'', u'臺北', u'市'), (u'', u'', u'大安', u'區'), (u'', u'', u'市府', u'路'), (u'1', u'之1', u'', u'號')] assert Address(u'臺北市大安區市府路1之1號').tokens == expected_tokens assert Address(u'台北市大安區市府路1之1號').tokens == expected_tokens assert Address(u'臺北市大安區市府路1之1號').tokens == expected_tokens assert Address(u'臺北市 大安區 市府路 1 之 1 號').tokens == expected_tokens assert Address(u'臺北市,大安區,市府路 1 之 1 號').tokens == expected_tokens assert Address(u'臺北市, 大安區, 市府路 1 之 1 號').tokens == expected_tokens assert Address(u'臺北市, 大安區, 市府路 1 - 1 號').tokens == expected_tokens
def test_address_flat(): addr = Address("臺北市大安區市府路1之1號") assert addr.flat(1) == addr.flat(-3) == u"臺北市" assert addr.flat(2) == addr.flat(-2) == u"臺北市大安區" assert addr.flat(3) == addr.flat(-1) == u"臺北市大安區市府路" assert addr.flat() == u"臺北市大安區市府路1之1號"
def test_address_flat(): addr = Address('臺北市大安區市府路1之1號') assert addr.flat(1) == addr.flat(-3) == u'臺北市' assert addr.flat(2) == addr.flat(-2) == u'臺北市大安區' assert addr.flat(3) == addr.flat(-1) == u'臺北市大安區市府路' assert addr.flat() == u'臺北市大安區市府路1之1號'
def test_rule_match_gradual_address(): # standard rule w/ gradual addresses rule = Rule('臺北市中正區丹陽街全') assert not rule.match(Address('臺北市')) assert not rule.match(Address('臺北市中正區')) assert not rule.match(Address('臺北市中正區仁愛路1段')) assert not rule.match(Address('臺北市中正區仁愛路1段1號')) rule = Rule('臺北市,中正區,仁愛路1段, 1號') assert not rule.match(Address('臺北市')) assert not rule.match(Address('臺北市中正區')) assert not rule.match(Address('臺北市中正區仁愛路1段')) assert rule.match(Address('臺北市中正區仁愛路1段1號'))
def test_rule_match(): # standard address w/ standard rules addr = Address('臺北市大安區市府路5號') # 全單雙 assert Rule('臺北市大安區市府路全').match(addr) assert Rule('臺北市大安區市府路單全').match(addr) assert not Rule('臺北市大安區市府路雙全').match(addr) # 以上 & 以下 assert not Rule('臺北市大安區市府路6號以上').match(addr) assert Rule('臺北市大安區市府路6號以下').match(addr) assert Rule('臺北市大安區市府路5號以上').match(addr) assert Rule('臺北市大安區市府路5號').match(addr) assert Rule('臺北市大安區市府路5號以下').match(addr) assert Rule('臺北市大安區市府路4號以上').match(addr) assert not Rule('臺北市大安區市府路4號以下').match(addr) # 至 assert not Rule('臺北市大安區市府路1號至4號').match(addr) assert Rule('臺北市大安區市府路1號至5號').match(addr) assert Rule('臺北市大安區市府路5號至9號').match(addr) assert not Rule('臺北市大安區市府路6號至9號').match(addr) # 附號 assert not Rule('臺北市大安區市府路6號及以上附號').match(addr) assert Rule('臺北市大安區市府路6號含附號以下').match(addr) assert Rule('臺北市大安區市府路5號及以上附號').match(addr) assert Rule('臺北市大安區市府路5號含附號').match(addr) assert not Rule('臺北市大安區市府路5附號全').match(addr) assert Rule('臺北市大安區市府路5號含附號以下').match(addr) assert Rule('臺北市大安區市府路4號及以上附號').match(addr) assert not Rule('臺北市大安區市府路4號含附號以下').match(addr) # 單雙 x 以上, 至, 以下 assert Rule('臺北市大安區市府路單5號以上').match(addr) assert not Rule('臺北市大安區市府路雙5號以上').match(addr) assert Rule('臺北市大安區市府路單1號至5號').match(addr) assert not Rule('臺北市大安區市府路雙1號至5號').match(addr) assert Rule('臺北市大安區市府路單5號至9號').match(addr) assert not Rule('臺北市大安區市府路雙5號至9號').match(addr) assert Rule('臺北市大安區市府路單5號以下').match(addr) assert not Rule('臺北市大安區市府路雙5號以下').match(addr)
def find(self, addr_str): addr = Address(addr_str) len_addr_tokens = len(addr.tokens) # avoid unnecessary iteration start_len = len_addr_tokens while start_len >= 0: if addr.parse(start_len - 1) == (0, 0): break start_len -= 1 for i in range(start_len, 0, -1): addr_str = addr.flat(i) rzpairs = self.get_rule_str_zipcode_pairs(addr_str) # for handling insignificant tokens and redundant unit if ( # It only runs once, and must be the first iteration. i == start_len and len_addr_tokens >= 4 and addr.tokens[2][Address.UNIT] in u'村里' and not rzpairs): if addr.tokens[3][Address.UNIT] == u'鄰': # delete the insignificant token (whose unit is 鄰) del addr.tokens[3] len_addr_tokens -= 1 if len_addr_tokens >= 4 and addr.tokens[3][ Address.UNIT] == u'號': # empty the redundant unit in the token addr.tokens[2] = (u'', u'', addr.tokens[2][Address.NAME], u'') else: # delete insignificant token (whose unit is 村 or 里) del addr.tokens[2] rzpairs = self.get_rule_str_zipcode_pairs(addr.flat(3)) if rzpairs: for rule_str, zipcode in rzpairs: if Rule(rule_str).match(addr): return zipcode gzipcode = self.get_gradual_zipcode(addr_str) if gzipcode: return gzipcode return u''
def part(rule_str): rule_str = Address.normalize(rule_str) rule_tokens = set() def extract(m): token = m.group() retval = u'' if token == u'連': token = u'' elif token == u'附號全': retval = u'號' if token: rule_tokens.add(token) return retval addr_str = Rule.RULE_TOKEN_RE.sub(extract, rule_str) return (rule_tokens, addr_str)
def test_address_init_subno(): expected_tokens = [('', '', '臺北', '市'), ('', '', '大安', '區'), ('', '', '市府', '路'), ('1', '之1', '', '號')] assert Address('臺北市大安區市府路1之1號').tokens == expected_tokens assert Address('臺北市大安區市府路1之1號').tokens == expected_tokens
def test_address_init_normalization_chinese_number(): assert Address.normalize(u"八德路") == u"八德路" assert Address.normalize(u"三元街") == u"三元街" assert Address.normalize(u"三號") == u"3號" assert Address.normalize(u"十八號") == u"18號" assert Address.normalize(u"三十八號") == u"38號" assert Address.normalize(u"三段") == u"3段" assert Address.normalize(u"十八路") == u"18路" assert Address.normalize(u"三十八街") == u"38街" assert Address.normalize(u"信義路一段") == u"信義路1段" assert Address.normalize(u"敬業一路") == u"敬業1路" assert Address.normalize(u"愛富三街") == u"愛富3街"
def test_rule_match_subno(): rule = Rule('臺北市,中正區,杭州南路1段, 14號含附號') assert not rule.match(Address('臺北市中正區杭州南路1段13號')) assert not rule.match(Address('臺北市中正區杭州南路1段13-1號')) assert rule.match(Address('臺北市中正區杭州南路1段14號')) assert rule.match(Address('臺北市中正區杭州南路1段14-1號')) assert not rule.match(Address('臺北市中正區杭州南路1段15號')) assert not rule.match(Address('臺北市中正區杭州南路1段15-1號')) rule = Rule('臺北市,大同區,哈密街, 47附號全') assert not rule.match(Address('臺北市大同區哈密街46號')) assert not rule.match(Address('臺北市大同區哈密街46-1號')) assert not rule.match(Address('臺北市大同區哈密街47號')) assert rule.match(Address('臺北市大同區哈密街47-1號')) assert not rule.match(Address('臺北市大同區哈密街48號')) assert not rule.match(Address('臺北市大同區哈密街48-1號')) rule = Rule('臺北市,大同區,哈密街,雙 68巷至 70號含附號全') assert not rule.match(Address('臺北市大同區哈密街66號')) assert not rule.match(Address('臺北市大同區哈密街66-1巷')) assert not rule.match(Address('臺北市大同區哈密街67號')) assert not rule.match(Address('臺北市大同區哈密街67-1巷')) assert rule.match(Address('臺北市大同區哈密街68巷')) assert rule.match(Address('臺北市大同區哈密街68-1號')) assert not rule.match(Address('臺北市大同區哈密街69號')) assert not rule.match(Address('臺北市大同區哈密街69-1巷')) assert rule.match(Address('臺北市大同區哈密街70號')) assert rule.match(Address('臺北市大同區哈密街70-1號')) assert not rule.match(Address('臺北市大同區哈密街71號')) assert not rule.match(Address('臺北市大同區哈密街71-1號')) rule = Rule('桃園縣,中壢市,普義,連 49號含附號以下') assert rule.match(Address('桃園縣中壢市普義48號')) assert rule.match(Address('桃園縣中壢市普義48-1號')) assert rule.match(Address('桃園縣中壢市普義49號')) assert rule.match(Address('桃園縣中壢市普義49-1號')) assert not rule.match(Address('桃園縣中壢市普義50號')) assert not rule.match(Address('桃園縣中壢市普義50-1號')) rule = Rule('臺中市,西屯區,西屯路3段西平南巷, 2之 3號及以上附號') assert not rule.match(Address('臺中市西屯區西屯路3段西平南巷1號')) assert not rule.match(Address('臺中市西屯區西屯路3段西平南巷1-1號')) assert not rule.match(Address('臺中市西屯區西屯路3段西平南巷2號')) assert not rule.match(Address('臺中市西屯區西屯路3段西平南巷2-2號')) assert rule.match(Address('臺中市西屯區西屯路3段西平南巷2-3號')) assert rule.match(Address('臺中市西屯區西屯路3段西平南巷3號')) assert rule.match(Address('臺中市西屯區西屯路3段西平南巷3-1號')) assert rule.match(Address('臺中市西屯區西屯路3段西平南巷4號')) assert rule.match(Address('臺中市西屯區西屯路3段西平南巷4-1號'))
def __init__(self, addr_str): self.tokens = Address.tokenize(addr_str)
def tokenize(addr_str): return Address.TOKEN_RE.findall(Address.normalize(addr_str))
def test_address_repr(): repr_str = "Address(u'\u81fa\u5317\u5e02\u5927\u5b89\u5340\u5e02\u5e9c\u8def1\u865f')" assert repr(Address('臺北市大安區市府路1號')) == repr_str assert repr(eval(repr_str)) == repr_str
def test_address_init_normalization_chinese_number(): assert Address.normalize(u'八德路') == u'八德路' assert Address.normalize(u'三元街') == u'三元街' assert Address.normalize(u'三號') == u'3號' assert Address.normalize(u'十八號') == u'18號' assert Address.normalize(u'三十八號') == u'38號' assert Address.normalize(u'三段') == u'3段' assert Address.normalize(u'十八路') == u'18路' assert Address.normalize(u'三十八街') == u'38街' assert Address.normalize(u'信義路一段') == u'信義路1段' assert Address.normalize(u'敬業一路') == u'敬業1路' assert Address.normalize(u'愛富三街') == u'愛富3街'
def __init__(self, rule_str): self.rule_tokens, addr_str = Rule.part(rule_str) Address.__init__(self, addr_str)
def test_rule_match_rule_all(): # Be careful of the 全! It will bite you! rule = Rule('臺北市,中正區,八德路1段,全') assert rule.match(Address('臺北市中正區八德路1段1號')) assert rule.match(Address('臺北市中正區八德路1段9號')) assert not rule.match(Address('臺北市中正區八德路2段1號')) assert not rule.match(Address('臺北市中正區八德路2段9號')) rule = Rule('臺北市,中正區,三元街,單全') assert rule.match(Address('臺北市中正區三元街1號')) assert not rule.match(Address('臺北市中正區三元街2號')) assert not rule.match(Address('臺北市中正區大埔街1號')) rule = Rule('臺北市,大同區,哈密街, 45巷全') assert rule.match(Address('臺北市大同區哈密街45巷1號')) assert rule.match(Address('臺北市大同區哈密街45巷9號')) assert not rule.match(Address('臺北市大同區哈密街46巷1號')) assert not rule.match(Address('臺北市大同區哈密街46巷9號'))
#r = Rule('台北市信義區市府路10號以下') #print r.tokens #a = Address('市府路1號') #print a.tokens #print r.match(a) #a = Address('台北市信義區市府路1號') #print a.tokens #print r.match(a) r = Rule('新北市,中和區,景平路,雙 64號以下') print r.tokens a = Address('新北市景平路64巷13弄13號') print a.tokens print r.match(a) ########NEW FILE######## __FILENAME__ = zipcodetw_server #!/usr/bin/env python # -*- coding: utf-8 -*- import zipcodetw from flask import Flask, render_template, request, jsonify app = Flask(__name__) @app.route('/')
def test_address_init_normalization_chinese_number(): assert Address.normalize('八德路') == '八德路' assert Address.normalize('三元街') == '三元街' assert Address.normalize('三號') == '3號' assert Address.normalize('十八號') == '18號' assert Address.normalize('三十八號') == '38號' assert Address.normalize('三段') == '3段' assert Address.normalize('十八路') == '18路' assert Address.normalize('三十八街') == '38街' assert Address.normalize('信義路一段') == '信義路1段' assert Address.normalize('敬業一路') == '敬業1路' assert Address.normalize('愛富三街') == '愛富3街'
def test_address_init_subno(): expected_tokens = [(u'', u'', u'臺北', u'市'), (u'', u'', u'大安', u'區'), (u'', u'', u'市府', u'路'), (u'1', u'之1', u'', u'號')] assert Address(u'臺北市大安區市府路1之1號').tokens == expected_tokens assert Address('臺北市大安區市府路1之1號').tokens == expected_tokens
def test_address_init(): expected_tokens = ((u'', u'', u'臺北', u'市'), (u'', u'', u'大安', u'區'), (u'', u'', u'市府', u'路'), (u'1', u'', u'', u'號')) assert Address(u'臺北市大安區市府路1號').tokens == expected_tokens assert Address('臺北市大安區市府路1號').tokens == expected_tokens