def test_full_text_extract4(monkeypatch): """测试满足贪婪匹配模式""" monkeypatch.setattr(cpca, 'province_map', {'江苏': '江苏省', '江苏省': '江苏省'}) monkeypatch.setattr(cpca, 'city_map', _dict2addr_map({'淮安': '淮安市'})) monkeypatch.setattr(cpca, 'area_map', _dict2addr_map({'清浦区': '清浦区'})) pca, addr = cpca._full_text_extract('江苏省淮安清浦区人民路111号', 3) assert addr == '人民路111号' assert pca.province == '江苏省' assert pca.province_pos == 0 assert pca.city == '淮安市' assert pca.city_pos == 3 assert pca.area == '清浦区' assert pca.area_pos == 5
def test_full_text_extract3(monkeypatch): """地址在中间, 验证地址截取规则:只截取句子开头提取到的地址""" monkeypatch.setattr(cpca, 'province_map', {'江苏': '江苏省'}) monkeypatch.setattr(cpca, 'city_map', _dict2addr_map({'淮安': '淮安市'})) monkeypatch.setattr(cpca, 'area_map', _dict2addr_map({'清浦区': '清浦区'})) pca, addr = cpca._full_text_extract('我家的地址是江苏淮安清浦区人民路111号', 8) assert addr == '我家的地址是江苏淮安清浦区人民路111号' assert pca.province == '江苏省' assert pca.province_pos == 6 assert pca.city == '淮安市' assert pca.city_pos == 8 assert pca.area == '清浦区' assert pca.area_pos == 10
def test_full_text_extract4(monkeypatch): """测试较小的lookahead""" monkeypatch.setattr(cpca, 'province_map', {'江苏': '江苏省'}) monkeypatch.setattr(cpca, 'city_map', _dict2addr_map({'淮安': '淮安市'})) monkeypatch.setattr(cpca, 'area_map', _dict2addr_map({'清浦区': '清浦区'})) pca, addr = cpca._full_text_extract('江苏淮安清浦区人民路111号', 2) assert addr == '清浦区人民路111号' assert pca.province == '江苏省' assert pca.province_pos == 0 assert pca.city == '淮安市' assert pca.city_pos == 2 assert pca.area == '' assert pca.area_pos == -1
def test_full_text_extract2(monkeypatch): """地址在结尾""" monkeypatch.setattr(cpca, 'province_map', {'江苏': '江苏省'}) monkeypatch.setattr(cpca, 'city_map', _dict2addr_map({'淮安': '淮安市'})) monkeypatch.setattr(cpca, 'area_map', _dict2addr_map({'清浦区': '清浦区'})) pca, addr = cpca._full_text_extract('我的家在江苏淮安清浦区', 8) assert addr == '我的家在江苏淮安清浦区' assert pca.province == '江苏省' assert pca.province_pos == 4 assert pca.city == '淮安市' assert pca.city_pos == 6 assert pca.area == '清浦区' assert pca.area_pos == 8