def test_normalize_simple(): item = ' foo: BAR ,sna ' res = address.normalize_string(item) eq(res, 'foo bar sna')
def _match_with_place(event, place_ids, places_coll, database, _log=None): if _log is None: _log = log venue = _event_venue(event) matches = DefaultOrderedDict(OrderedDict) seen = [] places = itertools.chain( # Always check nearby places first so that coord information # is not skipped if the same place is found twice _nearby_places(venue=venue, event_id=event["_id"], places_coll=places_coll, database=database, _log=_log), _place_ids_places(place_ids=place_ids, places_coll=places_coll, _log=_log), _normalized_places(event=event, places_coll=places_coll, _log=_log), ) for place in places: place_info = place["info"] place_id = place_info["_id"] search_type = place["type"] if place_id in seen: continue seen.append(place_id) matched = OrderedDict([(item, False) for item in match_fields]) distance = None venue_name = None score = 0 if "distance" in place: distance = place["distance"] matched["coord"] = True if venue["address"]: matched["address"] = streets_equal(venue["address"], place_info["address"]["address"]) if venue["locality"]: matched["locality"] = cities_equal(venue["locality"], place_info["address"]["locality"]) for name in venue["names"]: name_ = normalize_string(name) address_name = normalize_string(place_info["address"]["name"]) match_ = address_name in name_ or name_ in address_name if match_: venue_name = name matched["name"] = match_ break if venue["region"] and "region" in place_info["address"]: matched["region"] = states_equal(venue["region"], place_info["address"]["region"]) if venue["country"] and "country" in place_info["address"]: matched["country"] = countries_equal(venue["country"], place_info["address"]["country"]) if venue["postcode"] and "postcode" in place_info["address"]: matched["postcode"] = zipcodes_equal(venue["postcode"], place_info["address"]["postcode"]) if place_info["page_ids"] and venue["page_id"]: if venue["page_id"] in place_info["page_ids"]: matched["page"] = True # A score of 100 or more denotes a successful match # TODO places from place_ids should be probably be # treated differently since there is a higher chance # they will be a match if matched["coord"] and matched["name"]: score += 100 if matched["coord"] and matched["address"]: score += 100 if matched["address"] and matched["name"]: score += 100 if matched["address"] and matched["locality"]: score += 100 if matched["postcode"] and matched["address"]: score += 100 if matched["page"]: score += 100 current = matches[place_id] current["ubernear"] = OrderedDict( [ ("score", score), ("place_id", place_id), ("source", place_info["source"]), ("location", place_info["coords"]), ("search_type", search_type), ] ) current["ubernear"]["matched"] = matched if distance is not None: current["ubernear"]["distance"] = distance # Store address information in the match so no places_coll # look-ups are necessary when serving the event current["place"] = place_info["address"] # Some places will match on the address but the name # of the places are actually different, e.g., two # businesses with the same address but different unit # numbers. So, it's safer to use the facebook venue name if venue_name: current["place"]["name"] = venue_name elif venue["names"]: current["place"]["name"] = venue["names"][0] highest = _highest_match(matches) match = None if highest is not None: match = matches[highest] # Store a list of only those parts that have matched highest_matched = [k for k, v in match["ubernear"]["matched"].iteritems() if v] match["ubernear"]["matched"] = highest_matched return match