def find_location_in_string(text): # try to return one location from a short string result = None tokens = [x.lower().strip(".,") for x in text.split()][::-1] cursor = data.get_database_connection() select = "SELECT * FROM cities WHERE last_word = ?" possible_country = None possible_region = None city = None possible_cities = [] for token in tokens: if token in countries_cache: possible_country = countries_cache[token][0]['country_code'].lower() elif token in regions_cache: possible_region = regions_cache[token][0]['region_code'].lower() else: possible_cities.append(token) for token in possible_cities: cities = data.get_cities(token, token, possible_country, possible_region) if len(cities) > 0: city = sorted(cities.values(), key=lambda x: x['population'])[0] break if city is not None: current_result = { 'found_tokens': [], } lat = city['lat'] lon = city['lon'] current_result['found_tokens'].append({ 'type': 'CITY', 'lat': lat, 'lon': lon, 'matched_string': text, }) result = current_result return result
def city_data(): city_obj = db.get_cities() for c in city_obj: try: c_data = b.getCityData(c.city, c.state_id, c.best_places_url) except: print "ERROR could not get data for {}".format(c.city) continue for d in c_data: db.set_cities( state_id = d['state_id'], city = d['city'], population = d['population'], unemployment = d['unemployment'], avg_commute = d['avg_commute'], median_age = d['median_age'], household_size = d['household_size'], median_home_price = d['median_home_price'], msa = d['msa'], zipcodes = d['zipcodes'] ) print "imported {}".format(c.city) return
def is_city(text, text_starting_index, previous_result): # If we're part of a sequence, then use any country or region information to narrow down our search country_code = None region_code = None if previous_result is not None: found_tokens = previous_result["found_tokens"] for found_token in found_tokens: type = found_token["type"] if type == "COUNTRY": country_code = found_token["code"] elif type == "REGION": region_code = found_token["code"] current_word = "" current_index = text_starting_index pulled_word_count = 0 found_row = None while pulled_word_count < geodict_config.word_max: pulled_word, current_index, end_skipped = pull_word_from_end(text, current_index) pulled_word_count += 1 if current_word == "": current_word = pulled_word word_end_index = text_starting_index - end_skipped name_map = data.get_cities(pulled_word, current_word, country_code, region_code) # print candidate_rows if len(name_map) < 1: break else: current_word = pulled_word + " " + current_word if current_word == "": return None if current_word[0:1].islower(): continue name_key = current_word.lower() if name_key in name_map: found_row = name_map[name_key] if found_row is not None: break if current_index < 0: break if found_row is None: return None if previous_result is None: current_result = {"found_tokens": []} else: current_result = previous_result lat = found_row["lat"] lon = found_row["lon"] current_result["found_tokens"].insert( 0, { "type": "CITY", "lat": lat, "lon": lon, "matched_string": current_word, "start_index": (current_index + 1), "end_index": word_end_index, }, ) return current_result
def is_city( text, text_starting_index, previous_result): # If we're part of a sequence, then use any country or region information to narrow down our search country_code = None region_code = None if previous_result is not None: found_tokens = previous_result['found_tokens'] for found_token in found_tokens: type = found_token['type'] if type == 'COUNTRY': country_code = found_token['code'] elif type == 'REGION': region_code = found_token['code'] current_word = '' current_index = text_starting_index pulled_word_count = 0 found_row = None while pulled_word_count < geodict_config.word_max: pulled_word, current_index, end_skipped = pull_word_from_end(text, current_index) pulled_word_count += 1 if current_word == '': current_word = pulled_word word_end_index = (text_starting_index-end_skipped) name_map = data.get_cities(pulled_word,current_word,country_code,region_code) #print candidate_rows if len(name_map) < 1: break else: current_word = pulled_word+' '+current_word if current_word == '': return None if current_word[0:1].islower(): continue name_key = current_word.lower() if name_key in name_map: found_row = name_map[name_key] if found_row is not None: break if current_index < 0: break if found_row is None: return None if previous_result is None: current_result = { 'found_tokens': [], } else: current_result = previous_result lat = found_row['lat'] lon = found_row['lon'] current_result['found_tokens'].insert(0, { 'type': 'CITY', 'lat': lat, 'lon': lon, 'matched_string': current_word, 'start_index': (current_index+1), 'end_index': word_end_index }) return current_result