Exemplo n.º 1
0
def find_location_in_string(text):
    # try to return one location from a short string

    result = None

    tokens = [x.lower().strip(".,") for x in text.split()][::-1]

    cursor = data.get_database_connection()
    select = "SELECT * FROM cities WHERE last_word = ?"

    possible_country = None
    possible_region = None
    city = None

    possible_cities = []
    for token in tokens:
        if token in countries_cache:
            possible_country = countries_cache[token][0]['country_code'].lower()
        elif token in regions_cache:
            possible_region = regions_cache[token][0]['region_code'].lower()
        else:
            possible_cities.append(token)
    for token in possible_cities:
        cities = data.get_cities(token, token, possible_country, possible_region)
        if len(cities) > 0:
            city = sorted(cities.values(), key=lambda x: x['population'])[0]
            break

    if city is not None:
        current_result = {
            'found_tokens': [],
        }
        lat = city['lat']
        lon = city['lon']
                
        current_result['found_tokens'].append({
            'type': 'CITY',
            'lat': lat,
            'lon': lon,
            'matched_string': text,
        })
        result = current_result
    return result
def city_data():
    city_obj = db.get_cities()
    for c in city_obj:
        try:
            c_data = b.getCityData(c.city, c.state_id, c.best_places_url)
        except:
            print "ERROR could not get data for {}".format(c.city)
            continue
        for d in c_data:
            db.set_cities(
                            state_id = d['state_id'],
                            city = d['city'],
                            population = d['population'], 
                            unemployment = d['unemployment'],
                            avg_commute = d['avg_commute'],
                            median_age = d['median_age'],
                            household_size = d['household_size'],
                            median_home_price = d['median_home_price'],
                            msa = d['msa'],
                            zipcodes = d['zipcodes']

                        )
        print "imported {}".format(c.city)
    return
Exemplo n.º 3
0
def is_city(text, text_starting_index, previous_result):

    # If we're part of a sequence, then use any country or region information to narrow down our search
    country_code = None
    region_code = None
    if previous_result is not None:
        found_tokens = previous_result["found_tokens"]
        for found_token in found_tokens:
            type = found_token["type"]
            if type == "COUNTRY":
                country_code = found_token["code"]
            elif type == "REGION":
                region_code = found_token["code"]

    current_word = ""
    current_index = text_starting_index
    pulled_word_count = 0
    found_row = None
    while pulled_word_count < geodict_config.word_max:
        pulled_word, current_index, end_skipped = pull_word_from_end(text, current_index)
        pulled_word_count += 1

        if current_word == "":
            current_word = pulled_word
            word_end_index = text_starting_index - end_skipped

            name_map = data.get_cities(pulled_word, current_word, country_code, region_code)
            # print candidate_rows
            if len(name_map) < 1:
                break

        else:
            current_word = pulled_word + " " + current_word

        if current_word == "":
            return None

        if current_word[0:1].islower():
            continue

        name_key = current_word.lower()
        if name_key in name_map:
            found_row = name_map[name_key]

        if found_row is not None:
            break
        if current_index < 0:
            break

    if found_row is None:
        return None

    if previous_result is None:
        current_result = {"found_tokens": []}
    else:
        current_result = previous_result

    lat = found_row["lat"]
    lon = found_row["lon"]

    current_result["found_tokens"].insert(
        0,
        {
            "type": "CITY",
            "lat": lat,
            "lon": lon,
            "matched_string": current_word,
            "start_index": (current_index + 1),
            "end_index": word_end_index,
        },
    )

    return current_result
Exemplo n.º 4
0
def is_city( text, text_starting_index, previous_result):
    
    # If we're part of a sequence, then use any country or region information to narrow down our search
    country_code = None
    region_code = None
    if previous_result is not None:
        found_tokens = previous_result['found_tokens']
        for found_token in found_tokens:
            type = found_token['type']
            if type == 'COUNTRY':
                country_code = found_token['code']
            elif type == 'REGION':
                region_code = found_token['code']
    
    current_word = ''
    current_index = text_starting_index
    pulled_word_count = 0
    found_row = None
    while pulled_word_count < geodict_config.word_max:
        pulled_word, current_index, end_skipped = pull_word_from_end(text, current_index)
        pulled_word_count += 1
        
        if current_word == '':
            current_word = pulled_word
            word_end_index = (text_starting_index-end_skipped)

            name_map  = data.get_cities(pulled_word,current_word,country_code,region_code)
            #print candidate_rows
            if len(name_map) < 1:
                break
            
        else:
            current_word = pulled_word+' '+current_word

        if current_word == '':
            return None
        
        if current_word[0:1].islower():
            continue

        name_key = current_word.lower()
        if name_key in name_map:
            found_row = name_map[name_key]

        if found_row is not None:
            break
        if current_index < 0:
            break
    
    if found_row is None:
        return None
    
    if previous_result is None:
        current_result = {
            'found_tokens': [],
        }
    else:
        current_result = previous_result
                                        
    lat = found_row['lat']
    lon = found_row['lon']
                
    current_result['found_tokens'].insert(0, {
        'type': 'CITY',
        'lat': lat,
        'lon': lon,
        'matched_string': current_word,
        'start_index': (current_index+1),
        'end_index': word_end_index 
    })
    
    return current_result