def get_tract(lat,lon):
    try:
        ret = cg.coordinates(x=-lon, y=lat)
    except:
        write_log('failed',verbose)
        tract=0
    #print(ret)
    try:
        tract = int(ret['Census Tracts'][0]['TRACT'])
    except:
        tract=0
    #print(tract)
    return(tract)
Exemplo n.º 2
0
def get_blkgrps(latitude, longitude):
    result = cg.coordinates(x=longitude, y=latitude)
    county = result.get("Counties")[0].get("NAME")
    state = result.get("States")[0].get("NAME")
    gisid = set()
    tract_coord = {}
    rows = db.session.query(models.race.gis_id)\
    .filter(models.race.state==state, models.race.county==county).all()
    for row in rows:
        conv_id = row.gis_id[1:3] + row.gis_id[4:7] + row.gis_id[8:14]
        gisid.add(conv_id),
        row2 = db.session.query(models.tract_data)\
        .filter(models.tract_data.geoid==conv_id).first()
        if conv_id not in tract_coord:
            tract_coord[conv_id] = {
                'gis_id': [row.gis_id],
                'long': row2.longitude,
                'lat': row2.latitude,
            }
        else:
            tract_coord.get(conv_id).get('gis_id').append(row.gis_id)
    distances = {
        'underFive': [],
        'fiveToTen': [],
        'tenToTwenty': [],
        'overTwenty': []
    }
    for entry in tract_coord.keys():
        #distance = get_distance(float(latitude), float(longitude), tract_coord.get(entry).get('lat'), float(tract_coord.get(entry).get('long')))
        point1 = (float(latitude), float(longitude))
        point2 = (tract_coord.get(entry).get('lat'),
                  float(tract_coord.get(entry).get('long')))
        distance = vincenty(point1, point2).miles
        if distance < 5:
            for listElem in tract_coord.get(entry).get('gis_id'):
                distances['underFive'].append(listElem)
        elif distance < 10:
            for listElem in tract_coord.get(entry).get('gis_id'):
                distances['fiveToTen'].append(listElem)
            #distances['fiveToTen'].append(tract_coord.get(entry).get('gis_id'))
        elif distance < 20:
            for listElem in tract_coord.get(entry).get('gis_id'):
                distances['tenToTwenty'].append(listElem)
            #distances['tenToTwenty'].append(tract_coord.get(entry).get('gis_id'))
        else:
            for listElem in tract_coord.get(entry).get('gis_id'):
                distances['overTwenty'].append(listElem)
            #distances['overTwenty'].append(tract_coord.get(entry).get('gis_id'))

    return distances
Exemplo n.º 3
0
def find_tract_blk(lat, lng): 
    '''
    This function finds the census block info for a given latitude and longitude. Easy to use when only have one coordinate to check.
    
    lat: latitude
    lng: longitude
    
    return: FIPS census tract and block group code
    '''
    geo_info = cg.coordinates(x=lng,y=lat)
    block = geo_info['2010 Census Blocks'][0]['BLKGRP']

    tract = geo_info['2010 Census Blocks'][0]['TRACT']

    return tract+block
Exemplo n.º 4
0
def get_census_tract_from_geolocation(df, latitude_col_name,
                                      longitude_col_name, tract_col_name):
    census_dict = {}
    for idx, row in df.iterrows():
        census_info = cg.coordinates(y=row[latitude_col_name],
                                     x=row[longitude_col_name])
        tract = census_info['Census Tracts'][0]['GEOID']
        census_dict[tract] = [row[latitude_col_name], row[longitude_col_name]]

    census_coord = pd.DataFrame(census_dict).T
    census_coord = census_coord.reset_index()
    census_coord = census_coord.rename(columns={
        'index': tract_col_name,
        0: latitude_col_name,
        1: longitude_col_name
    })
    return census_coord
Exemplo n.º 5
0
    def add_population_in_buffer(self,
                                 mesh_pickle_path,
                                 step=0.0001,
                                 buffer_length=300):
        #default step : 0.0001 (degree)
        self.population_in_buffer = 0
        len_per_lat = 84000
        len_per_lon = 110000
        #lat = int(self.latitude * 10000) / 10000
        #lon = int(self.longitude * 10000) / 10000
        lat_min = float(self.latitude) - buffer_length / len_per_lat
        lat_max = float(self.latitude) + buffer_length / len_per_lat
        lon_min = float(self.longitude) - buffer_length / len_per_lon
        lon_max = float(self.longitude) + buffer_length / len_per_lon
        #mesharea = cal_mesharea(float(self.latitude), float(self.longitude), step)
        #Open mesh_pickle file
        try:
            with open(mesh_pickle_path, "rb") as f:
                mesh = pickle.load(f)
            f.close()
        except:
            mesh = {}
        for i_lat in range(int(lat_min * 10000), int(lat_max * 10000)):
            for i_lon in range(int(lon_min * 10000), int(lon_max * 10000)):
                lat = i_lat / 10000
                lon = i_lon / 10000
                distance = geopy.distance.distance(
                    (lat, lon), (self.latitude, self.longitude)).km * 1000
                if distance < buffer_length:
                    if (i_lat, i_lon) not in mesh:
                        mesh[(i_lat, i_lon)] = cg.coordinates(lon, lat)
                        print("Add census geocode lat:", lat, "lon:", lon,
                              "Mesh data.")
                    #if len(mesh[(lat, lon)]['Census Tracts']) > 0:
                    #if
                else:
                    print("lat:", lat, "lon:", lon,
                          " is not in buffer. distance: ", distance)

    #mesh_info = census['Census Tracts'][0]
        with open(mesh_pickle_path, "wb") as f:
            pickle.dump(mesh, f)
        f.close()
Exemplo n.º 6
0
def get_census_tract_by_geo_info(longitude, latitude, verbose=True):
    """
    find the census tract to which a given point defined by their
    longitude and latitude belongs.

    :param longitude: float
    :param latitude: float
    :param: verbose: boolean -> whether to print detailed outputs as the program runs
    :return: matched_dict: dictionary with four keys:
                            - census_block_id
                            - census_tract_id
                            - county_id
                            - state_id
    """
    geocoded_result = None
    repeated_trial = 0
    while geocoded_result is None:  # repeatly calling the Census API until the program gets the right return
        repeated_trial += 1
        if repeated_trial > 10:
            break
            sys.exit()
        try:
            geocoded_result = cg.coordinates(x=longitude, y=latitude)
        except ValueError:
            time.sleep(random.random())
        except KeyError:
            time.sleep(random.random())
    assert len(geocoded_result)
    census_block_id = geocoded_result['2010 Census Blocks'][0]['GEOID']
    census_tract_id = geocoded_result['Census Tracts'][0]['GEOID']
    county_id = geocoded_result['Counties'][0]['GEOID']
    state_id = geocoded_result['States'][0]['GEOID']
    matched_dict = {
        'census_block_id': census_block_id,
        'census_tract_id': census_tract_id,
        'county_id': county_id,
        'state_id': state_id
    }
    if verbose:
        pprint(matched_dict)
    return matched_dict
Exemplo n.º 7
0
    def get_mean_location(self):
        #Convert all to cartesian (x,y,z) coordinates
        coords = []
        total_weight = len(self.receiveData)

        for index, house in enumerate(self.receiveData):
            #Wrap in try/catch incase did not return a valid lat/lon
            try:
                #Get lat and lon
                temp_lat = house["lat"] * (math.pi / 180)
                temp_lon = house["lon"] * (math.pi / 180)

                #Convert to cartesian coords
                x = math.cos(temp_lat) * math.cos(temp_lon)
                y = math.cos(temp_lat) * math.sin(temp_lon)
                z = math.sin(temp_lat)

                coords.append((x, y, z))

            except Exception as e:
                print("Skipping " + house["id"] +
                      ", trying to use zip code as approximate lat/lon...")

        #Get combined average, divided by weight
        avg_x = sum([i[0] for i in coords]) / total_weight
        avg_y = sum([i[1] for i in coords]) / total_weight
        avg_z = sum([i[2] for i in coords]) / total_weight

        #Lon, Lat
        hyp = math.sqrt(avg_x * avg_x + avg_y * avg_y)
        lon = math.atan2(avg_y, avg_x) * (180 / math.pi)
        lat = math.atan2(avg_z, hyp) * (180 / math.pi)

        midpoint = (lat, lon)
        mid_address = cg.coordinates(lon, lat)
        sleep(1)
        return midpoint, mid_address
import numpy as np
import json
import censusgeocode as cg

df = pd.read_csv('LA_County_restaurants.csv', sep='\t', encoding='utf-8', index_col='Unnamed: 0')


results = []
for index,row in df.iterrows():
    lat = round(row['Latitude'],5)
    print(lat)
    lon = round(row['Longitude'],5)
    print(lon)
    
    try:
        response = cg.coordinates(x=lat, y=lon)
        tract = response['2010 Census Blocks'][0]['TRACT']
        print(tract)
        results.append(f"6037{tract}")
    except:
        results.append(np.nan)
        pass
    


# In[7]:


result = {'Counties': [{'OID': 275901063468976, 'STATE': '06', 'FUNCSTAT': 'A', 'AREAWATER': 1794659470, 'NAME': 'Los Angeles County', 'LSADC': '06', 'CENTLON': '-118.2617650', 'BASENAME': 'Los Angeles', 'INTPTLAT': '+34.1963983', 'COUNTYCC': 'H1', 'MTFCC': 'G4020', 'COUNTY': '037', 'GEOID': '06037', 'CENTLAT': '+34.1957768', 'INTPTLON': '-118.2618616', 'AREALAND': 10510687541, 'COUNTYNS': '00277283', 'OBJECTID': 398, 'CENT': (-118.261765, 34.1957768), 'INTPT': (-118.2618616, 34.1963983)}], 'Census Tracts': [{'OID': 207901115289836, 'STATE': '06', 'FUNCSTAT': 'S', 'NAME': 'Census Tract 1993', 'AREAWATER': 40785, 'LSADC': 'CT', 'CENTLON': '-118.1992140', 'BASENAME': '1993', 'INTPTLAT': '+34.0941911', 'MTFCC': 'G5020', 'COUNTY': '037', 'GEOID': '06037199300', 'CENTLAT': '+34.0926249', 'INTPTLON': '-118.2003961', 'AREALAND': 2550540, 'OBJECTID': 6991, 'TRACT': '199300', 'CENT': (-118.199214, 34.0926249), 'INTPT': (-118.2003961, 34.0941911)}], '2010 Census Blocks': [{'BLKGRP': '1', 'OID': 210404056348637, 'FUNCSTAT': 'S', 'STATE': '06', 'AREAWATER': 0, 'NAME': 'Block 1000', 'SUFFIX': '', 'LSADC': 'BK', 'CENTLON': '-118.1942484', 'LWBLKTYP': 'L', 'BASENAME': '1000', 'BLOCK': '1000', 'INTPTLAT': '+34.1001521', 'MTFCC': 'G5040', 'COUNTY': '037', 'GEOID': '060371993001000', 'CENTLAT': '+34.1001521', 'INTPTLON': '-118.1942484', 'AREALAND': 428518, 'OBJECTID': 3068287, 'TRACT': '199300', 'CENT': (-118.1942484, 34.1001521), 'INTPT': (-118.1942484, 34.1001521)}], 'States': [{'OID': 2749018475066, 'STATE': '06', 'FUNCSTAT': 'A', 'NAME': 'California', 'AREAWATER': 20484627967, 'LSADC': '00', 'CENTLON': '-119.5277460', 'STUSAB': 'CA', 'BASENAME': 'California', 'INTPTLAT': '+37.1551773', 'DIVISION': '9', 'MTFCC': 'G4000', 'STATENS': '01779778', 'GEOID': '06', 'CENTLAT': '+37.1547352', 'INTPTLON': '-119.5434183', 'REGION': '4', 'AREALAND': 403483191859, 'OBJECTID': 14, 'CENT': (-119.527746, 37.1547352), 'INTPT': (-119.5434183, 37.1551773)}]}
# 6037101110 
print(f"6037{result['2010 Census Blocks'][0]['TRACT']}")
lat_long = lat_long.drop_duplicates(subset='station_name', keep='first')
print(len(lat_long))
turn_cleaned.station.nunique()


census = pd.read_csv(working_directory+'censustract-medianhouseholdincome2018.csv')
census.columns = census.columns.str.lower().str.strip()
census = census.rename(columns={'census tract': 'census_tract'})
print(len(census))
census[census.census_tract==36061018900]

#=================================================================================================
# Get census tract ID for each station lat and long
census_dict = {}
for idx, row in lat_long.iterrows():
    census_info = cg.coordinates(y=row.station_latitude, x=row.station_longitude)
    tract = census_info['Census Tracts'][0]['GEOID']
    census_dict[tract] = [row.station_latitude, row.station_longitude]

census_coord = pd.DataFrame(census_dict).T
census_coord = census_coord.reset_index()
census_coord = census_coord.rename(columns={'index': 'census_tract'
                                            , 0: 'station_latitude'
                                            , 1: 'station_longitude'})
census_coord.shape
census_coord.to_csv(working_directory+'census_tract_lat_long.csv', index=False)
#=================================================================================================

census_coord['census_tract'] = census_coord.census_tract.astype(int)
census_coord.head()
lngs = []
csv_name = []
tract = []
master_df = pd.DataFrame({
    "Name": names,
    "Address": addresses,
    "Latitude": lats,
    "Longitude": lngs,
    "Table Name":csv_name,
    "Tract": tract
})
for i in range(10):
    df = pd.read_csv(csv_list[i])
    df.drop(inplace = True,columns="Unnamed: 0")
    df["Table Name"] = table_list[i]
    
    result_list = []
    for j in range(df.shape[0]):
        try:
            result = cg.coordinates(x=df["Longitude"][j], y=df["Latitude"][j])
            spl_re = result['Census Tracts'][0]['NAME']
            spl_re = spl_re.split(' ')
            result_list.append(spl_re[2])
        except:
            result_list.append("")
        time.sleep(2)
    df["Tract"] = result_list
    master_df = master_df.append(df,ignore_index=True)

master_df.drop_duplicates(inplace = True, ignore_index=True)
master_df.to_csv("../Resources/master.csv")
Exemplo n.º 11
0
import sqlite3
import pandas as pd
import censusgeocode as cg
import time

conn = sqlite3.connect("DC-Criminalistics/data/wmata-data/bus_data.db")
cur = conn.cursor()
df = pd.read_sql_query("select * from bus_data", conn)

census_block = []
census_blockgroup = []
census_tract = []

for row in df.itertuples(index=True, name='Pandas'):
    try:
        info = cg.coordinates(x=getattr(row, "Longitude"),
                              y=getattr(row, "Latitude"))
        census_block.append(info['2010 Census Blocks'][0]['BLOCK'])
        census_blockgroup.append(info['2010 Census Blocks'][0]['BLKGRP'])
        census_tract.append(info['2010 Census Blocks'][0]['TRACT'])

    except ValueError:
        census_block.append(None)
        census_blockgroup.append(None)
        census_tract.append(None)

df['census_block'] = census_block
df['census_blockgroup'] = census_blockgroup
df['census_tract'] = census_tract

df.to_csv('bus_station_reverse_geocode_out.csv')
Exemplo n.º 12
0
def address_to_census(address, aggregation="blocks", max_requests=100):
    """
    Converts street addresses to the GEOID of the selected aggregation choice

    Args:
        address (str): Address should be in the following format "<Street>, <City>, <State>, <Zip>". 
            Addresses are not case sensitive and the spacing between commas and entries do not matter. The State can be given in longform or abbreviated. 
            Examples: "1 Shields Avenue, Davis, CA, 95616", "1 Shields Avenue,Davis,California,95616", "1 shields avenue, davis,ca,   95616"
        aggregation (str): Census aggregation method: block groups, blocks, tracts

    Returns: 
        GEOID of selected aggregation
    """
    if pd.isna(address):
        return address

    OPTIONS = {
        "census block groups", "census block group", "block groups",
        "block group", "census blocks", "census block", "blocks", "block",
        "census tracts", "census tract", "tracts", "tract"
    }

    assert aggregation in OPTIONS, "The selected aggregation is not a valid option. Please select from the 3 possible choices: block groups, blocks, tracts"

    result = cg.onelineaddress(address, returntype="geographies")

    if result:
        geographies = result[0]["geographies"]
        census_blocks = geographies["2020 Census Blocks"][0]
    else:
        geolocator = ArcGIS()
        g = geolocator.geocode(address)
        x = g.longitude
        y = g.latitude
        result = None
        # This while loop is meant to deal with errors thrown on portions of the responses from https://geocoding.geo.census.gov/geocoder/
        # https://github.com/fitnr/censusgeocode/issues/18
        req_counter = 0
        while result is None and req_counter < max_requests:
            try:
                result = cg.coordinates(x=x, y=y, returntype="geographies")
            except:
                pass
            req_counter += 1
        census_blocks = result["2020 Census Blocks"][0]

    STATE = census_blocks["STATE"]
    COUNTY = census_blocks["COUNTY"]
    TRACT = census_blocks["TRACT"]
    BLOCK_GROUP = census_blocks["BLKGRP"]
    BLOCK = census_blocks["BLOCK"]

    if str.lower(aggregation) in {
            "census block groups", "census block group", "block groups",
            "block group"
    }:
        return STATE + COUNTY + TRACT + BLOCK_GROUP
    elif str.lower(aggregation) in {
            "census blocks", "census block", "blocks", "block"
    }:
        return STATE + COUNTY + TRACT + BLOCK
    elif str.lower(aggregation) in {
            "census tracts", "census tract", "tracts", "tract"
    }:
        return STATE + COUNTY + TRACT
Exemplo n.º 13
0
import pandas as pd
from pprint import pprint
import numpy as np
import json
import censusgeocode as cg

df = pd.read_csv('LA_County_restaurants.csv',
                 sep='\t',
                 encoding='utf-8',
                 index_col='Unnamed: 0')
df = df[df['city'] == "Los Angeles"]

results = []
for index, row in df.iterrows():
    print(row['Latitude'])
    print(row['Longitude'])
    response = cg.coordinates(x=row['Latitude'], y=row['Longitude'])
Exemplo n.º 14
0
import sqlite3
import pandas as pd
import censusgeocode as cg

conn = sqlite3.connect(
    "DC-Criminalistics/data/cabi-station-data/cabi_station_data.db")
cur = conn.cursor()
df = pd.read_sql_query("select * from cabi_station_data limit 10", conn)

census_block = []
census_blockgroup = []
census_tract = []

for row in df.itertuples(index=True, name='Pandas'):
    try:
        info = cg.coordinates(x=getattr(row, "lon"), y=getattr(row, "lat"))
        census_block.append(info['2010 Census Blocks'][0]['BLOCK'])
        census_blockgroup.append(info['2010 Census Blocks'][0]['BLKGRP'])
        census_tract.append(info['2010 Census Blocks'][0]['TRACT'])
    except ValueError:
        census_block.append(None)
        census_blockgroup.append(None)
        census_tract.append(None)

df['census_block'] = census_block
df['census_blockgroup'] = census_blockgroup
df['census_tract'] = census_tract

df.to_csv('cabi_reverse_geocode_out.csv')
Exemplo n.º 15
0
def get_location_name(coords):
    results = cg.coordinates(x=coords[0], y=coords[1])
    name = results["Counties"][0]["NAME"] + ", " + us_state_abbrev[results["States"][0]["NAME"]]
    return name