def get_tract(lat,lon): try: ret = cg.coordinates(x=-lon, y=lat) except: write_log('failed',verbose) tract=0 #print(ret) try: tract = int(ret['Census Tracts'][0]['TRACT']) except: tract=0 #print(tract) return(tract)
def get_blkgrps(latitude, longitude): result = cg.coordinates(x=longitude, y=latitude) county = result.get("Counties")[0].get("NAME") state = result.get("States")[0].get("NAME") gisid = set() tract_coord = {} rows = db.session.query(models.race.gis_id)\ .filter(models.race.state==state, models.race.county==county).all() for row in rows: conv_id = row.gis_id[1:3] + row.gis_id[4:7] + row.gis_id[8:14] gisid.add(conv_id), row2 = db.session.query(models.tract_data)\ .filter(models.tract_data.geoid==conv_id).first() if conv_id not in tract_coord: tract_coord[conv_id] = { 'gis_id': [row.gis_id], 'long': row2.longitude, 'lat': row2.latitude, } else: tract_coord.get(conv_id).get('gis_id').append(row.gis_id) distances = { 'underFive': [], 'fiveToTen': [], 'tenToTwenty': [], 'overTwenty': [] } for entry in tract_coord.keys(): #distance = get_distance(float(latitude), float(longitude), tract_coord.get(entry).get('lat'), float(tract_coord.get(entry).get('long'))) point1 = (float(latitude), float(longitude)) point2 = (tract_coord.get(entry).get('lat'), float(tract_coord.get(entry).get('long'))) distance = vincenty(point1, point2).miles if distance < 5: for listElem in tract_coord.get(entry).get('gis_id'): distances['underFive'].append(listElem) elif distance < 10: for listElem in tract_coord.get(entry).get('gis_id'): distances['fiveToTen'].append(listElem) #distances['fiveToTen'].append(tract_coord.get(entry).get('gis_id')) elif distance < 20: for listElem in tract_coord.get(entry).get('gis_id'): distances['tenToTwenty'].append(listElem) #distances['tenToTwenty'].append(tract_coord.get(entry).get('gis_id')) else: for listElem in tract_coord.get(entry).get('gis_id'): distances['overTwenty'].append(listElem) #distances['overTwenty'].append(tract_coord.get(entry).get('gis_id')) return distances
def find_tract_blk(lat, lng): ''' This function finds the census block info for a given latitude and longitude. Easy to use when only have one coordinate to check. lat: latitude lng: longitude return: FIPS census tract and block group code ''' geo_info = cg.coordinates(x=lng,y=lat) block = geo_info['2010 Census Blocks'][0]['BLKGRP'] tract = geo_info['2010 Census Blocks'][0]['TRACT'] return tract+block
def get_census_tract_from_geolocation(df, latitude_col_name, longitude_col_name, tract_col_name): census_dict = {} for idx, row in df.iterrows(): census_info = cg.coordinates(y=row[latitude_col_name], x=row[longitude_col_name]) tract = census_info['Census Tracts'][0]['GEOID'] census_dict[tract] = [row[latitude_col_name], row[longitude_col_name]] census_coord = pd.DataFrame(census_dict).T census_coord = census_coord.reset_index() census_coord = census_coord.rename(columns={ 'index': tract_col_name, 0: latitude_col_name, 1: longitude_col_name }) return census_coord
def add_population_in_buffer(self, mesh_pickle_path, step=0.0001, buffer_length=300): #default step : 0.0001 (degree) self.population_in_buffer = 0 len_per_lat = 84000 len_per_lon = 110000 #lat = int(self.latitude * 10000) / 10000 #lon = int(self.longitude * 10000) / 10000 lat_min = float(self.latitude) - buffer_length / len_per_lat lat_max = float(self.latitude) + buffer_length / len_per_lat lon_min = float(self.longitude) - buffer_length / len_per_lon lon_max = float(self.longitude) + buffer_length / len_per_lon #mesharea = cal_mesharea(float(self.latitude), float(self.longitude), step) #Open mesh_pickle file try: with open(mesh_pickle_path, "rb") as f: mesh = pickle.load(f) f.close() except: mesh = {} for i_lat in range(int(lat_min * 10000), int(lat_max * 10000)): for i_lon in range(int(lon_min * 10000), int(lon_max * 10000)): lat = i_lat / 10000 lon = i_lon / 10000 distance = geopy.distance.distance( (lat, lon), (self.latitude, self.longitude)).km * 1000 if distance < buffer_length: if (i_lat, i_lon) not in mesh: mesh[(i_lat, i_lon)] = cg.coordinates(lon, lat) print("Add census geocode lat:", lat, "lon:", lon, "Mesh data.") #if len(mesh[(lat, lon)]['Census Tracts']) > 0: #if else: print("lat:", lat, "lon:", lon, " is not in buffer. distance: ", distance) #mesh_info = census['Census Tracts'][0] with open(mesh_pickle_path, "wb") as f: pickle.dump(mesh, f) f.close()
def get_census_tract_by_geo_info(longitude, latitude, verbose=True): """ find the census tract to which a given point defined by their longitude and latitude belongs. :param longitude: float :param latitude: float :param: verbose: boolean -> whether to print detailed outputs as the program runs :return: matched_dict: dictionary with four keys: - census_block_id - census_tract_id - county_id - state_id """ geocoded_result = None repeated_trial = 0 while geocoded_result is None: # repeatly calling the Census API until the program gets the right return repeated_trial += 1 if repeated_trial > 10: break sys.exit() try: geocoded_result = cg.coordinates(x=longitude, y=latitude) except ValueError: time.sleep(random.random()) except KeyError: time.sleep(random.random()) assert len(geocoded_result) census_block_id = geocoded_result['2010 Census Blocks'][0]['GEOID'] census_tract_id = geocoded_result['Census Tracts'][0]['GEOID'] county_id = geocoded_result['Counties'][0]['GEOID'] state_id = geocoded_result['States'][0]['GEOID'] matched_dict = { 'census_block_id': census_block_id, 'census_tract_id': census_tract_id, 'county_id': county_id, 'state_id': state_id } if verbose: pprint(matched_dict) return matched_dict
def get_mean_location(self): #Convert all to cartesian (x,y,z) coordinates coords = [] total_weight = len(self.receiveData) for index, house in enumerate(self.receiveData): #Wrap in try/catch incase did not return a valid lat/lon try: #Get lat and lon temp_lat = house["lat"] * (math.pi / 180) temp_lon = house["lon"] * (math.pi / 180) #Convert to cartesian coords x = math.cos(temp_lat) * math.cos(temp_lon) y = math.cos(temp_lat) * math.sin(temp_lon) z = math.sin(temp_lat) coords.append((x, y, z)) except Exception as e: print("Skipping " + house["id"] + ", trying to use zip code as approximate lat/lon...") #Get combined average, divided by weight avg_x = sum([i[0] for i in coords]) / total_weight avg_y = sum([i[1] for i in coords]) / total_weight avg_z = sum([i[2] for i in coords]) / total_weight #Lon, Lat hyp = math.sqrt(avg_x * avg_x + avg_y * avg_y) lon = math.atan2(avg_y, avg_x) * (180 / math.pi) lat = math.atan2(avg_z, hyp) * (180 / math.pi) midpoint = (lat, lon) mid_address = cg.coordinates(lon, lat) sleep(1) return midpoint, mid_address
import numpy as np import json import censusgeocode as cg df = pd.read_csv('LA_County_restaurants.csv', sep='\t', encoding='utf-8', index_col='Unnamed: 0') results = [] for index,row in df.iterrows(): lat = round(row['Latitude'],5) print(lat) lon = round(row['Longitude'],5) print(lon) try: response = cg.coordinates(x=lat, y=lon) tract = response['2010 Census Blocks'][0]['TRACT'] print(tract) results.append(f"6037{tract}") except: results.append(np.nan) pass # In[7]: result = {'Counties': [{'OID': 275901063468976, 'STATE': '06', 'FUNCSTAT': 'A', 'AREAWATER': 1794659470, 'NAME': 'Los Angeles County', 'LSADC': '06', 'CENTLON': '-118.2617650', 'BASENAME': 'Los Angeles', 'INTPTLAT': '+34.1963983', 'COUNTYCC': 'H1', 'MTFCC': 'G4020', 'COUNTY': '037', 'GEOID': '06037', 'CENTLAT': '+34.1957768', 'INTPTLON': '-118.2618616', 'AREALAND': 10510687541, 'COUNTYNS': '00277283', 'OBJECTID': 398, 'CENT': (-118.261765, 34.1957768), 'INTPT': (-118.2618616, 34.1963983)}], 'Census Tracts': [{'OID': 207901115289836, 'STATE': '06', 'FUNCSTAT': 'S', 'NAME': 'Census Tract 1993', 'AREAWATER': 40785, 'LSADC': 'CT', 'CENTLON': '-118.1992140', 'BASENAME': '1993', 'INTPTLAT': '+34.0941911', 'MTFCC': 'G5020', 'COUNTY': '037', 'GEOID': '06037199300', 'CENTLAT': '+34.0926249', 'INTPTLON': '-118.2003961', 'AREALAND': 2550540, 'OBJECTID': 6991, 'TRACT': '199300', 'CENT': (-118.199214, 34.0926249), 'INTPT': (-118.2003961, 34.0941911)}], '2010 Census Blocks': [{'BLKGRP': '1', 'OID': 210404056348637, 'FUNCSTAT': 'S', 'STATE': '06', 'AREAWATER': 0, 'NAME': 'Block 1000', 'SUFFIX': '', 'LSADC': 'BK', 'CENTLON': '-118.1942484', 'LWBLKTYP': 'L', 'BASENAME': '1000', 'BLOCK': '1000', 'INTPTLAT': '+34.1001521', 'MTFCC': 'G5040', 'COUNTY': '037', 'GEOID': '060371993001000', 'CENTLAT': '+34.1001521', 'INTPTLON': '-118.1942484', 'AREALAND': 428518, 'OBJECTID': 3068287, 'TRACT': '199300', 'CENT': (-118.1942484, 34.1001521), 'INTPT': (-118.1942484, 34.1001521)}], 'States': [{'OID': 2749018475066, 'STATE': '06', 'FUNCSTAT': 'A', 'NAME': 'California', 'AREAWATER': 20484627967, 'LSADC': '00', 'CENTLON': '-119.5277460', 'STUSAB': 'CA', 'BASENAME': 'California', 'INTPTLAT': '+37.1551773', 'DIVISION': '9', 'MTFCC': 'G4000', 'STATENS': '01779778', 'GEOID': '06', 'CENTLAT': '+37.1547352', 'INTPTLON': '-119.5434183', 'REGION': '4', 'AREALAND': 403483191859, 'OBJECTID': 14, 'CENT': (-119.527746, 37.1547352), 'INTPT': (-119.5434183, 37.1551773)}]} # 6037101110 print(f"6037{result['2010 Census Blocks'][0]['TRACT']}")
lat_long = lat_long.drop_duplicates(subset='station_name', keep='first') print(len(lat_long)) turn_cleaned.station.nunique() census = pd.read_csv(working_directory+'censustract-medianhouseholdincome2018.csv') census.columns = census.columns.str.lower().str.strip() census = census.rename(columns={'census tract': 'census_tract'}) print(len(census)) census[census.census_tract==36061018900] #================================================================================================= # Get census tract ID for each station lat and long census_dict = {} for idx, row in lat_long.iterrows(): census_info = cg.coordinates(y=row.station_latitude, x=row.station_longitude) tract = census_info['Census Tracts'][0]['GEOID'] census_dict[tract] = [row.station_latitude, row.station_longitude] census_coord = pd.DataFrame(census_dict).T census_coord = census_coord.reset_index() census_coord = census_coord.rename(columns={'index': 'census_tract' , 0: 'station_latitude' , 1: 'station_longitude'}) census_coord.shape census_coord.to_csv(working_directory+'census_tract_lat_long.csv', index=False) #================================================================================================= census_coord['census_tract'] = census_coord.census_tract.astype(int) census_coord.head()
lngs = [] csv_name = [] tract = [] master_df = pd.DataFrame({ "Name": names, "Address": addresses, "Latitude": lats, "Longitude": lngs, "Table Name":csv_name, "Tract": tract }) for i in range(10): df = pd.read_csv(csv_list[i]) df.drop(inplace = True,columns="Unnamed: 0") df["Table Name"] = table_list[i] result_list = [] for j in range(df.shape[0]): try: result = cg.coordinates(x=df["Longitude"][j], y=df["Latitude"][j]) spl_re = result['Census Tracts'][0]['NAME'] spl_re = spl_re.split(' ') result_list.append(spl_re[2]) except: result_list.append("") time.sleep(2) df["Tract"] = result_list master_df = master_df.append(df,ignore_index=True) master_df.drop_duplicates(inplace = True, ignore_index=True) master_df.to_csv("../Resources/master.csv")
import sqlite3 import pandas as pd import censusgeocode as cg import time conn = sqlite3.connect("DC-Criminalistics/data/wmata-data/bus_data.db") cur = conn.cursor() df = pd.read_sql_query("select * from bus_data", conn) census_block = [] census_blockgroup = [] census_tract = [] for row in df.itertuples(index=True, name='Pandas'): try: info = cg.coordinates(x=getattr(row, "Longitude"), y=getattr(row, "Latitude")) census_block.append(info['2010 Census Blocks'][0]['BLOCK']) census_blockgroup.append(info['2010 Census Blocks'][0]['BLKGRP']) census_tract.append(info['2010 Census Blocks'][0]['TRACT']) except ValueError: census_block.append(None) census_blockgroup.append(None) census_tract.append(None) df['census_block'] = census_block df['census_blockgroup'] = census_blockgroup df['census_tract'] = census_tract df.to_csv('bus_station_reverse_geocode_out.csv')
def address_to_census(address, aggregation="blocks", max_requests=100): """ Converts street addresses to the GEOID of the selected aggregation choice Args: address (str): Address should be in the following format "<Street>, <City>, <State>, <Zip>". Addresses are not case sensitive and the spacing between commas and entries do not matter. The State can be given in longform or abbreviated. Examples: "1 Shields Avenue, Davis, CA, 95616", "1 Shields Avenue,Davis,California,95616", "1 shields avenue, davis,ca, 95616" aggregation (str): Census aggregation method: block groups, blocks, tracts Returns: GEOID of selected aggregation """ if pd.isna(address): return address OPTIONS = { "census block groups", "census block group", "block groups", "block group", "census blocks", "census block", "blocks", "block", "census tracts", "census tract", "tracts", "tract" } assert aggregation in OPTIONS, "The selected aggregation is not a valid option. Please select from the 3 possible choices: block groups, blocks, tracts" result = cg.onelineaddress(address, returntype="geographies") if result: geographies = result[0]["geographies"] census_blocks = geographies["2020 Census Blocks"][0] else: geolocator = ArcGIS() g = geolocator.geocode(address) x = g.longitude y = g.latitude result = None # This while loop is meant to deal with errors thrown on portions of the responses from https://geocoding.geo.census.gov/geocoder/ # https://github.com/fitnr/censusgeocode/issues/18 req_counter = 0 while result is None and req_counter < max_requests: try: result = cg.coordinates(x=x, y=y, returntype="geographies") except: pass req_counter += 1 census_blocks = result["2020 Census Blocks"][0] STATE = census_blocks["STATE"] COUNTY = census_blocks["COUNTY"] TRACT = census_blocks["TRACT"] BLOCK_GROUP = census_blocks["BLKGRP"] BLOCK = census_blocks["BLOCK"] if str.lower(aggregation) in { "census block groups", "census block group", "block groups", "block group" }: return STATE + COUNTY + TRACT + BLOCK_GROUP elif str.lower(aggregation) in { "census blocks", "census block", "blocks", "block" }: return STATE + COUNTY + TRACT + BLOCK elif str.lower(aggregation) in { "census tracts", "census tract", "tracts", "tract" }: return STATE + COUNTY + TRACT
import pandas as pd from pprint import pprint import numpy as np import json import censusgeocode as cg df = pd.read_csv('LA_County_restaurants.csv', sep='\t', encoding='utf-8', index_col='Unnamed: 0') df = df[df['city'] == "Los Angeles"] results = [] for index, row in df.iterrows(): print(row['Latitude']) print(row['Longitude']) response = cg.coordinates(x=row['Latitude'], y=row['Longitude'])
import sqlite3 import pandas as pd import censusgeocode as cg conn = sqlite3.connect( "DC-Criminalistics/data/cabi-station-data/cabi_station_data.db") cur = conn.cursor() df = pd.read_sql_query("select * from cabi_station_data limit 10", conn) census_block = [] census_blockgroup = [] census_tract = [] for row in df.itertuples(index=True, name='Pandas'): try: info = cg.coordinates(x=getattr(row, "lon"), y=getattr(row, "lat")) census_block.append(info['2010 Census Blocks'][0]['BLOCK']) census_blockgroup.append(info['2010 Census Blocks'][0]['BLKGRP']) census_tract.append(info['2010 Census Blocks'][0]['TRACT']) except ValueError: census_block.append(None) census_blockgroup.append(None) census_tract.append(None) df['census_block'] = census_block df['census_blockgroup'] = census_blockgroup df['census_tract'] = census_tract df.to_csv('cabi_reverse_geocode_out.csv')
def get_location_name(coords): results = cg.coordinates(x=coords[0], y=coords[1]) name = results["Counties"][0]["NAME"] + ", " + us_state_abbrev[results["States"][0]["NAME"]] return name