def geoclient_intersection(streetNumber, streetName, boroughName): ''' retrieve intersection street1 and street2 with main street''' g = Geoclient('799db7eb', '02b0bed977c344cb27b77e549eb69ed8') response_dict = {} dataGeo = g.address(streetNumber, streetName, boroughName) sideOfStreet = dataGeo['firstStreetNameNormalized'] fromStreet = dataGeo['highCrossStreetName1'] toStreet = dataGeo['lowCrossStreetName1'] borough = dataGeo['firstBoroughName'] response_dict.update({'sideOfStreet': sideOfStreet}) response_dict.update({'fromStreet': fromStreet}) response_dict.update({'toStreet': toStreet}) response_dict.update({'borough': borough}) return response_dict
def geoclient_intersection(streetNumber, streetName, boroughName): ''' retrieve intersection street1 and street2 with main street''' g = Geoclient('799db7eb', '02b0bed977c344cb27b77e549eb69ed8') response_dict = {} dataGeo = g.address(streetNumber, streetName, boroughName) sideOfStreet = dataGeo['firstStreetNameNormalized'] fromStreet = dataGeo['highCrossStreetName1'] toStreet = dataGeo['lowCrossStreetName1'] borough = dataGeo['firstBoroughName'] response_dict.update({'sideOfStreet': sideOfStreet}) response_dict.update({'fromStreet': fromStreet}) response_dict.update({'toStreet': toStreet}) response_dict.update({'borough': borough}) return response_dict
count+=1 if count == 1 or count == 2: continue ### For testing code purposes to make sure we can get data w/o pulling it all at once ### #if count == 5: # break; # Get the address row and split the number from the street for API purposes address = row[7] address = address.split(' ', 1) if len(address) == 1: address = ['', address[0]] # Call the API and store in jsonData jsonData = g.address(address[0], address[1], row[5]) # Write to the TSV storing file the original data, only with address column replaced with separated address, # plus the longitude, latitude try: lat = jsonData['latitude'] lon = jsonData['longitude'] except: lat, lon = '', '' print "error in %s on line %d" % (file, count) print row print jsonData csvout.writerow(row[0:6] + [s, address[0], address[1]] + row[8:11] + [lat, lon] + row[12:]) jsonout.write(json.dumps(jsonData) + '\n')
my_app_ID = id_key[1][0] my_app_key = id_key[1][1] g = Geoclient(my_app_ID, my_app_key) with open('../out/nyc_sales_clean.csv', 'rb') as f: reader = csv.reader(f) nyc_sales = map(tuple, reader) nrow_nyc = len(nyc_sales) with open("../out/coords_nyc_api.csv", "wb") as csv_file: writer = csv.writer(csv_file, delimiter=',') writer.writerow([ 'id_sale', 'lat', 'long', 'returned_street_name', 'returned_zip_code' ]) for i in range(1, len(nyc_sales)): print(str(i) + ' / ' + str(nrow_nyc)) # address(houseNumber, street, borough) addr = g.address(nyc_sales[i][24], nyc_sales[i][23], nyc_sales[i][25]) out = [ nyc_sales[i][22], addr.get('latitudeInternalLabel'), addr.get('longitudeInternalLabel'), addr.get('streetName1In'), addr.get('zipCode') ] print(out) print('\n\n') writer.writerow(out)
class GeoHelper: def __init__(self, app_id, key): # empty for now self._g = Geoclient(app_id, key) def _find_id_column(self, col_list): ids_by_pref = [ 'BIN', 'BUILDINGIDENTIFICATIONNUMBER', 'GEOCODEDBIN', 'BBL', 'GEOCODEDBBL', 'ADDRESS' ] col_map = dict(zip(map(str.upper, col_list), col_list)) return next( (col_map[i] for i in ids_by_pref if i in map(str.upper, col_list))) def _inferredGeocoder(self, input_str): ''' Attempts to infer the format of the input provided for geocoding, either BIN, BBL, or Street Address (very rudimentary). Will return the complete geoclient object. ''' input_str = str(input_str).replace('.0', '') if input_str.isdigit(): if len(input_str) == 10: # BBL #print '10 digit number: attempting BBL' out = self._g.bbl(int(input_str[0]),int(input_str[1:6].lstrip("0")) , \ int(float(input_str[6:].lstrip("0")))) elif len(input_str) == 7: # BIN #print '7 digit number: attempting BIN' out = self._g.bin(input_str) else: #print 'Unrecognized number of digits, no ID possible' out = None else: # try to split addresses #print "Attempting to split text into house number / street name / borough." split = str.split(input_str) house_num = split[0] street_name = " ".join(split[1:-1]) boro_name = split[-1] try: out = self._g.address(house_num, street_name, boro_name) except: print 'Format not recognized' out = None return out def _checkGeoclientValidity(self, geoclient_output): if 'returnCode1a' in geoclient_output: if str(geoclient_output['returnCode1a'][0]) == '0': return True elif geoclient_output['message']: return 'Error Code: ' + str(geoclient_output['message']) else: return 'error returned with no message' def _addressGeocoder(df): ''' private function to make a generic call to NYC geoclientBatch. ''' try: x = self._g.address(df[house_num], df[street], df[boro]) BBL = x['bbl'] BIN = x['buildingIdentificationNumber'] except: e = _sys.exc_info()[0] BBL = ("Error: %s" % e) BIN = BBL return BBL, BIN def get_BINandBBL(self, df, identifier_col=None): ''' Uses DOITT's GeoClient (the web API to DCP's GeoSupport) via the python wrapper https://github.com/talos/nyc-geoclient to geocode a dataframe df with columns number, street, and boro. Returns the dataframe df with two additional columns: geocodedBBL and geocodedBIN ''' if identifier_col: print 'using provided ID column: ' + identifier_col else: identifier_col = self._find_id_column(df.columns) print 'found ID column: ' + identifier_col def wrapper_func(x): out = self._inferredGeocoder(x[identifier_col]) log = self._checkGeoclientValidity(out) if log is True: return out['bbl'], out['buildingIdentificationNumber'] else: return log, log df[['geocodedBBL', 'geocodedBIN']] = df.apply(lambda x: wrapper_func(x), axis=1).apply(_pd.Series) return df def GetLatLong(self, df, identifier_col=None): if identifier_col: print 'using provided ID column: ' + identifier_col else: identifier_col = self._find_id_column(df.columns) print 'found ID column: ' + identifier_col df['Latitude'] = _np.nan df['Longitude'] = _np.nan def wrapper_func(x): out = self._inferredGeocoder(x[identifier_col]) log = self._checkGeoclientValidity(out) if log is True: return out['latitudeInternalLabel'], out[ 'longitudeInternalLabel'] else: return log, log df[['Latitude', 'Longitude']] = df.apply(lambda x: wrapper_func(x), axis=1).apply(_pd.Series) return df
reader = csv.reader(i) print "opening file: %s" % outfile with open(outfile, 'w') as o: writer = csv.writer(o, lineterminator='\n') all = [] row = next(reader, None) row.append('bbl') all.append(row) try: for row in reader: street_num = row[0] street_name = row[1] #print street_num, street_name row_gc = g.address(street_num, street_name, 'Manhattan') row_bbl = row_gc.get('bbl', '0') print "The BBL for %s %s is %s" % (street_num, street_name, row_bbl) row.append(row_bbl) all.append(row) writer.writerows(all) except csv.Error as e: sys.exit('file %s, line %d: %s' % (infile, reader.line_num, e))