def geoclient_intersection(streetNumber, streetName, boroughName):
    ''' retrieve intersection street1 and street2 with main street''' 
    g = Geoclient('799db7eb', '02b0bed977c344cb27b77e549eb69ed8')
    response_dict = {}
    dataGeo = g.address(streetNumber, streetName, boroughName)
    sideOfStreet = dataGeo['firstStreetNameNormalized']
    fromStreet = dataGeo['highCrossStreetName1']
    toStreet = dataGeo['lowCrossStreetName1']
    borough = dataGeo['firstBoroughName']
    response_dict.update({'sideOfStreet': sideOfStreet})
    response_dict.update({'fromStreet': fromStreet})
    response_dict.update({'toStreet': toStreet})
    response_dict.update({'borough': borough})
    
    return response_dict  
示例#2
0
def geoclient_intersection(streetNumber, streetName, boroughName):
    ''' retrieve intersection street1 and street2 with main street'''
    g = Geoclient('799db7eb', '02b0bed977c344cb27b77e549eb69ed8')
    response_dict = {}
    dataGeo = g.address(streetNumber, streetName, boroughName)
    sideOfStreet = dataGeo['firstStreetNameNormalized']
    fromStreet = dataGeo['highCrossStreetName1']
    toStreet = dataGeo['lowCrossStreetName1']
    borough = dataGeo['firstBoroughName']
    response_dict.update({'sideOfStreet': sideOfStreet})
    response_dict.update({'fromStreet': fromStreet})
    response_dict.update({'toStreet': toStreet})
    response_dict.update({'borough': borough})

    return response_dict
    			count+=1
    			if count == 1 or count == 2:
    				continue
    			
    			### For testing code purposes to make sure we can get data w/o pulling it all at once ###
    			#if count == 5:
    			#		break;
    			
    			# Get the address row and split the number from the street for API purposes
        		address = row[7]
        		address = address.split(' ', 1)
        		if len(address) == 1:
                                address = ['', address[0]]

        		# Call the API and store in jsonData
        		jsonData = g.address(address[0], address[1], row[5])
        		
        		# Write to the TSV storing file the original data, only with address column replaced with separated address,
        		# plus the longitude, latitude
                        try:
                                lat = jsonData['latitude']
                                lon = jsonData['longitude']
                        except:
                                lat, lon = '', ''
                                print "error in %s on line %d" % (file, count)
                                print row
                                print jsonData

                        csvout.writerow(row[0:6] + [s, address[0], address[1]] + row[8:11] + [lat, lon] + row[12:])
                        jsonout.write(json.dumps(jsonData) + '\n')
my_app_ID = id_key[1][0]
my_app_key = id_key[1][1]

g = Geoclient(my_app_ID, my_app_key)

with open('../out/nyc_sales_clean.csv', 'rb') as f:
    reader = csv.reader(f)
    nyc_sales = map(tuple, reader)

nrow_nyc = len(nyc_sales)

with open("../out/coords_nyc_api.csv", "wb") as csv_file:
    writer = csv.writer(csv_file, delimiter=',')
    writer.writerow([
        'id_sale', 'lat', 'long', 'returned_street_name', 'returned_zip_code'
    ])
    for i in range(1, len(nyc_sales)):
        print(str(i) + ' / ' + str(nrow_nyc))
        # address(houseNumber, street, borough)
        addr = g.address(nyc_sales[i][24], nyc_sales[i][23], nyc_sales[i][25])
        out = [
            nyc_sales[i][22],
            addr.get('latitudeInternalLabel'),
            addr.get('longitudeInternalLabel'),
            addr.get('streetName1In'),
            addr.get('zipCode')
        ]
        print(out)
        print('\n\n')
        writer.writerow(out)
示例#5
0
文件: MODA.py 项目: MODA-NYC/hackNY
class GeoHelper:
    def __init__(self, app_id, key):
        # empty for now
        self._g = Geoclient(app_id, key)

    def _find_id_column(self, col_list):
        ids_by_pref = [
            'BIN', 'BUILDINGIDENTIFICATIONNUMBER', 'GEOCODEDBIN', 'BBL',
            'GEOCODEDBBL', 'ADDRESS'
        ]
        col_map = dict(zip(map(str.upper, col_list), col_list))
        return next(
            (col_map[i] for i in ids_by_pref if i in map(str.upper, col_list)))

    def _inferredGeocoder(self, input_str):
        '''
        Attempts to infer the format of the input provided for geocoding, either BIN, BBL, or Street Address (very rudimentary). Will return the complete geoclient object.
        '''
        input_str = str(input_str).replace('.0', '')

        if input_str.isdigit():
            if len(input_str) == 10:  # BBL
                #print '10 digit number: attempting BBL'
                out = self._g.bbl(int(input_str[0]),int(input_str[1:6].lstrip("0")) , \
                            int(float(input_str[6:].lstrip("0"))))
            elif len(input_str) == 7:  # BIN
                #print '7 digit number: attempting BIN'
                out = self._g.bin(input_str)
            else:
                #print 'Unrecognized number of digits, no ID possible'
                out = None
        else:  # try to split addresses
            #print "Attempting to split text into house number / street name / borough."

            split = str.split(input_str)
            house_num = split[0]
            street_name = " ".join(split[1:-1])
            boro_name = split[-1]
            try:
                out = self._g.address(house_num, street_name, boro_name)
            except:
                print 'Format not recognized'
                out = None

        return out

    def _checkGeoclientValidity(self, geoclient_output):
        if 'returnCode1a' in geoclient_output:
            if str(geoclient_output['returnCode1a'][0]) == '0':
                return True
            elif geoclient_output['message']:
                return 'Error Code: ' + str(geoclient_output['message'])
        else:
            return 'error returned with no message'

    def _addressGeocoder(df):
        '''
        private function to make a generic call to NYC geoclientBatch.
        '''
        try:
            x = self._g.address(df[house_num], df[street], df[boro])
            BBL = x['bbl']
            BIN = x['buildingIdentificationNumber']
        except:
            e = _sys.exc_info()[0]
            BBL = ("Error: %s" % e)
            BIN = BBL
        return BBL, BIN

    def get_BINandBBL(self, df, identifier_col=None):
        '''
        Uses DOITT's GeoClient (the web API to DCP's GeoSupport)
        via the python wrapper https://github.com/talos/nyc-geoclient
        to geocode a dataframe df with columns number, street, and boro.

        Returns the dataframe df with two additional columns: geocodedBBL and geocodedBIN
        '''

        if identifier_col:
            print 'using provided ID column: ' + identifier_col
        else:
            identifier_col = self._find_id_column(df.columns)
            print 'found ID column: ' + identifier_col

        def wrapper_func(x):
            out = self._inferredGeocoder(x[identifier_col])
            log = self._checkGeoclientValidity(out)
            if log is True:
                return out['bbl'], out['buildingIdentificationNumber']
            else:
                return log, log

        df[['geocodedBBL',
            'geocodedBIN']] = df.apply(lambda x: wrapper_func(x),
                                       axis=1).apply(_pd.Series)
        return df

    def GetLatLong(self, df, identifier_col=None):

        if identifier_col:
            print 'using provided ID column: ' + identifier_col
        else:
            identifier_col = self._find_id_column(df.columns)
            print 'found ID column: ' + identifier_col

        df['Latitude'] = _np.nan
        df['Longitude'] = _np.nan

        def wrapper_func(x):
            out = self._inferredGeocoder(x[identifier_col])
            log = self._checkGeoclientValidity(out)
            if log is True:
                return out['latitudeInternalLabel'], out[
                    'longitudeInternalLabel']
            else:
                return log, log

        df[['Latitude', 'Longitude']] = df.apply(lambda x: wrapper_func(x),
                                                 axis=1).apply(_pd.Series)
        return df
示例#6
0
    reader = csv.reader(i)

    print "opening file: %s" % outfile
    with open(outfile, 'w') as o:

        writer = csv.writer(o, lineterminator='\n')
        all = []
        row = next(reader, None)
        row.append('bbl')
        all.append(row)

        try:
            for row in reader:

                street_num = row[0]
                street_name = row[1]
                #print street_num, street_name

                row_gc = g.address(street_num, street_name, 'Manhattan')
                row_bbl = row_gc.get('bbl', '0')

                print "The BBL for %s %s is %s" % (street_num, street_name,
                                                   row_bbl)
                row.append(row_bbl)
                all.append(row)

            writer.writerows(all)

        except csv.Error as e:
            sys.exit('file %s, line %d: %s' % (infile, reader.line_num, e))