Пример #1
0
    def get_cities(self):
        '''
        This function uses the zipcodes API to return the city name for each cluster centroid,
        based on latitude and longitude
        '''

        midpoints_df = self.midpoints_df

        search = ZipcodeSearchEngine()
        midpoints_df["City"] = midpoints_df[["latitude", "longitude"]].apply(lambda x:\
                                                      search.by_coordinate(\
                                                                              x[0]\
                                                                           , x[1]\
                                                                           , radius=30\
                                                                           , returns=1)[0].City\
                                                      , axis=1)

        midpoints_df["State"] = midpoints_df[["latitude", "longitude"]].apply(lambda x:\
                                                      search.by_coordinate(\
                                                                              x[0]\
                                                                           , x[1]\
                                                                           , radius=30\
                                                                           , returns=1)[0].State\
                                                      , axis=1)

        midpoints_df[
            "City_State"] = midpoints_df["City"] + ", " + midpoints_df["State"]
        cities_dict = midpoints_df.set_index("geo_cluster").to_dict("index")
        self.cities_dict = cities_dict
Пример #2
0
 def get_k_nearest_zipcodes_locations(self, ip_zipcode, radius=50, k_neigh=20):
     """
     Find the zipcodes near to the provided zipcodes.
     """
     search = ZipcodeSearchEngine()
     lat_long_inf = search.by_zipcode(str(ip_zipcode))
     lat, longi = lat_long_inf["Latitude"], lat_long_inf["Longitude"]
     
     try:
         result = search.by_coordinate(lat, longi, radius=radius, returns=k_neigh)
     except:
         return None
     
     if len(result) == 0:
         return None
     else:
         nearest_zip_list = []
         for res in result:
             nearest_zip_list.append(int(res["Zipcode"]))
             
         # Check which all zipcodes are present in the given data.
         avl_zipcode = set(nearest_zip_list) & set(self._zip_code_list)
         if avl_zipcode is not None:
             zip_index_list = []
             for code in avl_zipcode:
                 zip_index_list.append(self._zip_code_list.index(code))
             return zip_index_list
         else:
             return None
def get_weatherInfos(weatherData, stationData, stationName):
    ## find weather available at the station zipcode, if not available in data, find weather at the closest zipcode(s) nearby
    from geopy.geocoders import Nominatim
    from uszipcode import ZipcodeSearchEngine
    geolocator = Nominatim()
    (lat, lon) = get_station_coordinates(stationName, stationData)
    location = geolocator.reverse((lat, lon))
    zipcode = location.raw['address']['postcode']
    search = ZipcodeSearchEngine()
    zipcode_infos = search.by_zipcode(zipcode)
    stationWeather = pd.DataFrame()
    radius = 0
    while radius < 10 and stationWeather.shape[0] == 0:
        zipNearby = [
            int(z.Zipcode)
            for z in search.by_coordinate(lat, lon, radius=radius, returns=5)
        ]
        stationWeather = weatherData[weatherData['Zip'].isin(zipNearby)]
        #print("radius: ", radius)
        radius += 0.05  ## ?? 50m?, 0.05 miles?
    print("post codes of neighborhood: ", zipNearby)

    def fixPrecip(x):
        try:
            return float(x)
        except:
            return 0.005  # maybe 0.01 or something?

    precipitation_inch = stationWeather[u'PrecipitationIn'].apply(fixPrecip)
    temperature_fahrenheit = stationWeather[u'Mean TemperatureF']
    temperature_celcius = (temperature_fahrenheit - 32.) / 1.8
    precipitation_mm = 25.4 * precipitation_inch  ## in millimeters
    #sfPrecipitation.max() #[sfPrecipitation != 0.0]
    #sfTemp.head
    return (precipitation_mm, temperature_celcius)
Пример #4
0
def init_randomzips():
    while True:
        try:
            print(
                'Please enter the zip code you would like to find accounts around'
            )
            searchzip = int(input('---> '))
            print(
                'Please enter the radius you would like to find accounts with')
            searchradius = int(input('---> '))
            search = ZipcodeSearchEngine()
            zipcode = search.by_zipcode(str(searchzip))
            mylat = re.findall('"Latitude": (\S+),', str(zipcode))
            mylong = re.findall('"Longitude": (\S+),', str(zipcode))
            res = search.by_coordinate(zipcode.Latitude,
                                       zipcode.Longitude,
                                       radius=searchradius,
                                       returns=100)
            searchresults = []
            for zipcode in res:
                searchresults.append(zipcode.Zipcode)
                searchcity = zipcode.City
                searchstate = zipcode.State
        except:
            print("Sorry, I didn't understand that.")
            continue
        else:
            break

    print(searchresults)
 def filter_desired_columns_from_ip_records(self):
     """
     Filter only desired columns from the records. 
     """
     filtered_data = []
     
     print("LOG: [Filtering Engine] Filtering desired columns.")
     # Get categories and convert to list.
     for row in self._input_data_records:
         cat_list = []
         for k in row['categories']:
             cat_list.append(k.encode('ascii'))
         
         # Parse zipcode from the full_address value.
         zip_code = row['full_address'].split(' ')[len(row['full_address'].split(' ')) - 1]
 
         # Check if zipcode is available and a valid one.
         try:
             zip_code = int(zip_code)
             # Sometimes we get invalid zipcode such as 891118, we need to get 
             # the zipcode from latitude and longitude
             if (zip_code > 99999):
                 raise Exception("ERROR: [Filtering Engine] Invalid zip_code")
         except:
             # Get the closest zipcode for the given lat-long
             # Help link: https://pypi.python.org/pypi/uszipcode
             # Search engine for zipcode to lat-long and vice-versa conversions. This returns
             # top 3 matching zipcodes.
             search = ZipcodeSearchEngine()
             result = search.by_coordinate(row['latitude'], 
                                           row['longitude'], 
                                           radius=20, 
                                           returns=3)
             if len(result) == 0:
                 continue
             zip_code = int(result[0]['Zipcode'])
             
         # Filter out rows that belong to some invalid locations.
         if (zip_code < 100):
             continue
     
         # Create record row with desired columns.
         a = (cat_list, '', 
              row['state'], row['city'], 
              row['full_address'], zip_code, 
              row['longitude'], row['latitude'], 
              row['stars'], row['type'], 
              row['review_count']
             )
         
         # Append to final data.
         filtered_data.append(a)
         
     print ("LOG: [Filtering Engine] Number of filtered final records: %d" % len(filtered_data))
     self._input_filtered_cols_records = filtered_data
Пример #6
0
def getZips():
    # get variables passed
    stName = request.args.get('Store')
    stRad = request.args.get('Scope')
    sqlStr = ("SELECT \
				  njstores.LATITUDE_MEAS,\
				  njstores.LONGITUDE_MEAS\
		 	 FROM where_are_your_stores.njstores\
		 	WHERE LOCATION_NAME = '" + stName + "'")

    # Grab the file and return all store locations
    results = conn.execute(sqlStr)
    resDict = {}
    resList = []

    for row in results:
        resDict["Lat"] = row.LATITUDE_MEAS
        resDict["Long"] = row.LONGITUDE_MEAS
        stLat = row.LATITUDE_MEAS
        stLong = row.LONGITUDE_MEAS
        resList.append(resDict)

    stRad = int(stRad)
    # stRad = request.args.get('Scope')
    search = ZipcodeSearchEngine()
    res = search.by_coordinate(stLat, stLong, radius=stRad, returns=0)
    # print(res)

    allZips = []
    for aRec in res:
        zDict = {}
        # allZips.append(aRec.Zipcode)
        # allPop.append(aRec.Population)
        # allIncome.append(aRec.Total)
        zDict["City"] = aRec.City
        zDict["Density"] = aRec.Density
        zDict["HouseOfUnits"] = aRec.HouseOfUnits
        zDict["LandArea"] = aRec.LandArea
        zDict["Latitude"] = aRec.Latitude
        zDict["Longitude"] = aRec.Longitude
        zDict["NEBoundLatitude"] = aRec.NEBoundLatitude
        zDict["NEBoundLongitude"] = aRec.NEBoundLongitude
        zDict["Population"] = aRec.Population
        zDict["SWBoundLatitude"] = aRec.SWBoundLatitude
        zDict["SWBoungLongitude"] = aRec.SWBoungLongitude
        zDict["State"] = aRec.State
        zDict["TotalWages"] = aRec.TotalWages
        zDict["WaterArea"] = aRec.WaterArea
        zDict["Wealthy"] = aRec.Wealthy
        zDict["Zipcode"] = aRec.Zipcode
        zDict["ZipcodeType"] = aRec.ZipcodeType

        allZips.append(zDict)

    return jsonify(allZips)
Пример #7
0
def assign_zipcode(steps):
    search = ZipcodeSearchEngine()
    for step in steps:
        zipcodes = search.by_coordinate(step.latitude,
                                        step.longitude,
                                        returns=1)
        if len(zipcodes) > 0:
            zipcode_dict = next(iter(zipcodes))
            step.zipcode = zipcode_dict['Zipcode']
            step.city = zipcode_dict['City']
            step.state = zipcode_dict['State']
def get_nearest_zips(zip_code, radius=20):
    "Return a list of nearest zip codes"
    nearest_zip_codes = []
    search = ZipcodeSearchEngine()
    my_zip = search.by_zipcode(zip_code)
    if my_zip['Latitude'] is not None and my_zip['Longitude'] is not None:
        results = search.by_coordinate(my_zip['Latitude'],
                                       my_zip['Longitude'],
                                       radius=radius,
                                       returns=200)
        for result in results:
            nearest_zip_codes.append(result['Zipcode'])

    return nearest_zip_codes
Пример #9
0
def get_ratings_for_business_zipcode(business_type, zipcode):
    """
    """
    # Get all zipcodes avl in the result.
    global FINAL_RATINGS_DF
    FINAL_RATINGS_DF = read_csv_data_to_df(TRAINING_PRED_FILENAME)
    print len(FINAL_RATINGS_DF)
    zipcode_list = np.array(FINAL_RATINGS_DF.zipcode).tolist()
    if zipcode in zipcode_list:
        rating_row = FINAL_RATINGS_DF[FINAL_RATINGS_DF['zipcode'] == zipcode]
        rating = rating_row[business_type].tolist()[0]
        print("Predicted Rating for business: %s, zipcode: %d is %f" %
              (business_type, zipcode, rating))
        return rating
    else:
        search = ZipcodeSearchEngine()
        lat_long_inf = search.by_zipcode(str(zipcode))
        lat, longi = lat_long_inf["Latitude"], lat_long_inf["Longitude"]

        try:
            result = search.by_coordinate(lat,
                                          longi,
                                          radius=radius,
                                          returns=k_neigh)
        except:
            return None

        if len(result) == 0:
            return None
        else:
            nearest_zip_list = []
            for res in result:
                nearest_zip_list.append(int(res["Zipcode"]))

            # Check which all zipcodes are present in the given data.
            avl_zipcode = set(nearest_zip_list) & set(self._zip_code_list)
            if avl_zipcode is not None:
                avl_zipcode_list = list(avl_zipcode)
                ratings = FINAL_RATINGS_DF[FINAL_RATINGS_DF['zipcode'].isin(
                    avl_zipcode_list)]
                # Calculate avg rating.
                rating = 0
                for row in ratings.iterrows():
                    rating += ratings[business_type].tolist()[0]
                avg_rating = rating / len(ratings)
                print("Predicted Rating for business: %s, zipcode: %d is %f" %
                      (business_type, zipcode, avg_rating))
            else:
                return None
Пример #10
0
def getCountsForRoute(route):
    uniques = set()
    final = set()
    shops = list()
    yelp_api = YelpAPI(API_KEY)
    search = ZipcodeSearchEngine()
    for step in route.steps_list:
        res = search.by_coordinate(step.lat, step.long, radius=10, returns=5)
        codes = list()
        for zipcode in res:
            codes.append(zipcode.Zipcode)
        searchTerm = "Taco Bell"
        # quick empty check
        if len(codes) > 0:
            search_results = yelp_api.search_query(term=searchTerm,location=codes[0],price="1,2")      
        restCount = 0
        # looks through all the search query results
        for buis in search_results['businesses']:
            if searchTerm in buis['name']:
#                 print(buis['name'])
                if buis['id'] not in uniques:
                    uniques.add(buis['id'])
                    s = Shop(buis['coordinates']['latitude'],buis['coordinates']['longitude'],buis['id'])
                    shops.append(s)
    
        for shop_object in shops:
            for shop_object1 in shops:
                if shop_object.shop_id != shop_object1.shop_id and shop_object.visited == False and shop_object1.visited == False:
                    dist = calculate_distance(shop_object.lat,shop_object.lng,shop_object1.lat,shop_object1.lng)
                    if dist < 10:
                        shop_object.visited = True
        for x in shops:
            if x == False:
                restCount = restCount + 1
        shops = list()
        step.foodCount = restCount
#         print(step.foodCount)
	    print('Route Distance')
	    print((route.distance*3.28084)/5280)
	    print('Route Duration')
	    print(route.duration/3600)
	    
	    print('Total Restaraunts')
	    print(len(uniques))
Пример #11
0
def import_crime_data(crimes_raw):

    # selecting only the columns we will be interested in
    crimes_cut = crimes_raw[['Date Occurred', 'Time Occurred', 'Crime Code', 'Location ']]

    # renaming the columns
    crimes_cut.rename(
        columns={'Date Occurred': 'date', 'Time Occurred': 'time', 'Crime Code': 'crime_code', 'Location ': 'location'},
        inplace=True)

    # getting the data from the last two years
    dates_filter = (crimes_cut['date'] > '2015-12-31') & (crimes_cut['date'] < '2018-06-01')
    crimes_cut = crimes_cut[dates_filter]

    #getting the data for certain crime codes (in this case violent crime and burglaries)
    crimes_cut = crimes_cut.loc[
        crimes_cut['crime_code'].isin(['210', '220', '230', '231', '623', '624', '110', '120', '121', '310', '320'])];

    #changing values of time feature
    crimes_cut['time'] = (crimes_cut['time']/100).astype(int)
    crimes_cut.is_copy = False

    # creating categories for the hours (by 8 hour groups)
    crimes_cut.loc[(crimes_cut['time'] >= 0) & (crimes_cut['time'] < 8), 'time'] = 0
    crimes_cut.loc[(crimes_cut['time'] >= 8) & (crimes_cut['time'] < 16), 'time'] = 1
    crimes_cut.loc[(crimes_cut['time'] >= 16) & (crimes_cut['time'] < 24), 'time'] = 2

    # creating latitude and longitude columns
    crimes_cut[['latitude', 'longitude']] = crimes_cut['location'].str.split(',\s+', expand=True)
    crimes_cut['latitude'] = crimes_cut['latitude'].str.replace("(", '').astype(float)
    crimes_cut['longitude'] = crimes_cut['longitude'].str.replace(")", '').astype(float)
    crimes_cut = crimes_cut.drop(['location'], axis=1)

    # get the zipcodes based on coordinates
    search = ZipcodeSearchEngine()

    # deleting the records that have null values or 0 in the relevant columns
    crimes_cut = crimes_cut.dropna(subset=['date', 'time', 'crime_code', 'latitude', 'longitude'])
    crimes_cut = crimes_cut[(crimes_cut['latitude'] != 0) & (crimes_cut['longitude'] != 0)]

    codes = [(search.by_coordinate(lat, lng, returns = 1))[0].Zipcode for lat, lng in zip(crimes_cut['latitude'], crimes_cut['longitude'])]
    crimes_cut['zipcode'] = codes

    return crimes_cut
Пример #12
0
def get_zipcode(cdf):
	
	'''
	This function fetches the latitude, longitude values 
	Uses the ZipCodeSearchEngine module to retrieve the zipcode
	
	'''

	crime_zipcodes = []
	crime_latitudes = cdf["Y"].astype(float)
	crime_longitudes = cdf["X"].astype(float)
	search = ZipcodeSearchEngine()
	for lat, lon in zip(crime_latitudes, crime_longitudes):
		try:
			zipcode = search.by_coordinate(lat, lon, radius=2)
			#     print(zipcode)
			crime_zipcodes.append(zipcode[0].Zipcode)
		except:
			print lat, lon, zipcode  
			print(len(crime_zipcodes))
	return crime_zipcodes
Пример #13
0
from uszipcode import ZipcodeSearchEngine

search = ZipcodeSearchEngine()
a = search.by_coordinate(40.8579417, -73.9591453)[0]['Zipcode']
print(a)
Пример #14
0
from uszipcode import ZipcodeSearchEngine
from math import radians, cos, sin, asin, sqrt
import math
import os

search = ZipcodeSearchEngine()

res = search.by_coordinate(39.122229,
                           -77.133578,
                           radius=30000,
                           returns=5000000)
# This returns every us zip code


def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * asin(sqrt(a))
    r = 3956  # Radius of earth in kilometers. Use 3956 for miles
    return c * r

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 20 19:49:21 2017

@author: ERIC
"""
from uszipcode import ZipcodeSearchEngine
search = ZipcodeSearchEngine()
#enter area code you want to use as a base.
zip = input('Enter the zipcode: ')
area = search.by_zipcode(zip)
#establish lat and long
Lat = area['Latitude']
Long = area['Longitude']
#check lat and long
print('This is the Longitude:' + ' ' + str(Long))
print('This is the Latitude:' + ' ' + str(Lat))

#find zips within the closests 10 miles of a Lat and Long.
res = search.by_coordinate(Lat, Long, radius=10, returns=5)

# Print results
count = 0
while (count < 5):
    print('This is within 10 miles:')
    print(res[count])
    count = count + 1
Пример #16
0
    df['County'] = df['latitude']
    df['CountyCode'] = df['latitude']
    df.CountyCode = df.CountyCode.astype("int")
    count = len(df['latitude'])
    count2 = len(df['longitude'])



    for i in range(count):

    #location = loc.GetLocation(latitude, longitude)

        latitude = lat[i]
        longitude = long[i]

        result = search.by_coordinate(latitude, longitude, radius=50, returns=5)
        zipcodeList = [int(result[index]['Zipcode']) for index in range(len(result))]
        a = 20

        county = None
        for index in range(len(zipcodeList)):
            try:
                #x = zcdict[89]
                county = zcdict[zipcodeList[index]]
            except:
                pass
            if county != None:
                break

        if county == None:
            try:
Пример #17
0
def getrandomsongs():
    while True:
        try:
            print(
                '\nPlease enter the zip code you would like to find accounts around'
            )
            searchzip = int(getzip())
            print(
                '\nPlease enter the radius you would like to find accounts with'
            )
            searchradius = int(getradius())
            search = ZipcodeSearchEngine()
            zipcode = search.by_zipcode(str(searchzip))
            mylat = re.findall('"Latitude": (\S+),', str(zipcode))
            mylong = re.findall('"Longitude": (\S+),', str(zipcode))
            res = search.by_coordinate(zipcode.Latitude,
                                       zipcode.Longitude,
                                       radius=searchradius,
                                       returns=100)
            searchresults = []
            for zipcode in res:
                searchresults.append(zipcode.Zipcode)
                searchcity = zipcode.City
                searchstate = zipcode.State
        except:
            print(
                "\nSorry, I didn't understand that. Please enter a valid 5-digit zip code.\n"
            )
            continue
        else:
            break

    with open('accounts.json', 'r') as fp:
        names = []
        zips = searchresults
        for acczip in fp:
            for zipcode in zips:
                x = '"'
                zipsearch = str(zipcode)
                y = '": "(\S+)"'
                myzipsearch = str(x + zipsearch + y)
                links = re.findall(myzipsearch, acczip)
                for link in links:
                    names.append(link)
        if len(names) > 0:
            artists = names
            templist = []
            totallist = []
            finallist = []
            numberusers = int(len(artists))
            x = int(len(artists))
            print(
                'Searching through the SoundCloud accounts of the ' + str(x) +
                ' users we found in our database. \nPlease note that this may take a few minutes...'
            )
            for item in artists:
                print('...Number of users left to search through: ' + str(x))
                x -= 1
                templist = parseSoundcloud(item)
                myregx = 'https://soundcloud.com/' + str(item)
                myregex = str('^' + str(myregx))
                #makes sure that only songs from the users in our database are kept in the list, since the parser can sometimes grab other songs that aren't the user's
                for link in templist:
                    if re.match(myregex, link):
                        totallist.append(link)

            print('\nDone! Here is a random song from the ' +
                  str(numberusers) +
                  ' soundcloud users in our database that are within ' +
                  str(searchradius) + ' miles of ' + str(searchcity) + ', ' +
                  str(searchstate) + ' (' + str(searchzip) + '):\n')

            def randsong():
                rsong = random.choice(totallist)
                #generates a random song
                return (rsong)

            def selection():
                print(
                    '\nPlease enter a number based on the following options:')
                print('1 - Generate a new random song')
                print('2 - Play this song')
                print('3 - Return to the main menu')
                print('4 - Exit')
                rsong = randsong()
                option = input('---> ')
                if option == "1":
                    print(
                        'You may continually generate a new song by clicking 1. \n(Or you can choose options 2 (play), 3 (main menu) or 4 (exit) at any point.)\n'
                    )
                    while option == "1":
                        idk = randsong()
                        print(idk)
                        xyz = str(idk)
                        option = input('---> ')
                    if option == "2":
                        webbrowser.open(xyz, new=2, autoraise=True)
                        #opens in a new tab in the same window
                        selection()
                    elif option == "3":
                        options()
                    elif option == "4":
                        thanks()
                    else:
                        print('Sorry this was not a valid input')
                elif option == "2":
                    webbrowser.open(hjkl, new=2, autoraise=True)
                    selection()
                elif option == "3":
                    options()
                elif option == "4":
                    thanks()
                else:
                    print('Sorry this was not a valid input')
                    selection()

            uiop = randsong()
            print(uiop)
            hjkl = str(uiop)
            selection()

        else:
            print(
                '\nSorry, but we do not have any users in our database that are within '
                + str(searchradius) + ' miles of ' + str(searchcity) + ', ' +
                str(searchstate) + ' (' + str(searchzip) + ')')
            print('\nPlease enter a number based on the following options:')
            print('1 - Search again')
            print('2 - Return to the main menu')
            print('3 - Exit')
            option = input('---> ')
            if option == "1":
                getrandomsongs()
            elif option == "2":
                options()
            elif option == "3":
                thanks()
            else:
                print('Sorry this was not a valid input')
Пример #18
0
class ScrappingClassAddress:
    def __init__(self):
        print("nothing yet")
        self.data = []
        self.JSRender = JSRender()
        self.dataFolder = "ferida"
        self.listFerida = json.loads(
            open(self.dataFolder +
                 "/listFerida.json").read())['listFeridaFiles']
        self.ListPlacesCanScrape = json.loads(
            open("listPlacesScrape.json").read())
        self.search = ZipcodeSearchEngine()
        self.GoogleTimeLog = 0
        self.currentKey = 0
        self.keys = [
            "AIzaSyC0Wn7cV4llESIh9PlHnKl3lfKLdXCqxxA",
            "AIzaSyD4Es9ZHVJbk3y5JljLfS5oh91ipJA9Lg0",
            "AIzaSyBwS-G_NsWu6nFtWq5qwJEPeBtmYb6MFrc",
            "AIzaSyA5GM4wPjiZ-_b0l9cCW1E2mPdVryV_AR8",
            "AIzaSyDTj70AbEaFe4KDTPS2EXJJOOkfY1KYhnk",
            "AIzaSyDfTnjjuF59Y535-1dC4y2gTBAXz9w6a-s",
            "AIzaSyC3jwdd06Ws34zT4PoDUOBqEaZUFx_ynHI",
            "AIzaSyDU7mmhNMG-jtKPV3-oFcyu34zCAgAj-nA",
            "AIzaSyAjuTdrjlp9LvASK4xxnTzIG9gC9SwUa7Q",  #bja keys
            "AIzaSyCCcnd36fBeIoBYvhY5H4QnGTA6tKAb9JA",
            "AIzaSyDPlpSPLXtqYaDg8nGu2au-ePk2aNlkqw8",
            "AIzaSyBtfZbwuGZhh-cfDI7K42JQwPmdQb4vRmc",
            "AIzaSyBd1tJ-fW8WXKD6EC3bqkd665v8cOCKBdI",
            "AIzaSyCUVu3K4eKoVsCJMpKbngjiGsotqlYGMno",
            "AIzaSyAAkhQ20FZ7bhjTG_hk3DlwccocvArC_Io",
            "AIzaSyBUg88C4CK6oJT9eab2B5xRfTYCrvjY1Wg",
            "AIzaSyDGouQXFh7XOPb-cdbzJMW4YFkjEgtEEjk",
            "AIzaSyC8mHtFj_xU3P-AobqB5GldI9Yp0Gwn9TI",
            "AIzaSyDumNiqby4EaiFXFIqMEj-ffVdtrUzr7KU",
            "AIzaSyCmj0uHTdkXTEOm5y4-XAtvHJZBoZhJKBM",
            "AIzaSyC0uSsAF9QdxXTglIK4sNhWzunayweJ3Cc",
            "AIzaSyDkniiMdEUcsPJjmbhEkjzxZ3iwzuAZ0KU",
            "AIzaSyAnAYlLgV03tsbxEhJ_IUSu1V_nE4HC5WU",
            "AIzaSyAjFOr47GCSNBieiDNecpNPb7Y_fhc23yI",
            "AIzaSyDC38Pj0LRGn2iUoSy_i3lEFYKUImZ_L38",
            "AIzaSyBTUvhaLER0TLoxylDsNWNbGk1kmvNO6FU",
            "AIzaSyBIfVZMYFbSmj-9RsMsuJRgXzAh4ZoOPB0",
            "AIzaSyCWSWgMtNR348PW2BL9p8cR_EiVFZmqo0I",
            "AIzaSyC2gncaibuyQ6dEyl8IN27funviehMhfpQ",
            "AIzaSyAs6mwVUuqnFq6EhBDDHmu0l3kDfFbgKwo",
            "AIzaSyBF7dr-7M2KiIYOq_MlR4tygglU2d5NgIA"
        ]

        #"AIzaSyACVu-9cEH1KkHdnImX7SyZl--7wipuRP4",
        #"AIzaSyDGVCEUtFSRp7WZ6UgmzWfBeOAF3iiw_hc",
        #"AIzaSyAvQMM_9jGdFq7u2nBcNu5jxIXE4HUfa_A",
        #"AIzaSyCkea01k1qu4r9rGVzbkh29MMKGSt6mBgE",
        #"AIzaSyDt8KzzEJk4UDrBXS8kHwhQREhfoWFNdWA"]

    def searchURL(self, address):
        #address = "15 Glendale Ave, Somerville, MA 02144, USA"
        #"112 Stanley St, Redwood City, CA 94062, USA"
        #url = "https://www.google.com/get/sunroof#a=15%20Glendale%20Ave,%20Somerville,%20MA%2002144,%20USA&b=150&f=lease&np=28&p=1&sh=1"

        url = "https://www.google.com/get/sunroof#a="
        #print(url + str(address) + "&p=1")
        return url + str(address) + "&p=1"

    '''%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

        loop through a a lot of urls

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%'''

    def loopThroughAllAddress(self):

        for fileName in self.listFerida:
            print("./" + self.dataFolder + "/" +
                  fileName)  #speeds up things a lot and uses less memory
            baseFile = "./" + self.dataFolder + "/" + fileName
            df = pd.read_csv(baseFile + ".csv",
                             usecols=['id', 'latitude', 'longitude'])

            #if file exists - add
            if (Path(baseFile + "Updated.csv").is_file()):
                tmp = pd.read_csv(baseFile + "Updated.csv")
                startingPoint = tmp["id"].count()
                self.csvFile = open(baseFile + "Updated.csv", 'a')
                self.output = csv.writer(self.csvFile)

            #if file doesn't - start fresh
            else:
                self.csvFile = open(baseFile + "Updated.csv", 'w+')
                self.output = csv.writer(self.csvFile)
                self.output.writerow(["id", "hoursSun", "sqFtRoof", "zipCode"])
                startingPoint = 0

            zipCodes = []
            for i in range(0, len(self.ListPlacesCanScrape[fileName])):
                zipCodes.append(
                    int(self.ListPlacesCanScrape[fileName][i]["zipCode"]))

            print("totalNumber ", df["id"].count())

            count = df["id"].count()

            for i in range(startingPoint, count):  #range(0,200):
                print(i, " / ", count)
                self.loopLogic(df.iloc[i], zipCodes)
                #there something wrong with the iloc
                #it doesn't look right when you take the lat and longtidue out

    def loopLogic(self, datapoint, zipCodes):
        lat = datapoint['latitude']
        lng = datapoint['longitude']
        ID = datapoint['id']

        offlineCheck = self.offlineZipCodeCheck(lat, lng, zipCodes)

        if (offlineCheck != 0):

            address = self.googleGetZipCode(lat, lng, zipCodes)
            if (address != 0):
                self.getZipCodeData(address, ID)
            else:
                self.output.writerow([ID, 0, 0])
        else:
            self.output.writerow([ID, 0, 0, 0])

        self.csvFile.flush()

    def getZipCodeData(self, address, ID):
        url = self.searchURL(address)
        data = self.JSRender.getAddressPageInfo(url)
        data["id"] = ID
        self.output.writerow([
            data["id"], data["hoursSun"], data["sqFtRoof"], address.postal_code
        ])

    def googleGetZipCode(self, latitude, longitude, zipCodes):
        count = 0
        #if(time.time() - self.GoogleTimeLog   < 1):
        time.sleep(1)
        self.GoogleTimeLog = time.time()
        while (True):
            try:
                address = Geocoder(self.keys[self.currentKey]).reverse_geocode(
                    latitude, longitude)
                self.currentKey = self.currentKey + 1
                if (self.currentKey >= len(self.keys)):
                    self.currentKey = 0
                break
            except GeocoderError as GeoError:
                print(GeoError)
                count = count + 1
                time.sleep(10)
                if (count > 3):
                    sys.exit()

        if (address.postal_code is not None
                and int(address.postal_code) in zipCodes):

            return address
        else:
            return 0

    def offlineZipCodeCheck(self, latitude, longitude, zipCodes):
        zipCode = self.search.by_coordinate(latitude,
                                            longitude,
                                            radius=2,
                                            returns=2)
        if (len(zipCode) == 0):
            return 2
        else:
            zP = zipCode[0]["Zipcode"]
            if (int(zP) in zipCodes):
                return 1
            else:
                return 0
Пример #19
0
    (10314 >= (geocoder.osm(df.pickup, method='reverse')).json['postal'] >= 10301),
    (11120 >= ((geocoder.osm(df.pickup, method='reverse')).json['postal'] >= 11004) | (11697 >= (geocoder.osm(df.pickup, method='reverse')).json['postal'] >= 11351))
    ]
choices = ['Manhattan', 'Bronx', 'Brooklyn', 'Staten Island', 'Queens']
df['boroughPickup'] = np.select(conditions, choices, default=np.nan)

df.to_csv('datasets/withBoroughStart.csv', index=False)
"""

df['boroughPickup'] = ''
search = ZipcodeSearchEngine()
cnt = 0

for idx in df.index:
    zipCode = (search.by_coordinate(df.loc[idx].pickup_latitude,
                                    df.loc[idx].pickup_longitude,
                                    radius=1,
                                    returns=1))
    if zipCode:
        zipCodeNumber = int(zipCode[0].Zipcode)
        if (zipCodeNumber >= 10001 and zipCodeNumber <= 10286):
            df.set_value(idx, 'boroughPickup', 'Manhattan')
            #print('MANHATTAN')
        elif (zipCodeNumber >= 10451 and zipCodeNumber <= 10475):
            df.set_value(idx, 'boroughPickup', 'Bronx')
            #print('BRONX')
        elif (zipCodeNumber >= 11201 and zipCodeNumber <= 11256):
            df.set_value(idx, 'boroughPickup', 'Brooklyn')
            #print('BROOKLYN')
        elif (zipCodeNumber >= 10301 and zipCodeNumber <= 10314):
            df.set_value(idx, 'boroughPickup', 'StatenIsland')
            #print('STATEN')
Пример #20
0
from __future__ import print_function
from uszipcode import ZipcodeSearchEngine
search = ZipcodeSearchEngine()
import geocoder

g = geocoder.ip('me')
lat = float(g.lat)
lng = float(g.lng)
res = search.by_coordinate(lat, lng, radius=100, returns=30)
res1 = []
num2 = 0
for num1 in res:
    res1.append(res[num2]['Zipcode'])
    num2 += 1
print(res1)

zipcode = search.by_zipcode("60022")
print(zipcode)
print(zipcode.Latitude)
Пример #21
0
def index():
    # form = predictForm()
    if request.method == 'POST':
        print(request.form)
        RFR = app.config['model']
        unique_zips = [
            7030, 7605, 10001, 10002, 10003, 10004, 10005, 10006, 10007, 10009,
            10010, 10011, 10012, 10013, 10014, 10016, 10017, 10019, 10020,
            10021, 10022, 10023, 10024, 10025, 10026, 10027, 10028, 10029,
            10030, 10031, 10032, 10033, 10034, 10035, 10036, 10037, 10038,
            10039, 10040, 10044, 10065, 10069, 10075, 10111, 10115, 10119,
            10128, 10154, 10165, 10167, 10170, 10173, 10174, 10199, 10278,
            10314, 10451, 10452, 10453, 10454, 10455, 10456, 10457, 10458,
            10459, 10460, 10461, 10462, 10463, 10465, 10466, 10467, 10468,
            10469, 10470, 10472, 10473, 10475, 10705, 10708, 11101, 11102,
            11103, 11104, 11105, 11106, 11109, 11201, 11203, 11204, 11205,
            11206, 11209, 11210, 11211, 11213, 11214, 11215, 11216, 11217,
            11218, 11219, 11220, 11221, 11222, 11223, 11224, 11225, 11226,
            11230, 11231, 11232, 11233, 11234, 11235, 11237, 11238, 11351,
            11354, 11355, 11357, 11358, 11360, 11361, 11366, 11367, 11368,
            11369, 11370, 11371, 11372, 11373, 11374, 11375, 11377, 11378,
            11379, 11385, 11411, 11412, 11413, 11415, 11416, 11417, 11418,
            11419, 11420, 11421, 11422, 11423, 11427, 11432, 11433, 11434,
            11435, 11436, 7024, 7201, 7310, 7631, 10018, 10103, 10112, 10168,
            10280, 10471, 10474, 10701, 10704, 11004, 11207, 11208, 11212,
            11228, 11229, 11236, 11239, 11364, 11365, 11429, 11590, 11692,
            7307, 7458, 10282, 10301, 10305, 10550, 10703, 10801, 11005, 11356,
            11359, 11414, 11428, 11430, 11558, 11559, 10306, 11001, 11021,
            11362, 11426, 11580, 7002, 7010, 10303, 10502, 10803, 11003, 11042,
            11581, 11596, 11694, 7302, 7650, 7666, 10310, 10311, 10464, 10552,
            10805, 11024, 11552, 11691, 11742, 7105, 7114, 10302, 10601, 7093,
            10804, 11363, 11697, 11040, 10553, 7306, 11756, 11010, 11030, 7670,
            7632, 10304, 11020, 10522, 11553, 10710, 7208, 11023, 10309, 10606,
            11554, 6831, 11516, 10308, 10594, 10977, 11570, 7660, 7036, 7608,
            11563, 11575, 11803, 7604, 11514, 7601, 7643, 11548, 11788, 11520,
            11753, 11797, 10543, 7020, 11050, 11545, 11771, 7657, 6901, 11798,
            11576, 10573, 2903, 7305, 10312, 7205, 7087, 7606, 10709, 10538,
            7103, 7018, 11752, 7206, 11598, 10523, 11577, 11724, 11743, 7094,
            11729, 11550, 7086, 7603, 7070, 11096, 11565, 11804, 11542, 8859,
            10707, 10706, 7621, 10532, 11704, 10520, 11530, 10577, 11801,
            11961, 7423, 11706, 7006, 7927, 7626, 11507, 11509, 10570, 10530,
            7304, 7647, 10591, 7032, 7050, 7047, 7075, 10931, 10528, 11501,
            11693, 11579, 11557, 7072, 7607, 11762, 7642, 7041, 11703, 7676,
            8901, 11768, 11717, 7078, 7081, 11518, 7055, 7630, 11747, 7102,
            10983, 10980, 11791, 11793, 7001, 10580, 8861, 7663, 11709, 10562,
            11746, 11561, 10506, 11735, 11901, 8512, 11757, 7042, 6473, 11758,
            7950, 6825, 6878, 6902, 6906, 6907, 6854, 10533, 10960, 10941,
            7432, 10952, 7202, 7701, 6890, 7662, 7052, 12542, 7104, 6606,
            10989, 11566, 7656, 7640, 7661, 8105, 6604, 7107, 11776, 10583,
            11710, 8880, 10605, 7017, 7014, 7444, 7644, 7022, 7046, 11787,
            7452, 7026, 7029, 11741, 7652, 7054, 11790, 11714, 11796, 7073,
            11763, 7009, 7112, 11568, 7401, 10510, 10603, 11556, 11725, 10962,
            11572, 11967, 11510, 10307, 10930, 6820, 7960, 7011, 6360, 11718,
            7003, 6460, 7064, 7501, 8542, 18202, 7012, 7410, 8648, 7090, 11779,
            7981, 10607, 8863, 7502, 7203, 10994, 11751, 8734, 7512, 7071,
            6335, 7407, 18102, 6830, 6477, 8850, 7008, 8406, 6510, 11769,
            11726, 7524, 12550, 19142, 7508, 6897, 7504, 7503, 11721, 11783,
            10917, 7039, 7108, 7079, 8830, 6704, 7505, 8401, 8840, 18103, 7111,
            7043, 19141, 11740, 7106, 11722, 18661, 10964, 7722, 7901, 6880,
            7677, 8750, 10920, 7450, 7470, 7013, 10976, 11702, 6853, 10950,
            10504, 8823, 7040, 10970, 6811, 10956, 10507, 18930, 7495, 6851,
            19601, 7747, 7109, 7065, 10566, 10549, 7721, 10595, 7095, 7074,
            12534, 7062, 8817, 7446, 11749, 11767, 11754, 19136, 7027, 18017,
            11701, 6757, 7077, 12792, 7430, 7712, 7311, 8832, 7885, 7935,
            10604, 10913, 7028, 11715, 6903, 7627, 11720, 8837, 8807, 7649,
            10514, 95110, 7083, 19125, 8514, 11560, 11950, 10511, 11732, 7522,
            7080, 12151, 12540, 24328, 12533, 8003, 12528, 7031, 7733, 8742,
            2907, 8620, 10509, 6032, 7514, 8812, 6605, 10536, 32137, 32164,
            7738, 7513, 7044, 11941, 7088, 7204, 11795, 11738, 7016, 19147,
            1521, 11948, 10954, 7628, 12508, 7641, 10527, 7110, 8610, 8536,
            19373, 7920, 8724, 8869, 10990, 10992, 8609, 7974, 11944, 11937,
            2126, 10973, 11731, 19020, 11716, 7060, 21229, 11940, 2908, 11755,
            7004, 7825, 6870, 7921, 7646, 7932, 7740, 19406, 6403, 8201, 12442,
            7928, 6905, 12601, 11782, 7832, 11942, 8078, 11978, 22311, 17402,
            7758, 10578, 11968, 11954, 7728, 7405, 18302, 7645, 19713, 11733,
            8904, 7648, 11772, 7624, 12524, 12531, 6607, 6451, 8852, 8820,
            10965, 12110, 11980, 8550, 12207, 12205, 10535, 7021, 19090, 6043,
            20166, 6840, 12563, 10993, 6481, 12414, 48184, 7035, 2895, 10516,
            11778, 8831, 8854, 7424, 8054, 11933, 27616, 12768, 10923, 7069,
            7940, 6786, 6779, 6787, 1524, 1082, 6067, 11976, 19149, 7457,
            19109, 45203, 7066, 11949, 19107, 8629, 10968, 7724, 10921, 22401,
            7924, 11784, 19111, 6339, 12520, 18635, 10598, 18104, 7939, 8012,
            11946, 10928, 7506, 10996, 7730, 6516, 7851, 8701, 8048, 19446,
            6883, 7748, 7442, 19522, 7045, 7092, 7068, 7702, 7417, 18460, 6498,
            6357, 6379, 2886, 12577, 8882, 6461, 7023, 7057, 6371, 6850, 19104,
            6450, 6475, 11713, 7726, 7711, 6801, 10505, 11786, 7719, 12015,
            7753, 22015, 7033, 11765, 11951, 19148, 1062, 8033, 10567, 20005,
            20006, 20037, 7481, 20004, 7005, 6103, 7731, 10924, 8835, 10526,
            10927, 7936, 6614, 7403, 10541, 7871, 11789, 10597, 19153, 85034,
            8857, 12561, 89103, 89119, 12477, 18015, 18372, 1843, 11727, 11780,
            8733, 8520, 18512, 18321, 18509, 10974, 10975, 14212, 19116, 8902,
            7834, 48126, 8628, 17046, 2467, 48212, 8036, 8518, 7847, 6042,
            6519, 10501, 19507, 19120, 2119, 2118, 12204, 12077, 8879, 7756,
            18706, 7746, 12210, 7463, 7822, 8103, 10546, 13032
        ]
        print("predicting")
        col_names = ["Month", "Day", "Hour", "Weekday"]
        col = col_names + unique_zips

        testdummy1 = pd.DataFrame(columns=col)

        testdummy1.loc[0] = [0 for n in range(422)]

        testdummy1["Month"][0] = request.form["month"]
        testdummy1["Day"][0] = request.form["day"]
        testdummy1["Hour"][0] = request.form["hour"]
        testdummy1["Weekday"][0] = request.form["weekday"]
        search = ZipcodeSearchEngine()
        print("finding zip")
        res = search.by_coordinate(float(request.form["lat"]),
                                   float(request.form["lon"]),
                                   radius=30,
                                   returns=1)
        ziptry = res[0]["Zipcode"]
        print(ziptry)
        testdummy1[int(ziptry)][0] = 1
        testing = RFR.predict(testdummy1)

        print("output is")
        print(testing)
        return str(testing)
        # return render_template('predict.html', count=testing)

    elif request.method == 'GET':
        print("Rendering home page")
        return render_template('home.html')
Пример #22
0
def listofusers():
    #########   PROMPT USER FOR ZIP CODE AND RADIUS FOR SEARCH   ###########
    while True:
        try:
            print(
                '\nPlease enter the zip code you would like to find accounts around'
            )
            searchzip = int(getzip())
            print(
                '\nPlease enter the radius you would like to find accounts with'
            )
            searchradius = int(getradius())

            search = ZipcodeSearchEngine()
            zipcode = search.by_zipcode(str(searchzip))
            #searches zip code module for a given zipcode
            mylat = re.findall('"Latitude": (\S+),', str(zipcode))
            mylong = re.findall('"Longitude": (\S+),', str(zipcode))
            #finds the latitude and lognitude of a given zip code to be able to search for other zips in the radius
            res = search.by_coordinate(zipcode.Latitude,
                                       zipcode.Longitude,
                                       radius=searchradius,
                                       returns=100)
            #finds up to 100 zips in a given radius
            searchresults = []
            for zipcode in res:
                searchresults.append(zipcode.Zipcode)
                #stores each of the zip codes in a list
                searchcity = zipcode.City
                searchstate = zipcode.State
        except:
            #try and except to only take real zip codes that exist
            print(
                "\nSorry, I didn't understand that. Please enter a valid 5-digit zip code.\n"
            )
            continue
        else:
            break
    #########    FIND A LIST OF USERS WIHTHIN DATABASE   ###########
    with open('accounts.json', 'r') as fp:
        names = []
        zips = searchresults
        for acczip in fp:
            for zipcode in zips:
                x = '"'
                zipsearch = str(zipcode)
                y = '": "(\S+)"'
                myzipsearch = str(x + zipsearch + y)
                #allows for the regex search to continually change based on the current zip code that you using from going through the list of zip codes in radius
                links = re.findall(myzipsearch, acczip)
                for link in links:
                    names.append(link)
                    #stores each of the names found in the zip code from the accounts database
        if len(names) > 0:
            print('\nSoundcloud users in our database that are within ' +
                  str(searchradius) + ' miles of ' + str(searchcity) + ', ' +
                  str(searchstate) + ' (' + str(searchzip) + '):')
            for account in names:
                print(account)
        else:
            ######### NO RESULTS IN SEARCH  ###########
            print(
                '\nSorry, but we do not have any users in our database that are within '
                + str(searchradius) + ' miles of ' + str(searchcity) + ', ' +
                str(searchstate) + ' (' + str(searchzip) + ')')
            print('\nPlease enter a number based on the following options:')
            print('1 - Search again')
            print('2 - Add a user')
            print('3 - Return to the main menu')
            print('4 - Exit')

            def selections1():
                option = input('---> ')
                if option == "1":
                    listofusers()
                elif option == "2":
                    addusertodatabase()
                elif option == "3":
                    options()
                elif option == "4":
                    thanks()
                else:
                    print('\nSorry this was not a valid input. Try again.')
                    selections1()

            selections1()

    ######### WHAT TO DO NEXT, GIVEN RESULTS  ###########
    print('\nPlease enter a number based on the following options:')
    print('1 - Open each user\'s profile')
    print('2 - Redo your search')
    print('3 - Return to the main menu')
    print('4 - Exit')

    def selections2():
        option = input('---> ')
        if option == "1":
            if len(names) > 0:
                for account in names:
                    webbrowser.open('https://soundcloud.com/' + str(account),
                                    new=2,
                                    autoraise=True)
                print(
                    '\nThank you, the user accounts have been opened in your default browser and you are now back to the main menu'
                )
                options()
            else:
                print(
                    '\nSorry, but we do not have any users in our database that are within '
                    + str(searchradius) + ' miles of ' + str(searchcity) +
                    ', ' + str(searchstate) + ' (' + str(searchzip) + ')')
        elif option == "2":
            listofusers()
        elif option == "3":
            options()
        elif option == "4":
            thanks()
        else:
            print('\nSorry this was not a valid input. Try again')
            selections2()

    selections2()
##

latitude = property_data.latitude.value_counts().idxmax()
property_data.latitude.fillna(latitude, inplace=True)
longitude = property_data.longitude.value_counts().idxmax()
property_data.longitude.fillna(longitude, inplace=True)

print("Imputing zipcodes")
inds = np.arange(property_data.shape[0])
inds = inds[property_data.regionidzip.isnull().values]
print(len(inds), " zipcodes to impute")
search = ZipcodeSearchEngine()
zipcodes = np.array([
    int(
        search.by_coordinate(property_data.latitude.iloc[i] * 1e-6,
                             property_data.longitude.iloc[i] * 1e-6,
                             radius=50,
                             returns=1)[0]["Zipcode"]) for i in inds
])
property_data.regionidzip.loc[inds] = zipcodes

print("Imputing county")
mask = property_data.regionidcounty.isnull().values
zips = np.unique(property_data.regionidzip.loc[mask].values)
for z in zips:
    mask_z = (property_data.regionidzip == z).values
    counties, counts = np.unique(
        property_data.regionidcounty.loc[np.logical_and(
            np.logical_not(mask), mask_z)].values,
        return_counts=True)
    county = counties[counts.argmax()]
    property_data.regionidcounty.loc[np.logical_and(mask, mask_z)] = county
Пример #24
0
    "10005", "10004", "10019", "10023", "10006", "10035", "10010", "10016",
    "10032", "10002", "10038", "10013", "10278", "10018", "10036", "10128",
    "10280", "10037", "10028", "10115", "10029", "10031", "10039", "10026",
    "10044", "10021", "10007", "10011", "10034", "10030", "10111", "10022",
    "10119", "10199", "10001", "10033", "10282", "10065", "10075", "10173",
    "10165", "10168", "10174", "10112", "10020", "10103", "10017", "10069",
    "10167", "10154", "10170"
]

search = ZipcodeSearchEngine()
response2 = {"features": []}
for features in response['features']:
    # print(features['geometry'], features['properties'])
    coordinates = features['geometry']['coordinates']
    for latlon in coordinates:
        sample_postcode = search.by_coordinate(latlon[1],
                                               latlon[0])[0]['Zipcode']
        # print(sample_postcode, type(sample_postcode))
        if sample_postcode in postcode_list:
            response2['features'].append(features)
            # print("get ya!")
            break

j_response2 = json.dumps(response2)
j_response = json.loads(j_response2)

with io.open('data3.json', 'w', encoding='utf8') as outfile:
    str_ = json.dumps(j_response,
                      indent=4,
                      sort_keys=True,
                      separators=(',', ':'),
                      ensure_ascii=False)
Пример #25
0
class Output():
    def __init__(self,
                 source,
                 desiredStates,
                 simTime,
                 outputFileName,
                 zipFilter=None,
                 mapHoverOptions=['story']):

        # Passed Parameters #

        self._allData = self._loadData(source)
        self._desiredStates = desiredStates
        self._statuses = ['no_status'] + self._desiredStates + [
            'latitude', 'longitude', 'zip', 'damage_state_start'
        ]
        self._simTime = simTime
        self._outputFileName = outputFileName
        self._desiredZipcode = zipFilter
        self._mapHoverOptions = mapHoverOptions

        # Check that all parameters are valid #

        self._checkParams()
        self._checkStateValidity()

        # Further Processing #

        # Sizing for intitial Damage States
        self._damageStates = {
            'None': 6,
            'Slight': 8,
            'Moderate': 10,
            'Extensive': 12,
            'Complete': 14
        }

        # States that will be outputted
        self._desiredStates_ns = (['no_status'] + self._desiredStates)

        # Global counts
        self._numCategories = len(self._desiredStates_ns)
        self._numHomes = self._allData.shape[0]

        # Generate the colors based on the number of categories
        self._colorsOnly = bokeh.palettes.d3['Category20'][self._numCategories]
        self._assignedColors = self._assignColors(
        )  #assign colors to categories

        # Generate zipcodes
        self._zipSearch = ZipcodeSearchEngine()
        self._allData['zip'] = self._allData.apply(self._getZipcode, axis=1)
        self._uniqueZipcodes = sorted(self._allData['zip'].unique().tolist())

        # Filter data
        self._filteredData = self._filterByZip(zipFilter).reset_index(
            drop=True)
        self._filteredNumHomes = len(self._filteredData)
        self._onlyStateData = self._filteredData[desiredStates].reset_index(
            drop=True)

        if self._desiredZipcode is None:
            print("Entire data source will be processed.")
            print(
                "If you would like to filter the data by a specific ZIP code:\nUse getUniqueZipcodes() to get a list of unique ZIP codes or use filterByZip({ZIPCODE}) to filter the data."
            )
        else:
            print("Data is currently filtered by the following ZIP code: " +
                  str(zipFilter))
            print(
                "If you would like to filter the data by a differnt ZIP code:\nUse getUniqueZipcodes() to get a list of unique ZIP codes or use filterByZip({ZIPCODE}) to filter the data."
            )

        self._run()

    def _checkParams(self):
        if (type(self._outputFileName) != str) or (self._outputFileName[-5:] !=
                                                   ".html"):
            raise BaseException(
                "Invalid output file name. Output file name must end in '.html'"
            )
        if type(self._simTime) != int:
            raise BaseException(
                "Invalid simulation time. Simulation time must be an integer.")

    def _checkStateValidity(self):
        missing = [
            state for state in self._desiredStates
            if state not in self._statuses
        ]
        if (len(missing) != 0):
            raise BaseException(
                "Elements of the state order must also be in the desired state list.\nCurrent inconsistencies: ",
                missing)

    # Initialisation: Data sources.
    # returns None if there is an invalid source.
    def _loadData(self, source):
        if type(source) is str:
            try:
                return pd.read_csv(source)
            except BaseException as e:
                print("Unsupported file type or file was not found.")
                return None
        elif type(source) is pandas.core.frame.DataFrame:
            try:
                return source
            except BaseException as e:
                print("Error loading DataFrame. ", e)
                return None

    # Initialisation: Assign colors to desired data categories.
    # returns a dictionary in form {state:color}
    def _assignColors(self):
        colors = {}
        for i in range(0, len(self._desiredStates_ns)):
            colors[self._statuses[i]] = self._colorsOnly[i]
        return colors

    # Initialisation: Find the zipcode based on lat/long.
    # This method is used in DataFrame.apply
    def _getZipcode(self, data):
        lat = data['latitude']
        lng = data['longitude']
        zipcode = self._zipSearch.by_coordinate(lat, lng,
                                                returns=1)[0]['Zipcode']
        zipcode = int(zipcode)
        return zipcode

    # Initialisation: Filter the data source by the desired ZIP code
    def _filterByZip(self, desiredZipcode):
        if desiredZipcode is None:
            return self._allData
        elif int(desiredZipcode) in self._uniqueZipcodes:
            fltr = self._allData['zip'] == desiredZipcode
            return self._allData[fltr]
        else:
            raise TypeError(
                'Invalid Zipcode. Zipcodes currently available: ' +
                str(self._uniqueZipcodes) +
                "\nIf you would like to process the entire data source, pass None"
            )

    # Initialisation: Generate a DataFrame that shows the status of every entity
    # at every point of time.
    # returns a single DataFrame
    def _generateHomeStatus(self):
        home_status_list = []
        for i in range(1, self._simTime):
            single_home_status = np.empty(
                shape=[self._onlyStateData.shape[0], 1], dtype=object)
            curr_max = i
            curr = 0
            for row in self._onlyStateData.itertuples(index=False):
                row_asDict = row._asdict()
                try:
                    mostRecentTime = max(
                        value for name, value in row_asDict.items()
                        if value is not None and value < curr_max)
                    key = next(key for key, value in row_asDict.items()
                               if value == mostRecentTime)
                except ValueError:
                    key = 'no_status'
                single_home_status[curr] = key
                curr += 1
            home_status_list.append(
                pd.Series(data=single_home_status.ravel(), name=i))
        result = pd.concat(home_status_list, axis=1)
        return result

    # Initialisation: Generate a DataFrame that counts the number of each status
    # at every point in time. This is used for the line graph.
    # returns a single DataFrame
    def _generateStatusCounts(self):
        status_count_list = []
        for time in range(1, self._simTime):
            status_count_list.append(
                pd.Series(data=self._allHomeStates[time].value_counts(),
                          name=str(time)))
        status_count_df = pd.concat(status_count_list, axis=1).fillna(value=0)
        missing = [
            status for status in self._desiredStates
            if status not in status_count_df.index
        ]
        return status_count_df.reindex(
            status_count_df.index.union(missing)).fillna(value=0).reindex(
                self._desiredStates_ns)

    # Initialisation: Generate a DataFrame that mirrors allHomeStates but with
    # categorical colors to display on a map.
    # returns a single DataFrame
    def _generateHomeStatusColors(self):
        return self._allHomeStates.replace(self._assignedColors)

    # Initialisation: Generate sources for the plots.
    # returns a single DataFrame
    def _run(self):
        self._allHomeStates = self._generateHomeStatus()
        self._stateCounts = self._generateStatusCounts()
        self._allHomeStateColors = self._generateHomeStatusColors()

    # Client-facing: Get a list of the ZIP codes in the data.
    # prints: list
    def getUniqueZipcodes(self):
        print("ZIP codes in this dataset:")
        print(self._uniqueZipcodes)

    # Client-facing: Filter or re-filter the data by a different ZIPcode
    # refilters and prints confirmation
    def filterByZip(self, desiredZipcode):
        self._filteredData = self._filterByZip(desiredZipcode).reset_index(
            drop=True)
        self._filteredNumHomes = len(self._filteredData)
        self._desiredZipcode = desiredZipcode
        self._onlyStateData = self._filteredData[
            self._desiredStates].reset_index(drop=True)
        self._run()
        if desiredZipcode is None:
            print("Data is not filtered. All data will be shown.")
        else:
            print("Data now filtered by Zipcode:", desiredZipcode)

    # Client-facing: export the current vis data (statuses of each entity by day) to CSV
    def exportVisData(self, fileName="statusByDay.csv"):
        data = self._generateHomeStatus()
        data.to_csv(fileName)
        print("Exported the Status By Day file to " + fileName + ".")

    # Client-facing: Generate the vis!
    def visualize(self):

        # Set up the output file
        output_file(self._outputFileName)

        ## BARPLOT ##

        per_day = self._stateCounts.transpose().values.tolist()
        data = dict({str(i): v for i, v in enumerate(per_day)})
        data[
            'x'] = self._desiredStates_ns  #add the statuses to the data source
        data['y'] = [0.0 for i in range(len(self._desiredStates_ns))
                     ]  #dummy column for CustomJS to overwrite
        data['colorsOnly'] = self._colorsOnly

        source = ColumnDataSource(data)

        #plot setup
        barplot = figure(plot_width=800,
                         plot_height=600,
                         tools='pan',
                         x_axis_label='Status',
                         x_range=source.data['x'],
                         y_range=ranges.Range1d(
                             start=0, end=int(self._filteredNumHomes * 1.1)),
                         title="Number of Homes by Status at Current Day")

        barplot.vbar(source=source,
                     x='x',
                     top='y',
                     width=0.6,
                     fill_color='colorsOnly',
                     line_color=None)
        bar_hover = HoverTool(tooltips=[('num', '@y')])
        barplot.yaxis.axis_label = "Number of Homes"
        barplot.add_tools(bar_hover)

        ## MAPS ##

        mapHoverInfo = self._mapHoverOptions
        options_html = ""
        for option in mapHoverInfo:
            options_html += "<span style=\"font-weight: bold;\">%s: </span><span>%s<br></span>" % (
                str(option), "@" + str(option))

        mapHoverInfo_html = "<div style=\"width: 450px\">" + options_html + "</div>"

        map_hover = HoverTool(tooltips=mapHoverInfo_html)

        #get average lat, long
        mean_lat = self._filteredData['latitude'].mean()
        mean_long = self._filteredData['longitude'].mean()

        #get the zip area name
        if self._desiredZipcode is None:
            areaData = self._zipSearch.by_coordinate(mean_lat,
                                                     mean_long,
                                                     returns=1)[0]
            areaName = "Greater " + areaData['City'] + " Area"
        else:
            areaData = self._zipSearch.by_zipcode(self._desiredZipcode)
            areaName = areaData['City'] + ", " + str(areaData['Zipcode'])

        map_options = GMapOptions(lat=mean_lat,
                                  lng=mean_long,
                                  map_type="roadmap")
        mapplot = GMapPlot(x_range=ranges.Range1d(),
                           y_range=ranges.Range1d(),
                           map_options=map_options)
        mapplot.title.text = areaName
        mapplot.add_tools(PanTool(), WheelZoomTool(), map_hover)

        #set Google Maps API key
        mapplot.api_key = "AIzaSyAr5Z6tbpyDQLPyD4PQmrxvqn6VEN_3vnU"

        #data wrangling for JS interaction
        home_data_for_map_list = [
            self._allHomeStateColors.copy(), self._filteredData['latitude'],
            self._filteredData['longitude']
        ]
        for option in self._mapHoverOptions:
            home_data_for_map_list += [self._filteredData[str(option)]]

        home_status_colors_formap = pd.concat(home_data_for_map_list, axis=1)
        initialDamageStateData = self._filteredData[
            'damage_state_start'].replace(self._damageStates)
        home_status_colors_formap = pd.concat(
            [home_status_colors_formap, initialDamageStateData], axis=1)
        home_status_colors_formap['y'] = np.nan  #dummy column
        home_status_colors_formap.columns = home_status_colors_formap.columns.astype(
            str)

        mapsource = ColumnDataSource(home_status_colors_formap)
        circle = Circle(x="longitude",
                        y="latitude",
                        size='damage_state_start',
                        fill_color="y",
                        fill_alpha=0.8,
                        line_color=None)
        mapplot.add_glyph(mapsource, circle)

        ## LINE GRAPH ##

        # LINE GRAPH - CURRENT TIME INDICATOR #
        # Generate a vertical bar to indicate current time within the line graph
        # Line is generated to 10% above the number of homes and 10% below zero
        currtime_list = {
            'x': [0, 0],
            'y': [
                int(self._filteredNumHomes * 1.1),
                int(self._filteredNumHomes * -0.1)
            ]
        }  #dummy column for js callback
        for i in range(0, self._simTime):
            currtime_list[str(i)] = [i, i]

        currtime_source = ColumnDataSource(currtime_list)

        # LINE GRAPH - DATA #

        line_plot = figure(title='Overall House Status vs Time',
                           y_range=ranges.Range1d(
                               start=int(self._filteredNumHomes * 0.1),
                               end=int(self._filteredNumHomes * 1.5)))
        all_line_data = self._stateCounts.values.tolist()

        day_range = np.linspace(1, self._simTime - 2,
                                num=self._simTime - 1).tolist()

        for data, name, color in zip(all_line_data, self._statuses,
                                     self._colorsOnly):
            line_data = pd.DataFrame(data).values.tolist()
            line_plot.line(day_range,
                           line_data,
                           color=color,
                           alpha=0.8,
                           legend=name,
                           line_width=2)

        line_plot.line(x='x', y='y', source=currtime_source, line_color='red')

        line_plot.legend.location = "top_center"
        line_plot.legend.click_policy = "hide"
        line_plot.legend.orientation = "horizontal"
        line_plot.yaxis.axis_label = "Number of Homes"
        line_plot.xaxis.axis_label = "Day"

        # Requires Bokeh 0.12.7
        # Javascript callback to enable and link interactivity between the two plots.
        callback = CustomJS(args=dict(s1=source,
                                      s2=mapsource,
                                      s3=currtime_source),
                            code="""
            console.log(' changed selected time', cb_obj.value);
            var data = s1.data;
            var data2 = s2.data;
            var data3 = s3.data;
            data['y'] = data[cb_obj.value];
            data2['y'] = data2[cb_obj.value];
            data3['x'] = data3[cb_obj.value];
            s1.change.emit();
            s2.change.emit();
            s3.change.emit();
        """)

        ## SLIDER ##
        # This slider manages one callback which updates all three graphics.
        time_slider = Slider(start=1,
                             end=self._simTime - 1,
                             value=0,
                             step=1,
                             callback=callback,
                             title='DAY')

        show(
            gridplot([[mapplot], [line_plot, barplot], [time_slider]],
                     sizing_mode='stretch_both'))
    def predicting_trip_duration(self, pickup, drop,date_):
        """
        This is the main method which is doing all the modelling
        and calculations needed for the prediction of the trip duration
        :param pickup: Pick up address from the user
        :param drop: Drop off address from the user
        :param date_: Date and time from the user
        :return: The predicted time or message for the user
        """
        geo_locator = Nominatim()
        date_time_info = str(date_)
        if len(date_time_info) == 0:
            return "Enter trip details"
        dt = date_time_info.split(" ")[0].split("/")
        year, month, day = int(dt[2]), int(dt[1]), int(dt[0])
        date_entered = date(year, month, day)

        day_of_week = date_entered.isoweekday()
        if day_of_week == 6 or day_of_week == 7:
            type_of_day = 1  # weekend
        else:
            type_of_day = 0  # weekday

        t = [float(x) for x in date_time_info.split(" ")[1].split(":")]

        pickup_time = time(int(t[0]), int(t[1]), 0, 0, None)
        time_of_day = None  # divided day into 24 hours

        if pickup_time >= time(5, 0, 0, 0, None) and pickup_time <= time(5, 59, 0, 0, None):
            time_of_day = 0
        elif pickup_time >= time(6, 0, 0, 0, None) and pickup_time <= time(6, 59, 0, 0, None):
            time_of_day = 1
        elif pickup_time >= time(7, 0, 0, 0, None) and pickup_time <= time(7, 59, 0, 0, None):
            time_of_day = 2
        elif pickup_time >= time(8, 0, 0, 0, None) and pickup_time <= time(8, 59, 0, 0, None):
            time_of_day = 3
        elif pickup_time >= time(9, 0, 0, 0, None) and pickup_time <= time(9, 59, 0, 0, None):
            time_of_day = 4
        elif pickup_time >= time(10, 0, 0, 0, None) and pickup_time <= time(10, 59, 0, 0, None):
            time_of_day = 5
        elif pickup_time >= time(11, 0, 0, 0, None) and pickup_time <= time(11, 59, 0, 0, None):
            time_of_day = 6
        elif pickup_time >= time(12, 0, 0, 0, None) and pickup_time <= time(12, 59, 0, 0, None):
            time_of_day = 7
        elif pickup_time >= time(13, 0, 0, 0, None) and pickup_time <= time(13, 59, 0, 0, None):
            time_of_day = 8
        elif pickup_time >= time(14, 0, 0, 0, None) and pickup_time <= time(14, 59, 0, 0, None):
            time_of_day = 9
        elif pickup_time >= time(15, 0, 0, 0, None) and pickup_time <= time(15, 59, 0, 0, None):
            time_of_day = 10
        elif pickup_time >= time(16, 0, 0, 0, None) and pickup_time <= time(16, 59, 0, 0, None):
            time_of_day = 11
        elif pickup_time >= time(17, 0, 0, 0, None) and pickup_time <= time(17, 59, 0, 0, None):
            time_of_day = 12
        elif pickup_time >= time(18, 0, 0, 0, None) and pickup_time <= time(18, 59, 0, 0, None):
            time_of_day = 13
        elif pickup_time >= time(19, 0, 0, 0, None) and pickup_time <= time(19, 59, 0, 0, None):
            time_of_day = 14
        elif pickup_time >= time(20, 0, 0, 0, None) and pickup_time <= time(20, 59, 0, 0, None):
            time_of_day = 15
        elif pickup_time >= time(21, 0, 0, 0, None) and pickup_time <= time(21, 59, 0, 0, None):
            time_of_day = 16
        elif pickup_time >= time(22, 0, 0, 0, None) and pickup_time <= time(22, 59, 0, 0, None):
            time_of_day = 17
        elif pickup_time >= time(23, 0, 0, 0, None) and pickup_time <= time(23, 59, 0, 0, None):
            time_of_day = 18
        elif pickup_time >= time(1, 0, 0, 0, None) and pickup_time <= time(1, 59, 0, 0, None):
            time_of_day = 19
        elif pickup_time >= time(2, 0, 0, 0, None) and pickup_time <= time(2, 59, 0, 0, None):
            time_of_day = 20
        elif pickup_time >= time(3, 0, 0, 0, None) and pickup_time <= time(3, 59, 0, 0, None):
            time_of_day = 21
        elif pickup_time >= time(4, 0, 0, 0, None) and pickup_time <= time(4, 59, 0, 0, None):
            time_of_day = 22
        else:
            time_of_day = 23

        pickup_point_address = pickup  # if both pick and drop addresses are entered same
        drop_off_address = drop
        if pickup_point_address == drop_off_address:
            return "The trip duration will be 0 minutes"

        location_address_pick = geo_locator.geocode(pickup_point_address)
        pick_lat = None
        pick_long = None
        drop_lat = None
        drop_long = None
        if location_address_pick is not None:
            pick_lat = location_address_pick.latitude
            pick_long = location_address_pick.longitude
            coordinates_pick = (pick_lat, pick_long)
        else:   # If pick address entered is wrong
            return "Enter proper pick up address"

        location_address_drop_off = geo_locator.geocode(drop_off_address)
        if location_address_drop_off is not None:
            drop_lat = location_address_drop_off.latitude
            drop_long = location_address_drop_off.longitude
            coordinates_drop = (drop_lat,drop_long)
        else:   # If drop address entered is wrong
            return "Enter proper drop off address"

        if coordinates_pick == coordinates_drop:
            return "The trip duration will be 0 minutes"

        search = ZipcodeSearchEngine()  # finding the zip code using lat and long/ using google api
        pickup_zip = search.by_coordinate(pick_lat, pick_long, returns=1)[0]['Zipcode']
        drop_off_zip = search.by_coordinate(drop_lat, drop_long, returns=1)[0]['Zipcode']

        distance = self.distance_to_cover(coordinates_pick, coordinates_drop)

        table = [month,type_of_day,time_of_day, pickup_zip,drop_off_zip,pick_lat,pick_long,drop_lat,drop_long,distance]

        # making dataframe ready for prediction/ Feeding in user entered data
        testTable = [[int(month),int(type_of_day),int(time_of_day), int(pickup_zip),int(drop_off_zip),float(pick_lat),float(pick_long),float(drop_lat),float(drop_long), float(distance), '']]

        # cols will store names of attributes in the dataset
        cols = ['pickup_month','type_of_day', 'time_of_day','pickup_zip', 'dropoff_zip','pick_lat','pick_long','drop_lat','drop_long','distance_to_cover','trip_duration']
        df = pd.DataFrame(testTable, columns=cols)

        # FullDS will store the dataset provided on the given path
        full_dS = pd.read_csv('train_cleaned_new_full_with_per_hr.csv')
        train = full_dS

        # tesing file will be the dataframe of details given by the user
        test_ds = df

        # Feature will have the list of features for modelling
        # Fragmenting the data into two parts: training set and validation set
        msk = np.random.rand(len(full_dS)) < 0.75
        Train = full_dS[msk]
        validate = full_dS[~msk]

        # Generating the model based on the feature list and target variable
        features = ['pickup_month','type_of_day', 'time_of_day','pickup_zip', 'dropoff_zip','pick_lat','pick_long','drop_lat','drop_long','distance_to_cover']
        x_train = Train[list(features)].values
        y_train = Train["trip_duration"].values
        x_validate = validate[list(features)].values
        y_validate = validate["trip_duration"].values
        x_test = test_ds[list(features)].values

        # this will generate a Decision tree regressor model on the provided data
        '''print("Decision tree regression modelling: ")
        regr_decision_tree = DecisionTreeRegressor(max_depth=10)
        regr_decision_tree.fit(x_train, y_train)
        with open('decision_tree_regression.pickle', 'wb') as handle:
            pickle.dump(regr_decision_tree, handle, protocol=pickle.HIGHEST_PROTOCOL)

        with open('decision_tree_regression.pickle', 'rb') as handle:
            regr_decision_tree = pickle.load(handle)

        validation_result_decision_tree = regr_decision_tree.predict(x_validate)
        error_list = self.calculate_accuracy(y_validate, validation_result_decision_tree)
        print("Prediction Accuracy" + "   " + "No. of records")
        for i in range(5):
            print(str(i * 5) + "-" + str((i + 1) * 5) + " :" + str(error_list[i]))
            print()
        final_preiction_decision = regr_decision_tree.predict(x_test)
        print("Predicted trip duration by decision tree: "+str(final_preiction_decision[0]/60))
        print()'''


        # this will generate a random forest regressor model on the provided data
        print("Random forest  regression modelling: ")
        '''regr_random_forest = RandomForestRegressor(n_estimators=500, max_depth=10)
        print("Modelling starts")
        regr_random_forest.fit(x_train, y_train)
        with open('regression_model_lat_long_with_per_hr.pickle', 'wb') as handle:
            pickle.dump(regr_random_forest, handle, protocol=pickle.HIGHEST_PROTOCOL)'''

        with open('regression_model_lat_long_with_per_hr.pickle', 'rb') as handle:
            regr_random_forest = pickle.load(handle)

        validation_result = regr_random_forest.predict(x_validate)
        error_list = self.calculate_accuracy(y_validate, validation_result)
        final_status = regr_random_forest.predict(x_test)
        #print("Predicted trip duration by random forest: " + str(final_status[0] / 60))

        '''print("Prediction Accuracy " + "   " + "No. of records")
        for i in range(5):
            print(str(i * 5) + "-" + str((i + 1) * 5) + " :" + str(error_list[i]))
            print()
        feature_importance = regr_random_forest.feature_importances_
        print("Feature    " + " Importance")
        for i in range(len(feature_importance)):
            print(features[i] + " : " + str(feature_importance[i]))'''



        '''self.labelmain = Label(self.top, text=final_status[0], bg="black", fg="green", width=25, height=10)
        self.labelmain.pack()

        # Experimenting on changing number of trees for Random Trees
        mean_squared_error_chng_trees_l = []
        no_trees_l = []
        for i in range(1, 11):
            regr_random_forest = RandomForestRegressor(n_estimators=i * 5, max_depth=10)
            print("Modelling starts")
            regr_random_forest.fit(x_train, y_train)
            validation_result_tress = regr_random_forest.predict(x_validate)
            mean_squared_error_chng_trees_l.append(mean_squared_error(y_validate, validation_result_tress))

        print(mean_squared_error_chng_trees_l)

        # Experimenting on changing depth of trees keeping number of trees as 50 which has least MSE
        mean_squared_error_chng_depth_l = []
        for i in range(1,11):
            regr_random_forest = RandomForestRegressor(n_estimators=50, max_depth=i*2)
            print("Modelling starts")
            regr_random_forest.fit(x_train, y_train)
            validation_result_tress = regr_random_forest.predict(x_validate)
            mean_squared_error_chng_depth_l.append(mean_squared_error(y_validate, validation_result_tress))

        print(mean_squared_error_chng_depth_l)'''
        print( "The trip duration will be " + str(int(final_status[0] / 60)) + " minutes"   )
        return "The trip duration will be " + str(int(final_status[0] / 60)) + " minutes"
Пример #27
0
    reader = csv.reader(csvfile, delimiter=',',
                        quotechar='|')  # change contents to floats
    for row in reader:  # each row is a list
        results.append(row)

line_num = 1
for arr in results:
    lat_lon_str = arr[0]
    objId = arr[1]
    town = arr[3]
    addr = arr[4]

    if lat_lon_str:
        lon = float(lat_lon_str.split(" ")[1].split("(")[1])
        lat = float(lat_lon_str.split(" ")[2].split(")")[0])
        zipcode = search.by_coordinate(lat, lon)[0].Zipcode

        ## Race API Call
        ## Black
        burl = 'https://api.census.gov/data/2015/acs5?get=B01001B_001E&for=zip+code+tabulation+area:' + zipcode + '&key=2fd73cb25990a63e4d615c3bcbd02bbded8afd33'
        bdata = json.load(urllib2.urlopen(burl))
        bdata = list(chain.from_iterable(bdata))
        bdata = float(''.join(bdata[2]))

        ## White
        wurl = 'https://api.census.gov/data/2015/acs5?get=B01001H_001E&for=zip+code+tabulation+area:' + zipcode + '&key=2fd73cb25990a63e4d615c3bcbd02bbded8afd33'
        wdata = json.load(urllib2.urlopen(wurl))
        wdata = list(chain.from_iterable(wdata))
        wdata = float(''.join(wdata[2]))

        ## Hispanic