def get_cities(self): ''' This function uses the zipcodes API to return the city name for each cluster centroid, based on latitude and longitude ''' midpoints_df = self.midpoints_df search = ZipcodeSearchEngine() midpoints_df["City"] = midpoints_df[["latitude", "longitude"]].apply(lambda x:\ search.by_coordinate(\ x[0]\ , x[1]\ , radius=30\ , returns=1)[0].City\ , axis=1) midpoints_df["State"] = midpoints_df[["latitude", "longitude"]].apply(lambda x:\ search.by_coordinate(\ x[0]\ , x[1]\ , radius=30\ , returns=1)[0].State\ , axis=1) midpoints_df[ "City_State"] = midpoints_df["City"] + ", " + midpoints_df["State"] cities_dict = midpoints_df.set_index("geo_cluster").to_dict("index") self.cities_dict = cities_dict
def get_k_nearest_zipcodes_locations(self, ip_zipcode, radius=50, k_neigh=20): """ Find the zipcodes near to the provided zipcodes. """ search = ZipcodeSearchEngine() lat_long_inf = search.by_zipcode(str(ip_zipcode)) lat, longi = lat_long_inf["Latitude"], lat_long_inf["Longitude"] try: result = search.by_coordinate(lat, longi, radius=radius, returns=k_neigh) except: return None if len(result) == 0: return None else: nearest_zip_list = [] for res in result: nearest_zip_list.append(int(res["Zipcode"])) # Check which all zipcodes are present in the given data. avl_zipcode = set(nearest_zip_list) & set(self._zip_code_list) if avl_zipcode is not None: zip_index_list = [] for code in avl_zipcode: zip_index_list.append(self._zip_code_list.index(code)) return zip_index_list else: return None
def get_weatherInfos(weatherData, stationData, stationName): ## find weather available at the station zipcode, if not available in data, find weather at the closest zipcode(s) nearby from geopy.geocoders import Nominatim from uszipcode import ZipcodeSearchEngine geolocator = Nominatim() (lat, lon) = get_station_coordinates(stationName, stationData) location = geolocator.reverse((lat, lon)) zipcode = location.raw['address']['postcode'] search = ZipcodeSearchEngine() zipcode_infos = search.by_zipcode(zipcode) stationWeather = pd.DataFrame() radius = 0 while radius < 10 and stationWeather.shape[0] == 0: zipNearby = [ int(z.Zipcode) for z in search.by_coordinate(lat, lon, radius=radius, returns=5) ] stationWeather = weatherData[weatherData['Zip'].isin(zipNearby)] #print("radius: ", radius) radius += 0.05 ## ?? 50m?, 0.05 miles? print("post codes of neighborhood: ", zipNearby) def fixPrecip(x): try: return float(x) except: return 0.005 # maybe 0.01 or something? precipitation_inch = stationWeather[u'PrecipitationIn'].apply(fixPrecip) temperature_fahrenheit = stationWeather[u'Mean TemperatureF'] temperature_celcius = (temperature_fahrenheit - 32.) / 1.8 precipitation_mm = 25.4 * precipitation_inch ## in millimeters #sfPrecipitation.max() #[sfPrecipitation != 0.0] #sfTemp.head return (precipitation_mm, temperature_celcius)
def init_randomzips(): while True: try: print( 'Please enter the zip code you would like to find accounts around' ) searchzip = int(input('---> ')) print( 'Please enter the radius you would like to find accounts with') searchradius = int(input('---> ')) search = ZipcodeSearchEngine() zipcode = search.by_zipcode(str(searchzip)) mylat = re.findall('"Latitude": (\S+),', str(zipcode)) mylong = re.findall('"Longitude": (\S+),', str(zipcode)) res = search.by_coordinate(zipcode.Latitude, zipcode.Longitude, radius=searchradius, returns=100) searchresults = [] for zipcode in res: searchresults.append(zipcode.Zipcode) searchcity = zipcode.City searchstate = zipcode.State except: print("Sorry, I didn't understand that.") continue else: break print(searchresults)
def filter_desired_columns_from_ip_records(self): """ Filter only desired columns from the records. """ filtered_data = [] print("LOG: [Filtering Engine] Filtering desired columns.") # Get categories and convert to list. for row in self._input_data_records: cat_list = [] for k in row['categories']: cat_list.append(k.encode('ascii')) # Parse zipcode from the full_address value. zip_code = row['full_address'].split(' ')[len(row['full_address'].split(' ')) - 1] # Check if zipcode is available and a valid one. try: zip_code = int(zip_code) # Sometimes we get invalid zipcode such as 891118, we need to get # the zipcode from latitude and longitude if (zip_code > 99999): raise Exception("ERROR: [Filtering Engine] Invalid zip_code") except: # Get the closest zipcode for the given lat-long # Help link: https://pypi.python.org/pypi/uszipcode # Search engine for zipcode to lat-long and vice-versa conversions. This returns # top 3 matching zipcodes. search = ZipcodeSearchEngine() result = search.by_coordinate(row['latitude'], row['longitude'], radius=20, returns=3) if len(result) == 0: continue zip_code = int(result[0]['Zipcode']) # Filter out rows that belong to some invalid locations. if (zip_code < 100): continue # Create record row with desired columns. a = (cat_list, '', row['state'], row['city'], row['full_address'], zip_code, row['longitude'], row['latitude'], row['stars'], row['type'], row['review_count'] ) # Append to final data. filtered_data.append(a) print ("LOG: [Filtering Engine] Number of filtered final records: %d" % len(filtered_data)) self._input_filtered_cols_records = filtered_data
def getZips(): # get variables passed stName = request.args.get('Store') stRad = request.args.get('Scope') sqlStr = ("SELECT \ njstores.LATITUDE_MEAS,\ njstores.LONGITUDE_MEAS\ FROM where_are_your_stores.njstores\ WHERE LOCATION_NAME = '" + stName + "'") # Grab the file and return all store locations results = conn.execute(sqlStr) resDict = {} resList = [] for row in results: resDict["Lat"] = row.LATITUDE_MEAS resDict["Long"] = row.LONGITUDE_MEAS stLat = row.LATITUDE_MEAS stLong = row.LONGITUDE_MEAS resList.append(resDict) stRad = int(stRad) # stRad = request.args.get('Scope') search = ZipcodeSearchEngine() res = search.by_coordinate(stLat, stLong, radius=stRad, returns=0) # print(res) allZips = [] for aRec in res: zDict = {} # allZips.append(aRec.Zipcode) # allPop.append(aRec.Population) # allIncome.append(aRec.Total) zDict["City"] = aRec.City zDict["Density"] = aRec.Density zDict["HouseOfUnits"] = aRec.HouseOfUnits zDict["LandArea"] = aRec.LandArea zDict["Latitude"] = aRec.Latitude zDict["Longitude"] = aRec.Longitude zDict["NEBoundLatitude"] = aRec.NEBoundLatitude zDict["NEBoundLongitude"] = aRec.NEBoundLongitude zDict["Population"] = aRec.Population zDict["SWBoundLatitude"] = aRec.SWBoundLatitude zDict["SWBoungLongitude"] = aRec.SWBoungLongitude zDict["State"] = aRec.State zDict["TotalWages"] = aRec.TotalWages zDict["WaterArea"] = aRec.WaterArea zDict["Wealthy"] = aRec.Wealthy zDict["Zipcode"] = aRec.Zipcode zDict["ZipcodeType"] = aRec.ZipcodeType allZips.append(zDict) return jsonify(allZips)
def assign_zipcode(steps): search = ZipcodeSearchEngine() for step in steps: zipcodes = search.by_coordinate(step.latitude, step.longitude, returns=1) if len(zipcodes) > 0: zipcode_dict = next(iter(zipcodes)) step.zipcode = zipcode_dict['Zipcode'] step.city = zipcode_dict['City'] step.state = zipcode_dict['State']
def get_nearest_zips(zip_code, radius=20): "Return a list of nearest zip codes" nearest_zip_codes = [] search = ZipcodeSearchEngine() my_zip = search.by_zipcode(zip_code) if my_zip['Latitude'] is not None and my_zip['Longitude'] is not None: results = search.by_coordinate(my_zip['Latitude'], my_zip['Longitude'], radius=radius, returns=200) for result in results: nearest_zip_codes.append(result['Zipcode']) return nearest_zip_codes
def get_ratings_for_business_zipcode(business_type, zipcode): """ """ # Get all zipcodes avl in the result. global FINAL_RATINGS_DF FINAL_RATINGS_DF = read_csv_data_to_df(TRAINING_PRED_FILENAME) print len(FINAL_RATINGS_DF) zipcode_list = np.array(FINAL_RATINGS_DF.zipcode).tolist() if zipcode in zipcode_list: rating_row = FINAL_RATINGS_DF[FINAL_RATINGS_DF['zipcode'] == zipcode] rating = rating_row[business_type].tolist()[0] print("Predicted Rating for business: %s, zipcode: %d is %f" % (business_type, zipcode, rating)) return rating else: search = ZipcodeSearchEngine() lat_long_inf = search.by_zipcode(str(zipcode)) lat, longi = lat_long_inf["Latitude"], lat_long_inf["Longitude"] try: result = search.by_coordinate(lat, longi, radius=radius, returns=k_neigh) except: return None if len(result) == 0: return None else: nearest_zip_list = [] for res in result: nearest_zip_list.append(int(res["Zipcode"])) # Check which all zipcodes are present in the given data. avl_zipcode = set(nearest_zip_list) & set(self._zip_code_list) if avl_zipcode is not None: avl_zipcode_list = list(avl_zipcode) ratings = FINAL_RATINGS_DF[FINAL_RATINGS_DF['zipcode'].isin( avl_zipcode_list)] # Calculate avg rating. rating = 0 for row in ratings.iterrows(): rating += ratings[business_type].tolist()[0] avg_rating = rating / len(ratings) print("Predicted Rating for business: %s, zipcode: %d is %f" % (business_type, zipcode, avg_rating)) else: return None
def getCountsForRoute(route): uniques = set() final = set() shops = list() yelp_api = YelpAPI(API_KEY) search = ZipcodeSearchEngine() for step in route.steps_list: res = search.by_coordinate(step.lat, step.long, radius=10, returns=5) codes = list() for zipcode in res: codes.append(zipcode.Zipcode) searchTerm = "Taco Bell" # quick empty check if len(codes) > 0: search_results = yelp_api.search_query(term=searchTerm,location=codes[0],price="1,2") restCount = 0 # looks through all the search query results for buis in search_results['businesses']: if searchTerm in buis['name']: # print(buis['name']) if buis['id'] not in uniques: uniques.add(buis['id']) s = Shop(buis['coordinates']['latitude'],buis['coordinates']['longitude'],buis['id']) shops.append(s) for shop_object in shops: for shop_object1 in shops: if shop_object.shop_id != shop_object1.shop_id and shop_object.visited == False and shop_object1.visited == False: dist = calculate_distance(shop_object.lat,shop_object.lng,shop_object1.lat,shop_object1.lng) if dist < 10: shop_object.visited = True for x in shops: if x == False: restCount = restCount + 1 shops = list() step.foodCount = restCount # print(step.foodCount) print('Route Distance') print((route.distance*3.28084)/5280) print('Route Duration') print(route.duration/3600) print('Total Restaraunts') print(len(uniques))
def import_crime_data(crimes_raw): # selecting only the columns we will be interested in crimes_cut = crimes_raw[['Date Occurred', 'Time Occurred', 'Crime Code', 'Location ']] # renaming the columns crimes_cut.rename( columns={'Date Occurred': 'date', 'Time Occurred': 'time', 'Crime Code': 'crime_code', 'Location ': 'location'}, inplace=True) # getting the data from the last two years dates_filter = (crimes_cut['date'] > '2015-12-31') & (crimes_cut['date'] < '2018-06-01') crimes_cut = crimes_cut[dates_filter] #getting the data for certain crime codes (in this case violent crime and burglaries) crimes_cut = crimes_cut.loc[ crimes_cut['crime_code'].isin(['210', '220', '230', '231', '623', '624', '110', '120', '121', '310', '320'])]; #changing values of time feature crimes_cut['time'] = (crimes_cut['time']/100).astype(int) crimes_cut.is_copy = False # creating categories for the hours (by 8 hour groups) crimes_cut.loc[(crimes_cut['time'] >= 0) & (crimes_cut['time'] < 8), 'time'] = 0 crimes_cut.loc[(crimes_cut['time'] >= 8) & (crimes_cut['time'] < 16), 'time'] = 1 crimes_cut.loc[(crimes_cut['time'] >= 16) & (crimes_cut['time'] < 24), 'time'] = 2 # creating latitude and longitude columns crimes_cut[['latitude', 'longitude']] = crimes_cut['location'].str.split(',\s+', expand=True) crimes_cut['latitude'] = crimes_cut['latitude'].str.replace("(", '').astype(float) crimes_cut['longitude'] = crimes_cut['longitude'].str.replace(")", '').astype(float) crimes_cut = crimes_cut.drop(['location'], axis=1) # get the zipcodes based on coordinates search = ZipcodeSearchEngine() # deleting the records that have null values or 0 in the relevant columns crimes_cut = crimes_cut.dropna(subset=['date', 'time', 'crime_code', 'latitude', 'longitude']) crimes_cut = crimes_cut[(crimes_cut['latitude'] != 0) & (crimes_cut['longitude'] != 0)] codes = [(search.by_coordinate(lat, lng, returns = 1))[0].Zipcode for lat, lng in zip(crimes_cut['latitude'], crimes_cut['longitude'])] crimes_cut['zipcode'] = codes return crimes_cut
def get_zipcode(cdf): ''' This function fetches the latitude, longitude values Uses the ZipCodeSearchEngine module to retrieve the zipcode ''' crime_zipcodes = [] crime_latitudes = cdf["Y"].astype(float) crime_longitudes = cdf["X"].astype(float) search = ZipcodeSearchEngine() for lat, lon in zip(crime_latitudes, crime_longitudes): try: zipcode = search.by_coordinate(lat, lon, radius=2) # print(zipcode) crime_zipcodes.append(zipcode[0].Zipcode) except: print lat, lon, zipcode print(len(crime_zipcodes)) return crime_zipcodes
from uszipcode import ZipcodeSearchEngine search = ZipcodeSearchEngine() a = search.by_coordinate(40.8579417, -73.9591453)[0]['Zipcode'] print(a)
from uszipcode import ZipcodeSearchEngine from math import radians, cos, sin, asin, sqrt import math import os search = ZipcodeSearchEngine() res = search.by_coordinate(39.122229, -77.133578, radius=30000, returns=5000000) # This returns every us zip code def haversine(lon1, lat1, lon2, lat2): """ Calculate the great circle distance between two points on the earth (specified in decimal degrees) """ # convert decimal degrees to radians lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) # haversine formula dlon = lon2 - lon1 dlat = lat2 - lat1 a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2 c = 2 * asin(sqrt(a)) r = 3956 # Radius of earth in kilometers. Use 3956 for miles return c * r
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Jul 20 19:49:21 2017 @author: ERIC """ from uszipcode import ZipcodeSearchEngine search = ZipcodeSearchEngine() #enter area code you want to use as a base. zip = input('Enter the zipcode: ') area = search.by_zipcode(zip) #establish lat and long Lat = area['Latitude'] Long = area['Longitude'] #check lat and long print('This is the Longitude:' + ' ' + str(Long)) print('This is the Latitude:' + ' ' + str(Lat)) #find zips within the closests 10 miles of a Lat and Long. res = search.by_coordinate(Lat, Long, radius=10, returns=5) # Print results count = 0 while (count < 5): print('This is within 10 miles:') print(res[count]) count = count + 1
df['County'] = df['latitude'] df['CountyCode'] = df['latitude'] df.CountyCode = df.CountyCode.astype("int") count = len(df['latitude']) count2 = len(df['longitude']) for i in range(count): #location = loc.GetLocation(latitude, longitude) latitude = lat[i] longitude = long[i] result = search.by_coordinate(latitude, longitude, radius=50, returns=5) zipcodeList = [int(result[index]['Zipcode']) for index in range(len(result))] a = 20 county = None for index in range(len(zipcodeList)): try: #x = zcdict[89] county = zcdict[zipcodeList[index]] except: pass if county != None: break if county == None: try:
def getrandomsongs(): while True: try: print( '\nPlease enter the zip code you would like to find accounts around' ) searchzip = int(getzip()) print( '\nPlease enter the radius you would like to find accounts with' ) searchradius = int(getradius()) search = ZipcodeSearchEngine() zipcode = search.by_zipcode(str(searchzip)) mylat = re.findall('"Latitude": (\S+),', str(zipcode)) mylong = re.findall('"Longitude": (\S+),', str(zipcode)) res = search.by_coordinate(zipcode.Latitude, zipcode.Longitude, radius=searchradius, returns=100) searchresults = [] for zipcode in res: searchresults.append(zipcode.Zipcode) searchcity = zipcode.City searchstate = zipcode.State except: print( "\nSorry, I didn't understand that. Please enter a valid 5-digit zip code.\n" ) continue else: break with open('accounts.json', 'r') as fp: names = [] zips = searchresults for acczip in fp: for zipcode in zips: x = '"' zipsearch = str(zipcode) y = '": "(\S+)"' myzipsearch = str(x + zipsearch + y) links = re.findall(myzipsearch, acczip) for link in links: names.append(link) if len(names) > 0: artists = names templist = [] totallist = [] finallist = [] numberusers = int(len(artists)) x = int(len(artists)) print( 'Searching through the SoundCloud accounts of the ' + str(x) + ' users we found in our database. \nPlease note that this may take a few minutes...' ) for item in artists: print('...Number of users left to search through: ' + str(x)) x -= 1 templist = parseSoundcloud(item) myregx = 'https://soundcloud.com/' + str(item) myregex = str('^' + str(myregx)) #makes sure that only songs from the users in our database are kept in the list, since the parser can sometimes grab other songs that aren't the user's for link in templist: if re.match(myregex, link): totallist.append(link) print('\nDone! Here is a random song from the ' + str(numberusers) + ' soundcloud users in our database that are within ' + str(searchradius) + ' miles of ' + str(searchcity) + ', ' + str(searchstate) + ' (' + str(searchzip) + '):\n') def randsong(): rsong = random.choice(totallist) #generates a random song return (rsong) def selection(): print( '\nPlease enter a number based on the following options:') print('1 - Generate a new random song') print('2 - Play this song') print('3 - Return to the main menu') print('4 - Exit') rsong = randsong() option = input('---> ') if option == "1": print( 'You may continually generate a new song by clicking 1. \n(Or you can choose options 2 (play), 3 (main menu) or 4 (exit) at any point.)\n' ) while option == "1": idk = randsong() print(idk) xyz = str(idk) option = input('---> ') if option == "2": webbrowser.open(xyz, new=2, autoraise=True) #opens in a new tab in the same window selection() elif option == "3": options() elif option == "4": thanks() else: print('Sorry this was not a valid input') elif option == "2": webbrowser.open(hjkl, new=2, autoraise=True) selection() elif option == "3": options() elif option == "4": thanks() else: print('Sorry this was not a valid input') selection() uiop = randsong() print(uiop) hjkl = str(uiop) selection() else: print( '\nSorry, but we do not have any users in our database that are within ' + str(searchradius) + ' miles of ' + str(searchcity) + ', ' + str(searchstate) + ' (' + str(searchzip) + ')') print('\nPlease enter a number based on the following options:') print('1 - Search again') print('2 - Return to the main menu') print('3 - Exit') option = input('---> ') if option == "1": getrandomsongs() elif option == "2": options() elif option == "3": thanks() else: print('Sorry this was not a valid input')
class ScrappingClassAddress: def __init__(self): print("nothing yet") self.data = [] self.JSRender = JSRender() self.dataFolder = "ferida" self.listFerida = json.loads( open(self.dataFolder + "/listFerida.json").read())['listFeridaFiles'] self.ListPlacesCanScrape = json.loads( open("listPlacesScrape.json").read()) self.search = ZipcodeSearchEngine() self.GoogleTimeLog = 0 self.currentKey = 0 self.keys = [ "AIzaSyC0Wn7cV4llESIh9PlHnKl3lfKLdXCqxxA", "AIzaSyD4Es9ZHVJbk3y5JljLfS5oh91ipJA9Lg0", "AIzaSyBwS-G_NsWu6nFtWq5qwJEPeBtmYb6MFrc", "AIzaSyA5GM4wPjiZ-_b0l9cCW1E2mPdVryV_AR8", "AIzaSyDTj70AbEaFe4KDTPS2EXJJOOkfY1KYhnk", "AIzaSyDfTnjjuF59Y535-1dC4y2gTBAXz9w6a-s", "AIzaSyC3jwdd06Ws34zT4PoDUOBqEaZUFx_ynHI", "AIzaSyDU7mmhNMG-jtKPV3-oFcyu34zCAgAj-nA", "AIzaSyAjuTdrjlp9LvASK4xxnTzIG9gC9SwUa7Q", #bja keys "AIzaSyCCcnd36fBeIoBYvhY5H4QnGTA6tKAb9JA", "AIzaSyDPlpSPLXtqYaDg8nGu2au-ePk2aNlkqw8", "AIzaSyBtfZbwuGZhh-cfDI7K42JQwPmdQb4vRmc", "AIzaSyBd1tJ-fW8WXKD6EC3bqkd665v8cOCKBdI", "AIzaSyCUVu3K4eKoVsCJMpKbngjiGsotqlYGMno", "AIzaSyAAkhQ20FZ7bhjTG_hk3DlwccocvArC_Io", "AIzaSyBUg88C4CK6oJT9eab2B5xRfTYCrvjY1Wg", "AIzaSyDGouQXFh7XOPb-cdbzJMW4YFkjEgtEEjk", "AIzaSyC8mHtFj_xU3P-AobqB5GldI9Yp0Gwn9TI", "AIzaSyDumNiqby4EaiFXFIqMEj-ffVdtrUzr7KU", "AIzaSyCmj0uHTdkXTEOm5y4-XAtvHJZBoZhJKBM", "AIzaSyC0uSsAF9QdxXTglIK4sNhWzunayweJ3Cc", "AIzaSyDkniiMdEUcsPJjmbhEkjzxZ3iwzuAZ0KU", "AIzaSyAnAYlLgV03tsbxEhJ_IUSu1V_nE4HC5WU", "AIzaSyAjFOr47GCSNBieiDNecpNPb7Y_fhc23yI", "AIzaSyDC38Pj0LRGn2iUoSy_i3lEFYKUImZ_L38", "AIzaSyBTUvhaLER0TLoxylDsNWNbGk1kmvNO6FU", "AIzaSyBIfVZMYFbSmj-9RsMsuJRgXzAh4ZoOPB0", "AIzaSyCWSWgMtNR348PW2BL9p8cR_EiVFZmqo0I", "AIzaSyC2gncaibuyQ6dEyl8IN27funviehMhfpQ", "AIzaSyAs6mwVUuqnFq6EhBDDHmu0l3kDfFbgKwo", "AIzaSyBF7dr-7M2KiIYOq_MlR4tygglU2d5NgIA" ] #"AIzaSyACVu-9cEH1KkHdnImX7SyZl--7wipuRP4", #"AIzaSyDGVCEUtFSRp7WZ6UgmzWfBeOAF3iiw_hc", #"AIzaSyAvQMM_9jGdFq7u2nBcNu5jxIXE4HUfa_A", #"AIzaSyCkea01k1qu4r9rGVzbkh29MMKGSt6mBgE", #"AIzaSyDt8KzzEJk4UDrBXS8kHwhQREhfoWFNdWA"] def searchURL(self, address): #address = "15 Glendale Ave, Somerville, MA 02144, USA" #"112 Stanley St, Redwood City, CA 94062, USA" #url = "https://www.google.com/get/sunroof#a=15%20Glendale%20Ave,%20Somerville,%20MA%2002144,%20USA&b=150&f=lease&np=28&p=1&sh=1" url = "https://www.google.com/get/sunroof#a=" #print(url + str(address) + "&p=1") return url + str(address) + "&p=1" '''%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% loop through a a lot of urls %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%''' def loopThroughAllAddress(self): for fileName in self.listFerida: print("./" + self.dataFolder + "/" + fileName) #speeds up things a lot and uses less memory baseFile = "./" + self.dataFolder + "/" + fileName df = pd.read_csv(baseFile + ".csv", usecols=['id', 'latitude', 'longitude']) #if file exists - add if (Path(baseFile + "Updated.csv").is_file()): tmp = pd.read_csv(baseFile + "Updated.csv") startingPoint = tmp["id"].count() self.csvFile = open(baseFile + "Updated.csv", 'a') self.output = csv.writer(self.csvFile) #if file doesn't - start fresh else: self.csvFile = open(baseFile + "Updated.csv", 'w+') self.output = csv.writer(self.csvFile) self.output.writerow(["id", "hoursSun", "sqFtRoof", "zipCode"]) startingPoint = 0 zipCodes = [] for i in range(0, len(self.ListPlacesCanScrape[fileName])): zipCodes.append( int(self.ListPlacesCanScrape[fileName][i]["zipCode"])) print("totalNumber ", df["id"].count()) count = df["id"].count() for i in range(startingPoint, count): #range(0,200): print(i, " / ", count) self.loopLogic(df.iloc[i], zipCodes) #there something wrong with the iloc #it doesn't look right when you take the lat and longtidue out def loopLogic(self, datapoint, zipCodes): lat = datapoint['latitude'] lng = datapoint['longitude'] ID = datapoint['id'] offlineCheck = self.offlineZipCodeCheck(lat, lng, zipCodes) if (offlineCheck != 0): address = self.googleGetZipCode(lat, lng, zipCodes) if (address != 0): self.getZipCodeData(address, ID) else: self.output.writerow([ID, 0, 0]) else: self.output.writerow([ID, 0, 0, 0]) self.csvFile.flush() def getZipCodeData(self, address, ID): url = self.searchURL(address) data = self.JSRender.getAddressPageInfo(url) data["id"] = ID self.output.writerow([ data["id"], data["hoursSun"], data["sqFtRoof"], address.postal_code ]) def googleGetZipCode(self, latitude, longitude, zipCodes): count = 0 #if(time.time() - self.GoogleTimeLog < 1): time.sleep(1) self.GoogleTimeLog = time.time() while (True): try: address = Geocoder(self.keys[self.currentKey]).reverse_geocode( latitude, longitude) self.currentKey = self.currentKey + 1 if (self.currentKey >= len(self.keys)): self.currentKey = 0 break except GeocoderError as GeoError: print(GeoError) count = count + 1 time.sleep(10) if (count > 3): sys.exit() if (address.postal_code is not None and int(address.postal_code) in zipCodes): return address else: return 0 def offlineZipCodeCheck(self, latitude, longitude, zipCodes): zipCode = self.search.by_coordinate(latitude, longitude, radius=2, returns=2) if (len(zipCode) == 0): return 2 else: zP = zipCode[0]["Zipcode"] if (int(zP) in zipCodes): return 1 else: return 0
(10314 >= (geocoder.osm(df.pickup, method='reverse')).json['postal'] >= 10301), (11120 >= ((geocoder.osm(df.pickup, method='reverse')).json['postal'] >= 11004) | (11697 >= (geocoder.osm(df.pickup, method='reverse')).json['postal'] >= 11351)) ] choices = ['Manhattan', 'Bronx', 'Brooklyn', 'Staten Island', 'Queens'] df['boroughPickup'] = np.select(conditions, choices, default=np.nan) df.to_csv('datasets/withBoroughStart.csv', index=False) """ df['boroughPickup'] = '' search = ZipcodeSearchEngine() cnt = 0 for idx in df.index: zipCode = (search.by_coordinate(df.loc[idx].pickup_latitude, df.loc[idx].pickup_longitude, radius=1, returns=1)) if zipCode: zipCodeNumber = int(zipCode[0].Zipcode) if (zipCodeNumber >= 10001 and zipCodeNumber <= 10286): df.set_value(idx, 'boroughPickup', 'Manhattan') #print('MANHATTAN') elif (zipCodeNumber >= 10451 and zipCodeNumber <= 10475): df.set_value(idx, 'boroughPickup', 'Bronx') #print('BRONX') elif (zipCodeNumber >= 11201 and zipCodeNumber <= 11256): df.set_value(idx, 'boroughPickup', 'Brooklyn') #print('BROOKLYN') elif (zipCodeNumber >= 10301 and zipCodeNumber <= 10314): df.set_value(idx, 'boroughPickup', 'StatenIsland') #print('STATEN')
from __future__ import print_function from uszipcode import ZipcodeSearchEngine search = ZipcodeSearchEngine() import geocoder g = geocoder.ip('me') lat = float(g.lat) lng = float(g.lng) res = search.by_coordinate(lat, lng, radius=100, returns=30) res1 = [] num2 = 0 for num1 in res: res1.append(res[num2]['Zipcode']) num2 += 1 print(res1) zipcode = search.by_zipcode("60022") print(zipcode) print(zipcode.Latitude)
def index(): # form = predictForm() if request.method == 'POST': print(request.form) RFR = app.config['model'] unique_zips = [ 7030, 7605, 10001, 10002, 10003, 10004, 10005, 10006, 10007, 10009, 10010, 10011, 10012, 10013, 10014, 10016, 10017, 10019, 10020, 10021, 10022, 10023, 10024, 10025, 10026, 10027, 10028, 10029, 10030, 10031, 10032, 10033, 10034, 10035, 10036, 10037, 10038, 10039, 10040, 10044, 10065, 10069, 10075, 10111, 10115, 10119, 10128, 10154, 10165, 10167, 10170, 10173, 10174, 10199, 10278, 10314, 10451, 10452, 10453, 10454, 10455, 10456, 10457, 10458, 10459, 10460, 10461, 10462, 10463, 10465, 10466, 10467, 10468, 10469, 10470, 10472, 10473, 10475, 10705, 10708, 11101, 11102, 11103, 11104, 11105, 11106, 11109, 11201, 11203, 11204, 11205, 11206, 11209, 11210, 11211, 11213, 11214, 11215, 11216, 11217, 11218, 11219, 11220, 11221, 11222, 11223, 11224, 11225, 11226, 11230, 11231, 11232, 11233, 11234, 11235, 11237, 11238, 11351, 11354, 11355, 11357, 11358, 11360, 11361, 11366, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11374, 11375, 11377, 11378, 11379, 11385, 11411, 11412, 11413, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11422, 11423, 11427, 11432, 11433, 11434, 11435, 11436, 7024, 7201, 7310, 7631, 10018, 10103, 10112, 10168, 10280, 10471, 10474, 10701, 10704, 11004, 11207, 11208, 11212, 11228, 11229, 11236, 11239, 11364, 11365, 11429, 11590, 11692, 7307, 7458, 10282, 10301, 10305, 10550, 10703, 10801, 11005, 11356, 11359, 11414, 11428, 11430, 11558, 11559, 10306, 11001, 11021, 11362, 11426, 11580, 7002, 7010, 10303, 10502, 10803, 11003, 11042, 11581, 11596, 11694, 7302, 7650, 7666, 10310, 10311, 10464, 10552, 10805, 11024, 11552, 11691, 11742, 7105, 7114, 10302, 10601, 7093, 10804, 11363, 11697, 11040, 10553, 7306, 11756, 11010, 11030, 7670, 7632, 10304, 11020, 10522, 11553, 10710, 7208, 11023, 10309, 10606, 11554, 6831, 11516, 10308, 10594, 10977, 11570, 7660, 7036, 7608, 11563, 11575, 11803, 7604, 11514, 7601, 7643, 11548, 11788, 11520, 11753, 11797, 10543, 7020, 11050, 11545, 11771, 7657, 6901, 11798, 11576, 10573, 2903, 7305, 10312, 7205, 7087, 7606, 10709, 10538, 7103, 7018, 11752, 7206, 11598, 10523, 11577, 11724, 11743, 7094, 11729, 11550, 7086, 7603, 7070, 11096, 11565, 11804, 11542, 8859, 10707, 10706, 7621, 10532, 11704, 10520, 11530, 10577, 11801, 11961, 7423, 11706, 7006, 7927, 7626, 11507, 11509, 10570, 10530, 7304, 7647, 10591, 7032, 7050, 7047, 7075, 10931, 10528, 11501, 11693, 11579, 11557, 7072, 7607, 11762, 7642, 7041, 11703, 7676, 8901, 11768, 11717, 7078, 7081, 11518, 7055, 7630, 11747, 7102, 10983, 10980, 11791, 11793, 7001, 10580, 8861, 7663, 11709, 10562, 11746, 11561, 10506, 11735, 11901, 8512, 11757, 7042, 6473, 11758, 7950, 6825, 6878, 6902, 6906, 6907, 6854, 10533, 10960, 10941, 7432, 10952, 7202, 7701, 6890, 7662, 7052, 12542, 7104, 6606, 10989, 11566, 7656, 7640, 7661, 8105, 6604, 7107, 11776, 10583, 11710, 8880, 10605, 7017, 7014, 7444, 7644, 7022, 7046, 11787, 7452, 7026, 7029, 11741, 7652, 7054, 11790, 11714, 11796, 7073, 11763, 7009, 7112, 11568, 7401, 10510, 10603, 11556, 11725, 10962, 11572, 11967, 11510, 10307, 10930, 6820, 7960, 7011, 6360, 11718, 7003, 6460, 7064, 7501, 8542, 18202, 7012, 7410, 8648, 7090, 11779, 7981, 10607, 8863, 7502, 7203, 10994, 11751, 8734, 7512, 7071, 6335, 7407, 18102, 6830, 6477, 8850, 7008, 8406, 6510, 11769, 11726, 7524, 12550, 19142, 7508, 6897, 7504, 7503, 11721, 11783, 10917, 7039, 7108, 7079, 8830, 6704, 7505, 8401, 8840, 18103, 7111, 7043, 19141, 11740, 7106, 11722, 18661, 10964, 7722, 7901, 6880, 7677, 8750, 10920, 7450, 7470, 7013, 10976, 11702, 6853, 10950, 10504, 8823, 7040, 10970, 6811, 10956, 10507, 18930, 7495, 6851, 19601, 7747, 7109, 7065, 10566, 10549, 7721, 10595, 7095, 7074, 12534, 7062, 8817, 7446, 11749, 11767, 11754, 19136, 7027, 18017, 11701, 6757, 7077, 12792, 7430, 7712, 7311, 8832, 7885, 7935, 10604, 10913, 7028, 11715, 6903, 7627, 11720, 8837, 8807, 7649, 10514, 95110, 7083, 19125, 8514, 11560, 11950, 10511, 11732, 7522, 7080, 12151, 12540, 24328, 12533, 8003, 12528, 7031, 7733, 8742, 2907, 8620, 10509, 6032, 7514, 8812, 6605, 10536, 32137, 32164, 7738, 7513, 7044, 11941, 7088, 7204, 11795, 11738, 7016, 19147, 1521, 11948, 10954, 7628, 12508, 7641, 10527, 7110, 8610, 8536, 19373, 7920, 8724, 8869, 10990, 10992, 8609, 7974, 11944, 11937, 2126, 10973, 11731, 19020, 11716, 7060, 21229, 11940, 2908, 11755, 7004, 7825, 6870, 7921, 7646, 7932, 7740, 19406, 6403, 8201, 12442, 7928, 6905, 12601, 11782, 7832, 11942, 8078, 11978, 22311, 17402, 7758, 10578, 11968, 11954, 7728, 7405, 18302, 7645, 19713, 11733, 8904, 7648, 11772, 7624, 12524, 12531, 6607, 6451, 8852, 8820, 10965, 12110, 11980, 8550, 12207, 12205, 10535, 7021, 19090, 6043, 20166, 6840, 12563, 10993, 6481, 12414, 48184, 7035, 2895, 10516, 11778, 8831, 8854, 7424, 8054, 11933, 27616, 12768, 10923, 7069, 7940, 6786, 6779, 6787, 1524, 1082, 6067, 11976, 19149, 7457, 19109, 45203, 7066, 11949, 19107, 8629, 10968, 7724, 10921, 22401, 7924, 11784, 19111, 6339, 12520, 18635, 10598, 18104, 7939, 8012, 11946, 10928, 7506, 10996, 7730, 6516, 7851, 8701, 8048, 19446, 6883, 7748, 7442, 19522, 7045, 7092, 7068, 7702, 7417, 18460, 6498, 6357, 6379, 2886, 12577, 8882, 6461, 7023, 7057, 6371, 6850, 19104, 6450, 6475, 11713, 7726, 7711, 6801, 10505, 11786, 7719, 12015, 7753, 22015, 7033, 11765, 11951, 19148, 1062, 8033, 10567, 20005, 20006, 20037, 7481, 20004, 7005, 6103, 7731, 10924, 8835, 10526, 10927, 7936, 6614, 7403, 10541, 7871, 11789, 10597, 19153, 85034, 8857, 12561, 89103, 89119, 12477, 18015, 18372, 1843, 11727, 11780, 8733, 8520, 18512, 18321, 18509, 10974, 10975, 14212, 19116, 8902, 7834, 48126, 8628, 17046, 2467, 48212, 8036, 8518, 7847, 6042, 6519, 10501, 19507, 19120, 2119, 2118, 12204, 12077, 8879, 7756, 18706, 7746, 12210, 7463, 7822, 8103, 10546, 13032 ] print("predicting") col_names = ["Month", "Day", "Hour", "Weekday"] col = col_names + unique_zips testdummy1 = pd.DataFrame(columns=col) testdummy1.loc[0] = [0 for n in range(422)] testdummy1["Month"][0] = request.form["month"] testdummy1["Day"][0] = request.form["day"] testdummy1["Hour"][0] = request.form["hour"] testdummy1["Weekday"][0] = request.form["weekday"] search = ZipcodeSearchEngine() print("finding zip") res = search.by_coordinate(float(request.form["lat"]), float(request.form["lon"]), radius=30, returns=1) ziptry = res[0]["Zipcode"] print(ziptry) testdummy1[int(ziptry)][0] = 1 testing = RFR.predict(testdummy1) print("output is") print(testing) return str(testing) # return render_template('predict.html', count=testing) elif request.method == 'GET': print("Rendering home page") return render_template('home.html')
def listofusers(): ######### PROMPT USER FOR ZIP CODE AND RADIUS FOR SEARCH ########### while True: try: print( '\nPlease enter the zip code you would like to find accounts around' ) searchzip = int(getzip()) print( '\nPlease enter the radius you would like to find accounts with' ) searchradius = int(getradius()) search = ZipcodeSearchEngine() zipcode = search.by_zipcode(str(searchzip)) #searches zip code module for a given zipcode mylat = re.findall('"Latitude": (\S+),', str(zipcode)) mylong = re.findall('"Longitude": (\S+),', str(zipcode)) #finds the latitude and lognitude of a given zip code to be able to search for other zips in the radius res = search.by_coordinate(zipcode.Latitude, zipcode.Longitude, radius=searchradius, returns=100) #finds up to 100 zips in a given radius searchresults = [] for zipcode in res: searchresults.append(zipcode.Zipcode) #stores each of the zip codes in a list searchcity = zipcode.City searchstate = zipcode.State except: #try and except to only take real zip codes that exist print( "\nSorry, I didn't understand that. Please enter a valid 5-digit zip code.\n" ) continue else: break ######### FIND A LIST OF USERS WIHTHIN DATABASE ########### with open('accounts.json', 'r') as fp: names = [] zips = searchresults for acczip in fp: for zipcode in zips: x = '"' zipsearch = str(zipcode) y = '": "(\S+)"' myzipsearch = str(x + zipsearch + y) #allows for the regex search to continually change based on the current zip code that you using from going through the list of zip codes in radius links = re.findall(myzipsearch, acczip) for link in links: names.append(link) #stores each of the names found in the zip code from the accounts database if len(names) > 0: print('\nSoundcloud users in our database that are within ' + str(searchradius) + ' miles of ' + str(searchcity) + ', ' + str(searchstate) + ' (' + str(searchzip) + '):') for account in names: print(account) else: ######### NO RESULTS IN SEARCH ########### print( '\nSorry, but we do not have any users in our database that are within ' + str(searchradius) + ' miles of ' + str(searchcity) + ', ' + str(searchstate) + ' (' + str(searchzip) + ')') print('\nPlease enter a number based on the following options:') print('1 - Search again') print('2 - Add a user') print('3 - Return to the main menu') print('4 - Exit') def selections1(): option = input('---> ') if option == "1": listofusers() elif option == "2": addusertodatabase() elif option == "3": options() elif option == "4": thanks() else: print('\nSorry this was not a valid input. Try again.') selections1() selections1() ######### WHAT TO DO NEXT, GIVEN RESULTS ########### print('\nPlease enter a number based on the following options:') print('1 - Open each user\'s profile') print('2 - Redo your search') print('3 - Return to the main menu') print('4 - Exit') def selections2(): option = input('---> ') if option == "1": if len(names) > 0: for account in names: webbrowser.open('https://soundcloud.com/' + str(account), new=2, autoraise=True) print( '\nThank you, the user accounts have been opened in your default browser and you are now back to the main menu' ) options() else: print( '\nSorry, but we do not have any users in our database that are within ' + str(searchradius) + ' miles of ' + str(searchcity) + ', ' + str(searchstate) + ' (' + str(searchzip) + ')') elif option == "2": listofusers() elif option == "3": options() elif option == "4": thanks() else: print('\nSorry this was not a valid input. Try again') selections2() selections2()
## latitude = property_data.latitude.value_counts().idxmax() property_data.latitude.fillna(latitude, inplace=True) longitude = property_data.longitude.value_counts().idxmax() property_data.longitude.fillna(longitude, inplace=True) print("Imputing zipcodes") inds = np.arange(property_data.shape[0]) inds = inds[property_data.regionidzip.isnull().values] print(len(inds), " zipcodes to impute") search = ZipcodeSearchEngine() zipcodes = np.array([ int( search.by_coordinate(property_data.latitude.iloc[i] * 1e-6, property_data.longitude.iloc[i] * 1e-6, radius=50, returns=1)[0]["Zipcode"]) for i in inds ]) property_data.regionidzip.loc[inds] = zipcodes print("Imputing county") mask = property_data.regionidcounty.isnull().values zips = np.unique(property_data.regionidzip.loc[mask].values) for z in zips: mask_z = (property_data.regionidzip == z).values counties, counts = np.unique( property_data.regionidcounty.loc[np.logical_and( np.logical_not(mask), mask_z)].values, return_counts=True) county = counties[counts.argmax()] property_data.regionidcounty.loc[np.logical_and(mask, mask_z)] = county
"10005", "10004", "10019", "10023", "10006", "10035", "10010", "10016", "10032", "10002", "10038", "10013", "10278", "10018", "10036", "10128", "10280", "10037", "10028", "10115", "10029", "10031", "10039", "10026", "10044", "10021", "10007", "10011", "10034", "10030", "10111", "10022", "10119", "10199", "10001", "10033", "10282", "10065", "10075", "10173", "10165", "10168", "10174", "10112", "10020", "10103", "10017", "10069", "10167", "10154", "10170" ] search = ZipcodeSearchEngine() response2 = {"features": []} for features in response['features']: # print(features['geometry'], features['properties']) coordinates = features['geometry']['coordinates'] for latlon in coordinates: sample_postcode = search.by_coordinate(latlon[1], latlon[0])[0]['Zipcode'] # print(sample_postcode, type(sample_postcode)) if sample_postcode in postcode_list: response2['features'].append(features) # print("get ya!") break j_response2 = json.dumps(response2) j_response = json.loads(j_response2) with io.open('data3.json', 'w', encoding='utf8') as outfile: str_ = json.dumps(j_response, indent=4, sort_keys=True, separators=(',', ':'), ensure_ascii=False)
class Output(): def __init__(self, source, desiredStates, simTime, outputFileName, zipFilter=None, mapHoverOptions=['story']): # Passed Parameters # self._allData = self._loadData(source) self._desiredStates = desiredStates self._statuses = ['no_status'] + self._desiredStates + [ 'latitude', 'longitude', 'zip', 'damage_state_start' ] self._simTime = simTime self._outputFileName = outputFileName self._desiredZipcode = zipFilter self._mapHoverOptions = mapHoverOptions # Check that all parameters are valid # self._checkParams() self._checkStateValidity() # Further Processing # # Sizing for intitial Damage States self._damageStates = { 'None': 6, 'Slight': 8, 'Moderate': 10, 'Extensive': 12, 'Complete': 14 } # States that will be outputted self._desiredStates_ns = (['no_status'] + self._desiredStates) # Global counts self._numCategories = len(self._desiredStates_ns) self._numHomes = self._allData.shape[0] # Generate the colors based on the number of categories self._colorsOnly = bokeh.palettes.d3['Category20'][self._numCategories] self._assignedColors = self._assignColors( ) #assign colors to categories # Generate zipcodes self._zipSearch = ZipcodeSearchEngine() self._allData['zip'] = self._allData.apply(self._getZipcode, axis=1) self._uniqueZipcodes = sorted(self._allData['zip'].unique().tolist()) # Filter data self._filteredData = self._filterByZip(zipFilter).reset_index( drop=True) self._filteredNumHomes = len(self._filteredData) self._onlyStateData = self._filteredData[desiredStates].reset_index( drop=True) if self._desiredZipcode is None: print("Entire data source will be processed.") print( "If you would like to filter the data by a specific ZIP code:\nUse getUniqueZipcodes() to get a list of unique ZIP codes or use filterByZip({ZIPCODE}) to filter the data." ) else: print("Data is currently filtered by the following ZIP code: " + str(zipFilter)) print( "If you would like to filter the data by a differnt ZIP code:\nUse getUniqueZipcodes() to get a list of unique ZIP codes or use filterByZip({ZIPCODE}) to filter the data." ) self._run() def _checkParams(self): if (type(self._outputFileName) != str) or (self._outputFileName[-5:] != ".html"): raise BaseException( "Invalid output file name. Output file name must end in '.html'" ) if type(self._simTime) != int: raise BaseException( "Invalid simulation time. Simulation time must be an integer.") def _checkStateValidity(self): missing = [ state for state in self._desiredStates if state not in self._statuses ] if (len(missing) != 0): raise BaseException( "Elements of the state order must also be in the desired state list.\nCurrent inconsistencies: ", missing) # Initialisation: Data sources. # returns None if there is an invalid source. def _loadData(self, source): if type(source) is str: try: return pd.read_csv(source) except BaseException as e: print("Unsupported file type or file was not found.") return None elif type(source) is pandas.core.frame.DataFrame: try: return source except BaseException as e: print("Error loading DataFrame. ", e) return None # Initialisation: Assign colors to desired data categories. # returns a dictionary in form {state:color} def _assignColors(self): colors = {} for i in range(0, len(self._desiredStates_ns)): colors[self._statuses[i]] = self._colorsOnly[i] return colors # Initialisation: Find the zipcode based on lat/long. # This method is used in DataFrame.apply def _getZipcode(self, data): lat = data['latitude'] lng = data['longitude'] zipcode = self._zipSearch.by_coordinate(lat, lng, returns=1)[0]['Zipcode'] zipcode = int(zipcode) return zipcode # Initialisation: Filter the data source by the desired ZIP code def _filterByZip(self, desiredZipcode): if desiredZipcode is None: return self._allData elif int(desiredZipcode) in self._uniqueZipcodes: fltr = self._allData['zip'] == desiredZipcode return self._allData[fltr] else: raise TypeError( 'Invalid Zipcode. Zipcodes currently available: ' + str(self._uniqueZipcodes) + "\nIf you would like to process the entire data source, pass None" ) # Initialisation: Generate a DataFrame that shows the status of every entity # at every point of time. # returns a single DataFrame def _generateHomeStatus(self): home_status_list = [] for i in range(1, self._simTime): single_home_status = np.empty( shape=[self._onlyStateData.shape[0], 1], dtype=object) curr_max = i curr = 0 for row in self._onlyStateData.itertuples(index=False): row_asDict = row._asdict() try: mostRecentTime = max( value for name, value in row_asDict.items() if value is not None and value < curr_max) key = next(key for key, value in row_asDict.items() if value == mostRecentTime) except ValueError: key = 'no_status' single_home_status[curr] = key curr += 1 home_status_list.append( pd.Series(data=single_home_status.ravel(), name=i)) result = pd.concat(home_status_list, axis=1) return result # Initialisation: Generate a DataFrame that counts the number of each status # at every point in time. This is used for the line graph. # returns a single DataFrame def _generateStatusCounts(self): status_count_list = [] for time in range(1, self._simTime): status_count_list.append( pd.Series(data=self._allHomeStates[time].value_counts(), name=str(time))) status_count_df = pd.concat(status_count_list, axis=1).fillna(value=0) missing = [ status for status in self._desiredStates if status not in status_count_df.index ] return status_count_df.reindex( status_count_df.index.union(missing)).fillna(value=0).reindex( self._desiredStates_ns) # Initialisation: Generate a DataFrame that mirrors allHomeStates but with # categorical colors to display on a map. # returns a single DataFrame def _generateHomeStatusColors(self): return self._allHomeStates.replace(self._assignedColors) # Initialisation: Generate sources for the plots. # returns a single DataFrame def _run(self): self._allHomeStates = self._generateHomeStatus() self._stateCounts = self._generateStatusCounts() self._allHomeStateColors = self._generateHomeStatusColors() # Client-facing: Get a list of the ZIP codes in the data. # prints: list def getUniqueZipcodes(self): print("ZIP codes in this dataset:") print(self._uniqueZipcodes) # Client-facing: Filter or re-filter the data by a different ZIPcode # refilters and prints confirmation def filterByZip(self, desiredZipcode): self._filteredData = self._filterByZip(desiredZipcode).reset_index( drop=True) self._filteredNumHomes = len(self._filteredData) self._desiredZipcode = desiredZipcode self._onlyStateData = self._filteredData[ self._desiredStates].reset_index(drop=True) self._run() if desiredZipcode is None: print("Data is not filtered. All data will be shown.") else: print("Data now filtered by Zipcode:", desiredZipcode) # Client-facing: export the current vis data (statuses of each entity by day) to CSV def exportVisData(self, fileName="statusByDay.csv"): data = self._generateHomeStatus() data.to_csv(fileName) print("Exported the Status By Day file to " + fileName + ".") # Client-facing: Generate the vis! def visualize(self): # Set up the output file output_file(self._outputFileName) ## BARPLOT ## per_day = self._stateCounts.transpose().values.tolist() data = dict({str(i): v for i, v in enumerate(per_day)}) data[ 'x'] = self._desiredStates_ns #add the statuses to the data source data['y'] = [0.0 for i in range(len(self._desiredStates_ns)) ] #dummy column for CustomJS to overwrite data['colorsOnly'] = self._colorsOnly source = ColumnDataSource(data) #plot setup barplot = figure(plot_width=800, plot_height=600, tools='pan', x_axis_label='Status', x_range=source.data['x'], y_range=ranges.Range1d( start=0, end=int(self._filteredNumHomes * 1.1)), title="Number of Homes by Status at Current Day") barplot.vbar(source=source, x='x', top='y', width=0.6, fill_color='colorsOnly', line_color=None) bar_hover = HoverTool(tooltips=[('num', '@y')]) barplot.yaxis.axis_label = "Number of Homes" barplot.add_tools(bar_hover) ## MAPS ## mapHoverInfo = self._mapHoverOptions options_html = "" for option in mapHoverInfo: options_html += "<span style=\"font-weight: bold;\">%s: </span><span>%s<br></span>" % ( str(option), "@" + str(option)) mapHoverInfo_html = "<div style=\"width: 450px\">" + options_html + "</div>" map_hover = HoverTool(tooltips=mapHoverInfo_html) #get average lat, long mean_lat = self._filteredData['latitude'].mean() mean_long = self._filteredData['longitude'].mean() #get the zip area name if self._desiredZipcode is None: areaData = self._zipSearch.by_coordinate(mean_lat, mean_long, returns=1)[0] areaName = "Greater " + areaData['City'] + " Area" else: areaData = self._zipSearch.by_zipcode(self._desiredZipcode) areaName = areaData['City'] + ", " + str(areaData['Zipcode']) map_options = GMapOptions(lat=mean_lat, lng=mean_long, map_type="roadmap") mapplot = GMapPlot(x_range=ranges.Range1d(), y_range=ranges.Range1d(), map_options=map_options) mapplot.title.text = areaName mapplot.add_tools(PanTool(), WheelZoomTool(), map_hover) #set Google Maps API key mapplot.api_key = "AIzaSyAr5Z6tbpyDQLPyD4PQmrxvqn6VEN_3vnU" #data wrangling for JS interaction home_data_for_map_list = [ self._allHomeStateColors.copy(), self._filteredData['latitude'], self._filteredData['longitude'] ] for option in self._mapHoverOptions: home_data_for_map_list += [self._filteredData[str(option)]] home_status_colors_formap = pd.concat(home_data_for_map_list, axis=1) initialDamageStateData = self._filteredData[ 'damage_state_start'].replace(self._damageStates) home_status_colors_formap = pd.concat( [home_status_colors_formap, initialDamageStateData], axis=1) home_status_colors_formap['y'] = np.nan #dummy column home_status_colors_formap.columns = home_status_colors_formap.columns.astype( str) mapsource = ColumnDataSource(home_status_colors_formap) circle = Circle(x="longitude", y="latitude", size='damage_state_start', fill_color="y", fill_alpha=0.8, line_color=None) mapplot.add_glyph(mapsource, circle) ## LINE GRAPH ## # LINE GRAPH - CURRENT TIME INDICATOR # # Generate a vertical bar to indicate current time within the line graph # Line is generated to 10% above the number of homes and 10% below zero currtime_list = { 'x': [0, 0], 'y': [ int(self._filteredNumHomes * 1.1), int(self._filteredNumHomes * -0.1) ] } #dummy column for js callback for i in range(0, self._simTime): currtime_list[str(i)] = [i, i] currtime_source = ColumnDataSource(currtime_list) # LINE GRAPH - DATA # line_plot = figure(title='Overall House Status vs Time', y_range=ranges.Range1d( start=int(self._filteredNumHomes * 0.1), end=int(self._filteredNumHomes * 1.5))) all_line_data = self._stateCounts.values.tolist() day_range = np.linspace(1, self._simTime - 2, num=self._simTime - 1).tolist() for data, name, color in zip(all_line_data, self._statuses, self._colorsOnly): line_data = pd.DataFrame(data).values.tolist() line_plot.line(day_range, line_data, color=color, alpha=0.8, legend=name, line_width=2) line_plot.line(x='x', y='y', source=currtime_source, line_color='red') line_plot.legend.location = "top_center" line_plot.legend.click_policy = "hide" line_plot.legend.orientation = "horizontal" line_plot.yaxis.axis_label = "Number of Homes" line_plot.xaxis.axis_label = "Day" # Requires Bokeh 0.12.7 # Javascript callback to enable and link interactivity between the two plots. callback = CustomJS(args=dict(s1=source, s2=mapsource, s3=currtime_source), code=""" console.log(' changed selected time', cb_obj.value); var data = s1.data; var data2 = s2.data; var data3 = s3.data; data['y'] = data[cb_obj.value]; data2['y'] = data2[cb_obj.value]; data3['x'] = data3[cb_obj.value]; s1.change.emit(); s2.change.emit(); s3.change.emit(); """) ## SLIDER ## # This slider manages one callback which updates all three graphics. time_slider = Slider(start=1, end=self._simTime - 1, value=0, step=1, callback=callback, title='DAY') show( gridplot([[mapplot], [line_plot, barplot], [time_slider]], sizing_mode='stretch_both'))
def predicting_trip_duration(self, pickup, drop,date_): """ This is the main method which is doing all the modelling and calculations needed for the prediction of the trip duration :param pickup: Pick up address from the user :param drop: Drop off address from the user :param date_: Date and time from the user :return: The predicted time or message for the user """ geo_locator = Nominatim() date_time_info = str(date_) if len(date_time_info) == 0: return "Enter trip details" dt = date_time_info.split(" ")[0].split("/") year, month, day = int(dt[2]), int(dt[1]), int(dt[0]) date_entered = date(year, month, day) day_of_week = date_entered.isoweekday() if day_of_week == 6 or day_of_week == 7: type_of_day = 1 # weekend else: type_of_day = 0 # weekday t = [float(x) for x in date_time_info.split(" ")[1].split(":")] pickup_time = time(int(t[0]), int(t[1]), 0, 0, None) time_of_day = None # divided day into 24 hours if pickup_time >= time(5, 0, 0, 0, None) and pickup_time <= time(5, 59, 0, 0, None): time_of_day = 0 elif pickup_time >= time(6, 0, 0, 0, None) and pickup_time <= time(6, 59, 0, 0, None): time_of_day = 1 elif pickup_time >= time(7, 0, 0, 0, None) and pickup_time <= time(7, 59, 0, 0, None): time_of_day = 2 elif pickup_time >= time(8, 0, 0, 0, None) and pickup_time <= time(8, 59, 0, 0, None): time_of_day = 3 elif pickup_time >= time(9, 0, 0, 0, None) and pickup_time <= time(9, 59, 0, 0, None): time_of_day = 4 elif pickup_time >= time(10, 0, 0, 0, None) and pickup_time <= time(10, 59, 0, 0, None): time_of_day = 5 elif pickup_time >= time(11, 0, 0, 0, None) and pickup_time <= time(11, 59, 0, 0, None): time_of_day = 6 elif pickup_time >= time(12, 0, 0, 0, None) and pickup_time <= time(12, 59, 0, 0, None): time_of_day = 7 elif pickup_time >= time(13, 0, 0, 0, None) and pickup_time <= time(13, 59, 0, 0, None): time_of_day = 8 elif pickup_time >= time(14, 0, 0, 0, None) and pickup_time <= time(14, 59, 0, 0, None): time_of_day = 9 elif pickup_time >= time(15, 0, 0, 0, None) and pickup_time <= time(15, 59, 0, 0, None): time_of_day = 10 elif pickup_time >= time(16, 0, 0, 0, None) and pickup_time <= time(16, 59, 0, 0, None): time_of_day = 11 elif pickup_time >= time(17, 0, 0, 0, None) and pickup_time <= time(17, 59, 0, 0, None): time_of_day = 12 elif pickup_time >= time(18, 0, 0, 0, None) and pickup_time <= time(18, 59, 0, 0, None): time_of_day = 13 elif pickup_time >= time(19, 0, 0, 0, None) and pickup_time <= time(19, 59, 0, 0, None): time_of_day = 14 elif pickup_time >= time(20, 0, 0, 0, None) and pickup_time <= time(20, 59, 0, 0, None): time_of_day = 15 elif pickup_time >= time(21, 0, 0, 0, None) and pickup_time <= time(21, 59, 0, 0, None): time_of_day = 16 elif pickup_time >= time(22, 0, 0, 0, None) and pickup_time <= time(22, 59, 0, 0, None): time_of_day = 17 elif pickup_time >= time(23, 0, 0, 0, None) and pickup_time <= time(23, 59, 0, 0, None): time_of_day = 18 elif pickup_time >= time(1, 0, 0, 0, None) and pickup_time <= time(1, 59, 0, 0, None): time_of_day = 19 elif pickup_time >= time(2, 0, 0, 0, None) and pickup_time <= time(2, 59, 0, 0, None): time_of_day = 20 elif pickup_time >= time(3, 0, 0, 0, None) and pickup_time <= time(3, 59, 0, 0, None): time_of_day = 21 elif pickup_time >= time(4, 0, 0, 0, None) and pickup_time <= time(4, 59, 0, 0, None): time_of_day = 22 else: time_of_day = 23 pickup_point_address = pickup # if both pick and drop addresses are entered same drop_off_address = drop if pickup_point_address == drop_off_address: return "The trip duration will be 0 minutes" location_address_pick = geo_locator.geocode(pickup_point_address) pick_lat = None pick_long = None drop_lat = None drop_long = None if location_address_pick is not None: pick_lat = location_address_pick.latitude pick_long = location_address_pick.longitude coordinates_pick = (pick_lat, pick_long) else: # If pick address entered is wrong return "Enter proper pick up address" location_address_drop_off = geo_locator.geocode(drop_off_address) if location_address_drop_off is not None: drop_lat = location_address_drop_off.latitude drop_long = location_address_drop_off.longitude coordinates_drop = (drop_lat,drop_long) else: # If drop address entered is wrong return "Enter proper drop off address" if coordinates_pick == coordinates_drop: return "The trip duration will be 0 minutes" search = ZipcodeSearchEngine() # finding the zip code using lat and long/ using google api pickup_zip = search.by_coordinate(pick_lat, pick_long, returns=1)[0]['Zipcode'] drop_off_zip = search.by_coordinate(drop_lat, drop_long, returns=1)[0]['Zipcode'] distance = self.distance_to_cover(coordinates_pick, coordinates_drop) table = [month,type_of_day,time_of_day, pickup_zip,drop_off_zip,pick_lat,pick_long,drop_lat,drop_long,distance] # making dataframe ready for prediction/ Feeding in user entered data testTable = [[int(month),int(type_of_day),int(time_of_day), int(pickup_zip),int(drop_off_zip),float(pick_lat),float(pick_long),float(drop_lat),float(drop_long), float(distance), '']] # cols will store names of attributes in the dataset cols = ['pickup_month','type_of_day', 'time_of_day','pickup_zip', 'dropoff_zip','pick_lat','pick_long','drop_lat','drop_long','distance_to_cover','trip_duration'] df = pd.DataFrame(testTable, columns=cols) # FullDS will store the dataset provided on the given path full_dS = pd.read_csv('train_cleaned_new_full_with_per_hr.csv') train = full_dS # tesing file will be the dataframe of details given by the user test_ds = df # Feature will have the list of features for modelling # Fragmenting the data into two parts: training set and validation set msk = np.random.rand(len(full_dS)) < 0.75 Train = full_dS[msk] validate = full_dS[~msk] # Generating the model based on the feature list and target variable features = ['pickup_month','type_of_day', 'time_of_day','pickup_zip', 'dropoff_zip','pick_lat','pick_long','drop_lat','drop_long','distance_to_cover'] x_train = Train[list(features)].values y_train = Train["trip_duration"].values x_validate = validate[list(features)].values y_validate = validate["trip_duration"].values x_test = test_ds[list(features)].values # this will generate a Decision tree regressor model on the provided data '''print("Decision tree regression modelling: ") regr_decision_tree = DecisionTreeRegressor(max_depth=10) regr_decision_tree.fit(x_train, y_train) with open('decision_tree_regression.pickle', 'wb') as handle: pickle.dump(regr_decision_tree, handle, protocol=pickle.HIGHEST_PROTOCOL) with open('decision_tree_regression.pickle', 'rb') as handle: regr_decision_tree = pickle.load(handle) validation_result_decision_tree = regr_decision_tree.predict(x_validate) error_list = self.calculate_accuracy(y_validate, validation_result_decision_tree) print("Prediction Accuracy" + " " + "No. of records") for i in range(5): print(str(i * 5) + "-" + str((i + 1) * 5) + " :" + str(error_list[i])) print() final_preiction_decision = regr_decision_tree.predict(x_test) print("Predicted trip duration by decision tree: "+str(final_preiction_decision[0]/60)) print()''' # this will generate a random forest regressor model on the provided data print("Random forest regression modelling: ") '''regr_random_forest = RandomForestRegressor(n_estimators=500, max_depth=10) print("Modelling starts") regr_random_forest.fit(x_train, y_train) with open('regression_model_lat_long_with_per_hr.pickle', 'wb') as handle: pickle.dump(regr_random_forest, handle, protocol=pickle.HIGHEST_PROTOCOL)''' with open('regression_model_lat_long_with_per_hr.pickle', 'rb') as handle: regr_random_forest = pickle.load(handle) validation_result = regr_random_forest.predict(x_validate) error_list = self.calculate_accuracy(y_validate, validation_result) final_status = regr_random_forest.predict(x_test) #print("Predicted trip duration by random forest: " + str(final_status[0] / 60)) '''print("Prediction Accuracy " + " " + "No. of records") for i in range(5): print(str(i * 5) + "-" + str((i + 1) * 5) + " :" + str(error_list[i])) print() feature_importance = regr_random_forest.feature_importances_ print("Feature " + " Importance") for i in range(len(feature_importance)): print(features[i] + " : " + str(feature_importance[i]))''' '''self.labelmain = Label(self.top, text=final_status[0], bg="black", fg="green", width=25, height=10) self.labelmain.pack() # Experimenting on changing number of trees for Random Trees mean_squared_error_chng_trees_l = [] no_trees_l = [] for i in range(1, 11): regr_random_forest = RandomForestRegressor(n_estimators=i * 5, max_depth=10) print("Modelling starts") regr_random_forest.fit(x_train, y_train) validation_result_tress = regr_random_forest.predict(x_validate) mean_squared_error_chng_trees_l.append(mean_squared_error(y_validate, validation_result_tress)) print(mean_squared_error_chng_trees_l) # Experimenting on changing depth of trees keeping number of trees as 50 which has least MSE mean_squared_error_chng_depth_l = [] for i in range(1,11): regr_random_forest = RandomForestRegressor(n_estimators=50, max_depth=i*2) print("Modelling starts") regr_random_forest.fit(x_train, y_train) validation_result_tress = regr_random_forest.predict(x_validate) mean_squared_error_chng_depth_l.append(mean_squared_error(y_validate, validation_result_tress)) print(mean_squared_error_chng_depth_l)''' print( "The trip duration will be " + str(int(final_status[0] / 60)) + " minutes" ) return "The trip duration will be " + str(int(final_status[0] / 60)) + " minutes"
reader = csv.reader(csvfile, delimiter=',', quotechar='|') # change contents to floats for row in reader: # each row is a list results.append(row) line_num = 1 for arr in results: lat_lon_str = arr[0] objId = arr[1] town = arr[3] addr = arr[4] if lat_lon_str: lon = float(lat_lon_str.split(" ")[1].split("(")[1]) lat = float(lat_lon_str.split(" ")[2].split(")")[0]) zipcode = search.by_coordinate(lat, lon)[0].Zipcode ## Race API Call ## Black burl = 'https://api.census.gov/data/2015/acs5?get=B01001B_001E&for=zip+code+tabulation+area:' + zipcode + '&key=2fd73cb25990a63e4d615c3bcbd02bbded8afd33' bdata = json.load(urllib2.urlopen(burl)) bdata = list(chain.from_iterable(bdata)) bdata = float(''.join(bdata[2])) ## White wurl = 'https://api.census.gov/data/2015/acs5?get=B01001H_001E&for=zip+code+tabulation+area:' + zipcode + '&key=2fd73cb25990a63e4d615c3bcbd02bbded8afd33' wdata = json.load(urllib2.urlopen(wurl)) wdata = list(chain.from_iterable(wdata)) wdata = float(''.join(wdata[2])) ## Hispanic