def get_dist_by_city_state(city1=None, state1=None, city2=None, state2=None): if state1.lower() == state2.lower() and city1.lower() == city2.lower(): return 0 search = SearchEngine() int_zip1_list = sorted([ x.zipcode for x in search.by_city_and_state(city=city1, state=state1) ]) int_zip2_list = sorted([ x.zipcode for x in search.by_city_and_state(city=city2, state=state2) ]) # randomly select 1 zip # print('using zip {} and {}'.format(int_zip1_list[0], int_zip2_list[0])) dist = get_dist_by_zip(int_zip1_list[0], int_zip2_list[0]) return dist
def getZipcode(city, state): search = SearchEngine() zipSearch = search.by_city_and_state(city, state) zipcode = zipSearch[0] zipcode = zipcode.zipcode return zipcode
def main(): state2 = input("What" + '\x1b[1;31;40m' + ' state ' + '\x1b[0m' + "do you want the temperature of?\n") city2 = input("What" + '\x1b[1;31;40m' + ' city ' + '\x1b[0m' + "do you want the temperature of?\n") search = SearchEngine(simple_zipcode=True) res = search.by_city_and_state(city2, state2, zipcode_type='Standard', sort_by='zipcode', ascending=True, returns=5) len(res) try: zipcode = res[0] except IndexError: print("Please type in a valid USA State/City\n") main() zipcode city = zipcode.major_city state = zipcode.state urlend = zipcode.zipcode URL = 'https://weather.com/weather/today/l/' + urlend page = requests.get(URL) soup = BeautifulSoup(page.content, 'html.parser') temp = soup.find(class_='CurrentConditions--tempValue--3KcTQ').get_text() print('\nThe temperature right now in ' + city + ', ' + state + ' is ' + '\x1b[1;32;40m' + temp + '\x1b[0m' + "\n") main()
def get_lat_lng_of_city_state(city, state): count = 0 search = SearchEngine(simple_zipcode=True) info = [] res = search.by_city_and_state(city=city, state=state) for zip in res: if zip.lat is not None and count == 0: info.append(zip.lat) info.append(zip.lng) count = count + 1 return info
def getZip(str, zipList): if str.isdigit(): zipList.append(str) else: try: search = SearchEngine(simple_zipcode=True) city_state_str = str.split(', ') city_state = search.by_city_and_state(city_state_str[0], city_state_str[1]) zipList.append(city_state[0].zipcode) except: zipList.append("None")
def fill_missing_arguments(args: dict) -> dict: search_engine = SearchEngine(db_file_dir="backend/tmp") if args['zipcode'] != 0: zipcode = search_engine.by_zipcode(args['zipcode']) location = [zipcode.lng, zipcode.lat] args['coordinates'] = location elif args['city'] != '*' and args['state'] != '*': zipcode = search_engine.by_city_and_state(args['city'], args['state'])[0] args['zipcode'] = zipcode.zipcode location = [zipcode.lng, zipcode.lat] args['coordinates'] = location return args
def search(): """ This function takes city and state as an input, and returns the string value. Search() also will automatically execute the same logic found in lookup(). """ code = input("Enter [city,state]: ").split(',') a = SearchEngine(simple_zipcode=True) zipcode = a.by_city_and_state(code[0], code[1])[0] print(colored("Zipcode Information:", "green")) pprint(zipcode.to_json()) target = " ".join(code) return target
def get_zips_for_city(city, state): # get zips that with income greater than median search = SearchEngine(simple_zipcode=True) res = search.by_city_and_state(city, state, returns=0) print('# of zips are found: {}'.format(len(res))) df_zip = pd.DataFrame([{ 'zip': e.zipcode, 'home_value': e.median_home_value, 'income': e.median_household_income } for e in res]) income_threshold = df_zip['income'].quantile(0.5) zips = df_zip.loc[df_zip['income'] >= income_threshold, 'zip'].values print('# of zips are found with income greater than median: {}'.format( len(zips))) return zips
country = dfN.loc[idx, 'country'] if country not in focus: dfN.loc[idx, 'division'] = country # convert sets of states into subnational regions division = dfN.loc[idx, 'division'] if division not in ['', 'unknown']: if division in geoLevels.keys(): dfN.loc[idx, 'country'] = geoLevels[dfN.loc[idx, 'division']] # convert sets of cities into sub-state regions location = dfN.loc[idx, 'location'] # print(location) if location not in ['', 'unknown'] and division == 'Connecticut': try: res = search.by_city_and_state(location, "CT") area_zip = res[0].zipcode if area_zip in geoLevels.keys(): dfN.loc[idx, 'location'] = geoLevels[area_zip] else: print(row['location'] + ' has a zip code (' + area_zip + ') not found in the geo-scheme.)') notfound.append(location) except: notfound.append(location) dfN.loc[idx, 'location'] = '' # flatten location names as division names for divisions that are not a focus of study if division not in focus: dfN.loc[idx, 'location'] = division print('Processing metadata for... ' + row['strain'])
def main(): search_dir = 'twitter_geo_searches/' if not os.path.exists(os.path.dirname(search_dir)): os.makedirs(os.path.dirname(search_dir), 0o755) twpy_api = auth.get_access_creds() pool = multiprocessing.Pool(max(1, multiprocessing.cpu_count() - 1)) # set up the command line arguments parser = argparse.ArgumentParser( description= 'Get twitter user ids and their follower ids from Tweepy and save in different formats' ) subparsers = parser.add_subparsers(dest='mode') search_parser = subparsers.add_parser( 'search', help='Gather Twitter user ids and followers by city, state and radius') search_parser.add_argument( '-c', '--city', required=True, action='store', dest='city', help='City to search for Twitter user ids. REQUIRED') search_parser.add_argument( '-s', '--state', required=True, action='store', dest='state', help='State to search for Twitter user ids. REQUIRED') search_parser.add_argument( '-r', '--radius', required=True, action='store', dest='radius', help= 'Radius to search Twitter API for user ids (miles or kilometers -- ex: 50mi or 50km). REQUIRED' ) search_parser.add_argument( '-f', '--filename', required=True, action='store', dest='filename', help='Name of output file for networkx graph data. REQUIRED') netx_parser = subparsers.add_parser( 'netx', help='Perform operations on already generated networkx graph') netx_parser.add_argument('-q', '--clique', action='store_true', help='Find cliques with networkx') netx_parser.add_argument( '-x', '--clq_filename', action='store', help='Provide a filename for the serialized output of find_cliques') netx_parser.add_argument('-g', '--graph_filename', required=True, action='store', dest='graph_filename', help='Networkx input data filename. REQUIRED') netx_parser.add_argument('-o', '--out_filename', required=True, action='store', dest='out_filename', help='Networkx output data filename REQUIRED') netx_parser.add_argument('-k', '--comm', action='store_true', help='Find communities with networkx') netx_parser.add_argument('-p', '--print_graph', action='store_true', help='Print networkx graph') argcomplete.autocomplete(parser) args = parser.parse_args() if not args.mode: print('ERROR: No arguments provided. Use -h or --help for help') return if args.mode == 'search': city = args.city state = args.state search_radius = args.radius search_filename = args.filename + '.json' # gets the first 50 zip codes by city and state zip_search = SearchEngine() zipcodes = zip_search.by_city_and_state(city, state, returns=50) user_ids = [] user_followers = [] # gets the user ids at each geo-location for the retrieved zip codes bar = pyprind.ProgPercent(len(zipcodes), track_time=True, title='Finding user ids') for zipcode in zipcodes: bar.update(item_id=str(zipcode.zipcode) + '\t') latitude = zipcode.lat longitude = zipcode.lng user_ids.extend( get_user_ids(twpy_api, latitude, longitude, search_radius)) n = 2 # gets the followers of all the retrieved user ids n number of depths for i in range(0, n): user_ids, user_followers = get_user_followers( twpy_api, set(user_ids)) filename = os.path.join(search_dir, search_filename) save_user_follower_networkx_graph(user_followers, filename) if args.mode == 'netx': graph_filename = os.path.join(search_dir, args.graph_filename + '.json') output_filename = os.path.join(search_dir, args.out_filename + '.json') graph = open_nx_graph(graph_filename) cliques = [] if args.clique: for clique in pool.map(gather_cliques, nx.find_cliques(graph)): cliques.append([int(member) for member in clique]) with open(output_filename, 'w') as output: for clique in cliques: output.write('%s,\n' % (clique)) elif args.comm: if args.clq_filename: clique_filename = os.path.join(search_dir, args.clq_filename + '.json') # load the clique topology file with open(clique_filename, 'r') as find_cliques_file: cliques = [ clique for cliques in find_cliques_file for clique in ast.literal_eval(cliques) ] with open(output_filename, "w") as output: for node in pool.map(gather_cliques, community.girvan_newman(graph)): print(node) #output.write(str([int(item) for item in node]) + ', \n') elif args.print_graph: nx.draw(graph) plt.show() print("Job complete")
def main(): # set up the command line arguments parser = argparse.ArgumentParser( description= 'Get twitter user ids and their follower ids using Tweepy and save in different formats' ) subparsers = parser.add_subparsers(dest='mode') search_parser = subparsers.add_parser( 'search', help='Gather Twitter user ids by city, state and radius') search_parser.add_argument( '-c', '--city', required=True, action='store', dest='city', help='City to search for Twitter user ids. REQUIRED') search_parser.add_argument( '-s', '--state', required=True, action='store', dest='state', help='State to search for Twitter user ids. REQUIRED') search_parser.add_argument( '-r', '--radius', required=True, action='store', dest='radius', help= 'Radius to search Twitter API for user ids (miles or kilometers -- ex: 50mi or 50km). REQUIRED' ) search_parser.add_argument( '-d', '--depth', required=True, action='store', dest='depth', help= 'This value represents how far to traverse into user follower relationships when gathering users. REQUIRED' ) search_parser.add_argument( '-f', '--filename', required=True, action='store', dest='filename', help='Name of output file to store gathered users in. REQUIRED') search_parser.add_argument( '-z', '--creds', required=True, action='store', dest='creds', help='Path to Twitter developer access credentials REQUIRED') continue_parser = subparsers.add_parser( 'getfws', help= 'Takes in already gathered jsonified list of users and retrieves their followers' ) continue_parser.add_argument( '-f', '--filename', action='store', help= 'Filename of the previously saved Twitter users ids in .json format') continue_parser.add_argument( '-d', '--depth', required=True, action='store', dest='depth', help= 'This value represents how far to traverse into user follower relationships when searching for followers. REQUIRED' ) continue_parser.add_argument( '-z', '--creds', required=True, action='store', dest='creds', help='Path to Twitter developer access credentials REQUIRED') convert_parser = subparsers.add_parser( 'convert', help= 'Convert user followers dict to users list and save file. This is the file format used when continuing the get followers function and in get_community_tweets.py' ) convert_parser.add_argument( '-i', '--input_file', action='store', help='Filename of the previously saved followers dictionary') convert_parser.add_argument( '-o', '--out_file', action='store', help= 'Filename to store the output. Just the filename no path is needed. The output file will be saved in the folder of the input file' ) netx_parser = subparsers.add_parser( 'netx', help='Create cliques or communities from user follower data') group = netx_parser.add_mutually_exclusive_group(required=True) group.add_argument('-q', '--gen_cliques', required=False, action='store_true', dest='gen_cliques', help='Generate cliques from user followers dictionary') group.add_argument( '-c', '--gen_comms', required=False, action='store_true', dest='gen_comms', help='Generate communities from user followers dictionary') netx_parser.add_argument( '-n', '--min_size', action='store', dest='min_size', nargs='?', type=int, const=1, default=4, help='Constraint for min size of clique or community (default is 4)') netx_parser.add_argument('-i', '--in_filename', required=True, action='store', dest='in_filename', help='User followers dictionary file REQUIRED') netx_parser.add_argument('-o', '--out_filename', required=True, action='store', dest='out_filename', help='Output topology filename REQUIRED') argcomplete.autocomplete(parser) args = parser.parse_args() if args.mode == 'convert': working_dir = get_directory_of_file(args.input_file) convert_followers_to_users(args.input_file, args.out_file, working_dir) if args.mode == 'getfws': twpy_api = auth.get_access_creds(args.creds) if not twpy_api: print('Error: Twitter developer access credentials denied') return working_dir = get_directory_of_file(args.filename) user_ids = read_json(args.filename) if not user_ids: print('Error: No users found in provided file') return # gets the followers of all the retrieved user ids 'depth' number of times collect_user_followers(args.depth, twpy_api, working_dir, args.filename, user_ids) if args.mode == 'search': twpy_api = auth.get_access_creds(args.creds) if not twpy_api: print('Error: Twitter developer access credentials denied') return working_dir = get_directory_of_file(args.filename) # gets the first 50 zip codes by city and state zip_search = SearchEngine() zipcodes = zip_search.by_city_and_state(args.city, args.state, returns=50) user_ids = [] user_followers = [] # gets the user ids at each geo-location for the retrieved zip codes bar = pyprind.ProgPercent(len(zipcodes), track_time=True, title='Finding user ids') for zipcode in zipcodes: bar.update(item_id='zip code:' + str(zipcode.zipcode) + '\t') user_ids.extend( get_user_ids(twpy_api, zipcode.lat, zipcode.lng, args.radius)) write_json(args.filename, list(set(user_ids))) if args.mode == 'netx': user_followers = read_json(args.in_filename) pythonify_dict(user_followers) print("Number of followers: " + str(len(user_followers))) output_filename = args.out_filename + '.json' graph = build_netx_graph(user_followers) if args.gen_cliques: generate_cliques(graph, output_filename, args.min_size) if args.gen_comms: generate_communities(graph, output_filename, args.min_size)
def citystate(field): stmt = db.session.query(City_zip).statement df_census = pd.read_sql_query(stmt, db.session.bind) from keras.models import load_model from uszipcode import SearchEngine search = SearchEngine(simple_zipcode=True) print(field) city = field.split('-')[0] state = field.split('-')[1] res = search.by_city_and_state(city,state, returns = 30) total = len(res) lat = res[0].lat lng = res[0].lng # print(lat) print(total) import random if total <= 10: count =[] for x in range(total): count.append(int(x)) else: count = random.sample(range(0, total), (total-1)) # count = 5 print(count) zipArry = [] ziplat = [] ziplng = [] for x in count: print(x) item = res[x] print(item) if (item.lat == None): continue else: zipArry.append(item.zipcode) ziplat.append(float(item.lat)) ziplng.append(float(item.lng)) df_test = pd.DataFrame({'Zipcode': zipArry, 'Latitude': ziplat, 'Longitude': ziplng}) print(df_test) merge_table = pd.merge(df_census, df_test, on="Zipcode", how='inner') merge_table = merge_table[(merge_table != 0).all(1)] merge_table = merge_table[(merge_table != '').all(1)] merge_table = merge_table.dropna() print(merge_table) #["MedianAge", "HouseholdIncome", "PerCapitaIncome", "PovertyRate"] from joblib import dump, load loaded_model = load('classifier.joblib') # print("loading neural model") # loaded_model = load_model("neural.h5") # print("model loaded") merge_table['prediction'] = "" print("starting for loop") for index,row in merge_table.iterrows(): #for logistic regression input_data = [float(row['MedianAge']), float(row['HouseholdIncome']),\ float(row['PerCapitaIncome']), float(row['PovertyRate'])] #for neural network a = np.array([input_data]) print(a) # for logistic regression result = loaded_model.predict([input_data])[0] merge_table.at[index, 'prediction'] = result # for neural network model # result = loaded_model.predict_classes(a) # if result[0] == 2: # final = 'high' # elif result[0] == 1: # final = 'medium' # else: # final = 'low' # merge_table.at[index, 'prediction'] = final print(merge_table) ########################### data = { "total_results": len(count), "latitude": merge_table.Latitude.tolist(), "longitude": merge_table.Longitude.tolist(), "zipcode": merge_table.Zipcode.tolist(), "MedianAge": merge_table.MedianAge.tolist(), "HouseholdIncome": merge_table.HouseholdIncome.tolist(), "PerCapitaIncome": merge_table.PerCapitaIncome.tolist(), "PovertyRate": merge_table.PovertyRate.tolist(), "Predictions": merge_table.prediction.tolist() # "zipcode": zipArry } return jsonify(data)
baltimore_county_map, # lc1 richmond_city_map, # lc2 henrico_county_map, # lc2 phoenix_city_map, # lc3 maricopa_county_map, # lc3 houston_city_map, # lc4 harris_county_map # lc4 ] ### Get all Zips all_zip = [] # search for all the zips and add them to a list for cs in city_state: temp1 = search.by_city_and_state(cs[0], cs[1]) for t in temp1: tempL = [cs[0]] tempL.append(t.zipcode) if len(t.common_city_list) != 0: tempL.append(t.common_city_list[-1]) else: tempL.append(cs[0]+'?') tempL.append(t.lat) tempL.append(t.lng) all_zip.append(tempL) # format for all_zip [[city, zip, common city, lat, long]] # remove the none from the cord and put the previous val in prevE = []
def deal_location(deal): offer_location = deal['Location'].tolist() search = SearchEngine(simple_zipcode=True) city_list = [] state_list = [] zipcode_list = [] for location in offer_location: try: city = location.split(',')[0].lstrip().lower().replace('msa', '') state = location.split(',')[1].lstrip().lower().replace('msa', '') zipcode = search.by_city_and_state(city, state, sort_by=Zipcode.population, ascending=False, returns=1)[0].zipcode except (ValueError, IndexError, AttributeError): city = 'nan' state = 'nan' zipcode = 'nan' city_list.append(city) state_list.append(state) zipcode_list.append(zipcode) deal['zipcode'] = pd.Series(zipcode_list) deal = deal[deal['zipcode'] != 'nan'] deal_totalcat = pd.DataFrame(deal.groupby( ['zipcode'])['TotalCAT'].sum()).reset_index().drop_duplicates() deal_totalcmt = pd.DataFrame( deal.groupby([ 'zipcode' ])['Totalactivitys'].sum()).reset_index().drop_duplicates() off_total = deal_totalcat.merge(deal_totalcmt, how='left', on=['zipcode']) off_zipcode = off_total[off_total['TotalCAT'] > 0].reset_index(drop=True) off_lat = [] off_lon = [] for ele in off_zipcode['zipcode']: ele_search = search.by_zipcode(ele) off_lat.append(ele_search.lat) off_lon.append(ele_search.lng) off_zipcode['deal_lat'] = pd.Series(off_lat) off_zipcode['deal_lon'] = pd.Series(off_lon) off_zipcode = off_zipcode.dropna().reset_index(drop=True) lat = off_zipcode['deal_lat'].values lon = off_zipcode['deal_lon'].values total_activity = off_zipcode['Totalactivitys'].values total_CAT = off_zipcode['TotalCAT'].values # 1. Draw the map background fig = plt.figure(figsize=(30, 30)) m = Basemap(projection='lcc', resolution='h', lat_0=37.09, lon_0=-95.71, width=6E6, height=4E6) m.shadedrelief() m.drawcoastlines(color='gray') m.drawcountries(color='gray') m.drawstates(color='gray') # 2. scatter city data, with color reflecting population # and size reflecting area m.scatter(lon, lat, latlon=True, s=total_CAT, c=total_activity, cmap='Blues', alpha=1) # 3. create colorbar and legend cbar = plt.colorbar(label=r'Number of Total Activity in deal') cbar.ax.tick_params(labelsize=20) #plt.clim(30, 70) # make legend with dummy points for a in [400, 1000, 3000]: plt.scatter([], [], c='blue', alpha=1, s=a, label=str(a) + ' Thousand $ in Total activity Amount in deals') plt.legend(title='Size of the dot indicates...', scatterpoints=1, frameon=False, labelspacing=1, loc='lower left') plt.title('deal Location in the USA', fontsize=20) plt.tight_layout() plt.savefig('deal location in the USA.png') plt.show() return off_zipcode