return agency.replace("`", "'") \ .replace("Police Department", "") \ .replace("Sheriff's Office", "") \ .replace("Sheriffs Office", "") \ .replace("PD", "") \ .replace("Pd", "") \ .replace("Police Dept.", "") \ .replace("Police Dept", "") \ .replace("Division of Police", "") \ .replace("Police", "") \ .replace("/ Security", "") \ .replace("& ", "") \ .strip() geolocator_google = GoogleV3(api_key='AIzaSyApqcChB-VAfVx3KqzfW2NvYAfHFyrxRuc') geolocator_osm = Nominatim() courts = list(Database.get_circuit_courts()) court_names = [court['name'] for court in courts] rows = [] with open(sys.argv[1]) as csvfile: reader = csv.DictReader(csvfile) for row in reader: rows.append(row) fieldnames = rows[0].keys() agencies = [key for key, group in groupby(rows, lambda x: x['agency'])] agencies_to_court = {} sanitized_agencies_to_court = {} for agency in agencies:
class BusinessData(Scorer): """ Uses the Yelp api and geocoding to get and format business data the way we want. """ yelp = YelpAPI(settings.CONSUMER_KEY, settings.CONSUMER_SECRET, settings.TOKEN, settings.TOKEN_SECRET) geolocator = GoogleV3(api_key=settings.GOOGLE_MAPS_API_KEY) def get_chains_near(self, latitude, longitude, radius, term='burrito', category_filter='mexican'): """ Gets formatted businesses near the given location within the radius. """ # Search by lat/lng search_results = self.yelp.search_query( ll='%s,%s' % (latitude, longitude), term='Chipotle', category_filter=category_filter, radius_filter=radius, ) businesses = search_results['businesses'] businesses = self._format_data((latitude, longitude), businesses) return businesses def replacement_burrito(self, latitude, longitude): businesses = self.get_chains_near(latitude, longitude, CHAIN_RADIUS) scores = [business['rating'] for business in businesses.values() if business['name'] in BORING_CHAINS] if len(scores) > 0: return sum(scores) / len(scores) else: return 3.5 def score(self, location): # Geolocate the given location address, (latitude, longitude) = self.geolocator.geocode(location) replacement_burrito = self.replacement_burrito(latitude, longitude) print "replacement burrito = ", replacement_burrito businesses = self.get_all_near(latitude, longitude, RADIUS) for business in businesses.values(): business['vorb'] = business['rating'] - replacement_burrito score = 0.0 for (multiplier, vorb_min, vorb_max) in ( (1.0, 0, 0.24), (14.0, 0.25, 0.49), (20.0, 0.50, 0.74), (25.0, 0.75, 1.24), (40.0, 1.25, 5), ): den = self.density(vorb_min, vorb_max, businesses.values()) print "density of ", vorb_min, "to", vorb_max, "=", den score += multiplier * den return (round(score, 0), RADIUS, businesses) def density(self, vorb_min, vorb_max, businesses): distances = [business['distance'] for business in businesses if business['vorb'] <= vorb_max and business['vorb'] >= vorb_min] if len(distances) == 0: return 0 else: unsquashed = (len(distances) / sum(distances)) * 1000 squashed = sigmoid(unsquashed) return squashed def get_all_near(self, latitude, longitude, radius, term='burrito', category_filter='mexican'): """ Gets formatted businesses near the given location within the radius. """ # Search by lat/lng search_results = self.yelp.search_query( ll='%s,%s' % (latitude, longitude), term=term, category_filter=category_filter, radius_filter=radius, ) businesses = search_results['businesses'] businesses = self._format_data((latitude, longitude), businesses) return businesses def _format_data(self, location, businesses): """ Things we care about: name, location, distance, rating, review_count """ # Get all the missing data we need pool = Pool() geolocated_businesses = pool.map(geolocate_business, businesses) backfilled_businesses = self._backfill_distances(geolocated_businesses) # Only keep the stuff we care about formatted_businesses = {} for business in backfilled_businesses: formatted_businesses[business['id']] = { 'name': business['name'], 'address': ' '.join(business['location']['address']), 'lat': business['location']['coordinate']['latitude'], 'lon': business['location']['coordinate']['longitude'], 'distance': business['distance'], 'rating': business['rating'], 'review_count': business['review_count'], } return formatted_businesses def _backfill_distances(location, businesses): for business in businesses: if 'distance' not in business: business['distance'] = vincenty(location, (latitude, longitude)).meters return businesses
import geopy.geocoders.base geopy.geocoders.base.DEFAULT_USER_AGENT = ua import urllib.request urllib.request.OpenerDirector.client_version = ua from life360 import life360 import math import time import os import sys import datetime import re from geopy.geocoders import GoogleV3 geolocator = GoogleV3(api_key=google_api_key, user_agent=ua) import json def on_disconnect(mqtt, userdata, rc): print("Disconnected from MQTT server with code: {}".format(rc)) while rc != 0: try: time.sleep(1) rc = mqtt.reconnect() except: pass print("Reconnected to MQTT server.")
from geopy.geocoders import GoogleV3 from pyproj import Transformer locator = GoogleV3(api_key="AIzaSyACSuQBDhEU_qREpV4NevzKn0bi_W4ra0E") class AddressToCrs: def __init__(self, street, city): self.street = street self.city = city self.locator = locator self.toll = self.locator.geocode( f"{self.street}, {self.city}, Belgique") self.transformer = Transformer.from_crs(4326, 31370) self.target = self.transformer.transform(self.toll.latitude, self.toll.longitude) def to_long_latt(self): return self.toll.longitude, self.toll.latitude def to_crs31370(self): return self.target #print(AddressToCrs('Rue de la station 34', '7830 Silly').to_long_latt())
# demo assumes tweets in separate text file! import emoji from datetime import datetime from geopy.geocoders import GoogleV3 from geopy.distance import vincenty from vincenty import vincenty from decimal import * import re tweets_organized = [] entries = [] tweets = {} sources = {} hashtags = {} urls = {} geolocator = GoogleV3() saved_locations = { 'Dallas, TX': (32.7766642, -96.79698789999998), 'New York, NY': (40.7127837, -74.00594130000002), 'Chicago, IL': (41.8781136, -87.62979819999998), 'Flint, MI': (43.0125274, -83.68745619999999), 'Boston, MA': (42.3600825, -71.05888010000001), 'Seattle, WA': (47.6062095, -122.3320708), 'Los Angeles, CA': (34.0522342, -118.2436849), 'London, UK': (34.0522342, -118.2436849), 'Washington, D.C.': (38.9071923, -77.03687070000001), 'Nashville, TN': (38.9071923, -77.03687070000001), 'Orlando, FL': (28.5383355, -81.37923649999999), 'Austin, TX': (30.267153, -97.74306079999997), 'Houston, TX': (29.7604267, -95.3698028),
from geopy.geocoders import GoogleV3 import requests, json, time import webbrowser def locate_bus(): # Getting the latitude and longitude of bus link_bus = 'https://api.thingspeak.com/channels/478427/fields/1.json?api_key=0BGHNMN1HRECL0QU&results=2&location=true' bus = requests.get(link_bus).text bus = json.loads(bus) device_id = int(bus['feeds'][0]['field1']) lati_bus = float(bus['feeds'][0]['latitude']) longi_bus = float(bus['feeds'][0]['longitude']) print(lati_bus) print(longi_bus) return [lati_bus, longi_bus] whereisBus = locate_bus() geocoder = GoogleV3() location_list = geocoder.reverse(whereisBus) location = location_list[0] address = location.address print(address) open_link = f'https://www.google.com/maps/place/{whereisBus[0]},{whereisBus[1]}/data=!3m1!1e3' print(open_link) webbrowser.open(open_link)
def main(location, year): """ Run data gathering scripts to fetch weather observation data from Dark Sky API for given year and location and save it as JSON at ../../data/raw/{location}/{year} in files doy.json, where doy is the day of the year. :param str location: Name of the location :param int year: Observations year to download """ logger = logging.getLogger(__name__) logger.info("getting json data for every day of the year") # create folder path for saving the JSON data output_folder = os.path.join(project_dir, 'data', 'raw', location, str(year)) if not os.path.exists(output_folder): os.makedirs(output_folder) # use `geopy` to get the coordinates of the location. geocode = GoogleV3( api_key=os.environ.get('WTD_MAPS_KEY')).geocode(location) latitude = geocode.latitude longitude = geocode.longitude obs_dates = [ d.date() for d in pd.date_range( start=date(year, 1, 1), end=date(year, 12, 31), normalize=True) ] click.echo("\nFetching the data from Dark Sky API:") with tqdm(total=len(obs_dates)) as pbar: for obs_date in obs_dates: doy = obs_date.timetuple().tm_yday obs_fn = os.path.normpath( os.path.join(output_folder, str(doy) + '.json')) if not os.path.exists(obs_fn): # get the json request for the weather observations for the day response = get_weather(latitude, longitude, obs_date) if response: # check that response json had the `daily` key try: resp_date = date.fromtimestamp( response['daily']['data'][0]['time']) resp_doy = resp_date.timetuple().tm_yday except KeyError: logger.error("response JSON doesn't have `daily` key") click.echo(response) sys.exit(0) # check day of year in the response and assert is same in request try: assert resp_doy == doy, "Day of year should be equal in request and response." except AssertionError: logger.warning( "request day of year ({0:d}) diferent from in response ({1:d})" .format(doy, resp_doy)) else: logger.info("request day of year same as in response") # write json file with open(obs_fn, 'w') as fp: json.dump(response, fp) else: logger.error( "doy:{0:d} can\'t fetch data from API".format(doy)) return else: logger.info("file {0} already exists, skipping".format(obs_fn)) pbar.update(1)
import csv from geopy.geocoders import GoogleV3 import Keys locator = GoogleV3(api_key=Keys.getkey()) location = locator.geocode("Mexico") timezone = locator.timezone("43.695279,7.264738") location = locator.reverse("%s,%s" % (location.latitude, location.longitude)) print location
from geopy.geocoders import GoogleV3 import csv import pandas as pd # with open('asians.csv', 'rU') as csvfile: # reader = csv.reader(csvfile) # # for row in reader: # # print(row[0]) # # col_1 = list(zip) # geolocator = Nominatim() # location = geolocator.geocode("Singapore") # print(location.latitude, location.longitude) f = open('asian_percent.csv', 'a') geolocator = GoogleV3(api_key="AIzaSyCyjvDR65X330ffLtPwGKQ_X_rjoEo0k4c") df = pd.read_table('asian_pop.csv', sep=",") # print(df) #print(df.loc[1:, "GEO.display-label"]) #f.write('name,latitude,longitude,indian,chinese,filipino,japanese,korean,vietnamese,others\n') for i in range(0, len(df)): # for i in range(1, 2): print ("%d / %d \n" % (i, len(df))) county = (df.loc[i, "county"]) asians = df.loc[i, "asians"] total = df.loc[i, "total"] percent = df.loc[i, "percent"] try: location = geolocator.geocode(county) except: None f.write('"%s",%.7f,%.7f,%s,%s, %s\n' % (county,location.latitude, location.longitude, asians, total, percent))
def geoaddr(cls, addr): geolocator = GoogleV3() return geolocator.geocode(addr)
from geopy.exc import GeocoderTimedOut endpoint = 'search-mytestdomain-qnyhs32jjgymxujnd6h75uqwtq.us-east-1.es.amazonaws.com' #estabilish connection es = Elasticsearch(hosts=[endpoint], port=443, use_ssl=True, ca_certs=certifi.where()) #Variables that contains the user credentials to access Twitter API access_token = "837190810621906944-FJj2YqU1tRyDiS2S4WDPeKJfmN6XhUB" access_token_secret = "91fP560AMqSJIeTm4UlZFMA2YTvNcKRMVqZwLj0UDHeLy" consumer_key = "HQIPvIVR26ehH9GibMzOY5zbX" consumer_secret = "ES5WepQeeM10eikswEvH1vdiq6kBIOMdWFxfoDKwc5553xFMqz" geo = GoogleV3() # geo.__init__(api_key='AIzaSyBCQVwa2M37bSINXVZ5ns_ZMTyb9ExaCAU', domain='maps.googleapis.com', scheme='http', client_id=None, secret_key=None, timeout=5, proxies=None, user_agent=None) # location = geo.geocode("Kentucky, USA") # [location.latitude,location.longitude] # print(location.raw) #This is a basic listener that just prints received tweets to stdout. class StdOutListener(StreamListener): def on_data(self, data): def do_geocode(address): try: return geo.geocode(address) except GeocoderTimedOut: return do_geocode(address)
from geopy.geocoders import GoogleV3 google_key = "AIzaSyCYXmN6XNbZCziryf0LjTXilQq4MMPB7dY" g = GoogleV3(api_key=google_key) print("ADRES?!") locations = g.geocode(components={"city": "Paris", "country": "FR"}) geocode_result = g.geocode(locations) print(geocode_result) # select first location
df_obitos.BAIRRO = df_obitos.BAIRRO.str.upper() df_obitos.BAIRRO = df_obitos.BAIRRO.str.strip() df_obitos.BAIRRO = df_obitos['BAIRRO'].str.normalize('NFKD').str.encode( 'ascii', errors='ignore').str.decode('utf-8') df_bairros_obitos = pd.DataFrame(df_obitos.BAIRRO.unique()) df_bairros_bases = df_bairros_confirmados.append(df_bairros_obitos) df_bairros_bases.rename(columns={0: 'BAIRRO'}, inplace=True) df_bairros_bases = pd.DataFrame(df_bairros_bases.BAIRRO.unique()) df_bairros_bases.rename(columns={0: 'BAIRRO'}, inplace=True) # df_bairros_bases = df_bairros_bases.BAIRRO.tolist() df_bairros_bases_final = df_bairros_bases df_bairros_bases_final['lat'] = 0.0000000 df_bairros_bases_final['lon'] = 0.0000000 AUTH_KEY = "AIzaSyA-dulzoEbkNMZnor6jAO3UnWDJz4cSjNQ" geolocator = GoogleV3(api_key=AUTH_KEY) list_bairros = {} for i in range(0, df_bairros_bases.shape[0]): data = geolocator.geocode("Bairro: " + df_bairros_bases.BAIRRO[i] + " São Gonçalo - RJ").point df_bairros_bases_final.lat[i] = data[0] df_bairros_bases_final.lon[i] = data[1] # list_bairros.update({''f"{df_bairros_bases[i]}":''f"{data}"}) # confirmados_por_genero2 = df_obitos.groupby(['BAIRRO'])['BAIRRO'].count() # confirmados_por_genero2 = pd.DataFrame(confirmados_por_genero2) # confirmados_por_genero2 = confirmados_por_genero2.rename(columns={'BAIRRO':'Óbitos'}) # confirmados_por_genero2.reset_index(drop=False,inplace=True)
import json import os from firebase_admin.storage import bucket from itemadapter import ItemAdapter from geopy.geocoders import GoogleV3 import pygeohash as pgh from scrapy.exceptions import DropItem from firebase_admin import credentials, firestore, initialize_app from firebase_admin import storage import urllib.request dir = os.path.dirname(__file__) output_file = os.path.join(dir, 'zillow.csv') json_out_file = os.path.join(dir, 'zillow.jsonl') geo_locator = GoogleV3(api_key=os.environ.get('MAP_KEY')) cred = credentials.Certificate(os.path.join(dir, 'account.json')) bucket_name = os.environ.get('BUCKET') initialize_app(cred, {'storageBucket': bucket_name}) class GeoPipeline: def process_item(self, item, spider): address = item['address'] location = geo_locator.geocode(address).raw['geometry']['location'] hash = pgh.encode(location['lat'], location['lng']) item['geohash'] = hash item['longitude'] = location['lng'] item['latitude'] = location['lat'] return item
class HomeView(TemplateView): template_name = 'index.html' geolocator = GoogleV3() states_and_abbrevs = ['alabama', 'al', 'alaska', 'ak', 'arizona', 'az', 'arkansas', 'ar', 'california', 'ca', 'colorado', 'co', 'connecticut', 'ct', 'delaware', 'de', 'district of columbia', 'dc', 'florida', 'fl', 'georgia', 'ga', 'hawaii', 'hi', 'idaho', 'id', 'illinois', 'il', 'indiana', 'in', 'iowa', 'ia', 'kansas', 'ks', 'kentucky', 'ky', 'louisiana', 'la', 'maine', 'me', 'maryland', 'md', 'massachusetts', 'ma', 'michigan', 'mi', 'minnesota', 'mn', 'mississippi', 'ms', 'missouri', 'mo', 'montana', 'mt', 'nebraska', 'ne', 'nevada', 'nv', 'new hampshire', 'nh', 'new jersey', 'nj', 'new mexico', 'nm', 'new york', 'ny', 'north carolina', 'nc', 'north dakota', 'nd', 'ohio', 'oh', 'oklahoma', 'ok', 'oregon', 'or', 'pennsylvania', 'pa', 'rhode island', 'ri', 'south carolina', 'sc', 'south dakota', 'sd', 'tennessee', 'tn', 'texas', 'tx', 'utah', 'ut', 'vermont', 'vt', 'virginia', 'va', 'washington', 'wa', 'west virginia', 'wv', 'wisconsin', 'wi', 'wyoming', 'wy'] def get(self, request, **kwargs): context = self.get_context_data(**kwargs) return render(request, self.template_name, context) def post(self, request, **kwargs): category = request.POST.get('category') location = request.POST.get('location') context = self.get_context_data(**kwargs) context['map'] = True if category: search = Search(**{'text': 'Category: {0}'.format(category)}) search.save() museums = Museum.objects.filter(types__code=category) geojson = self.get_geojson(**{'name': category, 'museums': museums}) context["jsonfile"] = category elif location: search = Search(**{'text': location}) search.save() # Inputs: If just a state/abbrev given, show all items for that state only, no radius # Otherwise, geocode the result, run the vicenty distance if location.lower() in self.states_and_abbrevs: if len(location) != 2: location = STATES_NORMALIZED.get(location.lower()) context["jsonfile"] = location # TEMPORARY: EXCLUDE 11K GENERAL MUSEUMS FOR NOW -- Can always add them back later museums = Museum.objects.filter(state=location).exclude(types__code='GMU') if museums.count() > 0: geojson = self.get_geojson(**{'name': location, 'museums': museums}) # By this point, location is always a two-letter abbreviation address, (latitude, longitude) = self.geolocator.geocode(''.join([state_tuple[1] for state_tuple in US_STATES if state_tuple[0] == location])) else: try: museums = [] address, (latitude, longitude) = self.geolocator.geocode(location) except Exception: context["jsonfile"] = "" else: if latitude and longitude: all_museums = Museum.objects.exclude(types__code='GMU') for museum in all_museums: dist = vincenty( (museum.latitude, museum.longitude), (latitude, longitude) ).miles if dist <= RADIUS: museums.append(museum) context["jsonfile"] = hashlib.sha256(location).hexdigest()[:8] geojson = self.get_geojson(**{'name': context["jsonfile"], 'museums': museums}) context["latitude"] = latitude context["longitude"] = longitude # context["geojson_path"] = PATH_PREFIX context['museums'] = museums return render(request, self.template_name, context) def get_context_data(self, **kwargs): # EXCLUDE 11K GENERAL MUSEUMS FOR NOW -- Can always add them back later count_museums = Museum.objects.exclude(types__code='GMU').count() categories = MuseumType.objects.order_by('name').exclude(code='GMU') context = { 'count_museums': count_museums, 'categories': categories, 'map': False, } return context def get_geojson(self, **kwargs): name = kwargs.get('name') museums = kwargs.get('museums') try: return GeoJSON.objects.get(name=name) except ObjectDoesNotExist: # Generate the GeoJSON, save it to a file if museums: geojson = self.generate_geojson(**{'museums': museums}) self.save_geojson(**{'geojson': geojson, 'name': name}) new_geojson = GeoJSON(**{ 'name': name, 'url': "{0}.json".format(name), }) new_geojson.save() def generate_geojson(self, **kwargs): museums = kwargs.get('museums') museums_json = [] for museum in museums: museums_json.append({ "type": "Feature", "geometry": { "type": "Point", "coordinates": [ museum.longitude, museum.latitude ] }, "properties": { "": "<h4>{0}</h4>".format(museum.name), "marker-symbol": "museum", } }) if museum.url: museums_json[-1]["properties"][""] += '<b>Website:</b> <a href="{0}" target="_blank">{0}</a><br>'.format(museum.url) if museum.phone: museums_json[-1]["properties"][""] += '<b>Phone:</b> <a href="tel:{0}" target="_blank">{0}</a><br>'.format(museum.phone) if museum.address: museums_json[-1]["properties"][""] += '{0} {1} {2} {3}'.format(museum.address, museum.city, museum.state, museum.zipcode) # This doesn't play nicely with Geojson.io or Google maps # if museum.types.code == 'ART': # museums_json[-1]["properties"]["marker-color"] = "#BF0000" # elif museum.types.code == 'BOT': # museums_json[-1]["properties"]["marker-color"] = "#CC430A" # elif museum.types.code == 'CMU': # museums_json[-1]["properties"]["marker-color"] = "#CABA00" # elif museum.types.code == 'GMU': # museums_json[-1]["properties"]["marker-color"] = "#007321" # elif museum.types.code == 'HSC': # museums_json[-1]["properties"]["marker-color"] = "#0093BF" # elif museum.types.code == 'HST': # museums_json[-1]["properties"]["marker-color"] = "#0058D8" # elif museum.types.code == 'NAT': # museums_json[-1]["properties"]["marker-color"] = "#14008C" # elif museum.types.code == 'SCI': # museums_json[-1]["properties"]["marker-color"] = "#45008C" # elif museum.types.code == 'ZAW': # museums_json[-1]["properties"]["marker-color"] = "#B400BF" return json.dumps({ "type": "FeatureCollection", "features": museums_json, }, indent=4, sort_keys=True) def save_geojson(self, **kwargs): geojson = kwargs.get('geojson') name = kwargs.get('name') with open("{0}{1}.json".format(PATH_PREFIX, name), 'w') as json_file: json_file.write(geojson) def get_within_radius(self, **kwargs): pass
#https://www.google.com/travel/things-to-do/see-all?dest_src=ut&dest_mid=%2Fm%2F013yq&tcfs=EhMKCC9tLzAxM3lxEgdBdGxhbnRh&hl=en&gl=US&dest_state_type=sattd#ttdm=33.763980_-84.389956_13&ttdmf=%252Fm%252F04jny9 import pandas as pd pd.set_option("display.max_rows", None, "display.max_columns", None) #load in xlxs file df = pd.read_excel("manual/locations.xlsx") from pathlib import Path from geopy.geocoders import GoogleV3 path = Path(__file__).resolve().parent key = open(path / "api_key.txt").read() geolocator = GoogleV3(api_key=key, user_agent="cafe_analytics") #add lat/long columns df["latitude"] = "x" df["longitude"] = "x" #fill in lat/long for i, row in df.iterrows(): location = geolocator.geocode(row["address"]) df.at[i,"latitude"] = location.latitude df.at[i,"longitude"] = location.longitude print(df) #Connection stuff----------------------------------------------- host = 'database-1.c8futoifja4g.us-east-2.rds.amazonaws.com' port = 3306 db_name = 'new_schema' table_name = 'locations'
def set_location(location_name): geolocator = GoogleV3() loc = geolocator.geocode(location_name) print('[!] Your given location: {}'.format(loc.address.encode('utf-8'))) set_location_coords(loc.latitude, loc.longitude, loc.altitude)
def __init__(self, config): super(BaseWeather, self).__init__(config) self.airport_lookup = self.load_airports() db = dataset.connect(config['database']) self.usertable = db['users'] self.geoloc = GoogleV3(api_key=config['youtube_key'])
import pickle import os import os.path import re import shutil from decouple import config from geopy.exc import GeocoderTimedOut from geopy.geocoders import GoogleV3 import pandas as pd DATASET_PATH = os.path.join('data', 'companies.xz') TEMP_PATH = os.path.join('data', 'companies') CNPJ_REGEX = r'[./-]' geolocator = GoogleV3(config('GOOGLE_API_KEY')) def geocode_companies(companies): with futures.ThreadPoolExecutor(max_workers=40) as executor: future_to_geocoding = dict() for index, company in companies.iterrows(): future = executor.submit(geocode_company, company) future_to_geocoding[future] = company for future in futures.as_completed(future_to_geocoding): company = future_to_geocoding[future] if future.exception() is not None: print('%r raised an exception: %s' % (company['cnpj'], future.exception())) elif future.result() is not None: write_geocoding_info(future.result(), company['cnpj'])
def push(user, passwd, baseurl, homes, waittime=0.05, geocode_status='failsafe'): # After this many retries in failsafe mode, give up on geocoding client side failsafe_retries = 5 # For every single entry, do this many retries before moving on line_retries = 2 # If you have too many failures in a row without any success, stop. Reset to this count on one success. ceiling_overall_retries = 10 r = requests.post(baseurl + '/token/auth/', data={ "username": user, "password": passwd }) j = r.json() if r.status_code != 200: if 'non_field_errors' in j.keys(): errors = j['non_field_errors'] return {"errors": errors} else: return {"errors": ['Cannot get token from API']} if not ('token' in j.keys()): return { "errors": "ERROR: Got 200 response but it does not contain a token" } token = j['token'] if geocode_status != 'none': encoder = GoogleV3() overall_retries = ceiling_overall_retries fail_count = 0 giveup_count = 0 success_count = 0 result = {"start": str(datetime.datetime.now())} i = 0 for home in homes: if geocode_status != 'none' and failsafe_retries >= 0: try: location = encoder.geocode(home['raw_address']) home['geocoded_address'] = location.address home['lat'] = location.latitude home['lon'] = location.longitude home['rawjson'] = location.raw except: failsafe_retries -= 1 if geocode_status == 'failsafe' and failsafe_retries < 0: geocode_status = 'none' if geocode_status == 'failsecure': print("ERROR: Problems with geocoding") sys.exit(1) retries = line_retries failure = True while retries >= 0 and overall_retries >= 0 and failure: failure = False try: headers = {"Authorization": "Bearer " + token} time.sleep(waittime) p = requests.post(baseurl + '/api/property/', data=home, headers=headers) if p.status_code == 201: success_count += 1 else: failure = True fail_count += 1 overall_retries = ceiling_overall_retries except UnboundLocalError: print("UnboundLocalError when calling API") failure = True if failure: retries -= 1 overall_retries -= 1 print(home) msg = "ERROR [line " + str(i) + "]" if retries >= 0 and overall_retries >= 0: msg += " (going to retry)\n" print(msg) terminal_error = False try: detail = json.loads(p.content) if 'detail' in detail.keys() and detail[ 'detail'] == 'You do not have permission to perform this action.': msg = 'Your account does not have write access. Please contact [email protected]' print(msg) terminal_error = True except: pass if terminal_error: sys.exit(1) i += 1 if failure: giveup_count += 1 if overall_retries < 0: msg = "Quitting due to too many failures\n" print(msg) msg = "Successfully uploaded: " + str(success_count) print(msg) msg = "Failures experienced: " + str(fail_count) print(msg) msg = "Unrecovered failures: " + str(giveup_count) print(msg) return result
yaml_config = yaml.safe_load(f) logging.info("Config opened successful") except FileNotFoundError: logging.info("Can't open config, generating new") geolocator = "Nominatim" api_key = "BpKmlnBpKmlnhdUiJSPAI16qAVqo2Ks2MHV0pKQ" to_yaml = {"api_key": api_key, "geolocator": "Nominatim"} with open("ini2json.yaml", "w") as f: yaml.dump(to_yaml, f, default_flow_style=False) with open("ini2json.yaml") as f: yaml_config = yaml.safe_load(f) if yaml_config["geolocator"] == "GoogleV3": geolocator = GoogleV3(api_key=yaml_config["api_key"]) else: geolocator = Nominatim( user_agent= "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" ) def find_lat_long(address): location = None cutted_address = (address.replace("вул.", "").replace("м.", "").replace( ",", "").replace("не реаг", " ").replace("нереагувати", " ").split()) while location == None: try: del cutted_address[-1] result = cutted_address[:]
def storelocator_index(request, category_slug=None, product_slug=None, variation_slug=None, item_slug=None): # Grab location from POST or session query = request.GET.get('location', '') radius = int(request.GET.get('radius', 25)) date = request.GET.get('date', None) if date: date = datetime.strptime(date, '%Y-%m-%d') locality = int(request.GET.get('locality', 1)) location = None stores = [] closest = False result = None # Geocode location if query: try: result = GoogleV3( api_key=settings.Site.google_api_key).geocode(query) except GeocoderQuotaExceeded: error( request, "Our geocoder is currently overloaded - please try again later!" ) else: try: location = (result.point.latitude, result.point.longitude) query = result.address except AttributeError: error(request, "We could not find that location!") product = None variation = None item = None variations = None filterset = None filters = None category = None try: from sidecart.products.models import Product, Variation from sidecart.products.filters import FilterSet if product_slug: product = Product.objects.filter(slug=product_slug).first() if not product: return redirect('storelocator') if variation_slug: variation = product.variations().filter( slug=variation_slug).first() if not variation: return redirect('storelocator', product_slug) if item_slug: item = variation.items().filter(slug=item_slug).first() if not item: return redirect('storelocator', product_slug, variation_slug) elif settings.STORELOCATOR_INCLUDE_FILTERS: filters = request.GET.get('filters', None) if filters == '/': filters = None variations = None if not filters else Variation.objects.public( ).select_related('product').prefetch_related('product__categories') try: filterset = FilterSet(variations, filters.split('/') if filters else []) except KeyError: filters = None if filters: variations = filterset.apply() except (ImportError, FieldError): if product_slug: return redirect('storelocator') if category_slug == 'costco-roadshow': category = get_object_or_404(LocatorCategory, slug=category_slug, active=True) stores = CostcoRoadShow.objects.exclude(online=True).filter( locality=locality) if date: stores = stores.filter((Q(start_date__month=date.month) & Q(start_date__year=date.year)) | (Q(end_date__month=date.month) & Q(end_date__year=date.year))) else: # Grab nearby locations if location: all_stores = Location.objects.active().exclude(online=True) if category_slug: category = get_object_or_404(LocatorCategory, slug=category_slug, active=True) all_stores = all_stores.filter( categories__in=category.get_descendants(include_self=True)) stores = all_stores.within_radius(location, radius=radius) if product_slug or filters: stores = stores.with_inventory(variations, product_slug, variation_slug, item_slug) if not stores: if not product_slug and not filters: stores = all_stores.order_by_distance_from(location)[:1] closest = True else: stores = sorted(stores, key=lambda l: l.distance_from(location).km) elif settings.STORELOCATOR_ALWAYS_LOAD_ALL_STORES: stores = Location.objects.active() # Set context context = { 'success': True if location else False, 'location': { 'lat': location[0] if location else '39.8282', 'lng': location[1] if location else '-98.5795', }, 'stores': LocationSerializer(stores, many=True, origin=location, query=query).data if request.is_ajax() else stores, 'query': query, 'closest': closest, 'radius': radius } # json serializer doesn't like addresses and decimals # Return response if request.is_ajax(): response = HttpResponse(json.dumps(context), content_type='application/json') else: for store in context['stores']: store.distance = int(store.distance_from(location).miles) context['product'] = product context['variation'] = variation context['item'] = item context['filterset'] = filterset context['filters'] = filters context['categories'] = LocatorCategory.objects.active() context[ 'category'] = category if category else LocatorCategory.objects.active( ).all().first() context['online_stores'] = Location.objects.active().online().order_by( 'name') if category_slug == 'costco-roadshow': costco_dates = CostcoRoadShow.objects.exclude( online=True).order_by('start_date').values_list( 'start_date', 'end_date') display_dates = {} for date in costco_dates: display_dates[datetime(year=date[0].year, month=date[0].month, day=1)] = 1 display_dates[datetime(year=date[1].year, month=date[1].month, day=1)] = 1 context['dates'] = list( sorted(display_dates.items(), key=lambda t: t[0])) context['date'] = request.GET.get('date', '') context['states'] = CostcoRoadShow.objects.exclude( online=True).prefetch_related('locality').order_by( 'locality').values_list('locality__name', 'locality__id').distinct() context['locality'] = locality try: context['page'] = Page.objects.public().get(slug='find-a-store') except Page.DoesNotExist: pass response = TemplateResponse(request, 'storelocator/index.jinja', context) return response
import csv, os, geopy from geopy.geocoders import GoogleV3 instance = GoogleV3(api_key="", domain='maps.googleapis.com') with open('banks_unique.csv', 'rb') as csvfile: spamreader = csv.reader(csvfile, delimiter=',') rownum = 0 found = 0 array = [] for row in spamreader: if rownum == 0: header = row else: success = 0 if success == 0: try: print("geocoding " + str(rownum)) place_teh = row[0] + ", Pakistan" place_cit = row[1] + ", Pakistan" # Geocode based on tehsil res = instance.geocode(place_teh, exactly_one=True, timeout=15, region="pk") if res is not None: array.append( [row[0], row[1], res.latitude, res.longitude, 1]) print("succesfully completed tehsil " + place_teh) found += 1 else:
def scrapeNotices(notices, dateToScrapeAfter="2016-11-01"): for notice in notices: # IF notice is more recent than the day before this script was run last... if ((datetime.datetime.strptime( notice.select("td.searchResultContent")[4].text.strip(), '%B %d, %Y').date() - lastScrape).days) > 1: fullNotice = requests.get(r"https://www.ebr.gov.on.ca" + notice.select('a')[0].get('href')) href = r"https://www.ebr.gov.on.ca" + notice.select('a')[0].get( 'href') soup = bs(fullNotice.text, "html.parser") # If notice is for an instrument, start scraping if "Instrument" in soup.select( 'h1#h1_notice')[0].get_text().split(): # START TIMER start_time = time.time() print("Starting Scrape") notice = soup.select('h1#h1_notice')[0].get_text().strip() # Try (twice) to associate coordinates with the proponents address proponent = soup.select('span.notice-content-sub')[0].get_text( separator=" ").strip().replace("\n", " ").replace(" ", "") try: geolocator = GoogleV3() location = geolocator.geocode(proponent + " Ontario, Canada") proplat = location.latitude proplong = location.longitude except Exception as exc: try: location = geolocator.geocode(proponent + " Ontario, Canada") proplat = location.latitude proplong = location.longitude except: proplat = "Cant Find" proplong = "Cant Find" print(exc) instrument = soup.select( 'span.notice-content-sub')[1].get_text( separator=" ").strip().replace("\n", " ").replace(" ", "") ebr_id = soup.select( 'span.notice-content-sub')[2].get_text().strip().replace( "\n", " ").replace(" ", "") ministry_id = soup.select( 'span.notice-content-sub')[3].get_text().strip().replace( "\n", " ").replace(" ", "") ministry = soup.select( 'span.notice-content-sub')[4].get_text().strip().replace( "\n", " ").replace(" ", "") dateproploaded = soup.select( 'span.notice-content-sub')[5].get_text().strip().replace( "\n", " ").replace(" ", "") if "Decision" in soup.select( 'h1#h1_notice')[0].get_text().split(): datedecloaded = soup.select('span.notice-content-sub')[ 6].get_text().strip().replace("\n", " ").replace(" ", "") kws = soup.select('div[aria-label="Keyword(s):"]') for kw in kws: keywords = kw.get_text().strip().replace( "\n", " ").replace(" ", "").replace("Keyword(s):", "").strip() if "Proposal" in notice: commenthref = soup.select('input')[0]['onclick'].replace( "Javascript:openAddCmtWin('", "").replace("');", "").replace("¬", "&not") commentperiod = soup.select( 'div[aria-label="Comment Period:"]')[0].get_text( ).replace("Comment Period:", "").strip() # Try (twice) to associate coordinates with locations relevant to the instrument if "Location(s)" in soup.select( 'h2.notice-head-b')[1].get_text(): try: location = soup.select( 'div[aria-label="Location(s) Related to this"]' )[0].select( 'div.notice-content' )[0] #.get_text().replace("<br>", "").replace("\n", " ").replace(" ","").strip() location = str(location) + " " + str( soup.select( 'div[aria-label="Location(s) Related to this"]' )[0].select('div.notice-content')[1]) except: location = soup.select( 'div[aria-label="Location(s) Related to this Instrument"]' )[0].select( 'div.notice-content' )[0] #.get_text().replace("<br>", "").replace("\n", " ").replace(" ","").strip() location = str(location) + " " + str( soup.select( 'div[aria-label="Location(s) Related to this Instrument"]' )[0].select('div.notice-content')[1]) location = re.sub(r"\<\/?[\w]+[\s\w\"\-\=\/]*\>", " ", str(location)).strip().replace( "\n", " ").replace(" ", " ").replace( " ", " ").replace(" ", " ").strip() try: loc = geolocator.geocode( str(location) + " Ontario, Canada") loclat = loc.latitude loclong = loc.longitude except Exception as exc: loc = geolocator.geocode(location + " Ontario, Canada") try: loclat = loc.latitude loclong = loc.longitude except: loclat = "Cant Find" loclong = "Cant Find" print(exc) print("Scrape Complete") # Add scraped data to googlespreadsheet nrow = sum(i != "" for i in wks.col_values(1)) print("Adding data to row #" + str(nrow + 1)) wks.update_cell(nrow + 1, 1, ebr_id) wks.update_cell(nrow + 1, 2, href) wks.update_cell(nrow + 1, 3, ministry_id) wks.update_cell(nrow + 1, 4, proponent) wks.update_cell(nrow + 1, 5, proplat) wks.update_cell(nrow + 1, 6, proplong) wks.update_cell(nrow + 1, 7, instrument) wks.update_cell(nrow + 1, 8, notice) wks.update_cell(nrow + 1, 9, dateproploaded) if "Decision" in soup.select( 'h1#h1_notice')[0].get_text().split(): wks.update_cell(nrow + 1, 10, datedecloaded) wks.update_cell(nrow + 1, 11, ministry) wks.update_cell(nrow + 1, 12, keywords) wks.update_cell(nrow + 1, 13, proponent) wks.update_cell(nrow + 1, 14, proplat) wks.update_cell(nrow + 1, 15, proplong) if "Proposal" in notice: wks.update_cell(nrow + 1, 16, commenthref) wks.update_cell(nrow + 1, 17, commentperiod) wks.update_cell(nrow + 1, 18, location) wks.update_cell(nrow + 1, 19, loclat) wks.update_cell(nrow + 1, 20, loclong) print("Storage Complete") print(str(time.time() - start_time) + " seconds") print("-----------------------------") yesterday = str(datetime.date.today() - datetime.timedelta(1)) lastScrapeFile = open( r"C:\Users\mackenzien\Documents\MKN\py\ebrscraper\lastebrscrape.txt", 'w') lastScrapeFile.write(yesterday) lastScrapeFile.close()
locations = [] db = kpub.PublicationDB() all_publications = db.get_all() for publication in all_publications: affiliations = publication['aff'] # Use the first three authors for aff in affiliations: # Help the geolocator by only passing on the final components of the address aff_suffix = ",".join(aff.split(";")[-1].split(",")[-2:]).strip(" ;,-") locations.append(aff_suffix) unique_locations = nltk.FreqDist(locations) print("Found {} unique locations".format(len(unique_locations))) # Step 2: initialize the Google geolocator from config import API_KEY geolocator = GoogleV3(api_key=API_KEY) time.sleep(2) # Step 3: geolocate all affiliations out = open(OUTPUT_FN, "w") out.write("lon,lat,count,name\n") fd_aff = nltk.FreqDist(locations) for name, count in tqdm(unique_locations.items()): if name == "-" or name == "": continue try: location = geolocator.geocode(name, timeout=10) outstring = "{},{},{},{}\n".format(location.longitude, location.latitude, count, name.replace(",", ";")) out.write(outstring)
from matplotlib import rcParams from matplotlib.collections import LineCollection from mpl_toolkits.axes_grid.inset_locator import zoomed_inset_axes import shapefile import pycountry import haversine from geolite2 import geolite2 import warnings import matplotlib.cbook warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation) arcgis = ArcGIS(timeout=100) nominatim = Nominatim(timeout=100) googlev3 = GoogleV3(timeout=100) openmapquest = OpenMapQuest(timeout=100) logloc = logging.getLogger('locator') logloc.setLevel(logging.WARNING) ch = logging.StreamHandler() ch.setLevel(logging.WARNING) logloc.addHandler(ch) # choose and order your preference for geocoders here geocoders = [nominatim, arcgis, openmapquest] class Participant(object): def __init__(self, name=None,
def scrapingFL(): # setup fo = open("list_FL_DOT.txt", "w") # writing the header to the file fo.write("country#state#city#snapshot_url#latitude#longitude\n") driver = webdriver.Firefox() driver.get("http://fl511.com/Cameras.aspx") geolocator = GoogleV3("") # find elements in a page for p in xrange(1, 203): # printing page number print(" p." + str(p)) for x in xrange(2, 12): try: #printing the order of the picture print("----" + str(x - 1)) #getting the highway name highway = driver.find_element_by_xpath( "//*[@id='MainContent_MainContent_CameraGridView']/tbody/tr[" + str(x) + "]/td[3]") #getting the county name county = driver.find_element_by_xpath( "//*[@id='MainContent_MainContent_CameraGridView']/tbody/tr[" + str(x) + "]/td[2]") try: image = driver.find_element_by_id("MainContent_MainContent_CameraGridView_ImageImage_" + str(x - 2)) except NoSuchElementException: #waiting for the Firefox to load time.sleep(0.5) continue #Printing to Screen is intended for debuging and mornitoring purposes only print(highway.text) print(county.text) citywant = county.text url = image.get_attribute("src") print(url) location = geolocator.geocode(highway.text + " " + county.text + "Florida") print(location.latitude, location.longitude) # waiting for the Firefox to load time.sleep(0.5) except StaleElementReferenceException: #showing which kind of error to the screen print("->>>>>>>>>--------------------------->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-__-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_ Stale Error is here") continue except AttributeError: # showing which kind of error to the screen print( "->>>>>>>>>--------------------------->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-__-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_ AttributeError is here") continue except geopy.exc.GeocoderServiceError: # showing which kind of error to the screen print( "->>>>>>>>>--------------------------->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-__-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_ Geopy error") continue except : # catch *all* exceptions: # showing which kind of error to the screen print( "->>>>>>>>>--------------------------->>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-__-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_ others error") continue fo.write("USA#FL#") try : # priting city name fo.write(citywant + "#") except StaleElementReferenceException: #showing the error fo.write("--------------------------------------------------------------------------------------------------------------->>>>>>\n") continue fo.write(url + "#" + str(location.latitude) + "#" + str(location.longitude) + "\n") # Search for the nextBotton... We will do it untill we found it. while True: try : nextBotton = driver.find_element_by_id("MainContent_MainContent_CameraGridView_NextLinkButton") nextBotton.click() except StaleElementReferenceException: time.sleep(0.5) continue break # wait for the browser to load time.sleep(4) driver.close()
# -*- coding: utf-8 -*- """ Created on Fri Aug 5 10:15:43 2016 @author: wooyol """ import numpy as np import pandas as pd import geopy from geopy.geocoders import GoogleV3 from geopy.exc import GeocoderTimedOut from geopy.exc import GeocoderServiceError geolocator = GoogleV3(api_key="AIzaSyDvwglFuj1Ha8Fu09jYoPnKO_442oXcgKA", timeout=None) df = pd.read_csv('../refined.csv') def do_geocode(address): try: return geolocator.geocode(address) except GeocoderTimedOut: return do_geocode(address) except GeocoderServiceError: return do_geocode(address) except GeocoderUnavailable: return do_geocode(address)
print("Hola ETL") print(os.getcwd()) try: import argparse flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() except ImportError: flags = None # If modifying these scopes, delete your previously saved credentials # at ~/.credentials/sheets.googleapis.com-python-quickstart.json SCOPES = 'https://www.googleapis.com/auth/spreadsheets.readonly' CLIENT_SECRET_FILE = 'creds/secreto_cliente.json' APPLICATION_NAME = 'Temblor' geolocator = GoogleV3(api_key=os.environ.get('GM_KEY')) # Dirección debe ser de la forma "Num Calle Ciudad" def dir_correct(calle, numero, ciudad, estado): k = [] k.append('Calle ' + calle + ' ' + numero) k.append(ciudad) k.append(estado + ', ' + 'MX') dirr = ', '.join(k) return dirr def obtain_latlong(dirr): try: location = geolocator.geocode(dirr, region='MX')
def cold_start(start, miles, length_range = None, difficulty = None): earth_radius = 3960.0 degrees_to_radians = math.pi/180.0 radians_to_degrees = 180.0/math.pi #this gets to my api key in my bash rc geolocator = GoogleV3(api_key = os.environ['GOOGLE_API']) loc = geolocator.geocode(start) loc_lat_lon = (loc.latitude, loc.longitude) #print (location.address) lat_diff = (miles/earth_radius)*radians_to_degrees r = earth_radius*math.cos(loc_lat_lon[0]*degrees_to_radians) lon_diff = (miles/r)*radians_to_degrees lat_range = (loc_lat_lon[0] - lat_diff, loc_lat_lon[0] + lat_diff) lon_range = (loc_lat_lon[1] - lon_diff, loc_lat_lon[1] + lon_diff) new_df = df[(df['latitude'] >= lat_range[0]) & (df['latitude'] <= lat_range[1])] new_df = new_df[(new_df['longitude'] >= lon_range[0]) & (new_df['longitude'] <= lon_range[1])] #MAKE THIS EASIER BY TAKING IN THE LENGTH_RANGE TUPLE(MIN,MAX) AND THEN FILTERING ROWS WHOSE 'LENGTH' IS IN THAT RANGE if length_range == '0-5': new_df = new_df[new_df['length_range'] == '0-5'] if length_range == '5-10': new_df = new_df[new_df['length_range'] == '5-10'] if length_range == '10-15': new_df = new_df[new_df['length_range'] == '10-15'] if length_range == '15-20': new_df = new_df[new_df['length_range'] == '15-20'] if length_range == '20-25': new_df = new_df[new_df['length_range'] == '20-25'] if length_range == '25-30': new_df = new_df[new_df['length_range'] == '25-30'] if length_range == '30+': new_df = new_df[new_df['length_range'] == '30+'] ''' if length_range == '30-50': new_df = new_df[new_df['length_range'] == '30-50'] if length_range == '50-100': new_df = new_df[new_df['length_range'] == '50-100'] if length_range == '100+': new_df = new_df[new_df['length_range'] == '100+'] ''' if difficulty == 'Green': difficulties = ['Green', 'Green/Blue'] new_df = new_df[new_df['difficulty'].isin(difficulties)] if difficulty == 'Blue': difficulties = ['Green/Blue', 'Blue', 'Blue/Black'] new_df = new_df[new_df['difficulty'].isin(difficulties)] if difficulty == 'Black': difficulties = ['Blue/Black', 'Black', 'Double Black'] new_df = new_df[new_df['difficulty'].isin(difficulties)] if new_df.shape[0] == 0: return "There are no trails that meet your requirements. Try expanding your search." else: new_df['miles away'] = new_df.apply(get_vincenty, axis = 1, args = (loc_lat_lon,)) columns_to_output = ['location', 'difficulty', 'length', 'ascent', 'descent', 'stars', 'category', 'miles away', 'summary', 'url', 'name'] new_df = new_df[columns_to_output] new_df.sort_values(by = 'miles away', inplace = True) new_df = new_df.reset_index(drop=True) new_df.index = new_df.index+1 new_df.columns = ['Location', 'Difficulty', 'Length', 'Ascent', 'Descent', 'Rating', 'Category', 'Miles Away', 'Summary', 'Link', 'Name'] return new_df