示例#1
0
def list_zips(df):
    '''
    Assign a zip code to every block. Aggregate dataframe with all block
    information to the block level, take the most common latitude and longitude
    associated with that block (the mode), and call find_zip function defined
    above to assign a zipcode to those coordinates.

    Inputs:
      df (pandas df): data frame with all crime information

    Returns:
      block_df (pandas df): data frame with zip codes merged on by block
    '''
    valid_location = pd.notnull(df['location'])
    df_valid = df[valid_location]
    blocks = df_valid.groupby(['block'])['latitude', 'longitude'] \
                     .agg(pd.Series.mode)
    block_df = blocks.reset_index()

    filt = block_df['latitude'].apply(lambda x: type(x) != np.float64)
    block_df.loc[filt, 'latitude'] = block_df['latitude'][0]
    block_df.loc[filt, 'longitude'] = block_df['longitude'][0]

    search = uszipcode.SearchEngine(simple_zipcode=True)
    block_df.loc[:, 'zipcode'] = block_df.apply(
        lambda x: find_zip(search, x['latitude'], x['longitude']), axis=1)

    return block_df
 def get_us_search():
     # Use a known path for the uszipcode db_file_dir that already contains the DB that the
     # library would otherwise download. This is done because the host for this file can
     # be flaky. There is an issue for this in the underlying library here:
     # https://github.com/MacHu-GWU/uszipcode-project/issues/40
     db_file_path = os.path.join(os.path.dirname(__file__), "..", "..", "data", "uszipcode")
     return uszipcode.SearchEngine(simple_zipcode=True, db_file_dir=db_file_path)
示例#3
0
def zip_find(recipient_type):
    code = get_string("Enter the " + recipient_type + "'s mailing zip code.")
    code = ''.join(char for char in code if char.isdigit())
    search = uszipcode.SearchEngine()
    data = search.by_zipcode(code)
    if not data:
        return get_string("Manually enter the city, state, and zip code.")
    else:
        return data.city + ", " + data.state + " " + code
 def look_up_zip(self, zip, country, formatted=False):
     if country == "US":
         info = uszipcode.SearchEngine(simple_zipcode=True).by_zipcode(zip)
         if formatted:
             info = self.format_place(zip, info.major_city, info.state)
     elif country == "CA":
         info = PostalCodeDatabase()[zip]
         if formatted:
             info = self.format_place(zip, info.city, info.province)
     return info
示例#5
0
    def validate_zip_by_api(self):
        try:
            import uszipcode

        except ModuleNotFoundError:
            return True

        search = uszipcode.SearchEngine(simple_zipcode=True)

        for zip in ["68046", "99999"]:
            a = weather.verifyLocation(zip)
            self.assertEqual(weather.verifyLocation(zip), a)
    def _pandas(cls, column, central_zip=None, radius_in_miles=10, **kwargs):
        search = uszipcode.SearchEngine()
        center_zipcode_object = search.by_zipcode(central_zip)

        def _find_distance_between_zipcodes(
                center_lat, center_long, zipcode: int,
                search: uszipcode.search.SearchEngine):
            zipcode_object = search.by_zipcode(zipcode)
            return zipcode_object.dist_from(lat=center_lat, lng=center_long)

        return column.apply(lambda loc: _find_distance_between_zipcodes(
            center_lat=center_zipcode_object.lat,
            center_long=center_zipcode_object.lng,
            zipcode=int(loc),
            search=search,
        ) <= radius_in_miles)
示例#7
0
def Get_Weather(zip_or_city):
    #This function will use the OpenWeatherMap API to send a City ID and retrieve a weather forecast.
    #Then it will display the forecast to the user's screen.
    #OpenWeatherMap requests calls to its API be done by City ID instead of city name or zip code.
    #This function first checks to see if it was given a zip code or a city name, then attempts to correlate that with an existing City ID
    #If it cannot, it will alert the user about a failure to connect.

    const_APP_ID = "4861192b2ae379f9b65947707449278c"

    try:
        #Check to see if the string input is able to be converted to a zip code (that is, it is five digits):
        #########is_zip_code = re.match([0-9][0-9][0-9][0-9][0-9], zip_or_city)
        is_zip_code = zip_or_city.isdigit()
        #if so, use uszipcode to convert it to latitude and longitude
        if is_zip_code:
            search = uszipcode.SearchEngine()
            zipcode = search.by_zipcode(zip_or_city)
            lat = zipcode.lat
            lon = zipcode.lng
            #use the latitude and longitude in an API call to OpenWeatherApp
            #api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&APPID=4861192b2ae379f9b65947707449278c
            parameters = {
                "lat": lat,
                "lon": lon,
                "APPID": const_APP_ID
                }
            Print_Weather(parameters)

        else:
        #if not a zip code, check to see if it matches a city
            #with open('city.list.json') as city_json:
                #data = json.load(city_json)
            
        #I couldn't get the city list json file to work with python, as it doesn't match the syntax of json files I see when attempting to look up
        #solutions. For the time being, I will just send the city name in the API call, and attempt to add cityID support later
            parameters = {
                "q": zip_or_city,
                "APPID": const_APP_ID
                }

            Print_Weather(parameters)
    except:
        print("There was an issue retrieving your weather information. Please try again later.")
示例#8
0
文件: weather.py 项目: choman/dsc510
def getLocation():
    """Loop that queries user for location of weather info

    Returns:
        Nothing
    """
    search = None

    if USE_USZIPCODE:
        search = uszipcode.SearchEngine(simple_zipcode=True)

    while True:
        location = requestWeatherLocation()
        units = requestWeatherType()

        zipinfo = verifyLocation(location, search)
        print_debug(f'zipinfo = {zipinfo}')

        if zipinfo:
            weather_info = getWeather(zipinfo, units)
            display_Weather(weather_info, zipinfo, units)

    print_debug(f'location = {location}')
# Duplicate offline data workbook
for i in range(dataRead.nrows):
    for p in range(dataRead.ncols):
        # If cell is empty don't copy any data since you can only write to each cell once
        if dataRead.cell_value(i, p) != "":
            dataWrite.write(i, p, dataRead.cell_value(i, p))

# Make an array of all the unix times stored in offline data workbook
UnixTimeData = []
for i in range(dataRead.nrows):
    UnixTimeData.append(dataRead.cell_value(i, 0))

print(UnixTimeData)

# Define what search is
search = uszipcode.SearchEngine(simple_zipcode=True)

# Count number written to offline data workbook, used later to determine what line to write to in offline data workbook
WrittenIterations = 0

# Iterate through each row in base workbook
for i in range(sheetRead.nrows - 1):
    print("StartingRow" + str(i + 1))
    # Determine the longitude and lattitude and write them to the output workbook
    Longitude = search.by_zipcode(str(int(sheetRead.cell_value(i + 1, 0)))).lng
    Latitude = search.by_zipcode(str(int(sheetRead.cell_value(i + 1, 0)))).lat
    sheetWrite.write(i + 1, 4, Longitude)
    sheetWrite.write(i + 1, 5, Latitude)

    # Get Day Month and Year from the workbook
    Day = str(int(sheetRead.cell_value(i + 1, 1)))
#Find which factors' columns are sparsely filled or completely empty
print("Number of null values for each factor:")
print(pd.isnull(data).sum(), '\n')
#Find the number of trips with a trip distance of zero
stationary_trips = len(data.index) - (data[["Trip_distance"
                                            ]].astype(bool).sum(axis=0))
print("Trips of zero distance = ", int(stationary_trips), '\n')  #20,592
"""Question 2"""
print("*** Question 2 ***\n")
#print (data.info())
#print (data.describe())
#Cluster starting locations of trips to find pickup hubs
kmeans = KMeans(n_clusters=4).fit(data[['Pickup_latitude',
                                        'Pickup_longitude']])
centroids = kmeans.cluster_centers_
search = usz.SearchEngine()
print("Starting location hubs based on latitude and longitude")
print(centroids, '\n')
#Convert coordinates into zip codes
point1 = search.by_coordinates(centroids[0][0], centroids[0][1], radius=1)
point2 = (search.by_coordinates(centroids[2][0], centroids[2][1], radius=1))
point3 = (search.by_coordinates(centroids[3][0], centroids[3][1], radius=1))
print(point1, '\n', point2, '\n', point3, '\n\n')
print(
    'Three points where many customers request taxi pickups (within a mile): New York (10035), Brooklyn (11205), and'
    ' Elmhurst (11373)\n\n')
#Trip length histogram
hist = data.hist("Trip_distance",
                 bins=[0.001, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
plt.title("Green Taxi September 2015 Trip Lengths (#2)")
plt.xlabel("Trip distance (miles)")
示例#11
0
"""
Written for a request from my brother to collate contact information for
Advanced Practice Registered Nurses (ARNPs) in the Miami area, using public
data from https://appsmqa.doh.state.fl.us/downloadnet/Licensure.aspx
"""
from pprint import pprint
import uszipcode

zipsearch = uszipcode.SearchEngine()
TARGETZIPS = [res.zipcode for res in zipsearch.by_city("miami")]

with open("arnp_data_fl.txt", "r") as f:
    lines = [l.strip() for l in f if l]

columns = [colname for colname in lines[0].split("|") if colname]
MAILZIP = next(key for key in columns if key.startswith("Mailing") and "ZIP" in key)
PRACZIP = next(key for key in columns if key.startswith("Practice") and "ZIP" in key)
data = [
    {k: col for k, col in zip(columns, line.split("|"))} for line in lines[1:] if line
]

filtered = [
    datum
    for datum in data
    if any(z in (datum[MAILZIP], datum[PRACZIP]) for z in TARGETZIPS)
]

print("Total records: %d" % len(data))
print("Number of records in Miami: %d" % len(filtered))

with open("miami_ARNPs.txt", "w") as outf:
    def validate_geographic_areas(self, values, db):
        # Note: the validator does not recognize data from US territories other than Puerto Rico.

        us_search = uszipcode.SearchEngine(simple_zipcode=True)
        ca_search = PostalCodeDatabase()
        CA_PROVINCES = {
            "AB": "Alberta",
            "BC": "British Columbia",
            "MB": "Manitoba",
            "NB": "New Brunswick",
            "NL": "Newfoundland and Labrador",
            "NT": "Northwest Territories",
            "NS": "Nova Scotia",
            "NU": "Nunavut",
            "ON": "Ontario",
            "PE": "Prince Edward Island",
            "QC": "Quebec",
            "SK": "Saskatchewan",
            "YT": "Yukon Territories"
        }

        locations = {"US": [], "CA": []}

        for value in json.loads(values):
            flagged = False
            if value == "everywhere":
                locations["US"].append(value)
            elif len(value) and isinstance(value, basestring):
                if len(value) == 2:
                    # Is it a US state or Canadian province abbreviation?
                    if value in CA_PROVINCES:
                        locations["CA"].append(CA_PROVINCES[value])
                    elif len(us_search.query(state=value)):
                        locations["US"].append(value)
                    else:
                        return UNKNOWN_LOCATION.detailed(
                            _('"%(value)s" is not a valid U.S. state or Canadian province abbreviation.',
                              value=value))
                elif value in CA_PROVINCES.values():
                    locations["CA"].append(value)
                elif self.is_zip(value, "CA"):
                    # Is it a Canadian zipcode?
                    try:
                        info = self.look_up_zip(value, "CA")
                        formatted = "%s, %s" % (info.city, info.province)
                        # In some cases--mainly involving very small towns--even if the zip code is valid,
                        # the registry won't recognize the name of the place to which it corresponds.
                        registry_response = self.find_location_through_registry(
                            formatted, db)
                        if registry_response:
                            locations["CA"].append(formatted)
                        else:
                            return UNKNOWN_LOCATION.detailed(
                                _('Unable to locate "%(value)s" (%(formatted)s).  Try entering the name of a larger area.',
                                  value=value,
                                  formatted=formatted))
                    except:
                        return UNKNOWN_LOCATION.detailed(
                            _('"%(value)s" is not a valid Canadian zipcode.',
                              value=value))
                elif len(value.split(", ")) == 2:
                    # Is it in the format "[city], [state abbreviation]" or "[county], [state abbreviation]"?
                    city_or_county, state = value.split(", ")
                    if us_search.by_city_and_state(city_or_county, state):
                        locations["US"].append(value)
                    elif len([
                            x
                            for x in us_search.query(state=state, returns=None)
                            if x.county == city_or_county
                    ]):
                        locations["US"].append(value)
                    else:
                        # Flag this as needing to be checked with the registry
                        flagged = True
                elif self.is_zip(value, "US"):
                    # Is it a US zipcode?
                    info = self.look_up_zip(value, "US")
                    if not info:
                        return UNKNOWN_LOCATION.detailed(
                            _('"%(value)s" is not a valid U.S. zipcode.',
                              value=value))
                    locations["US"].append(value)
                else:
                    flagged = True

                if flagged:
                    registry_response = self.find_location_through_registry(
                        value, db)
                    if registry_response and isinstance(
                            registry_response, ProblemDetail):
                        return registry_response
                    elif registry_response:
                        locations[registry_response].append(value)
                    else:
                        return UNKNOWN_LOCATION.detailed(
                            _('Unable to locate "%(value)s".', value=value))
        return json.dumps(locations)
示例#13
0
 def __init__(self):
     self._search = uszipcode.SearchEngine(simple_zipcode=True,
                                           db_file_dir="/tmp/")
     self._cache = {}
示例#14
0
import pandas as pd
import numpy as np
import uszipcode as uz 
search = uz.SearchEngine(simple_zipcode=False)

spatial_features = [
    "population_density",
    "population_by_gender",
    "population_by_race",
    "median_household_income",
    "employment_status",
    "household_income",
    "educational_attainment_for_population_25_and_over",
    "school_enrollment_age_3_to_17"
]

temporal_features = [
    "January",
    "February",
    "March",
    "April",
    "May",
    "June",
    "July",
    "August",
    "September",
    "October",
    "November",
    "December",
    "year",
    "day1",