def address(address):
    '''
    Given an address, use the Geocode API to retrieve tract data and 
    lat/long data which is passed on into the respective compute scores
    function. Furthermore, the function also calls other functions so that
    graphics can be generated
    '''

    result = None
    tract = None
    while result is None:
        try:
            result = cg.address(address, city='Chicago', state='IL')
        except:
            pass

    while tract is None:
        try:
            tract = int(result[0]['geographies']\
                ['2010 Census Blocks'][0]['TRACT'])
        except:
            result = cg.address(address, city='Chicago', state='IL')

    lat = float(result[0]['geographies']['2010 Census Blocks'][0]['INTPTLAT'])
    longi = float(result[0]['geographies']['2010 Census Blocks'][0]['INTPTLON'])
    ed_score = edu.eduscore(lat,longi,1000)
    sa_score = sani.get_sanitation_score(longi, lat)
    racedist(tract, tracttocomm)
    incomedist(tract, tracttocomm)
    agedist(tract, tracttocomm)
    return lat, longi, ed_score, sa_score
示例#2
0
def geocode_util(num, name, typ, city, state, zipcode):
    # time.sleep(1)
    addr = str(num) + " " + str(name) + " " + str(typ)
    try:
        result = cg.address(addr,
                            city=city,
                            state=state,
                            zipcode=zipcode,
                            returntype='geographies')
        tract = result[0]['geographies']['Census Tracts'][0]['GEOID']
    except:
        tract = None
    return tract
示例#3
0
def confirm_address():
    data = (cg.address(request.form.get('address'), city=request.form.get('city'), state=request.form.get('state'),
                       zipcode=request.form.get('zipcode')))
    id = []
    context = {
        'data': data,
    }
    if request.method == "POST":
        address = Address(address=data[0]['matchedAddress'], census_tract=data[0]['geographies']['Census Tracts'][0]['BASENAME'], latitude=data[0]['geographies']['Census Tracts'][0]['CENTLAT'], longitude=data[0]['geographies']['Census Tracts'][0]['CENTLON'], zip=data[0]['addressComponents']['zip'])
        db.session.add(address)
        db.session.commit()
        id = address.id
        session['confirm_address_id']=id
        print(session['confirm_address_id'])
    return render_template('confirm_address.html', **context)
示例#4
0
def census_geocode(row, i):
    g = ()
    try:
        g = cg.address(street=str(row['street']),
                       city=str(row['city']),
                       state=str(row['state']),
                       zipcode=str(row['zip']))
    except:
        g = ()
    finally:
        if len(g) > 0:
            data = g[0]
            row['lat'] = data['coordinates']['y']
            row['lng'] = data['coordinates']['x']
    if i % 1000 == 0:
        print(i)
    return row
示例#5
0
geodata['AREALAND'] = ''
geodata['INTPTLON'] = ''
geodata['MTFCC'] = ''
geodata['LWBLKTYP'] = ''
geodata['COUNTY'] = ''

# Measure Starting time
startingtime = time.time()

# Info
print('#######\tstart index: {}\n#######\tstart time: {}\n#######\tdestination folder: {}\n'.format(start, strftime('%H%M', gmtime()), destinationpath))

for i in range(start, n):
    print(i, n)
    # Get address
    searchresult = cg.address(street=geodata['street'].iloc[i], city=geodata['city'].iloc[i], state=geodata['state'].iloc[i], zipcode=geodata['zip'].iloc[i])

    percentage = round((i/n), 5)
    sys.stdout.write("\033[F") # Cursor up one line
    print('address {} downloading...\t{} done'.format(i, percentage))

    # If address search result empty, continue to next, else, save info
    if searchresult is []:

        # Info
        print('\t\n no results found! Retrying downloading ...')

        # wait 3 sec and retry, if then still empty, continue
        timer.sleep(3)
        searchresult = cg.address(street=geodata['street'].iloc[i], city=geodata['city'].iloc[i], state=geodata['state'].iloc[i], zipcode=geodata['zip'].iloc[i])
示例#6
0
def listing_info(url_list):
    count = 0
    Buildings = []
    for link in url_list:
        count += 1
        site_facts = {}
        url = "{}".format(link)  # Puts the list link in the loop
        r = requests.get(url, headers=headers)
        page_soup = bs(r.content, features="html.parser")
        id_array = url.split(
            '/')  # Split url to get trailing digits for Primary Key
        site_facts['CS_ID'] = "LN-" + id_array[-2]
        site_facts['url'] = url  # Adds the url to the dictonary
        loc = page_soup.find(
            "h1", class_="breadcrumbs__crumb breadcrumbs__crumb-title"
        )  # Finds the address on page.
        try:  #If location doesn't have address, go to next item)
            loc = loc.get_text()
        except Exception as err:
            continue
        check = loc[-5:].isdigit(
        )  #Checks to see if the postal code is in the address  #TODO change this to use .split()
        if check:
            a1 = loc.split(", ")
            # Get AddressLine
            site_facts['Address_Line'] = a1[0]
            # Get City
            site_facts['City'] = a1[1]
            # Get State
            site_facts['State'] = a1[2][0:2]
            # Get Zip
            site_facts['Postal_Code'] = a1[2][-5:]
            geocode = cg.address(street=site_facts['Address_Line'],
                                 city=site_facts['City'],
                                 state=site_facts['State'],
                                 zipcode=site_facts['Postal_Code'])
            try:
                GEOID = geocode[0]['geographies']['2020 Census Blocks'][0][
                    'GEOID'][0:12]
                site_facts['bg_geo_id'] = GEOID
                print(count, site_facts['Address_Line'], site_facts['City'],
                      GEOID)
            except Exception as err:
                site_facts['bg_geo_id'] = None
                pass
        else:
            site_facts['Address_Line'] = loc
            site_facts["City"] = "N/A"
            site_facts['State'] = "N/A"
            site_facts['Postal_Code'] = "N/A"
            site_facts['bg_geo_id'] = None

        is_column = bool(
            page_soup.find("div", {
                "class": "property-facts__labels-one-col"
            }))  # Test to see how the data is formated on the listing page.
        if is_column == True:  # This loop is used when the listing uses columns.
            ### Temp lists to store property information
            property_label = []
            property_data = []
            labels = page_soup.find("div", {
                "class": "property-facts__labels-one-col"
            }).find_all('div', recursive=False
                        )  # Selects the child of the correct label column
            datas = page_soup.find("div", {
                "class": "property-facts__data-one-col"
            }).find_all('div', recursive=False
                        )  # Selects the child of the correct data column
            #   These loops isolate the text from the html and put them into lists.
            for label in labels:
                property_label.append(
                    re.sub(r"[\n\r\t]*", "", label.get_text()))
            for data in datas:  # This loop gets the data information
                property_data.append(re.sub(r"[\n\r\t]*", "", data.get_text(
                )))  # This removes tabs, newlines and returns
            temp_dict1 = dict(zip(
                property_label, property_data))  # Creates dictionary of lists
            ##  This section grabs info from the dictionary for each listing
            # Get Property Type
            site_facts['Property_Type'] = temp_dict1.get(
                'Property Type', 'N/A')

            # Get price
            site_facts['Price'] = temp_dict1.get('Price', None)
            if site_facts['Price'] == None:
                pass
            elif '-' in site_facts['Price']:
                site_facts['Price'] = None
            else:
                temprice = site_facts['Price']
                temprice2 = temprice.replace(',', '')
                temprice3 = temprice2.strip('$')
                site_facts['Price'] = int(temprice3)

            # Get Square Foot
            site_facts['SquareFeet'] = temp_dict1.get('Building Size', None)
            if site_facts['SquareFeet'] == None:
                pass
            else:
                tempft = site_facts['SquareFeet']
                tempft2 = tempft.replace(',', '')
                tempft3 = tempft2.strip('SF')
                site_facts['SquareFeet'] = int(tempft3)

            # Get Building Class
            site_facts['Building_Class'] = temp_dict1.get(
                'Building Class', 'N/A')

            # Get Year Built
            if 'Year Built' in temp_dict1:
                site_facts['Year_Built'] = temp_dict1['Year Built']
            elif 'Year Built/Renovated' in temp_dict1:
                site_facts['Year_Built'] = temp_dict1['Year Built/Renovated']
            else:
                site_facts['Year_Built'] = "N/A"

            # #Get Parking spots
            # if 'Parking' in temp_dict1:
            #     site_facts['Parking_Ratio'] = temp_dict1['Parking']
            # elif 'Parking Ratio' in temp_dict1:
            #     site_facts['Parking_Ratio'] = temp_dict1['Parking Ratio']
            # else:
            #     site_facts['Parking_Ratio'] = 'N/A'

            # Get Sale Type
            site_facts['Sale_Type'] = temp_dict1.get('Sale Type', 'N/A')

            site_facts["Picture_url"] = "N/A"

            site_facts["Upload_Date"] = datetime.now().strftime("%Y-%m-%d")

            ## TODO Add currently listed to dictionary.  How will we store the urls that are not active?
            site_facts["Currently_Listed"] = True

            site_facts["Sale_Leased"] = "Sale"
            Buildings.append(
                site_facts)  #Append the this loop to the buildings list
            sleep(randint(5, 10))
        if is_column == False:  # This loop is used when the listing is in a table.
            table = page_soup.table
            table_data = table.find_all('td')
            t_list = []
            temp_dict2 = {}
            for td in table_data:
                strip_td = (re.sub(r"[\n \r\t]*", "", td.get_text()))
                t_list.append(strip_td)
            temp_dict2 = {
                t_list[i]: t_list[i + 1]
                for i in range(0, len(t_list), 2)
            }  # Turns the list into a dictionary
            # Get Property Type
            site_facts['Property_Type'] = temp_dict2.get('PropertyType', 'N/A')

            # Get price
            site_facts['Price'] = temp_dict2.get('Price', None)
            if site_facts['Price'] == None:
                pass
            elif '-' in site_facts['Price']:
                site_facts['Price'] = None
            else:
                temprice = site_facts['Price']
                temprice2 = temprice.replace(',', '')
                temprice3 = temprice2.strip('$')
                site_facts['Price'] = int(temprice3)

            # Get Square Foot
            if 'BuildingSize' in temp_dict2:
                site_facts['SquareFeet'] = temp_dict2['BuildingSize']
            if 'TotalBuildingSize' in temp_dict2:
                site_facts['SquareFeet'] = temp_dict2['TotalBuildingSize']
            if 'UnitSize' in temp_dict2:
                site_facts['SquareFeet'] = temp_dict2['UnitSize']
            if 'RentableBuildingArea' in temp_dict2:
                site_facts['SquareFeet'] = temp_dict2['RentableBuildingArea']
            else:
                site_facts['SquareFeet'] = None
            if site_facts['SquareFeet'] == None:
                pass
            else:
                tempft = site_facts['SquareFeet']
                tempft2 = tempft.replace(',', '')
                tempft3 = tempft2.strip('SF')
                site_facts['SquareFeet'] = int(tempft3)

            # Get Building Class
            site_facts['Building_Class'] = temp_dict2.get(
                'BuildingClass', 'N/A')

            # Get Year Built
            if 'YearBuilt/Renovated' in temp_dict2:
                site_facts['Year_Built'] = temp_dict2['YearBuilt/Renovated']
            elif 'YearBuilt' in temp_dict2:
                site_facts['Year_Built'] = temp_dict2['YearBuilt']
            else:
                site_facts['Year_Built'] = "N/A"

            # #Get Parking info
            # if 'Parking' in temp_dict2:
            #     site_facts['Parking_Ratio'] = temp_dict2['Parking']
            # elif 'ParkingRatio' in temp_dict2:
            #     site_facts['Parking_Ratio'] = temp_dict2['ParkingRatio']
            # else:
            #     site_facts['Parking_Ratio'] = 'N/A'

            # Get Sale Type
            site_facts['Sale_Type'] = temp_dict2.get('SaleType', 'N/A')

            site_facts["Picture_url"] = "N/A"

            site_facts["Upload_Date"] = datetime.now().strftime("%Y-%m-%d")

            site_facts["Currently_Listed"] = True

            site_facts["Sale_Leased"] = "Sale"

            Buildings.append(
                site_facts)  # Add the site_Facts to the Buildings List
            sleep(randint(5, 10))
    return Buildings
示例#7
0
def get_info(df, address_col, state_col, zip_col, city_muni_col, subset_size,
             seed):
    '''
    Accepts a data frame and four strings specifying columns names where
    the full address, state, zip code, and city/municipality are stored.
    Any rows with NAs in these columns are filtered. The address attributes are
    extracted, and passed to censusgeocode to retrieve GEOID, which is added
    as a new column. A subset of rows of specified size is used with the 
    specified random seed.
    '''
    # Filter out any rows with NA in address related column
    f_df = df.dropna(subset=[address_col, state_col, zip_col, city_muni_col])

    #Select a random subset of rows to get addresses for
    #Dont need all of them for optimization
    subset_f_df = f_df.sample(n=subset_size, random_state=seed)

    #Initialize an empty list to collect the GEOIDs and latlon
    geoids = []
    lats = []
    lons = []

    #Takes a long to run, use a counter to check progress
    total_rows = len(subset_f_df)
    rows_complete = 0

    #iterate on each data frame row as a tuple, get address components
    for row in subset_f_df.itertuples():
        full_add = str(getattr(row, address_col))
        state = str(getattr(row, state_col))
        zip_code = str(int(getattr(row, zip_col)))
        city = str(getattr(row, city_muni_col))

        #Call censusgeocode for each row to get geoid
        result = cg.address(full_add, city=city, state=state, zipcode=zip_code)

        #If a match isn't found, geoid is np.NaN
        if len(result) == 0:
            geoid = np.NaN
            lat = np.NaN
            lon = np.NaN
        else:
            geoid = result[0]['geographies']['2010 Census Blocks'][0]['GEOID'][
                0:11]
            lat = result[0]['geographies']['2010 Census Blocks'][0]['INTPTLAT']
            lon = result[0]['geographies']['2010 Census Blocks'][0]['INTPTLON']

            #Clean the lat lon strings
            if lat[0] == '+':
                lat = float(lat[1:])
            else:
                lat = float(lat)

            if lon[0] == '+':
                lon = float(lon[1:])
            else:
                lon = float(lon)

        #Append to the frame
        geoids.append(geoid)
        lats.append(lat)
        lons.append(lon)

        rows_complete += 1

        #Report progress
        prog = round(rows_complete / total_rows, 4)
        print(prog * 100, "%")

    #Append to the frame and return
    subset_f_df['GEOID'] = geoids
    subset_f_df['LATITUDE'] = lats
    subset_f_df['LONGITUDE'] = lons

    #Print the NaN Proportion
    nan_prop = subset_f_df['GEOID'].isna().sum() / total_rows
    print('Percent NaN GEOIDs:', nan_prop * 100, '%')

    #Return the dataframe
    return subset_f_df
df['FIPS'] = ''

def clean_ids(x, desired_len):

    x = str(x)
    while len(x) < desired_len:
        x = "0"+ x
    return x
    

for i in range(len(df)):
    address = df.iloc[i]['address']
    state = df.iloc[i]['state']
    city = df.iloc[i]['city']

    result = cg.address(address, city=city, state=state)

    county_id = result[0]['geographies']['Counties'][0]['COUNTY']
    state_id = result[0]['geographies']['States'][0]['STATE']

    county_id = clean_ids(county_id, 3)
    state_id = clean_ids(state_id, 2)

    address_fips = state_id + county_id
    df.iloc[i]['FIPS'] = address_fips



import plotly
import plotly.express as px
from plotly.offline import plot
示例#9
0
def building_dict(url_list):
    buildings = []
    progress = 0
    for link in url_list:
        url = "{}".format(link)  # Puts the list link in the loop
        r = requests.get(url, headers=headers)
        page_soup = bs(r.content, features="html.parser")
        # The titles in order are "Space", "Size", "Term", "Rate", "Space_Use", "Condition", "Available"
        units = page_soup.find_all(
            "ul",
            class_=
            "available-spaces__accordion-data no-margin js-as-column-width")
        progress += 1
        counter = 1  # Used to ensure unique CS_ID
        print(progress, url)
        for item in units:
            site_facts = {}
            unit_temp = item.get_text("|", strip=True)
            units_txt = unit_temp.split("|")
            loc = page_soup.find(
                "h1", class_="breadcrumbs__crumb breadcrumbs__crumb-title"
            )  # Finds the address on page.
            try:  # If location doesn't have address, go to next item)
                loc = loc.get_text()
            except Exception as err:
                continue
            check = loc[-5:].isdigit(
            )  # Checks to see if the postal code is in the address
            if check:
                a1 = loc.split(", ")
                # Get AddressLine
                site_facts['Address_Line'] = a1[0]
                # Get City
                site_facts['City'] = a1[1]
                # Get State
                site_facts['State'] = a1[2][0:2]
                # Get Zip
                site_facts['Postal_Code'] = a1[2][-5:]
                if units_txt[-4].endswith("/"):
                    site_facts['Property_Type'] = units_txt[-4] + units_txt[-3]
                else:
                    site_facts['Property_Type'] = units_txt[-3]
                geocode = cg.address(street=site_facts['Address_Line'],
                                     city=site_facts['City'],
                                     state=site_facts['State'],
                                     zipcode=site_facts['Postal_Code'])
                try:
                    GEOID = geocode[0]['geographies']['2020 Census Blocks'][0][
                        'GEOID'][0:12]
                    site_facts['bg_geo_id'] = GEOID
                    print(site_facts['Address_Line'], site_facts['City'],
                          GEOID)
                except Exception as err:
                    site_facts['bg_geo_id'] = None
                    pass
            else:
                site_facts['Address_Line'] = loc
                site_facts["City"] = "N/A"
                site_facts['State'] = "N/A"
                site_facts['Postal_Code'] = "N/A"
                site_facts['bg_geo_id'] = None
                if units_txt[-4].endswith("/"):
                    site_facts['Property_Type'] = units_txt[-4] + units_txt[-3]
                else:
                    site_facts['Property_Type'] = units_txt[-3]
                site_facts['bg_geo_id'] = "N/A"
            # Gets the CS_ID and URL #
            id_array = url.split(
                '/')  # Split url to get trailing digits for Primary Key
            site_facts['CS_ID'] = "LN-" + id_array[-2] + "-" + str(counter)
            site_facts['url'] = url  # Adds the url to the dictonary
            # Price
            #site_facts['Price_month']
            try:
                monthp_index = units_txt.index(
                    "/MO"
                ) - 1  # Finds the element that matches "/MO" then goes one back to the numeric price
                month_price = units_txt[monthp_index].replace(',', '')
                if '.' in month_price:
                    m_p = month_price.split('.')
                    site_facts['Price_month'] = int(m_p[0].strip('$'))
                else:
                    site_facts['Price_month'] = int(month_price.strip('$'))
            except ValueError as err:
                site_facts['Price_month'] = None
            # Price Per Year
            try:
                yearp_index = units_txt.index(
                    "/YR"
                ) - 1  #Finds the element that matches "/YR" then goes one back to the numeric price
                year_price = units_txt[yearp_index].replace(',', '')
                site_facts['Price_year'] = int(year_price.strip('$'))
            except ValueError as err:
                site_facts['Price_year'] = None
            # Square Feet (size)
            if units_txt[1].endswith("-"):
                ft_int = units_txt[1].replace(',', '')
                site_facts['SquareFeet'] = int(ft_int.strip("-"))
                max_ft = units_txt[2].replace(',', '')
                site_facts['Expansion_SqrFt'] = int(max_ft.strip(' SF'))
            else:
                ft_int = units_txt[1].replace(',', '')
                site_facts['SquareFeet'] = int(ft_int.strip(' SF'))
                site_facts['Expansion_SqrFt'] = None
            # Space
            site_facts['Space'] = units_txt[0]
            # Condition
            if units_txt[-2] == '-':
                site_facts['Condition'] = 'Not Listed'
            else:
                site_facts['Condition'] = units_txt[-2]
            # Avalable
            site_facts['Available'] = units_txt[-1]
            # Term
            # Look to Expansion sqrft
            if site_facts['Expansion_SqrFt'] == None:
                site_facts['Term'] = units_txt[2]
            else:
                site_facts['Term'] = units_txt[3]
            # Upload_Date
            site_facts['Upload_Date'] = datetime.now().strftime("%Y-%m-%d")
            # Currently_Listed
            site_facts["Currently_Listed"] = True
            # Sale_Lease
            site_facts['Sale_Lease'] = "Lease"
            #Append to buildings
            buildings.append(site_facts)
            #Increase Counter
            counter += 1
        sleep(randint(5, 10))
    return buildings
示例#10
0

def clean_ids(x, desired_len):

    x = str(x)
    while len(x) < desired_len:
        x = "0" + x
    return x


fips['FIPSCd'] = fips['FIPSCd'].apply(lambda x: clean_ids(x, 5))
fips['STATEFP'] = fips['STATEFP'].apply(lambda x: clean_ids(x, 2))
fips['COUNTYFP'] = fips['COUNTYFP'].apply(lambda x: clean_ids(x, 3))

result = cg.address('75 Dayton Road',
                    city='Redding',
                    state='CT',
                    zipcode='06896')

county_id = result[0]['geographies']['Counties'][0]['COUNTY']

state_id = result[0]['geographies']['States'][0]['STATE']

county_id = clean_ids(county_id, 3)
state_id = clean_ids(state_id, 2)

address_fips = state_id + county_id
print(address_fips)

fips[fips.FIPSCd == address_fips].head()

fips.head()