Пример #1
0
def hifld_national():
#['VAL_DATE', 'NAICS_DESC', 'COUNTRY', 'NAICS_CODE', 'UNITS', 'RevGeoFlag', 'SOURCEDATE', 'MHPID', 'WEBSITE', 'VAL_METHOD', 'SOURCE', 'ZIP4', 'STATUS', 'COUNTYFIPS', 'ZIP', '\xef\xbb\xbfX', 'LONGITUDE', 'COUNTY', 'STATE', 'FID', 'Y', 'SIZE', 'CITY', 'YEARBLT', 'NAME', 'ADDRESS2', 'TELEPHONE', 'ADDRESS', 'LATITUDE', 'TYPE']

    payload = tpcsv.open_source_csv('source_files/HIFLD_Mobile_Home_Parks.csv')
    
    data = payload[0]
    headers = payload[1]
    
    parks = []

    for park in data:

        if park['NAICS_CODE'] == '721211':
            continue

        else:    
            address = park['ADDRESS']
            county = park['COUNTY']
            state = park['STATE']
            zipcode = park['ZIP']
            name = park['NAME']
            city = park['CITY']
            total_lots = park['UNITS']
            phone_number = park['TELEPHONE']
            website = park['WEBSITE']
            
            info_list = [
                        str("total_lots:" + total_lots),
                        str("phone_number:" + phone_number), 
                        str("zipcode:" + zipcode),
                        str("website:" + website)
                        ]
            info = "\n".join(info_list)
    
            geo_location = str(park['LATITUDE']) + ", " + str(park['LONGITUDE'])
            
            information = db.apply_schema(
                                park_name = name,
                                park_county = county,
                                park_address = address,
                                city = city,
                                park_info = info,
                                other_contact_info = phone_number,
                                geo_location = geo_location,
                                state = state,
                                total_lots = total_lots
                                )
            
            park_a = [information]
            parks.append(information)
                
            print "  ::  {}   ::    {}".format(information['park_name'], information['geo_location'])

            tpcsv.write_source_csv(park_a, "pa_complete.csv")
    
    return parks
Пример #2
0
def mh_village_PA():
#ID,Name,Address,City,State,Zip,County,"Site Count","Average Monthly Rent","Phone Number","Vacant Sites"

    payload = tpcsv.open_source_csv('source_files/PACommunityReport.csv')
    
    data = payload[0]
    headers = payload[1]
    
    parks = []

    for park in data:
        address = park['Address']
        county = park['County']
        state = park['State']
        zipcode = park['Zip']
        name = park['Name']
        city = park['City']
        total_lots = park['Site Count']
        avg_rent = park['Average Monthly Rent']
        phone_number = park['Phone Number']
        vacant_lots = park['Vacant Sites']
        
        info_list = [
                    str("total_lots:" + total_lots),
                    str("avg_rent:" + avg_rent),
                    str("phone_number:" + phone_number), 
                    str("vacant_lots:" + vacant_lots),
                    str("zipcode:" + zipcode),
                    ]
        info = "\n".join(info_list)
    
        geo_location = get_park_geolocation(address, county, state, zipcode)

        information = db.apply_schema(
                            park_name = name,
                            park_county = county,
                            park_address = address,
                            city = city,
                            park_info = info,
                            other_contact_info = phone_number,
                            geo_location = geo_location,
                            state = state,
                            total_lots = total_lots
                            )
        
        park_a = [information]
        parks.append(information)
            
        print "  ::  {}   ::    {}".format(information['park_name'], information['geo_location'])

        tpcsv.write_source_csv(park_a, "pa_complete.csv")
    
    return parks
def get_county_parks(links_array):
    base_url = 'http://www.in.gov'
    counties = links_array
    all_parks = []
    index = 1
    for count, county_link in enumerate(counties):
        county_parks = []
        num_counties = len(counties)
        print "processing {} of {} counties".format(count + index, num_counties)
        br.open(county_link)
        soup = BeautifulSoup(br.response().read(), "html.parser")
        main_html = soup.find('div', {"id" : "col2content"})
        county_name_html = soup.find('div', {"id" : "col2"})
        county_parks_html = main_html.find_all("div", style="PADDING-BOTTOM: 0px; PADDING-TOP: 0.5em; PADDING-LEFT: 2em; MARGIN-TOP: 0px; PADDING-RIGHT: 0px")
        
        for park in county_parks_html:
            information = {}
            park = BeautifulSoup(str(park), "html.parser")
            
            park_name = get_park_name(park)
        
            park_county_name = get_park_county_name(county_name_html)

            park_address = get_park_address(park)

            park_info = get_park_info(park)

            park_features = get_park_features(park)

            park_ownerinfo = get_park_ownerinfo(park)
            
            park_managerinfo = get_park_managerinfo(park)
            
            park_geolocation = get_park_geolocation(park_address, park_county_name)
            
            information = db.apply_schema(
                                park_name = park_name,
                                park_county = park_county_name,
                                park_address = park_address,
                                park_info = park_info,
                                owner_info = park_ownerinfo,
                                other_contact_info = park_managerinfo,
                                geo_location = park_geolocation,
                                state = 'IN',
                                total_lots = park_features['total_lots'],
                                water = park_features['water'], 
                                waste = park_features['waste']
                                )

            county_parks.append(information)
            
            all_parks.append(information)
            print "  ::  {}   ::    {}".format(information['park_name'], information['geo_location'])

        try:
            tpcsv.write_source_csv(county_parks, "indiana-test.csv")
        
        except:
            print "Error :: could not write {} county".format(park_county_name)
            continue

    return all_parks