def hifld_national(): #['VAL_DATE', 'NAICS_DESC', 'COUNTRY', 'NAICS_CODE', 'UNITS', 'RevGeoFlag', 'SOURCEDATE', 'MHPID', 'WEBSITE', 'VAL_METHOD', 'SOURCE', 'ZIP4', 'STATUS', 'COUNTYFIPS', 'ZIP', '\xef\xbb\xbfX', 'LONGITUDE', 'COUNTY', 'STATE', 'FID', 'Y', 'SIZE', 'CITY', 'YEARBLT', 'NAME', 'ADDRESS2', 'TELEPHONE', 'ADDRESS', 'LATITUDE', 'TYPE'] payload = tpcsv.open_source_csv('source_files/HIFLD_Mobile_Home_Parks.csv') data = payload[0] headers = payload[1] parks = [] for park in data: if park['NAICS_CODE'] == '721211': continue else: address = park['ADDRESS'] county = park['COUNTY'] state = park['STATE'] zipcode = park['ZIP'] name = park['NAME'] city = park['CITY'] total_lots = park['UNITS'] phone_number = park['TELEPHONE'] website = park['WEBSITE'] info_list = [ str("total_lots:" + total_lots), str("phone_number:" + phone_number), str("zipcode:" + zipcode), str("website:" + website) ] info = "\n".join(info_list) geo_location = str(park['LATITUDE']) + ", " + str(park['LONGITUDE']) information = db.apply_schema( park_name = name, park_county = county, park_address = address, city = city, park_info = info, other_contact_info = phone_number, geo_location = geo_location, state = state, total_lots = total_lots ) park_a = [information] parks.append(information) print " :: {} :: {}".format(information['park_name'], information['geo_location']) tpcsv.write_source_csv(park_a, "pa_complete.csv") return parks
def mh_village_PA(): #ID,Name,Address,City,State,Zip,County,"Site Count","Average Monthly Rent","Phone Number","Vacant Sites" payload = tpcsv.open_source_csv('source_files/PACommunityReport.csv') data = payload[0] headers = payload[1] parks = [] for park in data: address = park['Address'] county = park['County'] state = park['State'] zipcode = park['Zip'] name = park['Name'] city = park['City'] total_lots = park['Site Count'] avg_rent = park['Average Monthly Rent'] phone_number = park['Phone Number'] vacant_lots = park['Vacant Sites'] info_list = [ str("total_lots:" + total_lots), str("avg_rent:" + avg_rent), str("phone_number:" + phone_number), str("vacant_lots:" + vacant_lots), str("zipcode:" + zipcode), ] info = "\n".join(info_list) geo_location = get_park_geolocation(address, county, state, zipcode) information = db.apply_schema( park_name = name, park_county = county, park_address = address, city = city, park_info = info, other_contact_info = phone_number, geo_location = geo_location, state = state, total_lots = total_lots ) park_a = [information] parks.append(information) print " :: {} :: {}".format(information['park_name'], information['geo_location']) tpcsv.write_source_csv(park_a, "pa_complete.csv") return parks
def get_county_parks(links_array): base_url = 'http://www.in.gov' counties = links_array all_parks = [] index = 1 for count, county_link in enumerate(counties): county_parks = [] num_counties = len(counties) print "processing {} of {} counties".format(count + index, num_counties) br.open(county_link) soup = BeautifulSoup(br.response().read(), "html.parser") main_html = soup.find('div', {"id" : "col2content"}) county_name_html = soup.find('div', {"id" : "col2"}) county_parks_html = main_html.find_all("div", style="PADDING-BOTTOM: 0px; PADDING-TOP: 0.5em; PADDING-LEFT: 2em; MARGIN-TOP: 0px; PADDING-RIGHT: 0px") for park in county_parks_html: information = {} park = BeautifulSoup(str(park), "html.parser") park_name = get_park_name(park) park_county_name = get_park_county_name(county_name_html) park_address = get_park_address(park) park_info = get_park_info(park) park_features = get_park_features(park) park_ownerinfo = get_park_ownerinfo(park) park_managerinfo = get_park_managerinfo(park) park_geolocation = get_park_geolocation(park_address, park_county_name) information = db.apply_schema( park_name = park_name, park_county = park_county_name, park_address = park_address, park_info = park_info, owner_info = park_ownerinfo, other_contact_info = park_managerinfo, geo_location = park_geolocation, state = 'IN', total_lots = park_features['total_lots'], water = park_features['water'], waste = park_features['waste'] ) county_parks.append(information) all_parks.append(information) print " :: {} :: {}".format(information['park_name'], information['geo_location']) try: tpcsv.write_source_csv(county_parks, "indiana-test.csv") except: print "Error :: could not write {} county".format(park_county_name) continue return all_parks