Пример #1
0
def import_oil_reserve(name):
    page = requests.get(name)

    soup = BeautifulSoup(page.content, 'lxml')

    table = soup.find('table', attrs={'class': 'data1'})

    rows = table.findAll('tr', attrs={'class': 'DataRow'})

    values = []

    for tr in rows:
        state = tr.find('td', attrs={'class': 'DataStub1'}).get_text()

        otherYear = tr.findAll('td', attrs={'class': 'DataB'})
        y11 = otherYear[0].get_text()
        y12 = otherYear[1].get_text()
        y13 = otherYear[2].get_text()
        y14 = otherYear[3].get_text()
        y15 = otherYear[4].get_text()

        current = tr.find('td', attrs={'class': 'Current2'}).get_text()

        values.append(OilReserveData(state, y11, y12, y13, y14, y15, current))

    df = pd.DataFrame.from_records([s.to_dict() for s in values])

    md.create_table(md.connect(), df, 'oil_reserve')
Пример #2
0
def main(file_name):
    initial = pd.read_csv('resources/' + file_name)
    initial['city'] = initial['city'].str.replace(' ', '').str.upper()

    seperatedZips = (initial['zip'].str.strip()).str.split(expand=True)

    fullData = pd.concat([initial, seperatedZips], axis=1)
    fullData = fullData.drop(['zip'], axis=1)

    idvars = [
        'city', 'state_id', 'state_name', 'county_name', 'lat', 'lng',
        'population'
    ]

    allZips = pd.melt(fullData, id_vars=idvars, value_name='zip')
    allZips = allZips.drop(['variable'], axis=1)

    # Drops columns with missing zip code values
    allZips = allZips[pd.notnull(allZips.zip)]

    #engine = create_engine('mysql+pymysql://pythonUser:abc@localhost:3306/dddm?charset=utf8', encoding='utf-8')
    #allZips.to_sql(name='zip_lookup', con=engine, if_exists = 'replace')

    md.create_table(md.connect(), allZips, 'zip_lookup')
    """ Zip Code lookup table complete, Ready to be joined """
    """
Пример #3
0
def import_weather(file_name):
    df_weather = pd.read_csv("resources/" + file_name, low_memory=False)
    df_weather = df_weather[[
        'StationName', 'Date', 'ObsType', 'Value', 'S-Flag', 'City', 'State'
    ]]

    md.create_table(md.connect(), df_weather, 'weather_observations')
Пример #4
0
def import_water_data(file_name):
    data = pd.read_csv('resources/' + file_name)
    data = data[[
        'MonitoringLocationTypeName', 'LatitudeMeasure', 'LongitudeMeasure'
    ]]
    #data.to_sql(name='water_locations', con=dbEngine, if_exists = 'replace')
    md.create_table(md.connect(), data, 'water_locations')
Пример #5
0
def import_land_prices(file_name):
    df_landprices = pd.read_excel('resources/' + file_name,
                                  skiprows=[0],
                                  parse_cols="A,B,C,D,E,H,I")
    df_landprices = df_landprices.loc[df_landprices['Date'] == '2015Q4']
    df_landprices['MSA'] = df_landprices.MSA.str.replace(' ', '')
    #df_landprices.to_sql(name='land_prices', con=dbEngine, index=False, if_exists = 'replace')
    md.create_table(md.connect(), df_landprices, 'land_prices')
Пример #6
0
def import_seaports(file_name):
    df_ports = pd.read_csv("resources/" + file_name, low_memory=False)
    df_ports = df_ports[[
        'LATITUDE1', 'LONGITUDE1', 'CITY_OR_TO', 'STATE_POST', 'ZIPCODE',
        'PORT_NAME'
    ]]
    #df_ports.to_sql(name='seaports', con=dbEngine, if_exists = 'replace')

    md.create_table(md.connect(), df_ports, 'seaports')
Пример #7
0
def import_existing_plants(file_name):
    plant_locations = pd.read_csv('resources/' + file_name)
    plant_locations = plant_locations[[
        'Facility Name', 'Deregistered (Yes/No)', 'City', 'State', 'Zip Code',
        'Parent Company', 'Latitude', 'Longitude', 'Number of RMP Submissions'
    ]]
    plant_locations = plant_locations[plant_locations.State.notnull()]
    #plant_locations.to_sql(name='plant_locations', con=dbEngine, if_exists = 'replace')

    md.create_table(md.connect(), plant_locations, 'plant_locations')
Пример #8
0
def import_earthquakes():
    df_earthquakes = pd.read_csv("resources/USEarthquakes.csv",
                                 low_memory=False)
    df_earthquakes = df_earthquakes[[
        'time', 'latitude', 'longitude', 'mag', 'magType', 'place'
    ]]
    df_earthquakes2 = pd.read_csv("resources/AKEarthquakes.csv",
                                  low_memory=False)
    df_earthquakes2 = df_earthquakes2[[
        'time', 'latitude', 'longitude', 'mag', 'magType', 'place'
    ]]
    df_earthquakes.append(df_earthquakes2)
    md.create_table(md.connect(), df_earthquakes, 'earthquake_data')
Пример #9
0
def normalize_all():
    engine = md.connect()
    #normalize land prices
    df = pd.read_sql_table('land_prices_final', engine)
    df['home_value_norm'] = (df['Home Value'] - df['Home Value'].min()) / (
        df['Home Value'].max() - df['Home Value'].min())
    df['structure_cost_norm'] = (
        df['Structure Cost'] - df['Structure Cost'].min()) / (
            df['Structure Cost'].max() - df['Structure Cost'].min())
    md.create_table(engine, df, 'land_prices_final')

    #normalize oil reserve data
    df = pd.read_sql_table('oil_reserve_final', engine)
    df['year16'] = df['year16'].str.replace(',', '').astype(float)
    df['year16_norm'] = (df['year16'] - df['year16'].min()) / (
        df['year16'].max() - df['year16'].min())
    md.create_table(engine, df, 'oil_reserve_final')

    #normalize disaster data
    df = pd.read_sql_table('disaster_data_final', engine)
    df['NumFireReferences_norm'] = (
        df['NumFireReferences'] - df['NumFireReferences'].min()) / (
            df['NumFireReferences'].max() - df['NumFireReferences'].min())
    df['NumFloodReferences_norm'] = (
        df['NumFloodReferences'] - df['NumFloodReferences'].min()) / (
            df['NumFloodReferences'].max() - df['NumFloodReferences'].min())
    df['NumHurricaneReferences_norm'] = (
        df['NumHurricaneReferences'] - df['NumHurricaneReferences'].min()) / (
            df['NumHurricaneReferences'].max() -
            df['NumHurricaneReferences'].min())
    md.create_table(engine, df, 'disaster_data_final')

    #normalize railroad data
    df = pd.read_sql_table('railroad_data_final', engine)
    df['Tons_norm'] = (df['Tons'] - df['Tons'].min()) / (df['Tons'].max() -
                                                         df['Tons'].min())
    md.create_table(engine, df, 'railroad_data_final')

    #normalize population density data
    df = pd.read_sql_table('population_density', engine)
    df['density_norm'] = (df['density'] - df['density'].min()) / (
        df['density'].max() - df['density'].min())
    md.create_table(engine, df, 'population_density_final')
Пример #10
0
def read_state_codes(file_name):
    state_code = pd.read_csv('resources/' + file_name, sep='|')
    print(state_code.head(5))
    md.create_table(md.connect(), state_code, 'state_codes')
Пример #11
0
southdakota = len(lines)
hursd = len(hurlines)
floodsd = len(floodlines)

states = [
    'California', 'Louisiana', 'New York', 'Alaska', 'Texas', 'North Carolina',
    'Ohio', 'Massachusetts', 'Utah', 'South Dakota'
]
firelengths = [
    california, louisiana, newyork, alaska, texas, northcarolina, ohio,
    massachusetts, utah, southdakota
]
hurlengths = [
    hurca, hurla, hurny, hurak, hurtx, hurnc, huroh, hurma, hurut, hursd
]
floodlengths = [
    floodca, floodla, floodny, floodak, floodtx, floodnc, floodoh, floodma,
    floodut, floodsd
]
statecodes = ['CA', 'LA', 'NY', 'AK', 'TX', 'NC', 'OH', 'MA', 'UT', 'SD']

df_disaster = pd.DataFrame({
    'State': states,
    'StateCode': statecodes,
    'NumFireReferences': firelengths,
    'NumHurricaneReferences': hurlengths,
    'NumFloodReferences': floodlengths
})

md.create_table(md.connect(), df_disaster, 'disaster_data')
Пример #12
0
def import_land_prices(file_name):
    df_landprices = pd.read_excel('resources/' + file_name)
    md.create_table(md.connect(), df_landprices, 'railroad_data')
Пример #13
0
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 12 13:29:55 2018

@author: Beth
"""

import pandas as pd
import mysqlConnection as md

df = pd.read_csv('resources/BadZipCodes.csv', sep=",")
md.create_table(md.connect(), df, 'unfavorable_zipcodes')
Пример #14
0
def import_smaller_zips(file_name):
    data = pd.read_csv('resources/' + file_name)
    data = data[['City', 'State', 'Zip']]
    #data.to_sql(name='water_locations', con=dbEngine, if_exists = 'replace')
    md.create_table(md.connect(), data, 'test_zips')
Пример #15
0
# -*- coding: utf-8 -*-
"""
Created on Sun Apr  8 16:21:48 2018

@author: Cameron
"""

import pandas as pd
import mysqlConnection as md

df = pd.read_csv('resources/population_density.csv', sep=",")
md.create_table(md.connect(), df, 'population_density')