Пример #1
0
import pandas as pd
from datetime import datetime

from config import Configuration
from helpers import get_metals_and_prices_from_table, soup_url, get_database_engine

if __name__ == '__main__':
    config = Configuration()
    soup = soup_url(config.url)
    metal_table = soup.find("div", {"class": "c_mp_price_table"})
    metals, prices = get_metals_and_prices_from_table(metal_table)

    metal_and_prices_df = pd.DataFrame()
    metal_and_prices_df["metal"] = metals
    metal_and_prices_df["price"] = prices
    metal_and_prices_df["date"] = datetime.today().strftime("%Y-%m-%d")

    connection = get_database_engine()

    metal_and_prices_df.to_sql(name=config.sql_table,
                               con=connection,
                               schema="projects",
                               if_exists="append",
                               index=False)
"""Script to add additional H-2A worksites DOL data to our database."""

import helpers
from helpers import make_query, get_database_engine, myprint
import pandas as pd
from dotenv import load_dotenv

load_dotenv()
engine = get_database_engine(force_cloud=True)


# worksites is a DataFrame
# year, quarter should be strings - ex: 2020, 4
def manage_worksites(worksites, year, quarter):

    worksites = worksites.rename(
        columns={
            "PLACE_OF_EMPLOYMENT_ADDRESS1": "WORKSITE_ADDRESS",
            "PLACE_OF_EMPLOYMENT_ADDRESS2": "WORKSITE_ADDRESS2",
            "PLACE_OF_EMPLOYMENT_CITY": "WORKSITE_CITY",
            "PLACE_OF_EMPLOYMENT_STATE": "WORKSITE_STATE",
            "PLACE_OF_EMPLOYMENT_POSTAL_CODE": "WORKSITE_POSTAL_CODE",
            "PLACE_OF_EMPLOYMENT_POST_CODE": "WORKSITE_POSTAL_CODE",
            "ADDITONAL_PLACE_OF_EMPLOYMENT_INFO":
            "ADDITIONAL_PLACE_OF_EMPLOYMENT_INFORMATION",
            "JO_ORDER_NUMBER": "JOB_ORDER_NUMBER"
        })

    worksites = helpers.fix_zip_code_columns(worksites,
                                             ["WORKSITE_POSTAL_CODE"])
    worksites["table"], worksites["Source"], worksites[
Пример #3
0
"""This script is used for implementing the most recent address fixes from the PostgreSQL low_accuracies table."""

import os
import helpers
from helpers import myprint, print_red_and_email, make_query, get_database_engine, handle_null
import pandas as pd
from geocodio import GeocodioClient
from dotenv import load_dotenv

load_dotenv()

geocodio_api_key = os.getenv("GEOCODIO_API_KEY")
engine, client = get_database_engine(
    force_cloud=True), GeocodioClient(geocodio_api_key)


# fixed is a DataFrame of low_accuracies table rows that have been fixed
def implement_fixes(fixed, fix_worksites=False):

    # worksite_or_housing is a string - either "worksite" or "housing"
    # sets the "{worksite_or_housing}_fixed_by", "fixed" columns to failed, False in the i-th row of df
    def mark_as_failed(i, worksite_or_housing, df):
        df.at[i, f"{worksite_or_housing}_fixed_by"] = "failed"
        df.at[i, "fixed"] = False

    # worksite_or_housing is a string - either "worksite" or "housing"
    # overwrites the geocoding results columns of the i-th row in df based on its worksite_or_housing address columns
    # if the geocoding of the new address columns results in accuracy too low or an accuracy type in helpers.bad_accuracy_types, marks the row as failed
    def fix_by_address(i, row, worksite_or_housing, df):
        if worksite_or_housing == "worksite":
            full_address = helpers.create_address_from(
Пример #4
0
import helpers

if helpers.force_cloud:
    print("Refusing to run tests. Never run tests on the actual database!!! You are seeing this alert because the force_cloud variable is set to True (or you told it to be set it to true with your answer to the last question) in helpers.py. Set it to False to run tests on the local database.")
    exit()

import unittest
from add_housing import geocode_manage_split_housing
from merge_dol import geocode_manage_split_merge
from implement_fixes import implement_fixes
from helpers import merge_all_data, get_value, myprint, make_query, geocode_table
from colorama import Fore, Style
import os
import pandas as pd
bad_accuracy_types = helpers.bad_accuracy_types
engine = helpers.get_database_engine()

# replaces job_central and low_accuracies with the DataFrames accurates and inaccurates
def set_test_database_state(accurates, inaccurates):
    make_query("DELETE FROM job_central")
    make_query("DELETE FROM low_accuracies")
    accurates.to_sql("job_central", engine, if_exists='append', index=False, dtype=helpers.column_types)
    inaccurates.to_sql("low_accuracies", engine, if_exists='append', index=False, dtype=helpers.column_types)
    make_query("REFRESH MATERIALIZED VIEW previously_geocoded")

# runs merge_all_data function with the two parameters as inputs and returns the updated job_central and low_accuracies tables as DataFrames
def merge_all_and_get_new_state(accurate_new_jobs, inaccurate_new_jobs):
    merge_all_data(accurate_new_jobs, inaccurate_new_jobs)
    accurates = pd.read_sql("job_central", con=engine)
    inaccurates = pd.read_sql("low_accuracies", con=engine)
    return accurates, inaccurates
Пример #5
0
import os
import helpers
from helpers import make_query, get_database_engine
import pandas as pd
from dotenv import load_dotenv

load_dotenv()
engine = get_database_engine(force_cloud=False)


# geocodes, splits by accuracy, renames columns, and adds necessary columns for the DataFrame 'housing'.
# year, quarter should be strings - ex: 2020, 4
def geocode_manage_split_housing(housing, year, quarter):

    housing = housing.rename(
        columns={
            "PHYSICAL_LOCATION_ADDRESS_2": "HOUSING_ADDRESS2",
            "JOB_ORDER_NUMBER": "JO_ORDER_NUMBER",
            "PHYSICAL_LOCATION_STATE": "HOUSING_STATE",
            "PHYSICAL_LOCATION_POSTAL_CODE": "HOUSING_POSTAL_CODE",
            "PHYSICAL_LOCATION_CITY": "HOUSING_CITY",
            "HOUSING_STANDARD_STATE": "HOUSING_STANDARDS_STATE",
            "HOUSING_STANDARD_LOCAL": "HOUSING_STANDARDS_LOCAL",
            "HOUSING_STANDARD_FEDERAL": "HOUSING_STANDARDS_FEDERAL",
            "PHYSICAL_LOCATION_COUNTY": "HOUSING_COUNTY",
            "PHYSICAL_LOCATION_ADDRESS_1": "HOUSING_ADDRESS_LOCATION"
        })

    housing = helpers.fix_zip_code_columns(housing, ["HOUSING_POSTAL_CODE"])
    housing["table"], housing["Source"], housing["fixed"], housing[
        "housing_fixed_by"], housing[