Exemplo n.º 1
0
def publish_results(connectstring, df, level, runtype):
    schema = "landed"
    table = "calibrated_{level}_{runtype}_test".format(level=level,
                                                       runtype=runtype)

    dbutils.df_to_db(connectstring, df, schema, table, if_exists="replace",
                     write_index=True)
Exemplo n.º 2
0
def publish_results(connectstring, df, runtype, period):
    schema = "landed"
    table = "cl_pgm_{runtype}_{period}".format(runtype=runtype, period=period)

    dbutils.df_to_db(connectstring,
                     df,
                     schema,
                     table,
                     if_exists="replace",
                     write_index=True)
Exemplo n.º 3
0
def getREIGN(schema="dataprep", table="reign", if_exists="replace"):
    """
    Fetches most recent full REIGN dataset.
    """
    #find current download link
    url = 'https://oefdatascience.github.io/REIGN.github.io/menu/reign_current.html'
    html_doc = urllib.request.urlopen(url).read()
    soup = BeautifulSoup(html_doc, 'lxml')
    container = soup.find("div", {"class": "post-container"})
    link = container.find("a", href=True)['href']
    print("downloading data from {}".format(link))

    #fetch
    reign = pd.read_csv(link)

    #drop irrelevant variables
    #reign.drop(['leader'], axis=1, inplace=True)

    #exponentiate variables that have been logged for some unapparent reason
    #not sure about the other floats
    reign['lastelection'] = round(np.exp(reign['lastelection']))
    reign['loss'] = round(np.exp(reign['loss']))
    reign['irregular'] = round(np.exp(reign['irregular']))

    #add transforms
    reign['tenure_log'] = np.log1p(reign.tenure_months)
    nomonths = reign['tenure_months'] == 0
    #reign['regimeduration_log'] = np.log(reign.regimeduration)
    print(reign[nomonths])
    print(reign.head())
    #push to db
    dbutils.df_to_db(connectstring,
                     reign,
                     schema,
                     table,
                     if_exists,
                     write_index=True)
Exemplo n.º 4
0
                         logit_acled_ones_test_os, rf_canon_ones_test_sb,
                         rf_canon_ones_test_ns, rf_canon_ones_test_os,
                         rf_acled_ones_test_sb, rf_acled_ones_test_ns,
                         rf_acled_ones_test_os)

output_schema = "landed"
output_table = "step_run1_test"

models = [
    logit_canon_full_test_sb, logit_canon_full_test_ns,
    logit_canon_full_test_os, logit_acled_full_test_sb,
    logit_acled_full_test_ns, logit_acled_full_test_os,
    logit_canon_ones_test_sb, logit_canon_ones_test_ns,
    logit_canon_ones_test_os, logit_acled_ones_test_sb,
    logit_acled_ones_test_ns, logit_acled_ones_test_os, rf_canon_ones_test_sb,
    rf_canon_ones_test_ns, rf_canon_ones_test_os, rf_acled_ones_test_sb,
    rf_acled_ones_test_ns, rf_acled_ones_test_os
]

df_results = nstep.forecast_many(models)

# Write forecast to db
dbutils.df_to_db(connectstring,
                 df_results,
                 output_schema,
                 output_table,
                 if_exists="replace",
                 write_index=True)

print("test complete!")
Exemplo n.º 5
0
db_table = "fvp"

path_varlist = "../varlists/fvp/rawnames.txt"
path_renames = "../varlists/fvp/renames.txt"
varlist = datautils.read_varlist(path_varlist)
renames = datautils.read_renames(path_renames)

df = dbutils.file_to_df(path_input)
df.columns = df.columns.str.lower()

available = list(df.columns)
wanted_not_in_data = [col for col in varlist if col not in available]
wanted_in_data = [col for col in varlist if col in available]

if len(wanted_not_in_data) > 0:
    message = "There are variables in " + path_varlist + " that aren't in" + path_input
    for v in wanted_not_in_data:
        print("\t", v)
    raise ValueError(message)

# Subset the df to only those requested and existing in the data
df = df[wanted_in_data]

df = datautils.apply_renames(df, renames)

dbutils.df_to_db(db_connectstring,
                 df,
                 db_schema,
                 db_table,
                 if_exists="replace")
Exemplo n.º 6
0
import pandas as pd

sys.path.append("..")
from views_utils import dbutils

parser = argparse.ArgumentParser()
parser.add_argument("--path", type=str, help="dir_data")
parser.add_argument("--schema", type=str, help="schema")
parser.add_argument("--table", type=str, help="table")
args_main = parser.parse_args()

path = args_main.path
table = args_main.table
schema = args_main.schema

prefix = "postgres"
db = "views"
uname = "VIEWSADMIN"
hostname = "VIEWSHOST"
port = "5432"
connectstring = dbutils.make_connectstring(prefix, db, uname, hostname, port)

df = pd.read_hdf(path)
print(f"read {path}")
dbutils.df_to_db(connectstring=connectstring,
                 df=df,
                 schema=schema,
                 table=table,
                 if_exists="replace",
                 write_index=True)
Exemplo n.º 7
0
table_input = "transforms_pgm_imp_1"
schema_output = "landed"
table_output = "rescaled_pgm"

ids = [timevar, groupvar]
vars_to_rescale = [var['name'] for var in rescales]
vars_rescaled = []
cols = ids + vars_to_rescale

df = dbutils.db_to_df(connectstring,
                      schema_input,
                      table_input,
                      columns=cols,
                      ids=ids)

for rescale in rescales:
    name_new = rescale['name'] + "_rescaled"
    vars_rescaled.append(name_new)
    rescale['opts'].update({'x': df[rescale['name']]})

    df[name_new] = scale_to_range(**rescale['opts'])

df = df[vars_rescaled]

dbutils.df_to_db(connectstring,
                 df,
                 schema_output,
                 table_output,
                 if_exists="replace",
                 write_index=True)
Exemplo n.º 8
0
import sys

import pandas as pd

sys.path.insert(0, "../..")

from views_utils.dbutils import make_connectstring, df_to_db

prefix = "postgres"
db = "views"
uname = "VIEWSADMIN"
hostname = "VIEWSHOST"
port = "5432"
connectstring = make_connectstring(prefix, db, uname, hostname, port)

loas = ["pgm", "cm"]

schema = "launched"
if_exists = "replace"
for loa in loas:
    for imp in range(1, 6):
        table = loa + "_imp_ds_" + str(imp)
        path_input = "/storage/runs/current/ds/results/"
        path_input += loa + "_transforms/" + loa + "_imp_" + str(imp) + ".hdf5"
        print(schema, table)
        print(path_input)
        df = pd.read_hdf(path_input)
        df_to_db(connectstring, df, schema, table, if_exists, write_index=True)
Exemplo n.º 9
0
                                           uname="VIEWSADMIN")

cols_actual = [
    "ged_dummy_sb", "ged_dummy_ns", "ged_dummy_os", "acled_dummy_pr"
]

# Get the country_id for each pgm
df_country_keys = dbutils.db_to_df(connectstring, "staging_test", "cpgm")
df_country_keys.set_index(["month_id", "pg_id"], inplace=True)

df = df_country_keys.copy()
df_actuals = dbutils.db_to_df(connectstring,
                              "preflight",
                              "flight_pgm",
                              ids=["month_id", "pg_id"],
                              columns=cols_actual)
df_actuals = df_actuals.add_prefix("pgm_")

df = df.merge(df_actuals, left_index=True, right_index=True)
df_mean = df.groupby(["month_id", "country_id"]).mean()

table_out = "agg_cm_actuals"
print("table_out: {}".format(table_out))

dbutils.df_to_db(connectstring,
                 df_mean,
                 "landed",
                 table_out,
                 if_exists="replace",
                 write_index=True)
Exemplo n.º 10
0
    "ensemble_pgm_fcast_test", "ensemble_pgm_eval_test"
]

# ensemble
for table in tables:
    path = f"/storage/runs/archive/r.2018.08.01/ensemble/results/{table}.hdf5"
    df_prev = pd.read_hdf(path)
    # set index
    if "cm" in table:
        df_prev.set_index(ids_cm, inplace=True)
    if "pgm" in table:
        df_prev.set_index(ids_pgm, inplace=True)
    # push table
    dbutils.df_to_db(connectstring,
                     df_prev,
                     schema,
                     table,
                     if_exists,
                     write_index=True)

#decay
for level in ["cm", "pgm"]:
    path = f"/storage/runs/archive/r.2018.08.01/ds/transforms/{level}_transforms/data/{level}_imp_1.hdf5"
    table = f"transforms_{level}_imp_1"
    df_prev = pd.read_hdf(path)
    # isolate the decay columns
    cols = [col for col in df_prev if "decay" in col]
    if level == "cm":
        df_prev = df_prev[cols]
    if level == "pgm":
        df_prev = df_prev[cols]
    # push table
Exemplo n.º 11
0
    for year in range((ucdp_update.year + 1), (fcast_end.year + 1)):
        for month in range(1, 13):
            endvalues_base['year'] = year
            endvalues_base['month'] = month
            endvalues_base['month_id'] = endvalues_base['month_id'] + 1
            # concatenate
            endvalues = pd.concat([endvalues, endvalues_base])
    #concatenate that business
    vdem = pd.concat([vdem, endvalues])
    print(endvalues.month_id.values)
    #testfile to inspect
    testcase = vdem[(vdem.country_id == 124) | (vdem.country_id == 125)]
    testcase.to_csv("testcase_long.csv")

    #cleanup and return the values for changemonths only
    return(vdem)

# single
new_vdem = monthifyVDEM()
new_vdem.set_index(['country_id', 'month_id'], inplace=True)
new_vdem.to_csv("vdem_monthly.csv")

#push to DB
connectstring = dbutils.make_connectstring(prefix="postgresql", db="views",
                                           uname="VIEWSADMIN", hostname="VIEWSHOST",
                                           port="5432")
schema = "dataprep"
if_exists = "replace"
table = "vdem_regimechange"
dbutils.df_to_db(connectstring, new_vdem, schema, table, if_exists, write_index=True)