Python db_to_df示例，dbutils.db_to_df Python示例

示例#1

0

显示文件

                                           uname=uname)

# -- prepare pgm data --
# get pgm and subset to 1994-2014
columns = [
    'month_id', 'pg_id', 'ged_dummy_sb', 'ged_dummy_ns', 'ged_dummy_os',
    'l12_ged_dummy_sb', 'l12_ged_dummy_ns', 'l12_ged_dummy_os', 'ln_bdist3',
    'ln_ttime', 'ln_capdist', 'ln_pop', 'gcp_li_mer', 'imr_mean',
    'mountains_mean', 'urban_ih_li', 'excluded_dummy_li',
    'decay_12_cw_ged_dummy_sb_0', 'decay_12_cw_ged_dummy_ns_0',
    'decay_12_cw_ged_dummy_os_0', 'q_1_1_l1_ged_dummy_sb',
    'q_1_1_l1_ged_dummy_ns', 'q_1_1_l1_ged_dummy_os'
]

df = dbutils.db_to_df(connectstring,
                      schema="launched",
                      table="transforms_pgm_imp_1",
                      columns=columns)

limit = (df['month_id'] >= 169) & (df['month_id'] <= 420)
df = df[limit]

# add log imr
df['ln_imr_mean'] = np.log1p(df['imr_mean'])

# add dummy variables for border areas (+2 here due to small ds error)
df['border'] = np.where(df['ln_bdist3'] < np.log(25 + 2), 1, 0)

# set index
df.set_index(['pg_id', 'month_id'], inplace=True)

# print finish

示例#2

0

显示文件

import sys

sys.path.append("../..")
import dbutils

uname = "YOUR USERNAME"
schema = "SCHEMA"
table = "TABLE"

columns_example = ["pg_id", "month_id", "ged_dummy_sb"]
# Leave empty to get all cols from table or replace with your list
columns = []

connectstring = dbutils.make_connectstring(db="views",
                                           hostname="VIEWSHOST",
                                           port="5432",
                                           prefix="postgres",
                                           uname=uname)

df = dbutils.db_to_df(connectstring, schema, table, columns)

df.to_csv("my_table.csv")
df.to_stata("my_table.dta")

示例#3

0

显示文件

schema = "landed"
if_exists = "replace"

# set index on
id_dict = {"cm": ["month_id", "country_id"], "pgm": ["month_id", "pg_id"]}

# inserts
tables = [
    "ensemble_cm_fcast_test", "ensemble_cm_eval_test",
    "ensemble_pgm_fcast_test", "ensemble_pgm_eval_test"
]

# get startmonth
table = tables[1]
df_current = dbutils.db_to_df(connectstring, "landed", table)
df_current.set_index(id_dict["cm"], inplace=True)
startmonth = int(df_current.index.get_level_values('month_id').min())
# get endmonth
table = tables[0]
df_current = dbutils.db_to_df(connectstring, "landed", table)
df_current.set_index(id_dict["cm"], inplace=True)
endmonth = int(df_current.index.get_level_values('month_id').min())

print(f"Getting deltas for decay between {startmonth} and {endmonth}.")

# loop over ensemble tables
for table in tables:
    # set table name for the deltas
    tablename = f"deltapred_{table}"
    # get data from previous and landed

示例#4

0

显示文件

文件： derp.py 项目： jemceach/OpenViEWS

    gid = lat_component + lon_component + 1
    return int(gid)


# set up connectstring
uname = "VIEWSADMIN"
connectstring = dbutils.make_connectstring(db="views",
                                           hostname="VIEWSHOST",
                                           port="5432",
                                           prefix="postgres",
                                           uname=uname)

try:
    df = pd.read_hdf("geopko_cached.hdf5")
except:
    df = dbutils.db_to_df(connectstring, schema="dataprep", table="geo_pko")

df = df.drop(columns=["index", "month", "year"])

groupvar = "mission_location"
timevar = "month_id"
# Mission location is the groupvar to use as ID
df[groupvar] = df['mission'] + "__" + df['location']
df.set_index([timevar, groupvar], inplace=True)
df.sort_index(inplace=True)

# To keep our extended group dfs in
dfs = []
# level=1 is mission_location
for key, group in df.groupby(groupvar):

示例#5

0

显示文件

文件： prepare_actualspreds.py 项目： jemceach/OpenViEWS

sys.path.append("..")
import dbutils

# set up connectstring
uname = "VIEWSADMIN"
connectstring = dbutils.make_connectstring(db="views", hostname="VIEWSHOST", 
    port="5432", prefix="postgres",uname=uname)

## preparation of cm_actuals_preds
# indexes for cm_evalpreds
timevar = "month_id"
groupvar = "country_id"

# get cm evalpreds from db (change to SQL query)
cm_preds = dbutils.db_to_df(connectstring, schema="landed",
                          table="calibrated_cm_eval_test",
                          ids=[timevar, groupvar])
cm_ensemble = dbutils.db_to_df(connectstring, schema="landed",
                          table="ensemble_cm_eval_test",
                          ids=[timevar, groupvar])
cm_actuals = dbutils.db_to_df(connectstring, schema="launched",
                          table="transforms_cm_imp_1",
                          ids=[timevar, groupvar], 
                          columns=["ged_dummy_sb", "ged_dummy_ns", "ged_dummy_os"])
df_cm_actualspreds = cm_actuals.merge(cm_preds, left_index=True, right_index=True)
df_cm_actualspreds = df_cm_actualspreds.merge(cm_ensemble, left_index=True, right_index=True) 

# get country information
df_c = dbutils.db_to_df(connectstring, schema="staging",
                        table="country", columns=["id", "name"])