def _prepare_acled(): log.debug("Started _prepare_acled()") # This was pure sql, not even a parametrised query. db.execute_query(query=io.read_file( path=os.path.join(os.path.dirname(__file__), "prepare_acled.sql"))) log.debug("Finished _prepare_acled()")
def _get_id_dfs() -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """ Get dataframes with ids from database """ db.execute_query(query=io.read_file( path=os.path.join(os.path.dirname(__file__), "pg_ug.sql"))) df_pg_ug = db.db_to_df(fqtable="spei_v2.pg_ug", ids=["pg_id"]) df_m = (db.db_to_df(fqtable="staging.month", cols=["id"], ids=["year_id", "month"]).reset_index().rename(columns={ "year_id": "year", "id": "month_id" }).set_index(["year", "month"])) df_ug_pgm = (db.db_to_df( fqtable="staging.priogrid_month", cols=["id"], ids=["priogrid_gid", "month_id"], ).reset_index().rename(columns={ "id": "priogrid_month_id", "priogrid_gid": "pg_id" }).set_index(["pg_id", "month_id"]).join(df_pg_ug).reset_index().set_index( ["ug_id", "month_id"])[["pg_id", "priogrid_month_id"]]) return df_pg_ug, df_m, df_ug_pgm
def _stage_spei(): """ Stage SPEI """ log.debug("Started staging SPEI") db.execute_query(query=io.read_file( os.path.join(os.path.dirname(__file__), "stage.sql"))) db.execute_query(query=io.read_file( os.path.join(os.path.dirname(__file__), "cleanup.sql"))) log.debug("Finished staging SPEI")
def build_skeleton() -> None: """ Build skeleton schema by executing create_skeleton.sql """ log.info("Started rebuilding skeleton schema.") path_query = os.path.join(os.path.dirname(__file__), "create_skeleton.sql") with open(path_query, "r") as f: query = f.read() db.execute_query(query) log.info("Finished rebuilding skeleton schema.")
def _load_spei_from_path( path: str, df_pg_ug: pd.DataFrame, df_m: pd.DataFrame, df_ug_pgm: pd.DataFrame, ) -> None: """ Load a single SPEI from .nc at path """ spei_num = _spei_num_from_path(path) colname = f"spei_{spei_num}" # Use xarray to load the .nc, it deals with indexing, time etc df = xr.open_dataset(path).to_dataframe().reset_index() log.debug(f"Read {len(df)} rows of data from {path}") # Vectorised pg_id assignment on lat/lon df["pg_id"] = _priogrid_vec(df.lat.to_numpy(), df.lon.to_numpy()) log.debug("Assigned pg_ids") # Get year/month from date for joining df["year"] = df["time"].dt.year df["month"] = df["time"].dt.month df.drop(columns=["time", "lat", "lon"], inplace=True) # Put the SPEI number in the colname df.rename(columns={"spei": colname}, inplace=True) # Drop rows with no data df.dropna(inplace=True) # Join in month_id and drop those with missing df = df.set_index(["year", "month"]).join(df_m).dropna() df.month_id = df.month_id.astype(int) log.debug("Assigned month_id") # Keep only data df = df.reset_index().set_index(["pg_id", "month_id"])[[colname]] # Join in the ug_id df = df.join(df_pg_ug).dropna() df.ug_id = df.ug_id.astype(int) log.debug("Assigned ug_id") # Now reindex to ug_id-month_id so we can join bigly df = df.reset_index().set_index(["ug_id", "month_id"]).sort_index() df = df[[colname]] # Now join in pg_id and priogrid_month_id by ug_id df = df_ug_pgm.join(df) df = df.set_index(["priogrid_month_id"])[[colname]].dropna() log.info(f"Started pushing {path}") # Push it up and create index fqtable = f"spei_v2.spei_{spei_num}" db.df_to_db(fqtable=fqtable, df=df) db.execute_query(query=f"CREATE INDEX ON {fqtable} (priogrid_month_id);") log.info(f"{fqtable} ready")
def _prepare_ged() -> None: """ Recreates preflight.ged_attached and preflight.ged_attached_full """ # Moved into .sql file in this dir. log.debug(f"Preparing preflight.ged_attached(_full)") query = io.read_file( path=os.path.join(os.path.dirname(__file__), "prepare_ged.sql")) db.execute_query(query) log.debug(f"Done preflight.ged_attached(_full)")
def load_ged() -> None: """ Collect imputed and unimputed GED """ log.info("Started loading GED.") load_legacy_ged("20.9.4", 484, 484) # 2020-04 db.drop_schema("ged") db.create_schema("ged") db.execute_query( query=io.read_file( path=os.path.join(os.path.dirname(__file__), "ged.sql") ) ) log.info("Finished loading GED.")
def load_acled() -> None: """ Code that brings acled to staging yet to be merged """ log.info("Started loading ACLED.") load_legacy_acled( from_date="2020-01-01", from_month_id=483, to_month_id=484 ) db.drop_schema("acled") db.create_schema("acled") db.execute_query( query=io.read_file( path=os.path.join(os.path.dirname(__file__), "acled.sql") ) ) log.info("Finished loading ACLED.")