Пример #1
0
def _prepare_acled():

    log.debug("Started _prepare_acled()")
    # This was pure sql, not even a parametrised query.
    db.execute_query(query=io.read_file(
        path=os.path.join(os.path.dirname(__file__), "prepare_acled.sql")))
    log.debug("Finished _prepare_acled()")
Пример #2
0
def _get_id_dfs() -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """ Get dataframes with ids from database """
    db.execute_query(query=io.read_file(
        path=os.path.join(os.path.dirname(__file__), "pg_ug.sql")))
    df_pg_ug = db.db_to_df(fqtable="spei_v2.pg_ug", ids=["pg_id"])

    df_m = (db.db_to_df(fqtable="staging.month",
                        cols=["id"],
                        ids=["year_id",
                             "month"]).reset_index().rename(columns={
                                 "year_id": "year",
                                 "id": "month_id"
                             }).set_index(["year", "month"]))

    df_ug_pgm = (db.db_to_df(
        fqtable="staging.priogrid_month",
        cols=["id"],
        ids=["priogrid_gid", "month_id"],
    ).reset_index().rename(columns={
        "id": "priogrid_month_id",
        "priogrid_gid": "pg_id"
    }).set_index(["pg_id", "month_id"]).join(df_pg_ug).reset_index().set_index(
        ["ug_id", "month_id"])[["pg_id", "priogrid_month_id"]])

    return df_pg_ug, df_m, df_ug_pgm
Пример #3
0
def _stage_spei():
    """ Stage SPEI """
    log.debug("Started staging SPEI")
    db.execute_query(query=io.read_file(
        os.path.join(os.path.dirname(__file__), "stage.sql")))
    db.execute_query(query=io.read_file(
        os.path.join(os.path.dirname(__file__), "cleanup.sql")))
    log.debug("Finished staging SPEI")
Пример #4
0
def build_skeleton() -> None:
    """ Build skeleton schema by executing create_skeleton.sql """
    log.info("Started rebuilding skeleton schema.")
    path_query = os.path.join(os.path.dirname(__file__), "create_skeleton.sql")
    with open(path_query, "r") as f:
        query = f.read()
    db.execute_query(query)
    log.info("Finished rebuilding skeleton schema.")
Пример #5
0
def _load_spei_from_path(
    path: str,
    df_pg_ug: pd.DataFrame,
    df_m: pd.DataFrame,
    df_ug_pgm: pd.DataFrame,
) -> None:
    """ Load a single SPEI from .nc at path """

    spei_num = _spei_num_from_path(path)
    colname = f"spei_{spei_num}"

    # Use xarray to load the .nc, it deals with indexing, time etc
    df = xr.open_dataset(path).to_dataframe().reset_index()
    log.debug(f"Read {len(df)} rows of data from {path}")

    # Vectorised pg_id assignment on lat/lon
    df["pg_id"] = _priogrid_vec(df.lat.to_numpy(), df.lon.to_numpy())
    log.debug("Assigned pg_ids")

    # Get year/month from date for joining
    df["year"] = df["time"].dt.year
    df["month"] = df["time"].dt.month
    df.drop(columns=["time", "lat", "lon"], inplace=True)

    # Put the SPEI number in the colname
    df.rename(columns={"spei": colname}, inplace=True)
    # Drop rows with no data
    df.dropna(inplace=True)

    # Join in month_id and drop those with missing
    df = df.set_index(["year", "month"]).join(df_m).dropna()
    df.month_id = df.month_id.astype(int)
    log.debug("Assigned month_id")

    # Keep only data
    df = df.reset_index().set_index(["pg_id", "month_id"])[[colname]]

    # Join in the ug_id
    df = df.join(df_pg_ug).dropna()
    df.ug_id = df.ug_id.astype(int)
    log.debug("Assigned ug_id")

    # Now reindex to ug_id-month_id so we can join bigly
    df = df.reset_index().set_index(["ug_id", "month_id"]).sort_index()
    df = df[[colname]]

    # Now join in pg_id and priogrid_month_id by ug_id
    df = df_ug_pgm.join(df)

    df = df.set_index(["priogrid_month_id"])[[colname]].dropna()

    log.info(f"Started pushing {path}")

    # Push it up and create index
    fqtable = f"spei_v2.spei_{spei_num}"
    db.df_to_db(fqtable=fqtable, df=df)
    db.execute_query(query=f"CREATE INDEX ON {fqtable} (priogrid_month_id);")
    log.info(f"{fqtable} ready")
Пример #6
0
def _prepare_ged() -> None:
    """ Recreates preflight.ged_attached and preflight.ged_attached_full """

    # Moved into .sql file in this dir.
    log.debug(f"Preparing preflight.ged_attached(_full)")
    query = io.read_file(
        path=os.path.join(os.path.dirname(__file__), "prepare_ged.sql"))
    db.execute_query(query)
    log.debug(f"Done preflight.ged_attached(_full)")
Пример #7
0
def load_ged() -> None:
    """ Collect imputed and unimputed GED """

    log.info("Started loading GED.")

    load_legacy_ged("20.9.4", 484, 484)  # 2020-04

    db.drop_schema("ged")
    db.create_schema("ged")
    db.execute_query(
        query=io.read_file(
            path=os.path.join(os.path.dirname(__file__), "ged.sql")
        )
    )
    log.info("Finished loading GED.")
Пример #8
0
def load_acled() -> None:
    """ Code that brings acled to staging yet to be merged """

    log.info("Started loading ACLED.")

    load_legacy_acled(
        from_date="2020-01-01", from_month_id=483, to_month_id=484
    )

    db.drop_schema("acled")
    db.create_schema("acled")

    db.execute_query(
        query=io.read_file(
            path=os.path.join(os.path.dirname(__file__), "acled.sql")
        )
    )
    log.info("Finished loading ACLED.")