Пример #1
0
def _stage_spei():
    """ Stage SPEI """
    log.debug("Started staging SPEI")
    db.execute_query(query=io.read_file(
        os.path.join(os.path.dirname(__file__), "stage.sql")))
    db.execute_query(query=io.read_file(
        os.path.join(os.path.dirname(__file__), "cleanup.sql")))
    log.debug("Finished staging SPEI")
Пример #2
0
def _get_id_dfs() -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """ Get dataframes with ids from database """
    db.execute_query(query=io.read_file(
        path=os.path.join(os.path.dirname(__file__), "pg_ug.sql")))
    df_pg_ug = db.db_to_df(fqtable="spei_v2.pg_ug", ids=["pg_id"])

    df_m = (db.db_to_df(fqtable="staging.month",
                        cols=["id"],
                        ids=["year_id",
                             "month"]).reset_index().rename(columns={
                                 "year_id": "year",
                                 "id": "month_id"
                             }).set_index(["year", "month"]))

    df_ug_pgm = (db.db_to_df(
        fqtable="staging.priogrid_month",
        cols=["id"],
        ids=["priogrid_gid", "month_id"],
    ).reset_index().rename(columns={
        "id": "priogrid_month_id",
        "priogrid_gid": "pg_id"
    }).set_index(["pg_id", "month_id"]).join(df_pg_ug).reset_index().set_index(
        ["ug_id", "month_id"])[["pg_id", "priogrid_month_id"]])

    return df_pg_ug, df_m, df_ug_pgm
Пример #3
0
def _prepare_acled():

    log.debug("Started _prepare_acled()")
    # This was pure sql, not even a parametrised query.
    db.execute_query(query=io.read_file(
        path=os.path.join(os.path.dirname(__file__), "prepare_acled.sql")))
    log.debug("Finished _prepare_acled()")
Пример #4
0
def _prepare_ged() -> None:
    """ Recreates preflight.ged_attached and preflight.ged_attached_full """

    # Moved into .sql file in this dir.
    log.debug(f"Preparing preflight.ged_attached(_full)")
    query = io.read_file(
        path=os.path.join(os.path.dirname(__file__), "prepare_ged.sql"))
    db.execute_query(query)
    log.debug(f"Done preflight.ged_attached(_full)")
Пример #5
0
def parse_page(path: str) -> List[Dict[Any, Any]]:
    """ CrisisWatch parser using bs4. Appends to dataframe and returns df """
    # pylint: disable=too-many-locals

    soup = BeautifulSoup(io.read_file(path), "html.parser")

    # loop over blocks
    search = {
        "class": "c-crisiswatch-entry [ o-container o-container--m u-pr ]"
    }
    entries = []
    for block in soup.find_all("div", search):
        # remove whitespace titles
        countryname = block.find("h3").text
        # remove unnecessary spacing

        # countryname = re.sub("^\s+|\s+$", "", countryname, flags=re.UNICODE)
        countryname = re.sub(r"^s+|s+$", "", countryname, flags=re.UNICODE)
        countryname = countryname.strip()
        entrydate = block.find("time").text
        # entries may have no text, so adding a try here
        try:
            cls_tag = {"class": "o-crisis-states__detail [ u-ptserif u-fs18 ]"}
            entrytext = block.find("div", cls_tag).text
            entrytext = entrytext.replace("\n\t", "")
        except AttributeError:
            entrytext = ""
        # prepare dummies using list
        tblock = block.find("h3")
        updates = list(tblock.find_all("use"))
        deteriorated = 1 if "#deteriorated" in str(updates) else 0
        improved = 1 if "#improved" in str(updates) else 0
        alert = 1 if "#risk-alert" in str(updates) else 0
        resolution = 1 if "#resolution" in str(updates) else 0
        unobserved = 0
        entry_data = {
            "date": entrydate,
            "name": countryname,
            "alerts": alert,
            "opportunities": resolution,
            "deteriorated": deteriorated,
            "improved": improved,
            "unobserved": unobserved,
            "text": entrytext,
        }
        entries.append(entry_data)

    log.debug(f"Read {len(entries)} entries from {path}")
    return entries
Пример #6
0
def load_ged() -> None:
    """ Collect imputed and unimputed GED """

    log.info("Started loading GED.")

    load_legacy_ged("20.9.4", 484, 484)  # 2020-04

    db.drop_schema("ged")
    db.create_schema("ged")
    db.execute_query(
        query=io.read_file(
            path=os.path.join(os.path.dirname(__file__), "ged.sql")
        )
    )
    log.info("Finished loading GED.")
Пример #7
0
def load_acled() -> None:
    """ Code that brings acled to staging yet to be merged """

    log.info("Started loading ACLED.")

    load_legacy_acled(
        from_date="2020-01-01", from_month_id=483, to_month_id=484
    )

    db.drop_schema("acled")
    db.create_schema("acled")

    db.execute_query(
        query=io.read_file(
            path=os.path.join(os.path.dirname(__file__), "acled.sql")
        )
    )
    log.info("Finished loading ACLED.")