Exemplo n.º 1
0
def test_request():

    request = DWDStationRequest(
        station_ids=[1048],
        parameter=Parameter.CLIMATE_SUMMARY,
        time_resolution=TimeResolution.DAILY,
        period_type=PeriodType.RECENT,
    )

    df = request.collect_safe()
    assert not df.empty
Exemplo n.º 2
0
def sql_example():

    request = DWDStationRequest(
        station_ids=[1048],
        parameter=[Parameter.TEMPERATURE_AIR],
        time_resolution=TimeResolution.HOURLY,
        start_date="2019-01-01",
        end_date="2020-01-01",
        tidy_data=True,
        humanize_column_names=True,
        prefer_local=True,
        write_file=True,
    )

    sql = "SELECT * FROM data WHERE element='temperature_air_200' AND value < -7.0;"
    log.info(f"Invoking SQL query '{sql}'")

    df = request.collect_safe()
    df = df.wd.lower().io.sql(sql)

    print(df)
Exemplo n.º 3
0
def test_export_crate():

    request = DWDStationRequest(
        station_ids=[1048],
        parameter=Parameter.CLIMATE_SUMMARY,
        time_resolution=TimeResolution.DAILY,
        period_type=PeriodType.RECENT,
    )

    with mock.patch("pandas.DataFrame.to_sql", ) as mock_to_sql:

        df = request.collect_safe()
        df.io.export("crate://localhost/?database=test&table=testdrive")

        mock_to_sql.assert_called_once_with(
            name="testdrive",
            con="crate://localhost/?database=test&table=testdrive",
            if_exists="replace",
            index=False,
            method="multi",
            chunksize=5000,
        )
Exemplo n.º 4
0
def test_export_influxdb():

    request = DWDStationRequest(
        station_ids=[1048],
        parameter=Parameter.CLIMATE_SUMMARY,
        time_resolution=TimeResolution.DAILY,
        period_type=PeriodType.RECENT,
    )

    mock_client = mock.MagicMock()
    with mock.patch(
            "influxdb.dataframe_client.DataFrameClient",
            side_effect=[mock_client],
            create=True,
    ) as mock_connect:

        df = request.collect_safe()
        df.dwd.lower().io.export(
            "influxdb://localhost/?database=dwd&table=weather")

        mock_connect.assert_called_once_with(database="dwd")
        mock_client.create_database.assert_called_once_with("dwd")
        mock_client.write_points.assert_called_once()
Exemplo n.º 5
0
def test_export_duckdb():

    request = DWDStationRequest(
        station_ids=[1048],
        parameter=Parameter.CLIMATE_SUMMARY,
        time_resolution=TimeResolution.DAILY,
        period_type=PeriodType.RECENT,
    )

    mock_connection = mock.MagicMock()
    with mock.patch("duckdb.connect",
                    side_effect=[mock_connection],
                    create=True) as mock_connect:

        df = request.collect_safe()
        df.io.export("duckdb:///test.duckdb?table=testdrive")

        mock_connect.assert_called_once_with(database="test.duckdb",
                                             read_only=False)
        mock_connection.register.assert_called_once()
        mock_connection.execute.assert_called()
        mock_connection.table.assert_called_once_with("testdrive")
        # a.table.to_df.assert_called()
        mock_connection.close.assert_called_once()
Exemplo n.º 6
0
def main(cfg: DWDConfig) -> None:
    base_path = Path(hydra.utils.get_original_cwd())
    (base_path / "data" / "DWD" / "test").mkdir(exist_ok=True, parents=True)
    (base_path / "data" / "DWD" / "train").mkdir(exist_ok=True, parents=True)

    # hohenpeissenberg, wielenbach, altenstadt
    # target, supplier(s)
    # ids: (2290, 5538, 125)

    if cfg.res == "hourly":
        res = TimeResolution.HOURLY
    elif cfg.res == "daily":
        res = TimeResolution.DAILY
        log.error("Daily data setup valid but not implemented yet")
        raise NotImplementedError
    else:
        log.error("Only hourly time resolution allowed (for now)")
        raise NotImplementedError

    request = DWDStationRequest(
        station_ids=cfg.stations,
        parameter=[Parameter.TEMPERATURE_AIR, Parameter.PRECIPITATION],
        time_resolution=res,
        start_date=cfg.start_date,
        end_date=cfg.end_date,
        tidy_data=True,
        humanize_column_names=True,
        write_file=True,
        prefer_local=True,
    )

    dfs = []
    for df in request.collect_data():
        # sid = df.iloc[0]["STATION_ID"]
        df = df[["ELEMENT", "DATE", "VALUE"]]
        df = df.pivot(index="DATE", columns="ELEMENT", values="VALUE")
        dfs.append(df[["PRECIPITATION_HEIGHT", "TEMPERATURE_AIR_200"]])

    # merge and keep indices
    df = reduce(
        lambda df_left, df_right: pd.merge(
            df_left, df_right, left_index=True, right_index=True, how="outer"),
        dfs,
    )

    target_cols = ["TARGET_PRCP", "TARGET_TEMP"]
    source_cols = [[f"SRC{d:02d}_PRCP", f"SRC{d:02d}_TEMP"]
                   for d in range(1, len(dfs))]
    df.columns = target_cols + list(itertools.chain(*source_cols))

    log.debug(f"Sample:\n{df.head()}")

    n = len(df)
    rows = range(n)
    row_idx = sorted(random.choices(rows, k=n // 3))

    samples = []
    valid, invalid = 0, 0
    for i in row_idx:
        start_dt = df.index[i]
        end_dt = start_dt + timedelta(days=cfg.sample_hours / 24)

        sample = df[(df.index >= start_dt) & (df.index < end_dt)]
        sample = sample.dropna()
        if len(sample) == cfg.sample_hours:
            samples.append(sample)
            valid += 1
        else:
            invalid += 1

    log.info(f"invalid: {invalid}, valid: {valid}")
    for i, sample in enumerate(samples):
        # if i % 100 == 0: print(i)
        if i < len(samples) * (1 - cfg.valid_ratio):
            sample.to_csv(base_path / "data" / "DWD" / "train" /
                          f"{i:05d}.csv")
        else:
            sample.to_csv(base_path / "data" / "DWD" / "test" / f"{i:05d}.csv")
Exemplo n.º 7
0
def dwd_readings(
    station: str = Query(default=None),
    parameter: str = Query(default=None),
    resolution: str = Query(default=None),
    period: str = Query(default=None),
    date: str = Query(default=None),
    sql: str = Query(default=None),
):
    """
    Acquire data from DWD.

    # TODO: Obtain lat/lon distance/number information.

    :param station:     Comma-separated list of station identifiers.
    :param parameter:   Observation measure
    :param resolution:  Frequency/granularity of measurement interval
    :param period:      Recent or historical files
    :param date:        Date or date range
    :param sql:         SQL expression
    :return:
    """

    if station is None:
        raise HTTPException(
            status_code=400, detail="Query argument 'station' is required"
        )

    if parameter is None or resolution is None or period is None:
        raise HTTPException(
            status_code=400,
            detail="Query arguments 'parameter', 'resolution' "
            "and 'period' are required",
        )

    station_ids = map(int, read_list(station))
    parameter = parse_enumeration_from_template(parameter, Parameter)
    resolution = parse_enumeration_from_template(resolution, TimeResolution)
    period = parse_enumeration_from_template(period, PeriodType)

    # Data acquisition.
    request = DWDStationRequest(
        station_ids=station_ids,
        parameter=parameter,
        time_resolution=resolution,
        period_type=period,
        tidy_data=True,
        humanize_column_names=True,
    )

    # Postprocessing.
    df = request.collect_safe()

    if date is not None:
        df = df.dwd.filter_by_date(date, resolution)

    df = df.dwd.lower()

    if sql is not None:
        df = df.io.sql(sql)

    data = json.loads(df.to_json(orient="records", date_format="iso"))
    return make_json_response(data)