예제 #1
0
def test_export_influxdb_tidy():

    observations = DWDObservationData(
        station_ids=[1048],
        parameters=DWDObservationParameterSet.CLIMATE_SUMMARY,
        resolution=DWDObservationResolution.DAILY,
        periods=DWDObservationPeriod.RECENT,
        tidy_data=True,
    )

    mock_client = mock.MagicMock()
    with mock.patch(
            "influxdb.dataframe_client.DataFrameClient",
            side_effect=[mock_client],
            create=True,
    ) as mock_connect:

        df = observations.all()
        df.dwd.lower().io.export(
            "influxdb://localhost/?database=dwd&table=weather")

        mock_connect.assert_called_once_with(database="dwd")
        mock_client.create_database.assert_called_once_with("dwd")
        mock_client.write_points.assert_called_once()

        mock_client.write_points.assert_called_with(
            dataframe=mock.ANY,
            measurement="weather",
            tag_columns=[
                "station_id", "quality", "parameter_set", "parameter"
            ],
            batch_size=50000,
        )
예제 #2
0
def test_coerce_field_types():
    df = pd.DataFrame({
        "STATION_ID": ["00001"],
        "QN": ["1"],
        "RS_IND_01": ["1"],
        "DATE": ["1970010100"],
        "END_OF_INTERVAL": ["1970010100:00"],
        "V_VV_I": ["P"],
    })

    def __init__(self):
        self.tidy_data = False
        self.resolution = DWDObservationResolution.HOURLY

    with patch.object(DWDObservationData, "__init__", __init__):
        observations = DWDObservationData()
        df = observations._coerce_meta_fields(df)
        df = observations._coerce_parameter_types(df)

    expected_df = pd.DataFrame({
        "STATION_ID":
        pd.Series(["00001"], dtype="category"),
        "QN":
        pd.Series([1], dtype=pd.Int64Dtype()),
        "RS_IND_01":
        pd.Series([1], dtype=pd.Int64Dtype()),
        "DATE": [pd.Timestamp("1970-01-01").tz_localize("UTC")],
        "END_OF_INTERVAL": [pd.Timestamp("1970-01-01")],
        "V_VV_I":
        pd.Series(["P"], dtype=pd.StringDtype()),
    })

    assert_frame_equal(df, expected_df)
예제 #3
0
def test_request():

    observations = DWDObservationData(
        station_ids=[1048],
        parameters=DWDObservationParameterSet.CLIMATE_SUMMARY,
        resolution=DWDObservationResolution.DAILY,
        periods=DWDObservationPeriod.RECENT,
    )

    df = observations.collect_safe()
    assert not df.empty
def get_weatherdata(station_id, weather_parameters, resolution, periods,
                    start_date, end_date):
    """
	--- Return timeseries of DWD weatherstations with userdefined parameters, spatial and temporal resoultion ---- 
	station_id : list of station number(s) eg. [4928]
	weather_parameters: list of parameters in format: DWDObservationParameterSet.PARAMETERNAME,
	resolution :  resolution time in format: DWDObservationResolution.RESOLUTION
	periods : list of observation period in format: DWDObservationPeriod.PERIOD
	start_date : string indicating the start date in format "YYYY-MM-DD"
	end_date : string indicating the end date in format "YYYY-MM-DD"
	return pandas DataFrame with selected observations
	
	Further information for parameter types: https://wetterdienst.readthedocs.io/_/downloads/en/latest/pdf/	  
	If resolution has not a certain parameter: --> Atrribute error 
	"""
    observations = DWDObservationData(
        station_ids=station_id,
        parameters=weather_parameters,
        resolution=resolution,
        periods=periods,
        start_date=start_date,
        end_date=end_date,
        tidy_data=True,
        humanize_column_names=True).collect_safe()

    return observations
예제 #5
0
def sql_example():

    observations = DWDObservationData(
        station_ids=[1048],
        parameters=[DWDObservationParameterSet.TEMPERATURE_AIR],
        resolution=DWDObservationResolution.HOURLY,
        start_date="2019-01-01",
        end_date="2020-01-01",
        tidy_data=True,
        humanize_parameters=True,
    )

    sql = "SELECT * FROM data WHERE parameter='temperature_air_200' AND value < -7.0;"
    log.info(f"Invoking SQL query '{sql}'")

    df = observations.all()
    df = df.dwd.lower().io.sql(sql)

    print(df)
예제 #6
0
def test_export_crate():

    observations = DWDObservationData(
        station_ids=[1048],
        parameters=DWDObservationParameterSet.CLIMATE_SUMMARY,
        resolution=DWDObservationResolution.DAILY,
        periods=DWDObservationPeriod.RECENT,
    )

    with mock.patch("pandas.DataFrame.to_sql", ) as mock_to_sql:

        df = observations.collect_safe()
        df.io.export("crate://localhost/?database=test&table=testdrive")

        mock_to_sql.assert_called_once_with(
            name="testdrive",
            con="crate://localhost/?database=test&table=testdrive",
            if_exists="replace",
            index=False,
            method="multi",
            chunksize=5000,
        )
예제 #7
0
def test_export_influxdb():

    observations = DWDObservationData(
        station_ids=[1048],
        parameters=DWDObservationParameterSet.CLIMATE_SUMMARY,
        resolution=DWDObservationResolution.DAILY,
        periods=DWDObservationPeriod.RECENT,
    )

    mock_client = mock.MagicMock()
    with mock.patch(
            "influxdb.dataframe_client.DataFrameClient",
            side_effect=[mock_client],
            create=True,
    ) as mock_connect:

        df = observations.collect_safe()
        df.dwd.lower().io.export(
            "influxdb://localhost/?database=dwd&table=weather")

        mock_connect.assert_called_once_with(database="dwd")
        mock_client.create_database.assert_called_once_with("dwd")
        mock_client.write_points.assert_called_once()
예제 #8
0
def test_export_duckdb():

    observations = DWDObservationData(
        station_ids=[1048],
        parameters=DWDObservationParameterSet.CLIMATE_SUMMARY,
        resolution=DWDObservationResolution.DAILY,
        periods=DWDObservationPeriod.RECENT,
    )

    mock_connection = mock.MagicMock()
    with mock.patch("duckdb.connect",
                    side_effect=[mock_connection],
                    create=True) as mock_connect:

        df = observations.collect_safe()
        df.io.export("duckdb:///test.duckdb?table=testdrive")

        mock_connect.assert_called_once_with(database="test.duckdb",
                                             read_only=False)
        mock_connection.register.assert_called_once()
        mock_connection.execute.assert_called()
        mock_connection.table.assert_called_once_with("testdrive")
        # a.table.to_df.assert_called()
        mock_connection.close.assert_called_once()
예제 #9
0
def test_coerce_field_types_with_nans():
    df = pd.DataFrame({
        "QN": [pd.NA, np.nan, "1"],
        "RS_IND_01": [pd.NA, np.nan, "1"],
        "V_VV_I": [pd.NA, np.nan, "P"],
    })

    expected_df = pd.DataFrame({
        "QN":
        pd.Series([pd.NA, np.nan, 1], dtype=pd.Int64Dtype()),
        "RS_IND_01":
        pd.Series([pd.NA, np.nan, 1], dtype=pd.Int64Dtype()),
        "V_VV_I":
        pd.Series([pd.NA, np.nan, "P"], dtype=pd.StringDtype()),
    })

    def __init__(self):
        self.tidy_data = False

    with mock.patch.object(DWDObservationData, "__init__", new=__init__):

        df = DWDObservationData()._coerce_parameter_types(df)

    assert_frame_equal(df, expected_df)
예제 #10
0
def dwd_readings(
    product: str,
    station: str = Query(default=None),
    parameter: str = Query(default=None),
    resolution: str = Query(default=None),
    period: str = Query(default=None),
    mosmix_type: str = Query(default=None, alias="mosmix-type"),
    date: str = Query(default=None),
    sql: str = Query(default=None),
):
    """
    Acquire data from DWD.

    # TODO: Obtain lat/lon distance/number information.

    :param product:     string for product, either observations or mosmix
    :param station:     Comma-separated list of station identifiers.
    :param parameter:   Observation measure
    :param resolution:  Frequency/granularity of measurement interval
    :param period:      Recent or historical files
    :param mosmix_type  type of mosmix, either small or large
    :param date:        Date or date range
    :param sql:         SQL expression
    :return:
    """
    if product not in ["observations", "mosmix"]:
        return HTTPException(status_code=404, detail=f"product {product} not found")

    if station is None:
        raise HTTPException(
            status_code=400, detail="Query argument 'station' is required"
        )

    station_ids = map(str, read_list(station))

    if product == "observations":
        if parameter is None or resolution is None or period is None:
            raise HTTPException(
                status_code=400,
                detail="Query arguments 'parameter', 'resolution' "
                "and 'period' are required",
            )

        parameter = parse_enumeration_from_template(
            parameter, DWDObservationParameterSet
        )
        resolution = parse_enumeration_from_template(
            resolution, DWDObservationResolution
        )
        period = parse_enumeration_from_template(period, DWDObservationPeriod)

        # Data acquisition.
        readings = DWDObservationData(
            station_ids=station_ids,
            parameters=parameter,
            resolution=resolution,
            periods=period,
            tidy_data=True,
            humanize_parameters=True,
        )
    else:
        if mosmix_type is None:
            raise HTTPException(
                status_code=400, detail="Query argument 'mosmix_type' is required"
            )

        mosmix_type = parse_enumeration_from_template(mosmix_type, DWDMosmixType)

        readings = DWDMosmixData(station_ids=station_ids, mosmix_type=mosmix_type)

    # Postprocessing.
    df = readings.all()

    if date is not None:
        df = df.dwd.filter_by_date(date, resolution)

    df = df.dwd.lower()

    if sql is not None:
        df = df.io.sql(sql)

    data = json.loads(df.to_json(orient="records", date_format="iso"))

    return make_json_response(data)
예제 #11
0
def dwd_readings(
    station: str = Query(default=None),
    parameter: str = Query(default=None),
    resolution: str = Query(default=None),
    period: str = Query(default=None),
    date: str = Query(default=None),
    sql: str = Query(default=None),
):
    """
    Acquire data from DWD.

    # TODO: Obtain lat/lon distance/number information.

    :param station:     Comma-separated list of station identifiers.
    :param parameter:   Observation measure
    :param resolution:  Frequency/granularity of measurement interval
    :param period:      Recent or historical files
    :param date:        Date or date range
    :param sql:         SQL expression
    :return:
    """

    if station is None:
        raise HTTPException(
            status_code=400, detail="Query argument 'station' is required"
        )

    if parameter is None or resolution is None or period is None:
        raise HTTPException(
            status_code=400,
            detail="Query arguments 'parameter', 'resolution' "
            "and 'period' are required",
        )

    station_ids = map(int, read_list(station))
    parameter = parse_enumeration_from_template(parameter, DWDObservationParameterSet)
    resolution = parse_enumeration_from_template(resolution, DWDObservationResolution)
    period = parse_enumeration_from_template(period, DWDObservationPeriod)

    # Data acquisition.
    observations = DWDObservationData(
        station_ids=station_ids,
        parameters=parameter,
        resolution=resolution,
        periods=period,
        tidy_data=True,
        humanize_column_names=True,
    )

    # Postprocessing.
    df = observations.collect_safe()

    if date is not None:
        df = df.dwd.filter_by_date(date, resolution)

    df = df.dwd.lower()

    if sql is not None:
        df = df.io.sql(sql)

    data = json.loads(df.to_json(orient="records", date_format="iso"))
    return make_json_response(data)