Exemplo n.º 1
0
def test_IntercompMetrics_calculator():
    """
    Test IntercompMetrics.
    """
    df = make_some_data()
    data = df[['ref', 'k1', 'k2']]

    metriccalc = IntercomparisonMetrics(
        other_names=('k1', 'k2'), calc_tau=True)

    res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0))

    assert res['n_obs'] == np.array([366])

    assert np.isnan(res['R_between_ref_and_k1'])
    assert np.isnan(res['R_between_ref_and_k2'])

    assert np.isnan(res['rho_between_ref_and_k1'])
    assert np.isnan(res['rho_between_ref_and_k2'])

    np.testing.assert_almost_equal(
        res['mse_between_ref_and_k1'], np.array([0.04], dtype=np.float32)
    )
    np.testing.assert_almost_equal(
        res['mse_between_ref_and_k2'], np.array([0.04], dtype=np.float32)
    )

    np.testing.assert_almost_equal(
        res['mse_corr_between_ref_and_k1'], np.array([0], dtype=np.float32)
    )
    np.testing.assert_almost_equal(
        res['mse_corr_between_ref_and_k2'], np.array([0], dtype=np.float32)
    )

    np.testing.assert_almost_equal(
        res['mse_bias_between_ref_and_k1'], np.array([0.04], dtype=np.float32)
    )
    np.testing.assert_almost_equal(
        res['mse_bias_between_ref_and_k2'], np.array([0.04], dtype=np.float32)
    )

    # scipy 1.3.0 is not built for python 2.7 so we allow both for now
    assert (np.isnan(res['p_R_between_ref_and_k1'])
            or res['p_R_between_ref_and_k1'] == 1.0)
    assert (np.isnan(res['p_R_between_ref_and_k2'])
            or res['p_R_between_ref_and_k2'] == 1.0)

    assert res['RMSD_between_ref_and_k1'] == np.array([0.2], dtype='float32')
    assert res['RMSD_between_ref_and_k2'] == np.array([0.2], dtype='float32')

    assert res['BIAS_between_ref_and_k1'] == np.array([-0.2], dtype='float32')
    assert res['BIAS_between_ref_and_k2'] == np.array([0.2], dtype='float32')

    np.testing.assert_almost_equal(
        res['urmsd_between_ref_and_k1'], np.array([0.], dtype='float32'))
    np.testing.assert_almost_equal(
        res['urmsd_between_ref_and_k2'], np.array([0.], dtype='float32'))

    assert 'RSS_between_ref_and_k1' in res.keys()
    assert 'RSS_between_ref_and_k2' in res.keys()
Exemplo n.º 2
0
def test_IntercompMetrics_calculator():
    """
    Test IntercompMetrics.
    """
    df = make_some_data()
    data = df[["ref", "k1", "k2"]]

    metriccalc = IntercomparisonMetrics(other_names=("k1", "k2"),
                                        calc_tau=True)

    res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0))

    assert res["n_obs"] == np.array([366])

    assert np.isnan(res["R_between_ref_and_k1"])
    assert np.isnan(res["R_between_ref_and_k2"])

    assert np.isnan(res["rho_between_ref_and_k1"])
    assert np.isnan(res["rho_between_ref_and_k2"])

    np.testing.assert_almost_equal(res["mse_between_ref_and_k1"],
                                   np.array([0.04], dtype=np.float32))
    np.testing.assert_almost_equal(res["mse_between_ref_and_k2"],
                                   np.array([0.04], dtype=np.float32))

    np.testing.assert_almost_equal(res["mse_corr_between_ref_and_k1"],
                                   np.array([0], dtype=np.float32))
    np.testing.assert_almost_equal(res["mse_corr_between_ref_and_k2"],
                                   np.array([0], dtype=np.float32))

    np.testing.assert_almost_equal(res["mse_bias_between_ref_and_k1"],
                                   np.array([0.04], dtype=np.float32))
    np.testing.assert_almost_equal(res["mse_bias_between_ref_and_k2"],
                                   np.array([0.04], dtype=np.float32))

    # scipy 1.3.0 is not built for python 2.7 so we allow both for now
    assert (np.isnan(res["p_R_between_ref_and_k1"])
            or res["p_R_between_ref_and_k1"] == 1.0)
    assert (np.isnan(res["p_R_between_ref_and_k2"])
            or res["p_R_between_ref_and_k2"] == 1.0)

    assert res["RMSD_between_ref_and_k1"] == np.array([0.2], dtype="float32")
    assert res["RMSD_between_ref_and_k2"] == np.array([0.2], dtype="float32")

    assert res["BIAS_between_ref_and_k1"] == np.array([-0.2], dtype="float32")
    assert res["BIAS_between_ref_and_k2"] == np.array([0.2], dtype="float32")

    np.testing.assert_almost_equal(res["urmsd_between_ref_and_k1"],
                                   np.array([0.0], dtype="float32"))
    np.testing.assert_almost_equal(res["urmsd_between_ref_and_k2"],
                                   np.array([0.0], dtype="float32"))

    assert "RSS_between_ref_and_k1" in res.keys()
    assert "RSS_between_ref_and_k2" in res.keys()
Exemplo n.º 3
0
def test_IntercompMetrics_calculator_metadata():
    """
    Test IntercompMetrics with metadata.
    """
    df = make_some_data()
    data = df[['ref', 'k1', 'k2']]

    metadata_dict_template = {'network': np.array(['None'], dtype='U256')}

    metriccalc = IntercomparisonMetrics(other_names=('k1', 'k2'), calc_tau=True,
                                        metadata_template=metadata_dict_template)
    res = metriccalc.calc_metrics(
        data, gpi_info=(0, 0, 0, {'network': 'SOILSCAPE'}))

    assert res['network'] == np.array(['SOILSCAPE'], dtype='U256')
Exemplo n.º 4
0
def test_IntercompMetrics_calculator_metadata():
    """
    Test IntercompMetrics with metadata.
    """
    df = make_some_data()
    data = df[["ref", "k1", "k2"]]

    metadata_dict_template = {"network": np.array(["None"], dtype="U256")}

    metriccalc = IntercomparisonMetrics(
        other_names=("k1", "k2"),
        calc_tau=True,
        metadata_template=metadata_dict_template,
    )
    res = metriccalc.calc_metrics(data,
                                  gpi_info=(0, 0, 0, {
                                      "network": "SOILSCAPE"
                                  }))

    assert res["network"] == np.array(["SOILSCAPE"], dtype="U256")
Exemplo n.º 5
0
def test_PairwiseIntercomparisonMetrics(testdata_generator):
    # This test first compares the PairwiseIntercomparisonMetrics to known
    # results and then confirms that it agrees with IntercomparisonMetrics as
    # expected

    datasets, expected = testdata_generator()

    # for the pairwise intercomparison metrics it's important that we use
    # make_combined_temporal_matcher
    val = Validation(
        datasets,
        "reference_name",
        scaling=None,  # doesn't work with the constant test data
        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(6, "H")),
        metrics_calculators={
            (4, 2):
            (PairwiseIntercomparisonMetrics(calc_spearman=True,
                                            analytical_cis=False).calc_metrics)
        })
    results_pw = val.calc([1], [1], [1], rename_cols=False)

    # in results_pw, there are four entries with keys (("c1name", "c1"),
    # ("refname", "ref"), and so on.
    # Each value is a single dictionary with the values of the metrics

    expected_metrics = [
        "R", "p_R", "BIAS", "RMSD", "mse", "RSS", "mse_corr", "mse_bias",
        "urmsd", "mse_var", "n_obs", "gpi", "lat", "lon", "rho", "p_rho",
        "tau", "p_tau"
    ]
    for key in results_pw:
        assert isinstance(key, tuple)
        assert len(key) == 2
        assert all(map(lambda x: isinstance(x, tuple), key))
        assert isinstance(results_pw[key], dict)
        assert sorted(expected_metrics) == sorted(results_pw[key].keys())
        for m in expected_metrics:
            if m in expected[key]:
                assert_equal(results_pw[key][m], expected[key][m])

    # preparation of IntercomparisonMetrics run for comparison
    ds_names = list(datasets.keys())
    metrics = IntercomparisonMetrics(
        dataset_names=ds_names,
        # passing the names here explicitly, see GH issue #220
        refname="reference_name",
        other_names=ds_names[1:],
        calc_tau=True,
    )
    val = Validation(
        datasets,
        "reference_name",
        scaling=None,
        temporal_matcher=None,  # use default here
        metrics_calculators={(4, 4): metrics.calc_metrics})

    results = val.calc(1, 1, 1, rename_cols=False)

    # results is a dictionary with one entry and key
    # (('c1name', 'c1'), ('c2name', 'c2'), ('c3name', 'c3'), ('refname',
    # 'ref')), the value is a list of length 0, which contains a dictionary
    # with all the results, where the metrics are joined with "_between_" with
    # the combination of datasets, which is joined with "_and_", e.g. for R
    # between ``refname`` and ``c1name`` the key is
    # "R_between_refname_and_c1name"
    common_metrics = ["n_obs", "gpi", "lat", "lon"]
    pw_metrics = list(set(expected_metrics) - set(common_metrics))
    # there's some sorting done at some point in pytesmo
    oldkey = tuple(sorted([(name, name.split("_")[0]) for name in ds_names]))
    res_old = results[oldkey]
    for key in results_pw:
        res = results_pw[key]
        # handle the full dataset metrics
        for m in common_metrics:
            assert_equal(res[m], res_old[m])
        # now get the metrics and compare to the right combination
        for m in pw_metrics:
            othername = key[0][0]
            refname = key[1][0]
            if othername == "reference_name":
                # sorting might be different, see GH #220
                othername = key[1][0]
                refname = key[0][0]
            old_m_key = f"{m}_between_{refname}_and_{othername}"
            if m == "BIAS":
                # PairwiseIntercomparisonMetrics has the result as (other,
                # ref), and therefore "bias between other and ref", compared to
                # "bias between ref and bias" in IntercomparisonMetrics
                # this is related to issue #220
                assert_equal(np.abs(res[m]), np.abs(res_old[old_m_key]))
            elif m == "urmsd":
                # the old implementation differs from the new implementation
                pass
            else:
                assert_equal(res[m], res_old[old_m_key])
Exemplo n.º 6
0
def test_temporal_matching_ascat_ismn():
    """
    This test uses a CSV file of ASCAT and ISMN data to test if the temporal
    matching within the validation works as epxected in a "real" setup.
    This only tests whether the number of observations matches, because this is
    the main thing the temporal matching influences.
    """

    # test with ASCAT and ISMN data
    here = Path(__file__).resolve().parent
    ascat = pd.read_csv(here / "ASCAT.csv", index_col=0, parse_dates=True)
    ismn = pd.read_csv(here / "ISMN.csv", index_col=0, parse_dates=True)
    dfs = {"ASCAT": ascat, "ISMN": ismn}
    columns = {"ASCAT": "sm", "ISMN": "soil_moisture"}
    refname = "ISMN"
    window = pd.Timedelta(12, "H")

    old_matcher = BasicTemporalMatching().combinatory_matcher
    new_matcher = make_combined_temporal_matcher(window)

    datasets = {}
    for key in ["ISMN", "ASCAT"]:
        all_columns = list(dfs[key].columns)
        ds = {"columns": [columns[key]], "class": DummyReader(dfs[key], all_columns)}
        datasets[key] = ds

    new_val = Validation(
        datasets,
        refname,
        scaling=None,  # doesn't work with the constant test data
        temporal_matcher=new_matcher,
        metrics_calculators={
            (2, 2): PairwiseIntercomparisonMetrics().calc_metrics
        }
    )
    new_results = new_val.calc(
        1, 1, 1, rename_cols=False, only_with_temporal_ref=True
    )

    # old setup
    ds_names = list(datasets.keys())
    metrics = IntercomparisonMetrics(
        dataset_names=ds_names,
        # passing the names here explicitly, see GH issue #220
        refname=refname,
        other_names=ds_names[1:],
        calc_tau=True,
    )
    old_val = Validation(
        datasets,
        refname,
        scaling=None,  # doesn't work with the constant test data
        temporal_matcher=old_matcher,
        metrics_calculators={
            (2, 2): metrics.calc_metrics
        }
    )
    old_results = old_val.calc(
        1, 1, 1, rename_cols=False
    )

    old_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture'))
    new_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture'))

    assert old_results[old_key]["n_obs"] == new_results[new_key]["n_obs"]
Exemplo n.º 7
0
def create_pytesmo_validation(validation_run):
    ds_list = []
    ref_name = None
    scaling_ref_name = None

    ds_num = 1
    for dataset_config in validation_run.dataset_configurations.all():
        reader = create_reader(dataset_config.dataset, dataset_config.version)
        reader = setup_filtering(
            reader, list(dataset_config.filters.all()),
            list(dataset_config.parametrisedfilter_set.all()),
            dataset_config.dataset, dataset_config.variable)

        if validation_run.anomalies == ValidationRun.MOVING_AVG_35_D:
            reader = AnomalyAdapter(
                reader,
                window_size=35,
                columns=[dataset_config.variable.pretty_name])
        if validation_run.anomalies == ValidationRun.CLIMATOLOGY:
            # make sure our baseline period is in UTC and without timezone information
            anomalies_baseline = [
                validation_run.anomalies_from.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None),
                validation_run.anomalies_to.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None)
            ]
            reader = AnomalyClimAdapter(
                reader,
                columns=[dataset_config.variable.pretty_name],
                timespan=anomalies_baseline)

        if ((validation_run.reference_configuration) and
            (dataset_config.id == validation_run.reference_configuration.id)):
            # reference is always named "0-..."
            dataset_name = '{}-{}'.format(0, dataset_config.dataset.short_name)
        else:
            dataset_name = '{}-{}'.format(ds_num,
                                          dataset_config.dataset.short_name)
            ds_num += 1

        ds_list.append((dataset_name, {
            'class': reader,
            'columns': [dataset_config.variable.pretty_name]
        }))

        if ((validation_run.reference_configuration) and
            (dataset_config.id == validation_run.reference_configuration.id)):
            ref_name = dataset_name
        if ((validation_run.scaling_ref)
                and (dataset_config.id == validation_run.scaling_ref.id)):
            scaling_ref_name = dataset_name

    datasets = dict(ds_list)
    ds_num = len(ds_list)

    period = None
    if validation_run.interval_from is not None and validation_run.interval_to is not None:
        ## while pytesmo can't deal with timezones, normalise the validation period to utc; can be removed once pytesmo can do timezones
        startdate = validation_run.interval_from.astimezone(UTC).replace(
            tzinfo=None)
        enddate = validation_run.interval_to.astimezone(UTC).replace(
            tzinfo=None)
        period = [startdate, enddate]

    datamanager = DataManager(datasets,
                              ref_name=ref_name,
                              period=period,
                              read_ts_names='read')
    ds_names = get_dataset_names(datamanager.reference_name,
                                 datamanager.datasets,
                                 n=ds_num)

    if (len(ds_names) >= 3) and (validation_run.tcol is True):
        # if there are 3 or more dataset, do TC, exclude ref metrics
        metrics = TCMetrics(
            dataset_names=ds_names,
            tc_metrics_for_ref=False,
            other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)])
    else:
        metrics = IntercomparisonMetrics(
            dataset_names=ds_names,
            other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)])

    if validation_run.scaling_method == validation_run.NO_SCALING:
        scaling_method = None
    else:
        scaling_method = validation_run.scaling_method

    __logger.debug(f"Scaling method: {scaling_method}")
    __logger.debug(f"Scaling dataset: {scaling_ref_name}")

    val = Validation(datasets=datamanager,
                     spatial_ref=ref_name,
                     temporal_window=0.5,
                     scaling=scaling_method,
                     scaling_ref=scaling_ref_name,
                     metrics_calculators={
                         (ds_num, ds_num): metrics.calc_metrics
                     },
                     period=period)

    return val