Exemplo n.º 1
0
def test_TC_metrics_calculator_metadata():
    """
    Test TC metrics with metadata.
    """
    df = make_some_data()
    data = df[['ref', 'k1', 'k2']]

    metadata_dict_template = {'network': np.array(['None'], dtype='U256')}

    metriccalc = TCMetrics(other_names=('k1', 'k2'), calc_tau=True,
                           dataset_names=['ref', 'k1', 'k2'], metadata_template=metadata_dict_template)
    res = metriccalc.calc_metrics(
        data, gpi_info=(0, 0, 0, {'network': 'SOILSCAPE'}))

    assert res['network'] == np.array(['SOILSCAPE'], dtype='U256')
Exemplo n.º 2
0
def test_TC_metrics_calculator_metadata():
    """
    Test TC metrics with metadata.
    """
    df = make_some_data()
    data = df[["ref", "k1", "k2"]]

    metadata_dict_template = {"network": np.array(["None"], dtype="U256")}

    metriccalc = TCMetrics(
        other_names=("k1", "k2"),
        calc_tau=True,
        dataset_names=["ref", "k1", "k2"],
        metadata_template=metadata_dict_template,
    )
    res = metriccalc.calc_metrics(data,
                                  gpi_info=(0, 0, 0, {
                                      "network": "SOILSCAPE"
                                  }))

    assert res["network"] == np.array(["SOILSCAPE"], dtype="U256")
Exemplo n.º 3
0
def test_TC_metrics_calculator():
    """
    Test TC metrics.
    """
    # this calculator uses a reference data set that is part of ALL triples.
    df = make_some_data()
    data = df[['ref', 'k1', 'k2', 'k3']]

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")  # many warnings due to test data

        metriccalc = TCMetrics(other_names=('k1', 'k2', 'k3'),
                               calc_tau=True,
                               dataset_names=('ref', 'k1', 'k2', 'k3'))

        res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0))

    assert res['n_obs'] == np.array([366])

    assert np.isnan(res['R_between_ref_and_k1'])
    assert np.isnan(res['R_between_ref_and_k2'])

    assert np.isnan(res['rho_between_ref_and_k1'])
    assert np.isnan(res['rho_between_ref_and_k2'])

    assert np.isnan(res['mse_between_ref_and_k1'])
    assert np.isnan(res['mse_between_ref_and_k2'])

    assert np.isnan(res['mse_corr_between_ref_and_k1'])
    assert np.isnan(res['mse_corr_between_ref_and_k2'])

    assert res['mse_bias_between_ref_and_k1'], np.array([0.04],
                                                        dtype='float32')
    assert res['mse_bias_between_ref_and_k2'], np.array([0.04],
                                                        dtype='float32')

    # scipy 1.3.0 is not built for python 2.7 so we allow both for now
    assert (np.isnan(res['p_R_between_ref_and_k1'])
            or res['p_R_between_ref_and_k1'] == 1.0)
    assert (np.isnan(res['p_R_between_ref_and_k2'])
            or res['p_R_between_ref_and_k2'] == 1.0)

    assert res['RMSD_between_ref_and_k1'] == np.array([0.2], dtype='float32')
    assert res['RMSD_between_ref_and_k2'] == np.array([0.2], dtype='float32')

    assert res['BIAS_between_ref_and_k1'] == np.array([-0.2], dtype='float32')
    assert res['BIAS_between_ref_and_k2'] == np.array([0.2], dtype='float32')

    np.testing.assert_almost_equal(res['urmsd_between_ref_and_k1'],
                                   np.array([0.], dtype='float32'))
    np.testing.assert_almost_equal(res['urmsd_between_ref_and_k2'],
                                   np.array([0.], dtype='float32'))

    assert 'RSS_between_ref_and_k1' in res.keys()
    assert 'RSS_between_ref_and_k2' in res.keys()
    # each non-ref dataset has a snr, err and beta

    assert np.isnan(res['snr_k1_between_ref_and_k1_and_k2'])
    assert np.isnan(res['snr_k2_between_ref_and_k1_and_k2'])
    assert np.isnan(res['snr_k2_between_ref_and_k2_and_k3'])
    assert np.isnan(res['err_std_k1_between_ref_and_k1_and_k2'])
    np.testing.assert_almost_equal(res['beta_k1_between_ref_and_k1_and_k2'][0],
                                   0.)
    np.testing.assert_almost_equal(res['beta_k2_between_ref_and_k1_and_k2'][0],
                                   0.)
    np.testing.assert_almost_equal(res['beta_k3_between_ref_and_k1_and_k3'][0],
                                   0.)
Exemplo n.º 4
0
def test_TC_metrics_calculator():
    """
    Test TC metrics.
    """
    # this calculator uses a reference data set that is part of ALL triples.
    df = make_some_data()
    data = df[["ref", "k1", "k2", "k3"]]

    metriccalc = TCMetrics(
        other_names=("k1", "k2", "k3"),
        calc_tau=True,
        dataset_names=("ref", "k1", "k2", "k3"),
    )

    res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0))

    assert res["n_obs"] == np.array([366])

    assert np.isnan(res["R_between_ref_and_k1"])
    assert np.isnan(res["R_between_ref_and_k2"])

    assert np.isnan(res["rho_between_ref_and_k1"])
    assert np.isnan(res["rho_between_ref_and_k2"])

    np.testing.assert_almost_equal(res["mse_between_ref_and_k1"],
                                   np.array([0.04], dtype=np.float32))
    np.testing.assert_almost_equal(res["mse_between_ref_and_k2"],
                                   np.array([0.04], dtype=np.float32))

    np.testing.assert_almost_equal(res["mse_corr_between_ref_and_k1"],
                                   np.array([0], dtype=np.float32))
    np.testing.assert_almost_equal(res["mse_corr_between_ref_and_k2"],
                                   np.array([0], dtype=np.float32))

    np.testing.assert_almost_equal(res["mse_bias_between_ref_and_k1"],
                                   np.array([0.04], dtype=np.float32))
    np.testing.assert_almost_equal(res["mse_bias_between_ref_and_k2"],
                                   np.array([0.04], dtype=np.float32))

    # scipy 1.3.0 is not built for python 2.7 so we allow both for now
    assert (np.isnan(res["p_R_between_ref_and_k1"])
            or res["p_R_between_ref_and_k1"] == 1.0)
    assert (np.isnan(res["p_R_between_ref_and_k2"])
            or res["p_R_between_ref_and_k2"] == 1.0)

    assert res["RMSD_between_ref_and_k1"] == np.array([0.2], dtype="float32")
    assert res["RMSD_between_ref_and_k2"] == np.array([0.2], dtype="float32")

    assert res["BIAS_between_ref_and_k1"] == np.array([-0.2], dtype="float32")
    assert res["BIAS_between_ref_and_k2"] == np.array([0.2], dtype="float32")

    np.testing.assert_almost_equal(res["urmsd_between_ref_and_k1"],
                                   np.array([0.0], dtype="float32"))
    np.testing.assert_almost_equal(res["urmsd_between_ref_and_k2"],
                                   np.array([0.0], dtype="float32"))

    assert "RSS_between_ref_and_k1" in res.keys()
    assert "RSS_between_ref_and_k2" in res.keys()
    # each non-ref dataset has a snr, err and beta

    assert np.isnan(res["snr_k1_between_ref_and_k1_and_k2"])
    assert np.isnan(res["snr_k2_between_ref_and_k1_and_k2"])
    assert np.isnan(res["snr_k2_between_ref_and_k2_and_k3"])
    assert np.isnan(res["err_std_k1_between_ref_and_k1_and_k2"])
    np.testing.assert_almost_equal(res["beta_k1_between_ref_and_k1_and_k2"][0],
                                   0.0)
    np.testing.assert_almost_equal(res["beta_k2_between_ref_and_k1_and_k2"][0],
                                   0.0)
    np.testing.assert_almost_equal(res["beta_k3_between_ref_and_k1_and_k3"][0],
                                   0.0)
Exemplo n.º 5
0
def create_pytesmo_validation(validation_run):
    ds_list = []
    ref_name = None
    scaling_ref_name = None

    ds_num = 1
    for dataset_config in validation_run.dataset_configurations.all():
        reader = create_reader(dataset_config.dataset, dataset_config.version)
        reader = setup_filtering(
            reader, list(dataset_config.filters.all()),
            list(dataset_config.parametrisedfilter_set.all()),
            dataset_config.dataset, dataset_config.variable)

        if validation_run.anomalies == ValidationRun.MOVING_AVG_35_D:
            reader = AnomalyAdapter(
                reader,
                window_size=35,
                columns=[dataset_config.variable.pretty_name])
        if validation_run.anomalies == ValidationRun.CLIMATOLOGY:
            # make sure our baseline period is in UTC and without timezone information
            anomalies_baseline = [
                validation_run.anomalies_from.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None),
                validation_run.anomalies_to.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None)
            ]
            reader = AnomalyClimAdapter(
                reader,
                columns=[dataset_config.variable.pretty_name],
                timespan=anomalies_baseline)

        if ((validation_run.reference_configuration) and
            (dataset_config.id == validation_run.reference_configuration.id)):
            # reference is always named "0-..."
            dataset_name = '{}-{}'.format(0, dataset_config.dataset.short_name)
        else:
            dataset_name = '{}-{}'.format(ds_num,
                                          dataset_config.dataset.short_name)
            ds_num += 1

        ds_list.append((dataset_name, {
            'class': reader,
            'columns': [dataset_config.variable.pretty_name]
        }))

        if ((validation_run.reference_configuration) and
            (dataset_config.id == validation_run.reference_configuration.id)):
            ref_name = dataset_name
        if ((validation_run.scaling_ref)
                and (dataset_config.id == validation_run.scaling_ref.id)):
            scaling_ref_name = dataset_name

    datasets = dict(ds_list)
    ds_num = len(ds_list)

    period = None
    if validation_run.interval_from is not None and validation_run.interval_to is not None:
        ## while pytesmo can't deal with timezones, normalise the validation period to utc; can be removed once pytesmo can do timezones
        startdate = validation_run.interval_from.astimezone(UTC).replace(
            tzinfo=None)
        enddate = validation_run.interval_to.astimezone(UTC).replace(
            tzinfo=None)
        period = [startdate, enddate]

    datamanager = DataManager(datasets,
                              ref_name=ref_name,
                              period=period,
                              read_ts_names='read')
    ds_names = get_dataset_names(datamanager.reference_name,
                                 datamanager.datasets,
                                 n=ds_num)

    if (len(ds_names) >= 3) and (validation_run.tcol is True):
        # if there are 3 or more dataset, do TC, exclude ref metrics
        metrics = TCMetrics(
            dataset_names=ds_names,
            tc_metrics_for_ref=False,
            other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)])
    else:
        metrics = IntercomparisonMetrics(
            dataset_names=ds_names,
            other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)])

    if validation_run.scaling_method == validation_run.NO_SCALING:
        scaling_method = None
    else:
        scaling_method = validation_run.scaling_method

    __logger.debug(f"Scaling method: {scaling_method}")
    __logger.debug(f"Scaling dataset: {scaling_ref_name}")

    val = Validation(datasets=datamanager,
                     spatial_ref=ref_name,
                     temporal_window=0.5,
                     scaling=scaling_method,
                     scaling_ref=scaling_ref_name,
                     metrics_calculators={
                         (ds_num, ds_num): metrics.calc_metrics
                     },
                     period=period)

    return val