Exemplo n.º 1
0
def test_match_metrics_print_statistics(capsys):
    metricname = "metric"
    # give a dataset that has member_id as dim (indicator that it was aggregated).

    attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
    }
    ds = random_ds()
    ds.attrs = attrs
    ds_metric = random_ds().rename({"data": metricname})
    ds_metric.attrs = attrs

    match_metrics({"a": ds}, {"aa": ds_metric}, [metricname],
                  print_statistics=True)

    captured = capsys.readouterr()

    assert "Processed 1 datasets." in captured.out
    assert "Exact matches:{'metric': 1}" in captured.out
    assert "Other matches:{'metric': 0}" in captured.out
    assert "No match found:{'metric': 0}" in captured.out
Exemplo n.º 2
0
def test_match_metrics_align_dims():
    metricname = "metric"
    attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
        "variable_id": "a",
    }
    ds = random_ds(time_coords=True)
    ds.attrs = attrs
    ds_metric = (random_ds(time_coords=True).isel(time=slice(0, -1)).rename(
        {"data": metricname}))
    ds_metric.attrs = attrs
    with pytest.warns(UserWarning) as warninfo:
        ddict_matched = match_metrics(
            {"a": ds},
            {"aa": ds_metric},
            [metricname],
            print_statistics=True,
            dim_length_conflict="align",
        )
    msg = "none.none.a.a.a.a.a.a.a:`metric` dimensions ['time:5'] do not match `ds` ['time:6']. Aligning the data on `inner`"
    assert warninfo[0].message.args[0] == msg

    xr.testing.assert_allclose(ddict_matched["a"].time, ds_metric.time)
Exemplo n.º 3
0
def test_match_metrics_exceptions():
    metricname = "metric"
    # give a dataset that has member_id as dim (indicator that it was aggregated).

    attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
    }
    ds = random_ds().rename({"z": "member_id"})
    ds.attrs = attrs
    ds_metric = random_ds().rename({"data": metricname})
    ds_metric.attrs = attrs
    with pytest.raises(ValueError):
        match_metrics({"a": ds}, {"aa": ds_metric}, [metricname])
Exemplo n.º 4
0
def test_match_metrics_closer(metricname):
    # Test to see if a metric dataset with more matching attrs is preferred.

    ds_a = random_ds()
    ds_b = random_ds()
    ds_c = random_ds()

    # Give them cmip attrs
    ds_a.attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
    }
    ds_b.attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "b",
        "variant_label": "b",
        "version": "b",
    }
    ds_c.attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "b",
        "table_id": "b",
        "variant_label": "a",
        "version": "b",
    }

    ds_metric_a = random_ds().isel(time=0).rename({"data": metricname})
    ds_metric_a.attrs = ds_a.attrs
    ds_metric_c = random_ds().isel(time=0).rename({"data": metricname})
    ds_metric_c.attrs = ds_c.attrs

    ds_dict = {"c": ds_c}
    metric_dict = {
        "exact_c": ds_metric_c,
        "exact_a": ds_metric_a,
    }
    ds_dict_parsed = match_metrics(ds_dict,
                                   metric_dict,
                                   match_variables=[metricname])
    xr.testing.assert_allclose(
        ds_dict_parsed["c"][metricname].reset_coords(drop=True),
        ds_metric_c[metricname])
    assert ds_dict_parsed["c"][metricname].attrs["original_key"] == "exact_c"
Exemplo n.º 5
0
def test_match_metrics():
    # create a few different datasets
    ds_a = random_ds()
    ds_b = random_ds()
    ds_c = random_ds()
    ds_d = random_ds()
    ds_e = random_ds()

    # Give them cmip attrs
    ds_a.attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
    }
    ds_b.attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "b",
        "variant_label": "b",
        "version": "b",
    }
    ds_c.attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "b",
        "table_id": "b",
        "variant_label": "a",
        "version": "b",
    }
    ds_d.attrs = {
        "source_id": "a",
        "grid_label": "b",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
    }
    ds_e.attrs = {
        "source_id": "b",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
    }

    # now create a metric (which does not vary in time) which matches ds_a
    metricname = "metric"
    ds_metric = random_ds().isel(time=0).rename({"data": metricname})
    ds_metric.attrs = ds_a.attrs

    def _assert_parsed_ds_dict(ddict_parsed,
                               expected,
                               match_keys,
                               strict=True):
        expected = expected.copy()
        for i in match_keys:
            ds_parsed = ddict_parsed[i]
            assert metricname in list(ds_parsed.variables)
            xr.testing.assert_allclose(
                ds_parsed[metricname].reset_coords(drop=True), expected)
        if strict:
            for i in [
                    ii for ii in ddict_parsed.keys() if ii not in match_keys
            ]:
                ds_parsed = ddict_parsed[i]
                assert metricname not in ds_parsed.variables

    # With the default options I expect that this gets parsed into a,b,c (all the same source_id and grid_label)
    # but not d and e
    ds_dict = {"a": ds_a, "b": ds_b, "c": ds_c, "d": ds_d, "e": ds_e}
    metric_dict = {"something": ds_metric}
    expected = ds_metric[metricname]

    ds_dict_parsed = match_metrics(ds_dict, metric_dict, [metricname])
    _assert_parsed_ds_dict(ds_dict_parsed, ds_metric[metricname],
                           ["a", "b", "c"])

    # Now change the matching parameter
    ds_dict_parsed = match_metrics(ds_dict,
                                   metric_dict,
                                   match_variables=[metricname],
                                   match_attrs="exact")
    _assert_parsed_ds_dict(ds_dict_parsed, expected, ["a"])

    ds_dict_parsed = match_metrics(
        ds_dict,
        metric_dict,
        [metricname],
        match_attrs=["source_id", "grid_label", "experiment_id"],
    )
    _assert_parsed_ds_dict(ds_dict_parsed, expected, ["a", "b"])

    ds_dict_parsed = match_metrics(
        ds_dict,
        metric_dict,
        [metricname],
        match_attrs=["source_id", "grid_label", "variant_label"],
    )
    _assert_parsed_ds_dict(ds_dict_parsed, expected, ["a", "c"])

    # Now give the metric the attributes of e and check
    ds_metric.attrs = ds_e.attrs

    ds_dict = {"a": ds_a, "b": ds_b, "c": ds_c, "d": ds_d, "e": ds_e}
    metric_dict = {"something": ds_metric}

    ds_dict_parsed = match_metrics(ds_dict,
                                   metric_dict,
                                   match_variables=[metricname])
    _assert_parsed_ds_dict(ds_dict_parsed, ds_metric[metricname], ["e"])

    # Check that a metric with time dimension is never parsed anywhere, except an exact match
    ds_metric = random_ds().rename({"data": metricname})
    ds_metric.attrs = ds_a.attrs

    ds_dict = {"a": ds_a, "b": ds_b, "c": ds_c, "d": ds_d, "e": ds_e}
    metric_dict = {"something": ds_metric}

    ds_dict_parsed = match_metrics(ds_dict,
                                   metric_dict,
                                   match_variables=[metricname])
    _assert_parsed_ds_dict(ds_dict_parsed, ds_metric[metricname], ["a"])

    # Complex example. Check two with similar attrs (both matching), to see if the exact match is always preferred.
    ds_metric_a = random_ds().isel(time=0).rename({"data": metricname})
    ds_metric_a.attrs = ds_a.attrs
    ds_metric_c = random_ds().isel(time=0).rename({"data": metricname})
    ds_metric_c.attrs = ds_c.attrs

    # this one should be applied to all datasets
    ds_metric_a_other = random_ds().isel(time=0).rename({"data": "other"})
    ds_metric_a_other.attrs = ds_a.attrs

    ds_dict = {"a": ds_a, "b": ds_b, "c": ds_c, "d": ds_d, "e": ds_e}
    metric_dict = {
        "exact_c": ds_metric_c,
        "exact_a": ds_metric_a,
        "other_a": ds_metric_a_other,
    }
    ds_dict_parsed = match_metrics(ds_dict,
                                   metric_dict,
                                   match_variables=[metricname, "other"])

    _assert_parsed_ds_dict(ds_dict_parsed,
                           ds_metric_a[metricname], ["a"],
                           strict=False)
    assert ds_dict_parsed["a"][metricname].attrs["original_key"] == "exact_a"
    assert ds_dict_parsed["a"]["other"].attrs["original_key"] == "other_a"

    _assert_parsed_ds_dict(ds_dict_parsed,
                           ds_metric_c[metricname], ["c"],
                           strict=False)
    assert ds_dict_parsed["c"][metricname].attrs["original_key"] == "exact_c"
    assert ds_dict_parsed["c"]["other"].attrs["original_key"] == "other_a"

    # b is a 'closer' match to c
    _assert_parsed_ds_dict(ds_dict_parsed,
                           ds_metric_c[metricname], ["b"],
                           strict=False)
    assert ds_dict_parsed["b"][metricname].attrs["original_key"] == "exact_c"
    assert ds_dict_parsed["b"]["other"].attrs["original_key"] == "other_a"
Exemplo n.º 6
0
def test_match_metrics(metricname):
    # create a few different datasets
    ds_a = random_ds()
    ds_b = random_ds()
    ds_c = random_ds()
    ds_d = random_ds()
    ds_e = random_ds()

    # Give them cmip attrs
    ds_a.attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
    }
    ds_b.attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "b",
        "variant_label": "b",
        "version": "b",
    }
    ds_c.attrs = {
        "source_id": "a",
        "grid_label": "a",
        "experiment_id": "b",
        "table_id": "b",
        "variant_label": "a",
        "version": "b",
    }
    ds_d.attrs = {
        "source_id": "a",
        "grid_label": "b",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
    }
    ds_e.attrs = {
        "source_id": "b",
        "grid_label": "a",
        "experiment_id": "a",
        "table_id": "a",
        "variant_label": "a",
        "version": "a",
    }

    # now create a metric (which does not vary in time) which matches ds_a
    ds_metric = random_ds().isel(time=0).rename({"data": metricname})
    ds_metric.attrs = ds_a.attrs

    def _assert_parsed_ds_dict(ddict_parsed,
                               expected,
                               match_keys,
                               strict=True):
        expected = expected.copy()
        for i in match_keys:
            ds_parsed = ddict_parsed[i]
            assert metricname in list(ds_parsed.variables)
            xr.testing.assert_allclose(
                ds_parsed[metricname].reset_coords(drop=True), expected)
        if strict:
            for i in [
                    ii for ii in ddict_parsed.keys() if ii not in match_keys
            ]:
                ds_parsed = ddict_parsed[i]
                assert metricname not in ds_parsed.variables

    # With the default options I expect that this gets parsed into a,b,c (all the same source_id and grid_label)
    # but not d and e
    ds_dict = {"a": ds_a, "b": ds_b, "c": ds_c, "d": ds_d, "e": ds_e}
    metric_dict = {"something": ds_metric}
    expected = ds_metric[metricname]

    ds_dict_parsed = match_metrics(ds_dict, metric_dict, [metricname])
    _assert_parsed_ds_dict(ds_dict_parsed, ds_metric[metricname],
                           ["a", "b", "c"])

    # Now change the matching parameter
    ds_dict_parsed = match_metrics(ds_dict,
                                   metric_dict,
                                   match_variables=[metricname],
                                   match_attrs="exact")
    _assert_parsed_ds_dict(ds_dict_parsed, expected, ["a"])

    ds_dict_parsed = match_metrics(
        ds_dict,
        metric_dict,
        [metricname],
        match_attrs=["source_id", "grid_label", "experiment_id"],
    )
    _assert_parsed_ds_dict(ds_dict_parsed, expected, ["a", "b"])

    ds_dict_parsed = match_metrics(
        ds_dict,
        metric_dict,
        [metricname],
        match_attrs=["source_id", "grid_label", "variant_label"],
    )
    _assert_parsed_ds_dict(ds_dict_parsed, expected, ["a", "c"])

    # Now give the metric the attributes of e and check
    ds_metric.attrs = ds_e.attrs

    ds_dict = {"a": ds_a, "b": ds_b, "c": ds_c, "d": ds_d, "e": ds_e}
    metric_dict = {"something": ds_metric}

    ds_dict_parsed = match_metrics(ds_dict,
                                   metric_dict,
                                   match_variables=[metricname])
    _assert_parsed_ds_dict(ds_dict_parsed, ds_metric[metricname], ["e"])

    # Check that a metric with time dimension is never parsed anywhere, except an exact match
    ds_metric = random_ds().rename({"data": metricname})
    ds_metric.attrs = ds_a.attrs

    ds_dict = {"a": ds_a, "b": ds_b, "c": ds_c, "d": ds_d, "e": ds_e}
    metric_dict = {"something": ds_metric}

    ds_dict_parsed = match_metrics(ds_dict,
                                   metric_dict,
                                   match_variables=[metricname])
    _assert_parsed_ds_dict(ds_dict_parsed, ds_metric[metricname], ["a"])