Пример #1
0
def test_combinatory_matcher_n2():

    n = 1000
    x = np.arange(n)
    y = np.arange(n) * 0.5
    index = pd.date_range(start="2000-01-01", periods=n, freq="D")

    df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'], index=index)
    df2 = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'], index=index)
    df3 = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'], index=index)

    df_dict = {'data1': df, 'data2': df2, 'data3': df3}

    temp_matcher = temporal_matchers.BasicTemporalMatching()
    matched = temp_matcher.combinatory_matcher(df_dict, 'data1')
    assert sorted(list(matched)) == sorted([('data1', 'data2'),
                                            ('data1', 'data3')])
    assert sorted(list(matched[('data1',
                                'data2')].columns)) == sorted([('data1', 'x'),
                                                               ('data1', 'y'),
                                                               ('data2', 'x'),
                                                               ('data2', 'y')])

    assert sorted(list(matched[('data1',
                                'data3')].columns)) == sorted([('data1', 'x'),
                                                               ('data1', 'y'),
                                                               ('data3', 'x'),
                                                               ('data3', 'y')])
Пример #2
0
def test_validation_n3_k2_temporal_matching_no_matches():

    tst_results = {}

    datasets = setup_two_without_overlap()

    dm = DataManager(
        datasets,
        "DS1",
        read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]},
    )

    process = Validation(
        dm,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
    )

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Пример #3
0
def test_validation_error_n2_k2():

    datasets = setup_TestDatasets()

    dm = DataManager(
        datasets,
        "DS1",
        read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]},
    )

    # n less than number of datasets is no longer allowed
    with pytest.raises(ValueError):
        Validation(
            dm,
            "DS1",
            temporal_matcher=temporal_matchers.BasicTemporalMatching(
                window=1 / 24.0
            ).combinatory_matcher,
            scaling="lin_cdf_match",
            metrics_calculators={
                (2, 2): metrics_calculators.BasicMetrics(
                    other_name="k1"
                ).calc_metrics
            },
        )
Пример #4
0
def test_validation_n3_k2_temporal_matching_no_matches2():

    tst_results = {
        (("DS1", "x"), ("DS3", "y")): {
            "n_obs": np.array([1000], dtype=np.int32),
            "tau": np.array([np.nan], dtype=np.float32),
            "gpi": np.array([4], dtype=np.int32),
            "RMSD": np.array([0.0], dtype=np.float32),
            "lon": np.array([4.0]),
            "p_tau": np.array([np.nan], dtype=np.float32),
            "BIAS": np.array([0.0], dtype=np.float32),
            "p_rho": np.array([0.0], dtype=np.float32),
            "rho": np.array([1.0], dtype=np.float32),
            "lat": np.array([4.0]),
            "R": np.array([1.0], dtype=np.float32),
            "p_R": np.array([0.0], dtype=np.float32),
        },
        (("DS1", "x"), ("DS3", "x")): {
            "n_obs": np.array([1000], dtype=np.int32),
            "tau": np.array([np.nan], dtype=np.float32),
            "gpi": np.array([4], dtype=np.int32),
            "RMSD": np.array([0.0], dtype=np.float32),
            "lon": np.array([4.0]),
            "p_tau": np.array([np.nan], dtype=np.float32),
            "BIAS": np.array([0.0], dtype=np.float32),
            "p_rho": np.array([0.0], dtype=np.float32),
            "rho": np.array([1.0], dtype=np.float32),
            "lat": np.array([4.0]),
            "R": np.array([1.0], dtype=np.float32),
            "p_R": np.array([0.0], dtype=np.float32),
        },
    }

    datasets = setup_three_with_two_overlapping()
    dm = DataManager(
        datasets,
        "DS1",
        read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]},
    )

    process = Validation(
        dm,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
    )

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Пример #5
0
def test_validation_n3_k2():

    tst_results = {
        (('DS1', 'x'), ('DS3', 'y')): {
            'n_obs': np.array([1000], dtype=np.int32),
            'tau': np.array([np.nan], dtype=np.float32),
            'gpi': np.array([4], dtype=np.int32),
            'RMSD': np.array([0.], dtype=np.float32),
            'lon': np.array([4.]),
            'p_tau': np.array([np.nan], dtype=np.float32),
            'BIAS': np.array([0.], dtype=np.float32),
            'p_rho': np.array([0.], dtype=np.float32),
            'rho': np.array([1.], dtype=np.float32),
            'lat': np.array([4.]),
            'R': np.array([1.], dtype=np.float32),
            'p_R': np.array([0.], dtype=np.float32)},
        (('DS1', 'x'), ('DS2', 'y')): {
            'n_obs': np.array([1000], dtype=np.int32),
            'tau': np.array([np.nan], dtype=np.float32),
            'gpi': np.array([4], dtype=np.int32),
            'RMSD': np.array([0.], dtype=np.float32),
            'lon': np.array([4.]),
            'p_tau': np.array([np.nan], dtype=np.float32),
            'BIAS': np.array([0.], dtype=np.float32),
            'p_rho': np.array([0.], dtype=np.float32),
            'rho': np.array([1.], dtype=np.float32),
            'lat': np.array([4.]),
            'R': np.array([1.], dtype=np.float32),
            'p_R': np.array([0.], dtype=np.float32)},
        (('DS1', 'x'), ('DS3', 'x')): {
            'n_obs': np.array([1000], dtype=np.int32),
            'tau': np.array([np.nan], dtype=np.float32),
            'gpi': np.array([4], dtype=np.int32),
            'RMSD': np.array([0.], dtype=np.float32),
            'lon': np.array([4.]),
            'p_tau': np.array([np.nan], dtype=np.float32),
            'BIAS': np.array([0.], dtype=np.float32),
            'p_rho': np.array([0.], dtype=np.float32),
            'rho': np.array([1.], dtype=np.float32),
            'lat': np.array([4.]),
            'R': np.array([1.], dtype=np.float32),
            'p_R': np.array([0.], dtype=np.float32)}}

    datasets = setup_TestDatasets()

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics})

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Пример #6
0
def test_validation_n3_k2_masking_no_data_remains():

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        'masking1': {
            'class': mds1,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 500},
            'use_lut': False,
            'grids_compatible': True},
        'masking2': {
            'class': mds2,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 1000},
            'use_lut': False,
            'grids_compatible': True}
    }

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics},
        masking_datasets=mds)

    gpi_info = (1, 1, 1)
    ref_df = datasets['DS1']['class'].read(1)
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 0
    nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=DeprecationWarning)
            results = process.calc(*job)
        tst = []
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results),
                                sorted(tst)):
            nptest.assert_almost_equal(results[key]['n_obs'],
                                       tst[tst_key]['n_obs'])
Пример #7
0
    def __init__(self,
                 datasets,
                 spatial_ref,
                 metrics_calculators,
                 temporal_matcher=None,
                 temporal_window=1 / 24.0,
                 temporal_ref=None,
                 masking_datasets=None,
                 period=None,
                 scaling='lin_cdf_match',
                 scaling_ref=None):

        if type(datasets) is DataManager:
            self.data_manager = datasets
        else:
            self.data_manager = DataManager(datasets, spatial_ref, period)

        self.temp_matching = temporal_matcher
        if self.temp_matching is None:
            self.temp_matching = temporal_matchers.BasicTemporalMatching(
                window=temporal_window).combinatory_matcher

        self.temporal_ref = temporal_ref
        if self.temporal_ref is None:
            self.temporal_ref = self.data_manager.reference_name

        self.metrics_c = metrics_calculators
        for n, k in self.metrics_c:
            if n < len(self.data_manager.datasets.keys()):
                raise ValueError('n must be equal to the number of datasets')

        self.masking_dm = None
        if masking_datasets is not None:
            # add temporal reference dataset to the masking datasets since it
            # is necessary for temporally matching the masking datasets to the
            # common time stamps. Use _reference here to make a clash with the
            # names of the masking datasets unlikely
            masking_datasets.update(
                {'_reference': datasets[self.temporal_ref]})
            self.masking_dm = DataManager(masking_datasets,
                                          '_reference',
                                          period=period)

        if type(scaling) == str:
            self.scaling = DefaultScaler(scaling)
        else:
            self.scaling = scaling
        self.scaling_ref = scaling_ref
        if self.scaling_ref is None:
            self.scaling_ref = self.data_manager.reference_name

        self.luts = self.data_manager.get_luts()
Пример #8
0
    def __init__(self, datasets, spatial_ref, metrics_calculators,
                 temporal_matcher=None, temporal_window=1 / 24.0,
                 temporal_ref=None,
                 masking_datasets=None,
                 period=None,
                 scaling='lin_cdf_match', scaling_ref=None):

        if isinstance(datasets, DataManager):
            self.data_manager = datasets
        else:
            self.data_manager = DataManager(datasets, spatial_ref, period)

        self.temp_matching = temporal_matcher
        if self.temp_matching is None:
            warnings.warn(
                "You are using the default temporal matcher. If you are using one of the"
                " newer metric calculators (PairwiseIntercomparisonMetrics,"
                " TripleCollocationMetrics) you should probably use `make_combined_temporal_matcher`"
                " instead. Have a look at the documentation of the metric calculators for more info."
            )
            self.temp_matching = temporal_matchers.BasicTemporalMatching(
                window=temporal_window).combinatory_matcher

        self.temporal_ref = temporal_ref
        if self.temporal_ref is None:
            self.temporal_ref = self.data_manager.reference_name

        self.metrics_c = metrics_calculators
        for n, k in self.metrics_c:
            if n < len(self.data_manager.datasets.keys()):
                raise ValueError('n must be equal to the number of datasets')

        self.masking_dm = None
        if masking_datasets is not None:
            # add temporal reference dataset to the masking datasets since it
            # is necessary for temporally matching the masking datasets to the
            # common time stamps. Use _reference here to make a clash with the
            # names of the masking datasets unlikely
            masking_datasets.update(
                {'_reference': datasets[self.temporal_ref]})
            self.masking_dm = DataManager(masking_datasets, '_reference',
                                          period=period)

        if type(scaling) == str:
            self.scaling = DefaultScaler(scaling)
        else:
            self.scaling = scaling
        self.scaling_ref = scaling_ref
        if self.scaling_ref is None:
            self.scaling_ref = self.data_manager.reference_name

        self.luts = self.data_manager.get_luts()
Пример #9
0
def test_validation_error_n2_k2():

    datasets = setup_TestDatasets()

    dm = DataManager(datasets, 'DS1', read_ts_names={d: 'read' for d in ['DS1', 'DS2', 'DS3']})

    # n less than number of datasets is no longer allowed
    with pytest.raises(ValueError):
        process = Validation(
            dm, 'DS1',
            temporal_matcher=temporal_matchers.BasicTemporalMatching(
                window=1 / 24.0).combinatory_matcher,
            scaling='lin_cdf_match',
            metrics_calculators={
                (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics})
Пример #10
0
def test_validation_n2_k2_temporal_matching_no_matches():

    tst_results = {}

    datasets = setup_two_without_overlap()

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics})

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Пример #11
0
def test_validation_n3_k2_masking():

    # test result for one gpi in a cell
    tst_results_one = {
        (('DS1', 'x'), ('DS3', 'y')): {
            'n_obs': np.array([250], dtype=np.int32)},
        (('DS1', 'x'), ('DS2', 'y')): {
            'n_obs': np.array([250], dtype=np.int32)},
        (('DS1', 'x'), ('DS3', 'x')): {
            'n_obs': np.array([250], dtype=np.int32)}}

    # test result for two gpis in a cell
    tst_results_two = {
        (('DS1', 'x'), ('DS3', 'y')): {
            'n_obs': np.array([250, 250], dtype=np.int32)},
        (('DS1', 'x'), ('DS2', 'y')): {
            'n_obs': np.array([250, 250], dtype=np.int32)},
        (('DS1', 'x'), ('DS3', 'x')): {
            'n_obs': np.array([250, 250], dtype=np.int32)}}

    # cell 4 in this example has two gpis so it returns different results.
    tst_results = {1: tst_results_one,
                   1: tst_results_one,
                   2: tst_results_two}

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        'masking1': {
            'class': mds1,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 500},
            'use_lut': False,
            'grids_compatible': True},
        'masking2': {
            'class': mds2,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 750},
            'use_lut': False,
            'grids_compatible': True}
    }

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics},
        masking_datasets=mds)

    gpi_info = (1, 1, 1)
    ref_df = datasets['DS1']['class'].read_ts(1)
    new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 250
    nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        tst = tst_results[len(job[0])]
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results),
                                sorted(tst)):
            nptest.assert_almost_equal(results[key]['n_obs'],
                                       tst[tst_key]['n_obs'])
Пример #12
0
def test_validation_n3_k2_masking():

    # test result for one gpi in a cell
    tst_results_one = {
        (("DS1", "x"), ("DS3", "y")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS2", "y")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS3", "x")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "x")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "y")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
    }

    # test result for two gpis in a cell
    tst_results_two = {
        (("DS1", "x"), ("DS3", "y")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS2", "y")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS3", "x")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "x")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "y")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
    }

    # cell 4 in this example has two gpis so it returns different results.
    tst_results = {1: tst_results_one, 1: tst_results_one, 2: tst_results_two}

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(
        np.array([1, 2, 3, 4]),
        np.array([1, 2, 3, 4]),
        np.array([4, 4, 2, 1]),
        gpis=np.array([1, 2, 3, 4]),
    )

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        "masking1": {
            "class": mds1,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 500},
            "use_lut": False,
            "grids_compatible": True,
        },
        "masking2": {
            "class": mds2,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 750},
            "use_lut": False,
            "grids_compatible": True,
        },
    }

    process = Validation(
        datasets,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
        masking_datasets=mds,
    )

    gpi_info = (1, 1, 1)
    ref_df = datasets["DS1"]["class"].read(1)
    with warnings.catch_warnings():
        warnings.simplefilter(
            "ignore", category=DeprecationWarning
        )  # read_ts is hard coded when using mask_data
        new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 250
    nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:

        with warnings.catch_warnings():
            # most warnings here are caused by the read_ts function that cannot
            # be changed when using a masking data set
            warnings.simplefilter("ignore", category=DeprecationWarning)
            results = process.calc(*job)

        tst = tst_results[len(job[0])]
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results), sorted(tst)):
            nptest.assert_almost_equal(
                results[key]["n_obs"], tst[tst_key]["n_obs"]
            )
Пример #13
0
def test_validation_n3_k2_masking_no_data_remains():

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(
        np.array([1, 2, 3, 4]),
        np.array([1, 2, 3, 4]),
        np.array([4, 4, 2, 1]),
        gpis=np.array([1, 2, 3, 4]),
    )

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        "masking1": {
            "class": mds1,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 500},
            "use_lut": False,
            "grids_compatible": True,
        },
        "masking2": {
            "class": mds2,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 1000},
            "use_lut": False,
            "grids_compatible": True,
        },
    }

    process = Validation(
        datasets,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
        masking_datasets=mds,
    )

    gpi_info = (1, 1, 1)
    ref_df = datasets["DS1"]["class"].read(1)
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 0
    nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=DeprecationWarning)
            results = process.calc(*job)
        tst = []
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results), sorted(tst)):
            nptest.assert_almost_equal(
                results[key]["n_obs"], tst[tst_key]["n_obs"]
            )
Пример #14
0
def test_dfdict_combined_temporal_collocation():

    ref_dr = pd.date_range("2000", "2020", freq="YS")
    dr1 = pd.date_range("2000", "2015", freq="YS")
    dr2 = pd.date_range("2005", "2020", freq="YS")

    ref_df = pd.DataFrame({"ref": np.arange(len(ref_dr))}, index=ref_dr)
    df1 = pd.DataFrame({
        "k1": np.arange(len(dr1)),
        "k2": np.arange(len(dr1))
    },
                       index=dr1)
    df2 = pd.DataFrame({
        "k1": np.arange(len(dr2)),
        "k2": np.arange(len(dr2))
    },
                       index=dr2)

    dfs = {"refkey": ref_df, "df1key": df1, "df2key": df2}
    window = pd.Timedelta(days=300)

    matched = temporal_matchers.dfdict_combined_temporal_collocation(
        dfs, "refkey", 2, window=window, n=3, combined_dropna=True)

    # keys are the same, only refkey is missing
    key = ("refkey", "df1key", "df2key")
    assert list(matched.keys()) == [key]

    # overlap is only 11 timestamps
    assert matched[key].shape == (11, 5)

    overlap_dr = pd.date_range("2005", "2015", freq="YS")
    assert np.all(matched[key].index == overlap_dr)

    # test with ASCAT and ISMN data
    here = Path(__file__).resolve().parent
    ascat = pd.read_csv(here / "ASCAT.csv", index_col=0, parse_dates=True)
    ismn = pd.read_csv(here / "ISMN.csv", index_col=0, parse_dates=True)

    dfs = {"ASCAT": ascat[["sm"]], "ISMN": ismn[["soil_moisture"]]}
    refname = "ISMN"
    window = pd.Timedelta(12, "H")

    old_matcher = temporal_matchers.BasicTemporalMatching().combinatory_matcher
    new_matcher = temporal_matchers.make_combined_temporal_matcher(window)

    expected = old_matcher(dfs, refname, k=2, n=2)
    new = new_matcher(dfs, refname, k=2, n=2)

    key = ("ISMN", "ASCAT")
    assert list(expected.keys()) == [key]
    assert list(new.keys()) == [key]
    # We have to do an extra dropna for the old matcher, because the old
    # matcher doesn't do this by itself.
    # This is normally done within validation.py, `get_data_for_result_tuple`,
    # but since the combined matcher should exclude all data where even a
    # single entry misses (so that all only have common data) this is done
    # before in the new matcher (the combined matcher, whereas the old one is
    # the combinatory matcher)
    exp = expected[key].dropna()
    assert exp.shape == new[key].shape
    for col in new[key]:
        np.testing.assert_equal(exp[col].values, new[key][col].values)