Exemplo n.º 1
0
    def __init__(self, datasets, spatial_ref, metrics_calculators,
                 temporal_matcher=None, temporal_window=1 / 24.0,
                 temporal_ref=None,
                 masking_datasets=None,
                 period=None,
                 scaling='lin_cdf_match', scaling_ref=None):

        if isinstance(datasets, DataManager):
            self.data_manager = datasets
        else:
            self.data_manager = DataManager(datasets, spatial_ref, period)

        self.temp_matching = temporal_matcher
        if self.temp_matching is None:
            warnings.warn(
                "You are using the default temporal matcher. If you are using one of the"
                " newer metric calculators (PairwiseIntercomparisonMetrics,"
                " TripleCollocationMetrics) you should probably use `make_combined_temporal_matcher`"
                " instead. Have a look at the documentation of the metric calculators for more info."
            )
            self.temp_matching = temporal_matchers.BasicTemporalMatching(
                window=temporal_window).combinatory_matcher

        self.temporal_ref = temporal_ref
        if self.temporal_ref is None:
            self.temporal_ref = self.data_manager.reference_name

        self.metrics_c = metrics_calculators
        for n, k in self.metrics_c:
            if n < len(self.data_manager.datasets.keys()):
                raise ValueError('n must be equal to the number of datasets')

        self.masking_dm = None
        if masking_datasets is not None:
            # add temporal reference dataset to the masking datasets since it
            # is necessary for temporally matching the masking datasets to the
            # common time stamps. Use _reference here to make a clash with the
            # names of the masking datasets unlikely
            masking_datasets.update(
                {'_reference': datasets[self.temporal_ref]})
            self.masking_dm = DataManager(masking_datasets, '_reference',
                                          period=period)

        if type(scaling) == str:
            self.scaling = DefaultScaler(scaling)
        else:
            self.scaling = scaling
        self.scaling_ref = scaling_ref
        if self.scaling_ref is None:
            self.scaling_ref = self.data_manager.reference_name

        self.luts = self.data_manager.get_luts()
Exemplo n.º 2
0
    def __init__(self,
                 datasets,
                 spatial_ref,
                 metrics_calculators,
                 temporal_matcher=None,
                 temporal_window=1 / 24.0,
                 temporal_ref=None,
                 masking_datasets=None,
                 period=None,
                 scaling='lin_cdf_match',
                 scaling_ref=None):

        if type(datasets) is DataManager:
            self.data_manager = datasets
        else:
            self.data_manager = DataManager(datasets, spatial_ref, period)

        self.temp_matching = temporal_matcher
        if self.temp_matching is None:
            self.temp_matching = temporal_matchers.BasicTemporalMatching(
                window=temporal_window).combinatory_matcher

        self.temporal_ref = temporal_ref
        if self.temporal_ref is None:
            self.temporal_ref = self.data_manager.reference_name

        self.metrics_c = metrics_calculators
        for n, k in self.metrics_c:
            if n < len(self.data_manager.datasets.keys()):
                raise ValueError('n must be equal to the number of datasets')

        self.masking_dm = None
        if masking_datasets is not None:
            # add temporal reference dataset to the masking datasets since it
            # is necessary for temporally matching the masking datasets to the
            # common time stamps. Use _reference here to make a clash with the
            # names of the masking datasets unlikely
            masking_datasets.update(
                {'_reference': datasets[self.temporal_ref]})
            self.masking_dm = DataManager(masking_datasets,
                                          '_reference',
                                          period=period)

        if type(scaling) == str:
            self.scaling = DefaultScaler(scaling)
        else:
            self.scaling = scaling
        self.scaling_ref = scaling_ref
        if self.scaling_ref is None:
            self.scaling_ref = self.data_manager.reference_name

        self.luts = self.data_manager.get_luts()
Exemplo n.º 3
0
def setup_TestDataManager():

    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    ds1 = GriddedTsBase("", grid, TestDatasetRuntimeError)
    ds2 = GriddedTsBase("", grid, TestDatasetRuntimeError)
    ds3 = GriddedTsBase("", grid, TestDatasetRuntimeError,
                        ioclass_kws={'message': 'Other RuntimeError'})

    datasets = {
        'DS1': {
            'class': ds1,
            'columns': ['soil moisture'],
            'args': [],
            'kwargs': {}
        },
        'DS2': {
            'class': ds2,
            'columns': ['sm'],
            'args': [],
            'kwargs': {},
            'grids_compatible': True
        },
        'DS3': {
            'class': ds3,
            'columns': ['sm', 'sm2'],
            'args': [],
            'kwargs': {},
            'grids_compatible': True
        }
    }

    dm = DataManager(datasets, 'DS1')
    return dm
Exemplo n.º 4
0
def test_validation_n3_k2_temporal_matching_no_matches():

    tst_results = {}

    datasets = setup_two_without_overlap()

    dm = DataManager(
        datasets,
        "DS1",
        read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]},
    )

    process = Validation(
        dm,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
    )

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Exemplo n.º 5
0
def test_validation_error_n2_k2():

    datasets = setup_TestDatasets()

    dm = DataManager(
        datasets,
        "DS1",
        read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]},
    )

    # n less than number of datasets is no longer allowed
    with pytest.raises(ValueError):
        Validation(
            dm,
            "DS1",
            temporal_matcher=temporal_matchers.BasicTemporalMatching(
                window=1 / 24.0
            ).combinatory_matcher,
            scaling="lin_cdf_match",
            metrics_calculators={
                (2, 2): metrics_calculators.BasicMetrics(
                    other_name="k1"
                ).calc_metrics
            },
        )
Exemplo n.º 6
0
def test_validation_n3_k2_temporal_matching_no_matches2():

    tst_results = {
        (("DS1", "x"), ("DS3", "y")): {
            "n_obs": np.array([1000], dtype=np.int32),
            "tau": np.array([np.nan], dtype=np.float32),
            "gpi": np.array([4], dtype=np.int32),
            "RMSD": np.array([0.0], dtype=np.float32),
            "lon": np.array([4.0]),
            "p_tau": np.array([np.nan], dtype=np.float32),
            "BIAS": np.array([0.0], dtype=np.float32),
            "p_rho": np.array([0.0], dtype=np.float32),
            "rho": np.array([1.0], dtype=np.float32),
            "lat": np.array([4.0]),
            "R": np.array([1.0], dtype=np.float32),
            "p_R": np.array([0.0], dtype=np.float32),
        },
        (("DS1", "x"), ("DS3", "x")): {
            "n_obs": np.array([1000], dtype=np.int32),
            "tau": np.array([np.nan], dtype=np.float32),
            "gpi": np.array([4], dtype=np.int32),
            "RMSD": np.array([0.0], dtype=np.float32),
            "lon": np.array([4.0]),
            "p_tau": np.array([np.nan], dtype=np.float32),
            "BIAS": np.array([0.0], dtype=np.float32),
            "p_rho": np.array([0.0], dtype=np.float32),
            "rho": np.array([1.0], dtype=np.float32),
            "lat": np.array([4.0]),
            "R": np.array([1.0], dtype=np.float32),
            "p_R": np.array([0.0], dtype=np.float32),
        },
    }

    datasets = setup_three_with_two_overlapping()
    dm = DataManager(
        datasets,
        "DS1",
        read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]},
    )

    process = Validation(
        dm,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
    )

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Exemplo n.º 7
0
def test_DataManager_get_data():

    datasets = setup_TestDatasets()
    dm = DataManager(datasets,
                     'DS1',
                     read_ts_names={f'DS{i}': 'read'
                                    for i in range(1, 4)})
    data = dm.get_data(1, 1, 1)
    assert sorted(list(data)) == ['DS1', 'DS2', 'DS3']
Exemplo n.º 8
0
def test_validation_n2_k2_data_manager_argument():

    tst_results = {
        (('DS1', 'x'), ('DS3', 'y')): {
            'n_obs': np.array([1000], dtype=np.int32),
            'tau': np.array([np.nan], dtype=np.float32),
            'gpi': np.array([4], dtype=np.int32),
            'RMSD': np.array([0.], dtype=np.float32),
            'lon': np.array([4.]),
            'p_tau': np.array([np.nan], dtype=np.float32),
            'BIAS': np.array([0.], dtype=np.float32),
            'p_rho': np.array([0.], dtype=np.float32),
            'rho': np.array([1.], dtype=np.float32),
            'lat': np.array([4.]),
            'R': np.array([1.], dtype=np.float32),
            'p_R': np.array([0.], dtype=np.float32)},
        (('DS1', 'x'), ('DS2', 'y')): {
            'n_obs': np.array([1000], dtype=np.int32),
            'tau': np.array([np.nan], dtype=np.float32),
            'gpi': np.array([4], dtype=np.int32),
            'RMSD': np.array([0.], dtype=np.float32),
            'lon': np.array([4.]),
            'p_tau': np.array([np.nan], dtype=np.float32),
            'BIAS': np.array([0.], dtype=np.float32),
            'p_rho': np.array([0.], dtype=np.float32),
            'rho': np.array([1.], dtype=np.float32),
            'lat': np.array([4.]),
            'R': np.array([1.], dtype=np.float32),
            'p_R': np.array([0.], dtype=np.float32)},
        (('DS1', 'x'), ('DS3', 'x')): {
            'n_obs': np.array([1000], dtype=np.int32),
            'tau': np.array([np.nan], dtype=np.float32),
            'gpi': np.array([4], dtype=np.int32),
            'RMSD': np.array([0.], dtype=np.float32),
            'lon': np.array([4.]),
            'p_tau': np.array([np.nan], dtype=np.float32),
            'BIAS': np.array([0.], dtype=np.float32),
            'p_rho': np.array([0.], dtype=np.float32),
            'rho': np.array([1.], dtype=np.float32),
            'lat': np.array([4.]),
            'R': np.array([1.], dtype=np.float32),
            'p_R': np.array([0.], dtype=np.float32)}}

    datasets = setup_TestDatasets()
    dm = DataManager(datasets, 'DS1')

    process = Validation(dm, 'DS1',
                         temporal_matcher=temporal_matchers.BasicTemporalMatching(
                             window=1 / 24.0).combinatory_matcher,
                         scaling='lin_cdf_match',
                         metrics_calculators={
                             (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics})

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Exemplo n.º 9
0
def test_validation_error_n2_k2():

    datasets = setup_TestDatasets()

    dm = DataManager(datasets, 'DS1', read_ts_names={d: 'read' for d in ['DS1', 'DS2', 'DS3']})

    # n less than number of datasets is no longer allowed
    with pytest.raises(ValueError):
        process = Validation(
            dm, 'DS1',
            temporal_matcher=temporal_matchers.BasicTemporalMatching(
                window=1 / 24.0).combinatory_matcher,
            scaling='lin_cdf_match',
            metrics_calculators={
                (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics})
Exemplo n.º 10
0
def test_DataManager_default_add():

    grid = grids.CellGrid(np.array([1, 2, 3, 4]),
                          np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]),
                          gpis=np.array([1, 2, 3, 4]))

    ds1 = GriddedTsBase("", grid, TestDataset)

    datasets = {
        'DS1': {
            'class': ds1,
            'columns': ['soil moisture'],
        },
        'DS2': {
            'class': ds1,
            'columns': ['soil moisture'],
        }
    }

    dm = DataManager(datasets, 'DS1')
    assert dm.datasets == {
        'DS1': {
            'class': ds1,
            'columns': ['soil moisture'],
            'args': [],
            'kwargs': {},
            'use_lut': False,
            'lut_max_dist': None,
            'grids_compatible': False
        },
        'DS2': {
            'class': ds1,
            'columns': ['soil moisture'],
            'args': [],
            'kwargs': {},
            'use_lut': False,
            'lut_max_dist': None,
            'grids_compatible': False
        }
    }
Exemplo n.º 11
0
def test_DataManager_read_ts_method_names():

    ds1 = TestDataset("")

    datasets = {
        'DS1': {
            'class': ds1,
            'columns': ['soil moisture'],
        },
        'DS2': {
            'class': ds1,
            'columns': ['soil moisture'],
        }
    }

    read_ts_method_names = {'DS1': 'read_ts', 'DS2': 'read_ts_other'}
    dm = DataManager(datasets, 'DS1', read_ts_names=read_ts_method_names)
    data = dm.read_ds('DS1', 1)
    data_other = dm.read_ds('DS2', 1)
    pdtest.assert_frame_equal(data, ds1.read_ts(1))
    pdtest.assert_frame_equal(data_other, ds1.read_ts_other(1))
Exemplo n.º 12
0
def getdata():
    """
    handles the get request, which should contain the arguments listes under
    parameters

    Parameters
    ----------
    station_id: int
        id of station in database
    scaling: string
        chosen scaling method , for available choices see general.times_eries.scaling
    snow_depth: float
        mask snow depth greater than this value
    st_l1: float
        mask surface temperature layer1 lower than this value
    air_temp: float
        mask 2m air temperature lower than this value
    ssf_masking: boolean
        use SSF for masking true or false
    """
    station_id = request.args.get('station_id')
    scaling = request.args.get('scaling')
    if scaling == 'noscale':
        scaling = None
    masking_ids = request.args.getlist('masking_ds[]')
    masking_ops = request.args.getlist('masking_op[]')
    masking_values = request.args.getlist('masking_values[]')
    masking_values = [float(x) for x in masking_values]

    anomaly = request.args.get('anomaly')
    if anomaly == 'none':
        anomaly = None

    (depth_from,
     depth_to,
     sensor_id) = get_station_first_sm_layer(app.config['ISMN_PATH'],
                                             station_id)
    lon, lat = get_station_lonlat(app.config['ISMN_PATH'],
                                  station_id)
    start, end = get_station_start_end(app.config['ISMN_PATH'],
                                       station_id, "soil moisture",
                                       depth_from, depth_to)
    period = [start, end]

    masking_data = {'labels': [], 'data': []}
    masking_meta = get_masking_metadata()
    masking_masked_dict = None
    if len(masking_ids) > 0:
        # prepare masking datasets
        masking_ds_dict = get_masking_ds_dict(masking_ids)
        masking_masked_dict = {}
        for masking_ds, masking_op, masking_value in zip(masking_ids,
                                                         masking_ops,
                                                         masking_values):

            masking_masked_dict[masking_ds] = dict(masking_ds_dict[masking_ds])
            new_cls = MaskingAdapter(masking_masked_dict[masking_ds]['class'],
                                     masking_op,
                                     masking_value)
            masking_masked_dict[masking_ds]['class'] = new_cls

        # use DataManager for reading masking datasets
        masking_dm = DataManager(masking_ds_dict, masking_ids[0],
                                 period=period)
        masking_data = {}
        valid_masking_ids = []
        for mds in masking_ids:
            mdata = masking_dm.read_ds(mds, lon, lat)
            if mdata is not None:
                masking_data[mds] = mdata
                valid_masking_ids.append(mds)
            else:
                masking_data[mds] = pd.DataFrame()
        if len(valid_masking_ids) > 1:
            masking_data = BasicTemporalMatching(window=1.0).combinatory_matcher(
                masking_data, masking_ids[0], n=len(masking_ids))

            if len(masking_data) > 0:
                labels, values = masking_data[
                    masking_data.keys()[0]].to_dygraph_format()
        elif len(valid_masking_ids) == 1:
            masking_data = masking_data[valid_masking_ids[0]]
            labels, values = masking_data.to_dygraph_format()
        else:
            labels = [None]
            values = None

        for i, label in enumerate(labels):
            for mid in masking_meta:
                if masking_meta[mid]['variable']['name'] in label:
                    labels[i] = masking_meta[mid]['long_name']

        masking_data = {'labels': labels, 'data': values}

    ismn_iface = prepare_station_interface(app.config['ISMN_PATH'],
                                           station_id,
                                           "soil moisture",
                                           depth_from, depth_to, sensor_id)

    validation_ds_dict = get_validation_ds_dict()
    validation_ds_dict.update({'ISMN': {'class': ismn_iface,
                                        'columns': ['soil moisture']}})

    if anomaly is not None:
        adapter = {'climatology': AnomalyClimAdapter,
                   'average': AnomalyAdapter}
        for dataset in validation_ds_dict:
            validation_ds_dict[dataset]['class'] = adapter[
                anomaly](validation_ds_dict[dataset]['class'],
                         columns=validation_ds_dict[dataset]['columns'])

    mcalc = BasicMetricsPlusMSE(other_name='k1',
                                calc_tau=True).calc_metrics
    process = Validation(validation_ds_dict, 'ISMN',
                         temporal_ref='cci',
                         scaling=scaling,
                         metrics_calculators={(2, 2): mcalc},
                         masking_datasets=masking_masked_dict,
                         period=period,
                         temporal_window=1)

    df_dict = process.data_manager.get_data(1,
                                            lon,
                                            lat)

    matched_data, result, used_data = process.perform_validation(
        df_dict, (1, lon, lat))

    res_key = list(result)[0]
    data = used_data[res_key]
    result = result[res_key][0]

    # rename data to original names
    rename_dict = {}
    f = lambda x: "k{}".format(x) if x > 0 else 'ref'
    for i, r in enumerate(res_key):
        rename_dict[f(i)] = " ".join(r)

    data.rename(columns=rename_dict, inplace=True)

    labels, values = data.to_dygraph_format()

    validation_datasets = {'labels': labels, 'data': values}

    statistics = {'kendall': {'v': '%.2f' % result['tau'], 'p': '%.4f' % result['p_tau']},
                  'spearman': {'v': '%.2f' % result['rho'], 'p': '%.4f' % result['p_rho']},
                  'pearson': {'v': '%.2f' % result['R'], 'p': '%.4f' % result['p_R']},
                  'bias': '%.4f' % result['BIAS'],
                  'rmsd': {'rmsd': '%.4f' % np.sqrt(result['mse']),
                           'rmsd_corr': '%.4f' % np.sqrt(result['mse_corr']),
                           'rmsd_bias': '%.4f' % np.sqrt(result['mse_bias']),
                           'rmsd_var': '%.4f' % np.sqrt(result['mse_var'])},
                  'mse': {'mse': '%.4f' % result['mse'],
                          'mse_corr': '%.4f' % result['mse_corr'],
                          'mse_bias': '%.4f' % result['mse_bias'],
                          'mse_var': '%.4f' % result['mse_var']}}

    scaling_options = {'noscale': 'No scaling',
                       'porosity': 'Scale using porosity',
                       'linreg': 'Linear Regression',
                       'mean_std': 'Mean - standard deviation',
                       'min_max': 'Minimum,maximum',
                       'lin_cdf_match': 'Piecewise <br> linear CDF matching',
                       'cdf_match': 'CDF matching'}

    if scaling is None:
        scaling = 'noscale'

    masking_option_return = {}
    for mid, mops, mval in zip(masking_ids,
                               masking_ops,
                               masking_values):
        masking_option_return[mid] = {'op': mops,
                                      'val': mval,
                                      'name': masking_meta[mid]['long_name']}

    settings = {'scaling': scaling_options[scaling],
                'masking': masking_option_return}

    output_data = {'validation_data': validation_datasets, 'masking_data': masking_data,
                   'statistics': statistics, 'settings': settings}
    status = 1
    if status == -1:
        data = 'Error'
    else:
        data = jsonify(output_data)

    resp = make_response(data)
    resp.headers['Access-Control-Allow-Origin'] = '*'
    return resp
Exemplo n.º 13
0
def create_pytesmo_validation(validation_run):
    ds_list = []
    ref_name = None
    scaling_ref_name = None

    ds_num = 1
    for dataset_config in validation_run.dataset_configurations.all():
        reader = create_reader(dataset_config.dataset, dataset_config.version)
        reader = setup_filtering(
            reader, list(dataset_config.filters.all()),
            list(dataset_config.parametrisedfilter_set.all()),
            dataset_config.dataset, dataset_config.variable)

        if validation_run.anomalies == ValidationRun.MOVING_AVG_35_D:
            reader = AnomalyAdapter(
                reader,
                window_size=35,
                columns=[dataset_config.variable.pretty_name])
        if validation_run.anomalies == ValidationRun.CLIMATOLOGY:
            # make sure our baseline period is in UTC and without timezone information
            anomalies_baseline = [
                validation_run.anomalies_from.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None),
                validation_run.anomalies_to.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None)
            ]
            reader = AnomalyClimAdapter(
                reader,
                columns=[dataset_config.variable.pretty_name],
                timespan=anomalies_baseline)

        if (validation_run.reference_configuration and
            (dataset_config.id == validation_run.reference_configuration.id)):
            # reference is always named "0-..."
            dataset_name = '{}-{}'.format(0, dataset_config.dataset.short_name)
        else:
            dataset_name = '{}-{}'.format(ds_num,
                                          dataset_config.dataset.short_name)
            ds_num += 1

        ds_list.append((dataset_name, {
            'class': reader,
            'columns': [dataset_config.variable.pretty_name]
        }))

        if (validation_run.reference_configuration and
            (dataset_config.id == validation_run.reference_configuration.id)):
            ref_name = dataset_name
            ref_short_name = validation_run.reference_configuration.dataset.short_name

        if (validation_run.scaling_ref
                and (dataset_config.id == validation_run.scaling_ref.id)):
            scaling_ref_name = dataset_name

    datasets = dict(ds_list)
    ds_num = len(ds_list)

    period = None
    if validation_run.interval_from is not None and validation_run.interval_to is not None:
        # while pytesmo can't deal with timezones, normalise the validation period to utc; can be removed once pytesmo can do timezones
        startdate = validation_run.interval_from.astimezone(UTC).replace(
            tzinfo=None)
        enddate = validation_run.interval_to.astimezone(UTC).replace(
            tzinfo=None)
        period = [startdate, enddate]

    upscale_parms = None
    if validation_run.upscaling_method != "none":
        __logger.debug("Upscaling option is active")
        upscale_parms = {
            "upscaling_method": validation_run.upscaling_method,
            "temporal_stability": validation_run.temporal_stability,
        }
        upscaling_lut = create_upscaling_lut(
            validation_run=validation_run,
            datasets=datasets,
            ref_name=ref_name,
        )
        upscale_parms["upscaling_lut"] = upscaling_lut
        __logger.debug("Lookup table for non-reference datasets " +
                       ", ".join(upscaling_lut.keys()) + " created")
        __logger.debug("{}".format(upscaling_lut))

    datamanager = DataManager(
        datasets,
        ref_name=ref_name,
        period=period,
        read_ts_names='read',
        upscale_parms=upscale_parms,
    )
    ds_names = get_dataset_names(datamanager.reference_name,
                                 datamanager.datasets,
                                 n=ds_num)

    # set value of the metadata template according to what reference dataset is used
    if ref_short_name == 'ISMN':
        metadata_template = METADATA_TEMPLATE['ismn_ref']
    else:
        metadata_template = METADATA_TEMPLATE['other_ref']

    pairwise_metrics = PairwiseIntercomparisonMetrics(
        metadata_template=metadata_template,
        calc_kendall=False,
    )

    metric_calculators = {(ds_num, 2): pairwise_metrics.calc_metrics}

    if (len(ds_names) >= 3) and (validation_run.tcol is True):
        tcol_metrics = TripleCollocationMetrics(
            ref_name,
            metadata_template=metadata_template,
        )
        metric_calculators.update({(ds_num, 3): tcol_metrics.calc_metrics})

    if validation_run.scaling_method == validation_run.NO_SCALING:
        scaling_method = None
    else:
        scaling_method = validation_run.scaling_method

    __logger.debug(f"Scaling method: {scaling_method}")
    __logger.debug(f"Scaling dataset: {scaling_ref_name}")

    val = Validation(datasets=datamanager,
                     temporal_matcher=make_combined_temporal_matcher(
                         pd.Timedelta(12, "H")),
                     spatial_ref=ref_name,
                     scaling=scaling_method,
                     scaling_ref=scaling_ref_name,
                     metrics_calculators=metric_calculators,
                     period=period)

    return val
Exemplo n.º 14
0
def test_ascat_ismn_validation_metadata_rolling(ascat_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    # Initialize ISMN reader
    ismn_data_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "ismn",
        "multinetwork",
        "header_values",
    )
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable="soil moisture", min_depth=0, max_depth=0.1
    )

    metadata_dict_template = {
        "network": np.array(["None"], dtype="U256"),
        "station": np.array(["None"], dtype="U256"),
        "landcover": np.float32([np.nan]),
        "climate": np.array(["None"], dtype="U4"),
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [
            {
                "network": metadata["network"],
                "station": metadata["station"],
                "landcover": metadata["landcover_2010"],
                "climate": metadata["climate"],
            }
        ]
        jobs.append(
            (idx, metadata["longitude"], metadata["latitude"], metadata_dict)
        )

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {"class": ismn_reader, "columns": ["soil moisture"]},
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(
        datasets, "ISMN", period, read_ts_names=read_ts_names
    )

    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2): metrics_calculators.RollingMetrics(
                other_name="k1", metadata_template=metadata_dict_template
            ).calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(
            results, save_path, ts_vars=["R", "p_R", "RMSD"]
        )

    results_fname = os.path.join(
        save_path, "ASCAT.sm_with_ISMN.soil moisture.nc"
    )

    vars_should = [
        u"gpi",
        u"lon",
        u"lat",
        u"R",
        u"p_R",
        u"time",
        u"idx",
        u"_row_size",
    ]

    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    network_should = np.array(
        [
            "MAQU",
            "MAQU",
            "SCAN",
            "SCAN",
            "SCAN",
            "SOILSCAPE",
            "SOILSCAPE",
            "SOILSCAPE",
        ],
        dtype="U256",
    )

    reader = PointDataResults(results_fname, read_only=True)
    df = reader.read_loc(None)
    nptest.assert_equal(sorted(network_should), sorted(df["network"].values))
    assert np.all(df.gpi.values == np.arange(8))
    assert reader.read_ts(0).index.size == 357
    assert np.all(
        reader.read_ts(1).columns.values == np.array(["R", "p_R", "RMSD"])
    )
Exemplo n.º 15
0
def test_DataManager_get_data():

    datasets = setup_TestDatasets()
    dm = DataManager(datasets, 'DS1')
    data = dm.get_data(1, 1, 1)
    assert sorted(list(data)) == ['DS1', 'DS2', 'DS3']
Exemplo n.º 16
0
def test_ascat_ismn_validation_metadata(ascat_reader, ismn_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    jobs = []

    ids = ismn_reader.get_dataset_ids(variable="soil moisture",
                                      min_depth=0,
                                      max_depth=0.1)

    metadata_dict_template = {
        "network": np.array(["None"], dtype="U256"),
        "station": np.array(["None"], dtype="U256"),
        "landcover": np.float32([np.nan]),
        "climate": np.array(["None"], dtype="U4"),
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [{
            "network": metadata["network"],
            "station": metadata["station"],
            "landcover": metadata["landcover_2010"],
            "climate": metadata["climate"],
        }]
        jobs.append(
            (idx, metadata["longitude"], metadata["latitude"], metadata_dict))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"],
        },
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           "ISMN",
                           period,
                           read_ts_names=read_ts_names)
    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(
                other_name="k1",
                metadata_template=metadata_dict_template).calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 "ASCAT.sm_with_ISMN.soil moisture.nc")
    target_vars = {
        "n_obs": [357, 384, 1646, 1875, 1915, 467, 141, 251],
        "rho":
        np.array([
            0.53934574,
            0.7002289,
            0.62200236,
            0.53647155,
            0.30413666,
            0.6740655,
            0.8418981,
            0.74206454,
        ],
                 dtype=np.float32),
        "RMSD":
        np.array([
            11.583476,
            7.729667,
            17.441547,
            21.125721,
            14.31557,
            14.187225,
            13.0622425,
            12.903898,
        ],
                 dtype=np.float32),
        "network":
        np.array(
            [
                "MAQU",
                "MAQU",
                "SCAN",
                "SCAN",
                "SCAN",
                "SOILSCAPE",
                "SOILSCAPE",
                "SOILSCAPE",
            ],
            dtype="U256",
        )
    }
    vars_should = [
        'BIAS', 'R', 'RMSD', '_row_size', 'climate', 'gpi', 'idx', 'landcover',
        'lat', 'lon', 'n_obs', 'network', 'p_R', 'p_rho', 'p_tau', 'rho',
        'station', 'tau', 'time'
    ]

    check_results(filename=results_fname,
                  target_vars=target_vars,
                  variables=vars_should)
Exemplo n.º 17
0
def test_ascat_ismn_validation(ascat_reader, ismn_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    jobs = []

    ids = ismn_reader.get_dataset_ids(variable="soil moisture",
                                      min_depth=0,
                                      max_depth=0.1)
    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        jobs.append((idx, metadata["longitude"], metadata["latitude"]))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"]
        },
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           "ISMN",
                           period,
                           read_ts_names=read_ts_names)

    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(other_name="k1").calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 "ASCAT.sm_with_ISMN.soil moisture.nc")
    # targets
    target_vars = {
        "n_obs": [357, 384, 1646, 1875, 1915, 467, 141, 251],
        "rho":
        np.array([
            0.53934574, 0.7002289, 0.62200236, 0.53647155, 0.30413666,
            0.6740655, 0.8418981, 0.74206454
        ],
                 dtype=np.float32),
        "RMSD":
        np.array([
            11.583476, 7.729667, 17.441547, 21.125721, 14.31557, 14.187225,
            13.0622425, 12.903898
        ],
                 dtype=np.float32)
    }

    check_results(
        filename=results_fname,
        target_vars=target_vars,
    )
Exemplo n.º 18
0
def create_pytesmo_validation(validation_run):
    ds_list = []
    ref_name = None
    scaling_ref_name = None

    ds_num = 1
    for dataset_config in validation_run.dataset_configurations.all():
        reader = create_reader(dataset_config.dataset, dataset_config.version)
        reader = setup_filtering(
            reader, list(dataset_config.filters.all()),
            list(dataset_config.parametrisedfilter_set.all()),
            dataset_config.dataset, dataset_config.variable)

        if validation_run.anomalies == ValidationRun.MOVING_AVG_35_D:
            reader = AnomalyAdapter(
                reader,
                window_size=35,
                columns=[dataset_config.variable.pretty_name])
        if validation_run.anomalies == ValidationRun.CLIMATOLOGY:
            # make sure our baseline period is in UTC and without timezone information
            anomalies_baseline = [
                validation_run.anomalies_from.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None),
                validation_run.anomalies_to.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None)
            ]
            reader = AnomalyClimAdapter(
                reader,
                columns=[dataset_config.variable.pretty_name],
                timespan=anomalies_baseline)

        if ((validation_run.reference_configuration) and
            (dataset_config.id == validation_run.reference_configuration.id)):
            # reference is always named "0-..."
            dataset_name = '{}-{}'.format(0, dataset_config.dataset.short_name)
        else:
            dataset_name = '{}-{}'.format(ds_num,
                                          dataset_config.dataset.short_name)
            ds_num += 1

        ds_list.append((dataset_name, {
            'class': reader,
            'columns': [dataset_config.variable.pretty_name]
        }))

        if ((validation_run.reference_configuration) and
            (dataset_config.id == validation_run.reference_configuration.id)):
            ref_name = dataset_name
        if ((validation_run.scaling_ref)
                and (dataset_config.id == validation_run.scaling_ref.id)):
            scaling_ref_name = dataset_name

    datasets = dict(ds_list)
    ds_num = len(ds_list)

    period = None
    if validation_run.interval_from is not None and validation_run.interval_to is not None:
        ## while pytesmo can't deal with timezones, normalise the validation period to utc; can be removed once pytesmo can do timezones
        startdate = validation_run.interval_from.astimezone(UTC).replace(
            tzinfo=None)
        enddate = validation_run.interval_to.astimezone(UTC).replace(
            tzinfo=None)
        period = [startdate, enddate]

    datamanager = DataManager(datasets,
                              ref_name=ref_name,
                              period=period,
                              read_ts_names='read')
    ds_names = get_dataset_names(datamanager.reference_name,
                                 datamanager.datasets,
                                 n=ds_num)

    if (len(ds_names) >= 3) and (validation_run.tcol is True):
        # if there are 3 or more dataset, do TC, exclude ref metrics
        metrics = TCMetrics(
            dataset_names=ds_names,
            tc_metrics_for_ref=False,
            other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)])
    else:
        metrics = IntercomparisonMetrics(
            dataset_names=ds_names,
            other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)])

    if validation_run.scaling_method == validation_run.NO_SCALING:
        scaling_method = None
    else:
        scaling_method = validation_run.scaling_method

    __logger.debug(f"Scaling method: {scaling_method}")
    __logger.debug(f"Scaling dataset: {scaling_ref_name}")

    val = Validation(datasets=datamanager,
                     spatial_ref=ref_name,
                     temporal_window=0.5,
                     scaling=scaling_method,
                     scaling_ref=scaling_ref_name,
                     metrics_calculators={
                         (ds_num, ds_num): metrics.calc_metrics
                     },
                     period=period)

    return val
Exemplo n.º 19
0
def test_ascat_ismn_validation_metadata(ascat_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    # Initialize ISMN reader

    ismn_data_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "ismn",
        "multinetwork",
        "header_values",
    )
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable="soil moisture", min_depth=0, max_depth=0.1
    )

    metadata_dict_template = {
        "network": np.array(["None"], dtype="U256"),
        "station": np.array(["None"], dtype="U256"),
        "landcover": np.float32([np.nan]),
        "climate": np.array(["None"], dtype="U4"),
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [
            {
                "network": metadata["network"],
                "station": metadata["station"],
                "landcover": metadata["landcover_2010"],
                "climate": metadata["climate"],
            }
        ]
        jobs.append(
            (idx, metadata["longitude"], metadata["latitude"], metadata_dict)
        )

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"],
        },
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(
        datasets, "ISMN", period, read_ts_names=read_ts_names
    )
    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2): metrics_calculators.BasicMetrics(
                other_name="k1", metadata_template=metadata_dict_template
            ).calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(
        save_path, "ASCAT.sm_with_ISMN.soil moisture.nc"
    )

    vars_should = [
        u"n_obs",
        u"tau",
        u"gpi",
        u"RMSD",
        u"lon",
        u"p_tau",
        u"BIAS",
        u"p_rho",
        u"rho",
        u"lat",
        u"R",
        u"p_R",
        u"time",
        u"idx",
        u"_row_size",
    ]
    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    n_obs_should = [357, 384, 1646, 1875, 1915, 467, 141, 251]
    rho_should = np.array(
        [
            0.53934574,
            0.7002289,
            0.62200236,
            0.53647155,
            0.30413666,
            0.6740655,
            0.8418981,
            0.74206454,
        ],
        dtype=np.float32,
    )
    rmsd_should = np.array(
        [
            11.583476,
            7.729667,
            17.441547,
            21.125721,
            14.31557,
            14.187225,
            13.0622425,
            12.903898,
        ],
        dtype=np.float32,
    )

    network_should = np.array(
        [
            "MAQU",
            "MAQU",
            "SCAN",
            "SCAN",
            "SCAN",
            "SOILSCAPE",
            "SOILSCAPE",
            "SOILSCAPE",
        ],
        dtype="U256",
    )

    with nc.Dataset(results_fname, mode="r") as results:
        vars = results.variables.keys()
        n_obs = results.variables["n_obs"][:].tolist()
        rho = results.variables["rho"][:]
        rmsd = results.variables["RMSD"][:]
        network = results.variables["network"][:]

    assert sorted(vars) == sorted(vars_should)
    assert sorted(n_obs) == sorted(n_obs_should)
    nptest.assert_allclose(sorted(rho), sorted(rho_should), rtol=1e-4)
    nptest.assert_allclose(sorted(rmsd), sorted(rmsd_should), rtol=1e-4)
    nptest.assert_equal(sorted(network), sorted(network_should))
Exemplo n.º 20
0
def test_ascat_ismn_validation_metadata_rolling():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     'grid')

    static_layers_folder = os.path.join(os.path.dirname(__file__), '..',
                                        'test-data', 'sat', 'h_saf',
                                        'static_layer')

    ascat_reader = AscatSsmCdr(ascat_data_folder,
                               ascat_grid_folder,
                               grid_filename='TUW_WARP5_grid_info_2_1.nc',
                               static_layer_path=static_layers_folder)
    ascat_reader.read_bulk = True

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                    'test-data', 'ismn', 'multinetwork',
                                    'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(variable='soil moisture',
                                      min_depth=0,
                                      max_depth=0.1)

    metadata_dict_template = {
        'network': np.array(['None'], dtype='U256'),
        'station': np.array(['None'], dtype='U256'),
        'landcover': np.float32([np.nan]),
        'climate': np.array(['None'], dtype='U4')
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [{
            'network': metadata['network'],
            'station': metadata['station'],
            'landcover': metadata['landcover_2010'],
            'climate': metadata['climate']
        }]
        jobs.append(
            (idx, metadata['longitude'], metadata['latitude'], metadata_dict))

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {
                'mask_frozen_prob': 80,
                'mask_snow_prob': 80,
                'mask_ssf': True
            }
        }
    }

    read_ts_names = {'ASCAT': 'read', 'ISMN': 'read_ts'}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           'ISMN',
                           period,
                           read_ts_names=read_ts_names)

    process = Validation(
        datasets,
        'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2):
            metrics_calculators.RollingMetrics(
                other_name='k1',
                metadata_template=metadata_dict_template).calc_metrics
        },
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results,
                               save_path,
                               ts_vars=['R', 'p_R', 'RMSD'])

    results_fname = os.path.join(save_path,
                                 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [
        u'gpi', u'lon', u'lat', u'R', u'p_R', u'time', u'idx', u'_row_size'
    ]

    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    network_should = np.array([
        'MAQU', 'MAQU', 'SCAN', 'SCAN', 'SCAN', 'SOILSCAPE', 'SOILSCAPE',
        'SOILSCAPE'
    ],
                              dtype='U256')

    reader = PointDataResults(results_fname, read_only=True)
    df = reader.read_loc(None)
    nptest.assert_equal(sorted(network_should), sorted(df['network'].values))
    assert np.all(df.gpi.values == np.arange(8))
    assert (reader.read_ts(0).index.size == 357)
    assert np.all(
        reader.read_ts(1).columns.values == np.array(['R', 'p_R', 'RMSD']))
Exemplo n.º 21
0
def test_ascat_ismn_validation():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     'grid')

    static_layers_folder = os.path.join(os.path.dirname(__file__), '..',
                                        'test-data', 'sat', 'h_saf',
                                        'static_layer')

    ascat_reader = AscatSsmCdr(ascat_data_folder,
                               ascat_grid_folder,
                               grid_filename='TUW_WARP5_grid_info_2_1.nc',
                               static_layer_path=static_layers_folder)
    ascat_reader.read_bulk = True

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                    'test-data', 'ismn', 'multinetwork',
                                    'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(variable='soil moisture',
                                      min_depth=0,
                                      max_depth=0.1)
    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        jobs.append((idx, metadata['longitude'], metadata['latitude']))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {
                'mask_frozen_prob': 80,
                'mask_snow_prob': 80,
                'mask_ssf': True
            }
        }
    }

    read_ts_names = {'ASCAT': 'read', 'ISMN': 'read_ts'}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           'ISMN',
                           period,
                           read_ts_names=read_ts_names)

    process = Validation(
        datasets,
        'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(other_name='k1').calc_metrics
        },
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [
        u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho',
        u'rho', u'lat', u'R', u'p_R', u'time', u'idx', u'_row_size'
    ]
    n_obs_should = [384, 357, 482, 141, 251, 1927, 1887, 1652]
    rho_should = np.array([
        0.70022893, 0.53934574, 0.69356072, 0.84189808, 0.74206454, 0.30299741,
        0.53143877, 0.62204134
    ],
                          dtype=np.float32)

    rmsd_should = np.array([
        7.72966719, 11.58347607, 14.57700157, 13.06224251, 12.90389824,
        14.24668026, 21.19682884, 17.3883934
    ],
                           dtype=np.float32)
    with nc.Dataset(results_fname, mode='r') as results:
        assert sorted(list(results.variables.keys())) == sorted(vars_should)
        assert sorted(
            results.variables['n_obs'][:].tolist()) == sorted(n_obs_should)
        nptest.assert_allclose(sorted(rho_should),
                               sorted(results.variables['rho'][:]),
                               rtol=1e-4)
        nptest.assert_allclose(sorted(rmsd_should),
                               sorted(results.variables['RMSD'][:]),
                               rtol=1e-4)
Exemplo n.º 22
0
def test_validation_with_averager(ascat_reader, ismn_reader):
    """
    Test processing framework with averaging module. ASCAT and ISMN data are used here with no geographical
    considerations (the lut is provided more upstream and contains this information already)
    """
    while hasattr(ascat_reader, 'cls'):
        ascat_reader = ascat_reader.cls
    # lookup table between the ascat and ismn points - not geographically correct
    upscaling_lut = {
        "ISMN": {
            1814367: [(0, 102.1333, 33.8833), (1, 102.1333, 33.6666)],
            1803695: [(2, -86.55, 34.783), (3, -97.083, 37.133),
                      (4, -105.417, 34.25)],
            1856312: [(5, -120.9675, 38.43003), (6, -120.78559, 38.14956),
                      (7, -120.80639, 38.17353)]
        }
    }
    gpis = (1814367, 1803695, 1856312)
    lons, lats = [], []
    for gpi in gpis:
        lon, lat = ascat_reader.grid.gpi2lonlat(gpi)
        lons.append(lon)
        lats.append(lat)

    jobs = [(gpis, lons, lats)]

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            }
        },
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"],
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(
        datasets,
        "ASCAT",
        period,
        read_ts_names=read_ts_names,
        upscale_parms={
            "upscaling_method": "average",
            "temporal_stability": True,
            "upscaling_lut": upscaling_lut,
        },
    )
    process = Validation(
        datasets,
        "ASCAT",
        temporal_ref="ISMN",
        scaling="lin_cdf_match",
        scaling_ref="ISMN",
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(other_name="k1").calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 "ASCAT.sm_with_ISMN.soil moisture.nc")

    target_vars = {
        "n_obs": [764, 2392, 904],
        "rho": np.array([-0.012487, 0.255156, 0.635517], dtype=np.float32),
        "RMSD": np.array([0.056428, 0.056508, 0.116294], dtype=np.float32),
        "R": np.array([-0.012335, 0.257671, 0.657239], dtype=np.float32)
    }

    check_results(
        filename=results_fname,
        target_vars=target_vars,
    )