示例#1
0
    def test_timezone_adapter(self):
        c3s_data_folder = path.join(
            Dataset.objects.get(short_name='C3S').storage_path,
            'C3S_V201706/TCDR/063_images_to_ts/combined-daily')
        c3s_reader = c3s_read(c3s_data_folder)

        timezone_reader = TimezoneAdapter(c3s_reader)

        orig_data = c3s_reader.read_ts(-155.42, 19.78)
        data = timezone_reader.read_ts(-155.42, 19.78)
        self.assertTrue(
            np.array_equal(orig_data.index.values, data.index.values))
        self.assertTrue(not hasattr(data.index, 'tz') or data.index.tz is None)

        orig_data = c3s_reader.read(-155.42, 19.78)
        data = timezone_reader.read(-155.42, 19.78)
        self.assertTrue(
            np.array_equal(orig_data.index.values, data.index.values))
        self.assertTrue((not hasattr(data.index, 'tz'))
                        or (data.index.tz is None))

        ismn_data_folder = path.join(
            Dataset.objects.get(short_name='ISMN').storage_path,
            'ISMN_V20191211')
        ismn_reader = ISMN_Interface(ismn_data_folder)

        timezone_reader2 = TimezoneAdapter(ismn_reader)

        orig_data = ismn_reader.read_ts(0)
        data = timezone_reader2.read_ts(0)
        self.assertTrue(
            np.array_equal(orig_data.index.values, data.index.values))
        self.assertTrue((not hasattr(data.index, 'tz'))
                        or (data.index.tz is None))
def generate_station_list():
    """ This routine generates a list of available ISMN stations and the EASEv2 grid point they are located in. """

    paths = Paths()

    io = ISMN_Interface(paths.ismn_raw)

    # get metadata indices of all stations that measure soil moisture within the first 10 cm
    idx = io.get_dataset_ids('soil moisture', min_depth=0.0, max_depth=0.1)
    df = pd.DataFrame({'network': io.metadata[idx]['network'],
                       'station': io.metadata[idx]['station'],
                       'lat': io.metadata[idx]['latitude'],
                       'lon': io.metadata[idx]['longitude'],
                       'ease2_gpi': np.zeros(len(idx)).astype('int')}, index=idx)

    # merge indices for stations that have multiple sensors within the first 10 cm
    duplicate_idx = df.groupby(df.columns.tolist()).apply(lambda x: '-'.join(['%i'% i for i in x.index])).values
    df.drop_duplicates(inplace=True)
    df.index = duplicate_idx

    # create EASEv2 grid domain
    grid = EASE2()
    lons, lats = np.meshgrid(grid.ease_lons, grid.ease_lats)
    lons = lons.flatten()
    lats = lats.flatten()

    # find EASEv2 grid points in which the individual stations are located
    for i, (idx, data) in enumerate(df.iterrows()):
        print('%i / %i' % (i, len(df)))
        r = (lons - data.lon) ** 2 + (lats - data.lat) ** 2
        df.loc[idx, 'ease2_gpi'] = np.where((r - r.min()) < 0.0001)[0][0]

    df.to_csv(paths.ismn / 'station_list.csv')
def generate_station_list():

    paths = Paths()

    io = ISMN_Interface(paths.ismn / 'downloaded' / 'CONUS_20100101_20190101')

    # get metadata indices of all stations that measure soil moisture within the first 10 cm
    idx = io.get_dataset_ids('soil moisture', min_depth=0.0, max_depth=0.1)
    df = pd.DataFrame(
        {
            'network': io.metadata[idx]['network'],
            'station': io.metadata[idx]['station'],
            'lat': io.metadata[idx]['latitude'],
            'lon': io.metadata[idx]['longitude'],
            'ease2_gpi': np.zeros(len(idx)).astype('int')
        },
        index=idx)

    # merge indices for stations that have multiple sensors within the first 10 cm
    duplicate_idx = df.groupby(df.columns.tolist()).apply(
        lambda x: '-'.join(['%i' % i for i in x.index])).values
    df.drop_duplicates(inplace=True)
    df.index = duplicate_idx

    grid = EASE2()
    lons, lats = np.meshgrid(grid.ease_lons, grid.ease_lats)
    lons = lons.flatten()
    lats = lats.flatten()

    for i, (idx, data) in enumerate(df.iterrows()):
        print('%i / %i' % (i, len(df)))
        r = (lons - data.lon)**2 + (lats - data.lat)**2
        df.loc[idx, 'ease2_gpi'] = np.where((r - r.min()) < 0.0001)[0][0]

    df.to_csv(paths.ismn / 'station_list.csv')
def resample_timeseries():

    paths = Paths()

    io = ISMN_Interface(paths.ismn / 'downloaded' / 'CONUS_20100101_20190101')

    # get all stations / sensors for each grid cell.
    lut = pd.read_csv(paths.ismn / 'station_list.csv', index_col=0)
    lut = lut.groupby('ease2_gpi').apply(
        lambda x: '-'.join([i for i in x.index]))

    dir_out = paths.ismn / 'timeseries'

    for cnt, (gpi, indices) in enumerate(lut.iteritems()):
        print('%i / %i' % (cnt, len(lut)))

        fname = dir_out / ('%i.csv' % gpi)

        idx = indices.split('-')

        # Only one station within grid cell
        if len(idx) == 1:
            try:
                ts = io.read_ts(int(idx[0]))
                ts = ts[ts['soil moisture_flag'] == 'G']['soil moisture']
                ts.tz_convert(None).to_csv(fname, float_format='%.4f')
            except:
                print('Corrupt file: ' + io.metadata[int(idx[0])]['filename'])

        # Multiple stations within grid cell
        else:
            df = []
            for i in idx:
                try:
                    ts = io.read_ts(int(i))
                    df += [
                        ts[ts['soil moisture_flag'] == 'G']['soil moisture']
                    ]
                except:
                    print('Corrupt file: ' + io.metadata[int(i)]['filename'])
            if len(df) == 0:
                continue

            df = pd.concat(df, axis=1)
            df.columns = np.arange(len(df.columns))

            # match temporal mean and standard deviation to those of the station with the maximum temporal coverage
            n = np.array([len(df[i].dropna()) for i in df])
            ref = np.where(n == n.max())[0][0]
            for col in df:
                if col != ref:
                    df[col] = (df[col] - df[col].mean()) / df[col].std(
                    ) * df[ref].std() + df[ref].mean()

            # Average measurements of all stations
            df.mean(axis='columns').tz_convert(None).to_csv(
                fname, float_format='%.4f')
示例#5
0
def main(_) -> None:
    # Make a call to the ISMN_Interface to create metadata required once.
    ISMN_Interface(_ISMN_DATA_PATH_FLAG.value, parallel=True)

    # Soil moisture data released with the paper is stored under
    # soil_moisture_retrieval_data/* under the eoscience-public GCS bucket.
    input_files = [
        f"soil_moisture_retrieval_data/data-{idx:05d}-of-{_TOTAL_FILES:05d}.tfrecord.gz"
        for idx in range(_TOTAL_FILES)
    ]

    with multiprocessing.pool.Pool(_NUM_WORKERS_FLAG.value,
                                   initializer=init_worker,
                                   initargs=(add_sm_labels_to_data, )) as pool:
        stats_list = pool.starmap(
            add_sm_labels_to_data,
            zip(input_files, itertools.repeat(_OUTPUT_DIR_FLAG.value),
                itertools.repeat(_TEMP_STORAGE_DIR_FLAG.value)))
        pool.close()
        pool.join()

        print("Finished execution!")
        total_stats = list(map(sum, zip(*stats_list)))
        print(
            f"Total success: {total_stats[0]}, Total lookup failures: {total_stats[1]}, Total value failures: {total_stats[2]}"
        )
示例#6
0
    def setUpClass(cls):
        super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass()

        testdata = os.path.join(testdata_root,
                                "Data_seperate_files_20170810_20180809")
        metadata_path = os.path.join(testdata, "python_metadata")

        cleanup(metadata_path)
        ds = ISMN_Interface(testdata, network=[], parallel=True)
        assert ds.networks == OrderedDict()
        cls.testdata = testdata
示例#7
0
    def setUpClass(cls):
        super(Test_ISMN_Interface_HeaderValuesZipped, cls).setUpClass()

        testdata_path = os.path.join(testdata_root, "zip_archives", "header")
        testdata_zip_path = os.path.join(
            testdata_path, "Data_seperate_files_header_20170810_20180809.zip")
        # clean up existing metadata
        metadata_path = os.path.join(testdata_path, "python_metadata")
        cleanup(metadata_path)

        ISMN_Interface(testdata_zip_path)

        cls.testdata_zip_path = testdata_zip_path
示例#8
0
    def setUpClass(cls):
        super(Test_ISMN_Interface_CeopZipped, cls).setUpClass()

        testdata_path = os.path.join(testdata_root, 'zip_archives', 'ceop')
        testdata_zip_path = os.path.join(testdata_path,
            'Data_seperate_files_20170810_20180809.zip')
        # clean up existing metadata
        metadata_path = os.path.join(testdata_path, 'python_metadata')
        cleanup(metadata_path)

        ISMN_Interface(testdata_zip_path)

        cls.testdata_zip_path = testdata_zip_path
示例#9
0
文件: readers.py 项目: sheenaze/qa4sm
def create_reader(dataset, version):
    reader = None

    folder_name = path.join(dataset.storage_path, version.short_name)

    if dataset.short_name == globals.ISMN:
        reader = ISMN_Interface(folder_name)

    if dataset.short_name == globals.C3S:
        c3s_data_folder = path.join(folder_name,
                                    'TCDR/063_images_to_ts/combined-daily')
        reader = c3s_read(c3s_data_folder, ioclass_kws={'read_bulk': True})

    if (dataset.short_name == globals.CCI or dataset.short_name == globals.CCIA
            or dataset.short_name == globals.CCIP):
        reader = CCITs(folder_name, ioclass_kws={'read_bulk': True})

    if dataset.short_name == globals.GLDAS:
        reader = GLDASTs(folder_name, ioclass_kws={'read_bulk': True})

    if dataset.short_name == globals.SMAP:
        smap_data_folder = path.join(folder_name, 'netcdf')
        reader = SMAPTs(smap_data_folder, ioclass_kws={'read_bulk': True})

    if dataset.short_name == globals.ASCAT:
        ascat_data_folder = path.join(folder_name, 'data')
        ascat_grid_path = first_file_in(path.join(folder_name, 'grid'), '.nc')
        fn_format = "{:04d}"
        reader = AscatNc(path=ascat_data_folder,
                         fn_format=fn_format,
                         grid_filename=ascat_grid_path,
                         static_layer_path=None,
                         ioclass_kws={'read_bulk': True})

    if dataset.short_name == globals.SMOS:
        reader = SMOSTs(folder_name, ioclass_kws={'read_bulk': True})

    if dataset.short_name == globals.ERA5:
        reader = ERATs(folder_name, ioclass_kws={'read_bulk': True})

    if dataset.short_name == globals.ERA5_LAND:
        reader = ERATs(folder_name, ioclass_kws={'read_bulk': True})

    if not reader:
        raise ValueError(
            "Reader for dataset '{}' not available".format(dataset))

    reader = TimezoneAdapter(reader)

    return reader
示例#10
0
    def setUpClass(cls):
        super(Test_ISMN_Interface_HeaderValuesUnzipped, cls).setUpClass()

        testdata_path_unzipped = os.path.join(
            testdata_root, "Data_seperate_files_header_20170810_20180809")
        # clean existing metadata

        metadata_path = os.path.join(testdata_path_unzipped, "python_metadata")

        cleanup(metadata_path)

        ISMN_Interface(testdata_path_unzipped)

        cls.testdata = testdata_path_unzipped
示例#11
0
def ismn_reader():
    # Initialize ISMN reader

    ismn_data_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "ismn",
        "multinetwork",
        "header_values",
    )
    ismn_reader = ISMN_Interface(ismn_data_folder)

    return ismn_reader
示例#12
0
    def __init__(self, path=None, col_offs=0, row_offs=0):

        self.col_offs = col_offs
        self.row_offs = row_offs

        if path is None:
            self.root = Path(
                '~/data_sets/ISMN/CONUS_20070101_20200101').expanduser()
        else:
            self.root = Path(path)

        self.io = ISMN_Interface(self.root)

        self.list_file = self.root / 'station_list.csv'
        if not self.list_file.exists():
            print('Station list does not exist.')
            self.generate_station_list()
        else:
            self.list = pd.read_csv(self.list_file, index_col=0)
示例#13
0
def test_metadata_dataframe():
    # make sure that metadata.index represents same values as get_dataset_ids
    testdata = os.path.join(testdata_root,
                            "Data_seperate_files_20170810_20180809")
    metadata_path = os.path.join(testdata, "python_metadata")
    cleanup(metadata_path)
    ds_one = ISMN_Interface(testdata,
                            meta_path=metadata_path,
                            network='FR_Aqui')

    assert np.all(ds_one.metadata.index.values == ds_one.get_dataset_ids(
        None, -np.inf, np.inf))
    ids = ds_one.get_dataset_ids('soil_moisture')
    assert ids == ds_one.metadata.index.values
    assert ds_one.metadata.loc[ids[0], 'variable']['val'] == 'soil_moisture'
    assert ds_one.metadata.loc[ids[0], 'network']['val'] == 'FR_Aqui'
    ds_one.close_files()
示例#14
0
 def setUp(self) -> None:
     self.ds = ISMN_Interface(self.testdata, network=["COSMOS"])
示例#15
0
def test_ascat_ismn_validation():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data',
                                     'sat', 'ascat', 'netcdf', '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data',
                                     'sat', 'ascat', 'netcdf', 'grid')

    static_layers_folder = os.path.join(os.path.dirname(__file__),
                                        '..', 'test-data', 'sat',
                                        'h_saf', 'static_layer')

    ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder,
                               grid_filename='TUW_WARP5_grid_info_2_1.nc',
                               static_layer_path=static_layers_folder)
    ascat_reader.read_bulk = True

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data',
                                    'ismn', 'multinetwork', 'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable='soil moisture',
        min_depth=0,
        max_depth=0.1)
    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        jobs.append((idx, metadata['longitude'], metadata['latitude']))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {'mask_frozen_prob': 80,
                       'mask_snow_prob': 80,
                       'mask_ssf': True}
        }}

    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    process = Validation(
        datasets, 'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics},
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(
        save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau',
                   u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R']
    n_obs_should = [384,  357,  482,  141,  251, 1927, 1887, 1652]
    rho_should = np.array([0.70022893, 0.53934574,
                           0.69356072, 0.84189808,
                           0.74206454, 0.30299741,
                           0.53143877, 0.62204134], dtype=np.float32)

    rmsd_should = np.array([7.72966719, 11.58347607,
                            14.57700157, 13.06224251,
                            12.90389824, 14.24668026,
                            21.19682884, 17.3883934], dtype=np.float32)
    with nc.Dataset(results_fname, mode='r') as results:
        assert sorted(results.variables.keys()) == sorted(vars_should)
        assert sorted(results.variables['n_obs'][:].tolist()) == sorted(
            n_obs_should)
        nptest.assert_allclose(sorted(rho_should),
                               sorted(results.variables['rho'][:]),
                               rtol=1e-4)
        nptest.assert_allclose(sorted(rmsd_should),
                               sorted(results.variables['RMSD'][:]),
                               rtol=1e-4)
示例#16
0
ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder,
                           grid_filename='TUW_WARP5_grid_info_2_1.nc',
                           static_layer_path=static_layers_folder)
ascat_reader.read_bulk = True


# Initialize ISMN reader

# In[4]:


ismn_data_folder = os.path.join(testdata_folder,
                                 'ismn/multinetwork/header_values')

ismn_reader = ISMN_Interface(ismn_data_folder)


# The validation is run based on jobs. A job consists of at least three lists or numpy arrays specifing the grid
# point index, its latitude and longitude. In the case of the ISMN we can use the `dataset_ids` that identify every
# time series in the downloaded ISMN data as our grid point index. We can then get longitude and latitude from the
# metadata of the dataset.
# 
# **DO NOT CHANGE** the name ***jobs*** because it will be searched during the parallel processing!

# In[5]:


jobs = []

ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1)
def resample_ismn():
    """
    This resamples ISMN data onto the EASE2 grid and stores data for each grid cell into .csv files.
    If single grid cells contain multiple stations, they are averaged.

    A grid look-up table needs to be created first (method: ancillary.grid.create_lut).

    """

    paths = Paths()

    io = ISMN_Interface(paths.ismn_raw)

    # get all stations / sensors for each grid cell.
    lut = pd.read_csv(paths.ismn / 'station_list.csv',index_col=0)
    lut = lut.groupby('ease2_gpi').apply(lambda x: '-'.join([i for i in x.index]))

    dir_out = paths.ismn / 'timeseries'
    if not dir_out.exists():
        dir_out.mkdir()

    for cnt, (gpi, indices) in enumerate(lut.iteritems()):
        print('%i / %i' % (cnt, len(lut)))

        fname = dir_out / ('%i.csv' % gpi)

        idx = indices.split('-')

        # Only one station within grid cell
        if len(idx) == 1:
            try:
                ts = io.read_ts(int(idx[0]))
                ts = ts[ts['soil moisture_flag'] == 'G']['soil moisture'] # Get only "good" data based on ISMN QC
                ts.tz_convert(None).to_csv(fname, float_format='%.4f')
            except:
                print('Corrupt file: ' + io.metadata[int(idx[0])]['filename'])

        # Multiple stations within grid cell
        else:
            df = []
            for i in idx:
                try:
                    ts = io.read_ts(int(i))
                    df += [ts[ts['soil moisture_flag'] == 'G']['soil moisture']] # Get only "good" data based on ISMN QC
                except:
                    print('Corrupt file: ' + io.metadata[int(i)]['filename'])
            if len(df) == 0:
                continue

            df = pd.concat(df, axis=1)
            df.columns = np.arange(len(df.columns))

            # match temporal mean and standard deviation to those of the station with the maximum temporal coverage
            n = np.array([len(df[i].dropna()) for i in df])
            ref = np.where(n==n.max())[0][0]
            for col in df:
                if col != ref:
                    df[col] = (df[col] - df[col].mean())/df[col].std() * df[ref].std() + df[ref].mean()

            # Average measurements of all stations
            df.mean(axis='columns').tz_convert(None).to_csv(fname, float_format='%.4f')
示例#18
0
def test_ascat_ismn_validation_metadata_rolling(ascat_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    # Initialize ISMN reader
    ismn_data_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "ismn",
        "multinetwork",
        "header_values",
    )
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable="soil moisture", min_depth=0, max_depth=0.1
    )

    metadata_dict_template = {
        "network": np.array(["None"], dtype="U256"),
        "station": np.array(["None"], dtype="U256"),
        "landcover": np.float32([np.nan]),
        "climate": np.array(["None"], dtype="U4"),
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [
            {
                "network": metadata["network"],
                "station": metadata["station"],
                "landcover": metadata["landcover_2010"],
                "climate": metadata["climate"],
            }
        ]
        jobs.append(
            (idx, metadata["longitude"], metadata["latitude"], metadata_dict)
        )

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {"class": ismn_reader, "columns": ["soil moisture"]},
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(
        datasets, "ISMN", period, read_ts_names=read_ts_names
    )

    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2): metrics_calculators.RollingMetrics(
                other_name="k1", metadata_template=metadata_dict_template
            ).calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(
            results, save_path, ts_vars=["R", "p_R", "RMSD"]
        )

    results_fname = os.path.join(
        save_path, "ASCAT.sm_with_ISMN.soil moisture.nc"
    )

    vars_should = [
        u"gpi",
        u"lon",
        u"lat",
        u"R",
        u"p_R",
        u"time",
        u"idx",
        u"_row_size",
    ]

    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    network_should = np.array(
        [
            "MAQU",
            "MAQU",
            "SCAN",
            "SCAN",
            "SCAN",
            "SOILSCAPE",
            "SOILSCAPE",
            "SOILSCAPE",
        ],
        dtype="U256",
    )

    reader = PointDataResults(results_fname, read_only=True)
    df = reader.read_loc(None)
    nptest.assert_equal(sorted(network_should), sorted(df["network"].values))
    assert np.all(df.gpi.values == np.arange(8))
    assert reader.read_ts(0).index.size == 357
    assert np.all(
        reader.read_ts(1).columns.values == np.array(["R", "p_R", "RMSD"])
    )
示例#19
0
def test_ascat_ismn_validation_metadata(ascat_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    # Initialize ISMN reader

    ismn_data_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "ismn",
        "multinetwork",
        "header_values",
    )
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable="soil moisture", min_depth=0, max_depth=0.1
    )

    metadata_dict_template = {
        "network": np.array(["None"], dtype="U256"),
        "station": np.array(["None"], dtype="U256"),
        "landcover": np.float32([np.nan]),
        "climate": np.array(["None"], dtype="U4"),
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [
            {
                "network": metadata["network"],
                "station": metadata["station"],
                "landcover": metadata["landcover_2010"],
                "climate": metadata["climate"],
            }
        ]
        jobs.append(
            (idx, metadata["longitude"], metadata["latitude"], metadata_dict)
        )

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"],
        },
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(
        datasets, "ISMN", period, read_ts_names=read_ts_names
    )
    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2): metrics_calculators.BasicMetrics(
                other_name="k1", metadata_template=metadata_dict_template
            ).calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(
        save_path, "ASCAT.sm_with_ISMN.soil moisture.nc"
    )

    vars_should = [
        u"n_obs",
        u"tau",
        u"gpi",
        u"RMSD",
        u"lon",
        u"p_tau",
        u"BIAS",
        u"p_rho",
        u"rho",
        u"lat",
        u"R",
        u"p_R",
        u"time",
        u"idx",
        u"_row_size",
    ]
    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    n_obs_should = [357, 384, 1646, 1875, 1915, 467, 141, 251]
    rho_should = np.array(
        [
            0.53934574,
            0.7002289,
            0.62200236,
            0.53647155,
            0.30413666,
            0.6740655,
            0.8418981,
            0.74206454,
        ],
        dtype=np.float32,
    )
    rmsd_should = np.array(
        [
            11.583476,
            7.729667,
            17.441547,
            21.125721,
            14.31557,
            14.187225,
            13.0622425,
            12.903898,
        ],
        dtype=np.float32,
    )

    network_should = np.array(
        [
            "MAQU",
            "MAQU",
            "SCAN",
            "SCAN",
            "SCAN",
            "SOILSCAPE",
            "SOILSCAPE",
            "SOILSCAPE",
        ],
        dtype="U256",
    )

    with nc.Dataset(results_fname, mode="r") as results:
        vars = results.variables.keys()
        n_obs = results.variables["n_obs"][:].tolist()
        rho = results.variables["rho"][:]
        rmsd = results.variables["RMSD"][:]
        network = results.variables["network"][:]

    assert sorted(vars) == sorted(vars_should)
    assert sorted(n_obs) == sorted(n_obs_should)
    nptest.assert_allclose(sorted(rho), sorted(rho_should), rtol=1e-4)
    nptest.assert_allclose(sorted(rmsd), sorted(rmsd_should), rtol=1e-4)
    nptest.assert_equal(sorted(network), sorted(network_should))
示例#20
0
class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase):
    
    @classmethod
    def setUpClass(cls):
        super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass()

        testdata = os.path.join(testdata_root,
            'Data_seperate_files_20170810_20180809')
        metadata_path = os.path.join(testdata, 'python_metadata')

        cleanup(metadata_path)
        ds = ISMN_Interface(testdata, network=[])
        assert ds.networks == OrderedDict()
        cls.testdata = testdata

    def setUp(self) -> None:
        self.ds = ISMN_Interface(self.testdata, network=['COSMOS'])

    def tearDown(self) -> None:
        self.ds.close_files()
        logging.shutdown()
            
    def test_list(self):
        assert len(self.ds.list_networks()) == 1
        assert len(self.ds.list_stations()) == len(self.ds.list_stations('COSMOS')) == 2
        assert len(self.ds.list_sensors()) == 2
        assert len(self.ds.list_sensors(station='Barrow-ARM')) == 1

    def test_network_for_station(self):
        assert self.ds.network_for_station('Barrow-ARM') == 'COSMOS'
        assert self.ds.network_for_station('ARM-1') == 'COSMOS'

    def test_stations_that_measure(self):
        for s in self.ds.stations_that_measure('soil_moisture'):
            assert s.name in ['ARM-1', 'Barrow-ARM']

        for s in self.ds.stations_that_measure('nonexisting'):
            raise AssertionError("Found var that doesnt exist")

    def test_get_dataset_ids(self):
        ids = self.ds.get_dataset_ids('soil_moisture', max_depth=100, groupby='network')
        assert list(ids.keys()) == ['COSMOS']
        assert ids['COSMOS'] == [0,1]

        ids = self.ds.get_dataset_ids('soil_moisture', max_depth=0.19)
        assert ids == [0]

        ids = self.ds.get_dataset_ids('soil_moisture', max_depth=99,
                                      filter_meta_dict={'lc_2010': 210,
                                                        'network': 'COSMOS',
                                                        'station': 'Barrow-ARM'})
        assert ids == [1]

        ids = self.ds.get_dataset_ids('novar')
        assert len(ids) == 0

        ids = self.ds.get_dataset_ids('soil_moisture', 0., 0.19) # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids('soil_moisture', 0., 1.) # should get 2
        assert len(ids) == 2

        ids = self.ds.get_dataset_ids('soil_moisture', 0., 1.,
                                      filter_meta_dict={'lc_2010': 210}) # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids('nonexisting') # should get 0
        assert len(ids) == 0

    def test_read_ts(self):
        data1 = self.ds.read(0)
        assert not data1.empty

        data2 = self.ds.read_ts(1)
        assert not data2.empty

        assert len(data1.index) != len(data2.index) # make sure they are not same

    def test_find_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station = self.ds.find_nearest_station(should_lon, should_lat)

        assert station.lon == should_lon
        assert station.lat == should_lat

    def test_plot_station_locations(self):
        with TemporaryDirectory() as out_dir:
            outpath = os.path.join(out_dir, 'plot.png')
            self.ds.plot_station_locations('soil_moisture', markersize=5,
                                           filename=outpath)
            assert len(os.listdir(out_dir)) == 1

    def test_get_min_max_obs_timestamps(self):
        tmin, tmax = self.ds.get_min_max_obs_timestamps('soil_moisture', max_depth=0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_min_max_obs_timestamps_for_station(self):
        station = self.ds.collection.networks['COSMOS'].stations['ARM-1']
        tmin, tmax = station.get_min_max_obs_timestamp('soil_moisture', 0, 0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_static_var_val(self):
        vals = self.ds.get_static_var_vals('soil_moisture', max_depth=0.19)
        assert vals == {130: 'Grassland'}

        vals = self.ds.get_landcover_types('soil_moisture', max_depth=100)
        assert len(vals) == 2
        assert vals[130] == 'Grassland'
        assert vals[210] == 'Water'
        self.ds.print_landcover_dict()

        vals = self.ds.get_climate_types('soil_moisture', max_depth=100,
                                         climate='climate_KG')
        assert len(vals) == 2
        assert vals['ET'] == 'Polar Tundra'
        assert vals['Cfa'] == 'Temperate Without Dry Season, Hot Summer'
        self.ds.print_climate_dict()

    def test_get_var(self):
        vars = self.ds.get_variables()
        assert vars == ['soil_moisture']


    def test_get_sensors(self):
        i = 0
        for nw, station in self.ds.collection.iter_stations(
                filter_meta_dict={'network': 'COSMOS'}):
            for se in station.iter_sensors():
                data = se.read_data()
                # check if the networks is COSMOS or station in [ARM, Barrow-ARM]
                assert not data.empty
                # check something for that one station
                i += 1
        assert i == 2

        i = 0
        for se in self.ds.networks['COSMOS'].stations['Barrow-ARM'].iter_sensors():
            data = se.read_data()
            assert not data.empty
            # check something for that one station
            i += 1
        assert i == 1

        i = 0
        for net, stat, sens in self.ds.collection.iter_sensors(
                depth=Depth(0,1),
                filter_meta_dict={'station': ['Barrow-ARM', 'ARM-1']}):
            data = sens.read_data()
            assert not data.empty
            i +=1
        assert i == 2


        for nw, station in self.ds.collection.iter_stations():
            for se in station.iter_sensors(variable='nonexisting'):
                raise ValueError("Found sensor, although none should exist")

    def test_get_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station, dist = self.ds.collection.get_nearest_station(should_lon, should_lat)
        assert dist == 0
        assert station.lon == should_lon
        assert station.lat == should_lat
        gpi, dist = self.ds.collection.grid.find_nearest_gpi(int(should_lon),int(should_lat))
        assert dist != 0
        for net in self.ds.collection.iter_networks():
            if station.name in net.stations.keys():
                assert net.stations[station.name].lon == should_lon
                assert net.stations[station.name].lat == should_lat

        station, dist = self.ds.find_nearest_station(0, 0, return_distance=True,
                                                     max_dist=100)
        assert station == dist == None
示例#21
0
def test_ascat_ismn_validation_metadata_rolling():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     'grid')

    static_layers_folder = os.path.join(os.path.dirname(__file__), '..',
                                        'test-data', 'sat', 'h_saf',
                                        'static_layer')

    ascat_reader = AscatSsmCdr(ascat_data_folder,
                               ascat_grid_folder,
                               grid_filename='TUW_WARP5_grid_info_2_1.nc',
                               static_layer_path=static_layers_folder)
    ascat_reader.read_bulk = True

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                    'test-data', 'ismn', 'multinetwork',
                                    'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(variable='soil moisture',
                                      min_depth=0,
                                      max_depth=0.1)

    metadata_dict_template = {
        'network': np.array(['None'], dtype='U256'),
        'station': np.array(['None'], dtype='U256'),
        'landcover': np.float32([np.nan]),
        'climate': np.array(['None'], dtype='U4')
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [{
            'network': metadata['network'],
            'station': metadata['station'],
            'landcover': metadata['landcover_2010'],
            'climate': metadata['climate']
        }]
        jobs.append(
            (idx, metadata['longitude'], metadata['latitude'], metadata_dict))

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {
                'mask_frozen_prob': 80,
                'mask_snow_prob': 80,
                'mask_ssf': True
            }
        }
    }

    read_ts_names = {'ASCAT': 'read', 'ISMN': 'read_ts'}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           'ISMN',
                           period,
                           read_ts_names=read_ts_names)

    process = Validation(
        datasets,
        'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2):
            metrics_calculators.RollingMetrics(
                other_name='k1',
                metadata_template=metadata_dict_template).calc_metrics
        },
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results,
                               save_path,
                               ts_vars=['R', 'p_R', 'RMSD'])

    results_fname = os.path.join(save_path,
                                 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [
        u'gpi', u'lon', u'lat', u'R', u'p_R', u'time', u'idx', u'_row_size'
    ]

    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    network_should = np.array([
        'MAQU', 'MAQU', 'SCAN', 'SCAN', 'SCAN', 'SOILSCAPE', 'SOILSCAPE',
        'SOILSCAPE'
    ],
                              dtype='U256')

    reader = PointDataResults(results_fname, read_only=True)
    df = reader.read_loc(None)
    nptest.assert_equal(sorted(network_should), sorted(df['network'].values))
    assert np.all(df.gpi.values == np.arange(8))
    assert (reader.read_ts(0).index.size == 357)
    assert np.all(
        reader.read_ts(1).columns.values == np.array(['R', 'p_R', 'RMSD']))
示例#22
0
def test_ascat_ismn_validation():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     'grid')

    static_layers_folder = os.path.join(os.path.dirname(__file__), '..',
                                        'test-data', 'sat', 'h_saf',
                                        'static_layer')

    ascat_reader = AscatSsmCdr(ascat_data_folder,
                               ascat_grid_folder,
                               grid_filename='TUW_WARP5_grid_info_2_1.nc',
                               static_layer_path=static_layers_folder)
    ascat_reader.read_bulk = True

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                    'test-data', 'ismn', 'multinetwork',
                                    'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(variable='soil moisture',
                                      min_depth=0,
                                      max_depth=0.1)
    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        jobs.append((idx, metadata['longitude'], metadata['latitude']))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {
                'mask_frozen_prob': 80,
                'mask_snow_prob': 80,
                'mask_ssf': True
            }
        }
    }

    read_ts_names = {'ASCAT': 'read', 'ISMN': 'read_ts'}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           'ISMN',
                           period,
                           read_ts_names=read_ts_names)

    process = Validation(
        datasets,
        'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(other_name='k1').calc_metrics
        },
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [
        u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho',
        u'rho', u'lat', u'R', u'p_R', u'time', u'idx', u'_row_size'
    ]
    n_obs_should = [384, 357, 482, 141, 251, 1927, 1887, 1652]
    rho_should = np.array([
        0.70022893, 0.53934574, 0.69356072, 0.84189808, 0.74206454, 0.30299741,
        0.53143877, 0.62204134
    ],
                          dtype=np.float32)

    rmsd_should = np.array([
        7.72966719, 11.58347607, 14.57700157, 13.06224251, 12.90389824,
        14.24668026, 21.19682884, 17.3883934
    ],
                           dtype=np.float32)
    with nc.Dataset(results_fname, mode='r') as results:
        assert sorted(list(results.variables.keys())) == sorted(vars_should)
        assert sorted(
            results.variables['n_obs'][:].tolist()) == sorted(n_obs_should)
        nptest.assert_allclose(sorted(rho_should),
                               sorted(results.variables['rho'][:]),
                               rtol=1e-4)
        nptest.assert_allclose(sorted(rmsd_should),
                               sorted(results.variables['RMSD'][:]),
                               rtol=1e-4)
示例#23
0
    def setUp(self) -> None:

        self.ds = ISMN_Interface(self.testdata_zip_path)
示例#24
0
class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass()

        testdata = os.path.join(testdata_root,
                                "Data_seperate_files_20170810_20180809")
        metadata_path = os.path.join(testdata, "python_metadata")

        cleanup(metadata_path)
        ds = ISMN_Interface(testdata, network=[], parallel=True)
        assert ds.networks == OrderedDict()
        cls.testdata = testdata

    def setUp(self) -> None:
        self.ds = ISMN_Interface(self.testdata, network=["COSMOS"])

    def tearDown(self) -> None:
        self.ds.close_files()
        logging.shutdown()

    def test_list(self):
        with pytest.deprecated_call():
            assert len(self.ds.list_networks()) == 1
            assert len(self.ds.list_stations()) == len(
                self.ds.list_stations("COSMOS")) == 2
            assert len(self.ds.list_sensors()) == 2
            assert len(self.ds.list_sensors(station="Barrow-ARM")) == 1

    def test_network_for_station(self):
        assert self.ds.network_for_station("Barrow-ARM") == "COSMOS"
        assert self.ds.network_for_station("ARM-1") == "COSMOS"

    def test_stations_that_measure(self):
        for s in self.ds.stations_that_measure("soil_moisture"):
            assert s.name in ["ARM-1", "Barrow-ARM"]

        for s in self.ds.stations_that_measure("nonexisting"):
            raise AssertionError("Found var that doesnt exist")

    def test_get_dataset_ids(self):
        ids = self.ds.get_dataset_ids("soil_moisture",
                                      max_depth=100,
                                      groupby="network")
        assert list(ids.keys()) == ["COSMOS"]
        assert ids["COSMOS"] == [0, 1]

        ids = self.ds.get_dataset_ids("soil_moisture", max_depth=0.19)
        assert ids == [0]

        ids = self.ds.get_dataset_ids(
            ["soil_moisture"],
            max_depth=99,
            filter_meta_dict={
                "lc_2010": 210,
                "network": "COSMOS",
                "station": "Barrow-ARM",
            },
        )
        assert ids == [1]

        ids = self.ds.get_dataset_ids("novar")
        assert len(ids) == 0

        ids = self.ds.get_dataset_ids(["soil_moisture", "shouldhavenoeffect"],
                                      0.0, 0.19)  # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids("soil_moisture", 0.0,
                                      1.0)  # should get 2
        assert len(ids) == 2

        ids = self.ds.get_dataset_ids("soil_moisture",
                                      0.0,
                                      1.0,
                                      filter_meta_dict={"lc_2010":
                                                        210})  # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids("nonexisting")  # should get 0
        assert len(ids) == 0

    def test_read_ts(self):
        data1 = self.ds.read(0)
        assert not data1.empty

        data2, meta = self.ds.read_ts(1, return_meta=True)
        assert not data2.empty

    def test_read_metadata(self):
        data2, meta = self.ds.read_ts(1, return_meta=True)
        assert all(meta == self.ds.read_metadata(1, format="pandas"))
        assert self.ds.read_metadata(1, format="dict") is not None
        assert self.ds.read_metadata([1], format="obj") is not None

        assert not self.ds.metadata.empty
        assert self.ds.metadata.loc[1]['station']['val'] \
               == self.ds.read_metadata([0,1]).loc[1, ('station', 'val')]

    def test_find_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station = self.ds.find_nearest_station(should_lon, should_lat)

        assert station.lon == should_lon
        assert station.lat == should_lat

    def test_plot_station_locations(self):
        with TemporaryDirectory() as out_dir:
            outpath = os.path.join(out_dir, "plot.png")
            self.ds.plot_station_locations(["soil_moisture", 'precipitation'],
                                           markersize=5,
                                           filename=outpath)
            assert len(os.listdir(out_dir)) == 1

    def test_get_min_max_obs_timestamps(self):
        tmin, tmax = self.ds.get_min_max_obs_timestamps("soil_moisture",
                                                        max_depth=0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_min_max_obs_timestamps_for_station(self):
        station = self.ds.collection.networks["COSMOS"].stations["ARM-1"]
        tmin, tmax = station.get_min_max_obs_timestamp("soil_moisture", 0,
                                                       0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_static_var_val(self):
        vals = self.ds.get_static_var_vals("soil_moisture", max_depth=0.19)
        assert vals == {130: "Grassland"}

        vals = self.ds.get_landcover_types("soil_moisture", max_depth=100)
        assert len(vals) == 2
        assert vals[130] == "Grassland"
        assert vals[210] == "Water"
        self.ds.print_landcover_dict()

        vals = self.ds.get_climate_types("soil_moisture",
                                         max_depth=100,
                                         climate="climate_KG")
        assert len(vals) == 2
        assert vals["ET"] == "Polar Tundra"
        assert vals["Cfa"] == "Temperate Without Dry Season, Hot Summer"
        self.ds.print_climate_dict()

    def test_get_var(self):
        vars = self.ds.get_variables()
        assert vars == ["soil_moisture"]

    def test_get_sensors(self):
        i = 0
        for nw, station in self.ds.collection.iter_stations(
                filter_meta_dict={"network": "COSMOS"}):
            for se in station.iter_sensors():
                data = se.read_data()
                # check if the networks is COSMOS or station in [ARM, Barrow-ARM]
                assert not data.empty
                # check something for that one station
                i += 1
        assert i == 2

        i = 0
        for se in self.ds.networks["COSMOS"].stations[
                "Barrow-ARM"].iter_sensors():
            data = se.read_data()
            assert not data.empty
            # check something for that one station
            i += 1
        assert i == 1

        i = 0
        for net, stat, sens in self.ds.collection.iter_sensors(
                depth=Depth(0, 1),
                filter_meta_dict={"station": ["Barrow-ARM", "ARM-1"]},
        ):
            data = sens.read_data()
            assert not data.empty
            i += 1
        assert i == 2

        for nw, station in self.ds.collection.iter_stations():
            for se in station.iter_sensors(variable="nonexisting"):
                raise ValueError("Found sensor, although none should exist")

    def test_get_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station, dist = self.ds.collection.get_nearest_station(
            should_lon, should_lat)
        assert dist == 0
        assert station.lon == should_lon
        assert station.lat == should_lat
        gpi, dist = self.ds.collection.grid.find_nearest_gpi(
            int(should_lon), int(should_lat))
        assert dist != 0
        for net in self.ds.collection.iter_networks():
            if station.name in net.stations.keys():
                assert net.stations[station.name].lon == should_lon
                assert net.stations[station.name].lat == should_lat

        station, dist = self.ds.find_nearest_station(0,
                                                     0,
                                                     return_distance=True,
                                                     max_dist=100)
        assert station == dist == None

    def test_citation(self):
        with TemporaryDirectory() as out_dir:
            out_file = os.path.join(out_dir, 'citation.txt')
            refs = self.ds.collection.export_citations(out_file=out_file)
            assert all([
                net in refs.keys()
                for net in list(self.ds.collection.networks.keys())
            ])
            assert os.path.exists(out_file)
            with open(out_file, mode='r') as f:
                lines = f.readlines()
                assert len(lines) > 0
示例#25
0
static_layers_folder = os.path.join(testdata_folder, 'sat/h_saf/static_layer')

ascat_reader = AscatSsmCdr(ascat_data_folder,
                           ascat_grid_folder,
                           grid_filename='TUW_WARP5_grid_info_2_1.nc',
                           static_layer_path=static_layers_folder)
ascat_reader.read_bulk = True

# Initialize ISMN reader

# In[4]:

ismn_data_folder = os.path.join(testdata_folder,
                                'ismn/multinetwork/header_values')

ismn_reader = ISMN_Interface(ismn_data_folder)

# The validation is run based on jobs. A job consists of at least three lists or numpy arrays specifing the grid
# point index, its latitude and longitude. In the case of the ISMN we can use the `dataset_ids` that identify every
# time series in the downloaded ISMN data as our grid point index. We can then get longitude and latitude from the
# metadata of the dataset.
#
# **DO NOT CHANGE** the name ***jobs*** because it will be searched during the parallel processing!

# In[5]:

jobs = []

ids = ismn_reader.get_dataset_ids(variable='soil moisture',
                                  min_depth=0,
                                  max_depth=0.1)
示例#26
0
def init_worker(function: Any):
    """Sets up a worker with the GCS client and the ISMN Data."""
    function.gcs_client = storage.Client.create_anonymous_client()
    function.ismn_data = ISMN_Interface(_ISMN_DATA_PATH_FLAG.value,
                                        parallel=True)