示例#1
0
def test_load_dataset():
    filename = "WAO-20magl_EUROPE_201306_small.nc"
    dir_path = os.path.dirname(__file__)
    test_data = "../data/emissions"
    filepath = os.path.join(dir_path, test_data, filename)

    ds = xarray.load_dataset(filepath)

    metadata = {"some": "metadata"}

    d = Datasource("dataset_test")

    d.add_data(metadata=metadata, data=ds, data_type="footprint")

    d.save()

    keys = d._data_keys["latest"]["keys"]

    key = list(keys.values())[0]

    bucket = get_local_bucket()

    loaded_ds = Datasource.load_dataset(bucket=bucket, key=key)

    assert loaded_ds.equals(ds)
示例#2
0
def recombine_sections(data_keys):
    """ Combines separate dataframes into a single dataframe for
        processing to NetCDF for output

        Args:
            data_keys (list): Dictionary of object store keys keyed by search
            term
        Returns:
            Pandas.Dataframe or list: Combined dataframes
    """
    # from pandas import concat as _concat
    from xarray import concat as xr_concat
    from HUGS.ObjectStore import get_bucket
    from HUGS.Modules import Datasource

    bucket = get_bucket()

    data = [Datasource.load_dataset(bucket=bucket, key=k) for k in data_keys]

    combined = xr_concat(data, dim="time")

    combined = combined.sortby("time")

    # Check for duplicates?
    # This is taken from https://stackoverflow.com/questions/51058379/drop-duplicate-times-in-xarray
    # _, index = np.unique(f['time'], return_index=True)
    # f.isel(time=index)

    # Check that the dataframe's index is sorted by date
    # if not combined.time.is_monotonic_increasing:
    #     combined = combined.sortby("time")

    # if not combined.index.is_unique:
    #     raise ValueError("Dataframe index is not unique")

    return combined