Пример #1
0
def test_img2ts_daily_no_resampling_missing_day():
    """
    Test resampling over missing day 2016-01-01 (see reader above)
    """
    input_grid = BasicGrid(
        np.array([0.5, 0.5, -0.5, -0.5]),
        np.array([1, -1, 1, -1]),
    )

    outputpath = tempfile.mkdtemp()
    start = datetime(2015, 12, 5)
    end = datetime(2016, 1, 10)

    ds_in = TestMultiTemporalImageDatasetDaily()
    img2ts = Img2Ts(ds_in,
                    outputpath,
                    start,
                    end,
                    imgbuffer=15,
                    input_grid=input_grid)

    ts_should = np.concatenate(
        [np.arange(5, 32, dtype=np.float),
         np.arange(2, 11, dtype=np.float)])
    dates_should = ds_in.tstamps_for_daterange(start, end)
    dates_should.remove(datetime(2016, 1, 1))
    img2ts.calc()
    ts_file = os.path.join(outputpath, '0000.nc')
    with OrthoMultiTs(ts_file) as ds:
        ts = ds.read_ts('var1', 0)
        nptest.assert_allclose(ts['var1'], ts_should)
        assert dates_should == list(ts['time'])
        nptest.assert_allclose(ds.dataset.variables['location_id'][:],
                               np.array([0, 1, 2, 3]))
Пример #2
0
def reshuffle(input_root, outputpath,
              startdate, enddate,
              parameters=None, land_points=True, ignore_meta=False,
              imgbuffer=200):
    """
    Reshuffle method applied to ESA CCI SM images.

    Parameters
    ----------
    input_root: string
        input path where era interim data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list, optional (default: None)
        parameters to read and convert
        If none are passed, we read an image in the root path and use vars from
        the image.
    land_points : bool, optional (default: True)
        Use the land grid to calculate time series on.
        Leads to faster processing and smaller files.
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """
    if land_points:
        grid = CCILandGrid()
    else:
        grid = CCICellGrid()

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    file_args, file_vars = parse_filename(input_root)

    if parameters is None:
        parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']]

    input_dataset = CCI_SM_025Ds(data_path=input_root, parameter=parameters,
                                 subgrid=grid, array_1D=True)

    if not ignore_meta:
        global_attr, ts_attributes = read_metadata(sensortype=file_args['sensor_type'],
                                                   version=int(file_args['version']),
                                                   varnames=parameters)
        global_attr['time_coverage_start'] = str(startdate)
        global_attr['time_coverage_end'] = str(enddate)
    else:
        global_attr = {'product' : 'ESA CCI SM'}
        ts_attributes = None


    reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath,
                        startdate=startdate, enddate=enddate, input_grid=grid,
                        imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0,
                        global_attr=global_attr, zlib=True,
                        unlim_chunksize=1000, ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #3
0
def reshuffle(input_root, outputpath,
              startdate, enddate,
              parameters, land_points=True,
              imgbuffer=50):
    """
    Reshuffle method applied to GLDAS data.

    Parameters
    ----------
    input_root: string
        input path where gldas data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list
        parameters to read and convert
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """

    if land_points:
        landgrid = GLDAS025LandGrid()
    else:
        landgrid = None

    if get_filetype(input_root) == 'grib':
        input_dataset = GLDAS_Noah_v1_025Ds(input_root, parameters,
                                            array_1D=True)
        if land_points:
            warnings.warn('Land Grid is fit to GLDAS 2.x netCDF data')
    else:
        input_dataset = GLDAS_Noah_v21_025Ds(input_root, parameters, landgrid,
                                             array_1D=True)

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    global_attr = {'product': 'GLDAS'}

    # get time series attributes from first day of data.
    data = input_dataset.read(startdate)
    ts_attributes = data.metadata
    if landgrid:
        grid = landgrid
    else:
        grid = BasicGrid(data.lon, data.lat)

    reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath,
                        startdate=startdate, enddate=enddate, input_grid=grid,
                        imgbuffer=imgbuffer, cellsize_lat=5.0,
                        cellsize_lon=5.0, global_attr=global_attr, zlib=True,
                        unlim_chunksize=1000, ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #4
0
def reshuffle(input_root, outputpath,
              startdate, enddate,
              parameters=None, img_kwargs={},
              imgbuffer=50):
    """
    Reshuffle method applied to SMOS image data.
    Parameters
    ----------
    input_root: string
        input path where gldas data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list
        parameters to read and convert
    img_kwargs: dict
        Kwargs that are passed to the image class
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """

    ff, file_vars = firstfile(input_root)
    fp, ff = os.path.split(ff)

    grid = EASE25CellGrid()

    if parameters is None:
        parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']]

    # this is only for reading the ts_attrs
    input_dataset = SMOSImg(filename=os.path.join(fp, ff),
        parameters=parameters, grid=grid, flatten=True, **img_kwargs)
    data = input_dataset.read()
    ts_attributes = data.metadata
    ts_attributes = None  # todo: fails for Quality_Flags

    input_dataset = SMOSDs(input_root, parameters, grid=grid, flatten=True,
                           **img_kwargs)

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    global_attr = {'product': 'SMOS_IC'}

    # get time series attributes from first day of data.

    reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath,
                        startdate=startdate, enddate=enddate, input_grid=grid,
                        imgbuffer=imgbuffer, cellsize_lat=5.0,
                        cellsize_lon=5.0, global_attr=global_attr, zlib=True,
                        unlim_chunksize=1000, ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #5
0
def _create_reshuffler(
    dataset_root,
    timeseries_root,
    startdate,
    enddate,
    imgbuffer=365,
    only_land=False,
    bbox=None,
):
    """
    Create a reshuffler for converting images to timeseries.

    Parameters
    ----------
    dataset_root : str or Path
        Path of the directory containing the data files.
    timeseries_root : str or Path
        Path of where to store the timeseries files.
    startdate : np.datetime64
        Start date of processing
    enddate : np.datetime64
        End date of processing
    imgbuffer : int, optional (default: 365)
        Number of images to read at once.
    only_land : bool, optional (default: False)
        Use the land mask to reduce the grid to land grid points only.
    bbox : list/tuple
        Bounding box parameters in the form [min_lon, min_lat, max_lon,
        max_lat]

    Returns
    -------
    reshuffler : Img2Ts object
    """
    input_dataset = GSWPDataset(
        Path(dataset_root) / "*.nc",
        only_land=only_land,
        bbox=bbox,
    )
    Path(timeseries_root).mkdir(parents=True, exist_ok=True)

    reshuffler = Img2Ts(
        input_dataset=input_dataset,
        outputpath=timeseries_root,
        startdate=startdate,
        enddate=enddate,
        ts_attributes=input_dataset.metadata,
        zlib=True,
        imgbuffer=imgbuffer,
        # this is necessary currently due to bug in repurpose
        cellsize_lat=input_dataset.cellsize,
        cellsize_lon=input_dataset.cellsize,
    )
    return reshuffler
Пример #6
0
def reshuffle(input_root, outputpath,
              startdate, enddate,
              imgbuffer=200, **ds_kwargs):
    """
    Reshuffle method applied to SMOS image data.

    Parameters
    ----------
    input_root: string
        input path where smos ic data was downloaded to (yearly folders)
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    ds_kwargs: dict
        Kwargs that are passed to the image datastack class
    """

    ff, file_vars = firstfile(input_root)
    fp, ff = os.path.split(ff)

    if 'grid' not in ds_kwargs.keys():
        ds_kwargs['grid'] = EASE25CellGrid(None)
    if 'parameters' not in ds_kwargs.keys():
        ds_kwargs['parameters'] = None

    # this is only for reading the ts_attrs
    input_dataset = SMOSImg(filename=os.path.join(fp, ff),
                            parameters=ds_kwargs['parameters'], flatten=True, read_flags=None,
                            grid=ds_kwargs['grid'])
    _, ts_attributes = input_dataset._read_img()
    global_attr = input_dataset.get_global_attrs()

    if ds_kwargs['parameters'] is None:
        ds_kwargs['parameters'] = input_dataset.parameters

    input_dataset = SMOSDs(input_root, flatten=True, **ds_kwargs)

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    # get time series attributes from first day of data.
    reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath,
                        startdate=startdate, enddate=enddate,
                        input_grid=ds_kwargs['grid'].cut(),  # drop points that are not subset
                        imgbuffer=imgbuffer, cellsize_lat=5.0,
                        cellsize_lon=5.0, global_attr=global_attr, zlib=True,
                        unlim_chunksize=1000, ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #7
0
def reshuffle(input_root, outputpath, startdate, enddate,
              parameters, overpass=None, crid=None, imgbuffer=50):
    """
    Reshuffle method applied to ERA-Interim data.

    Parameters
    ----------
    input_root: string
        input path where era interim data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list
        parameters to read and convert
    overpass : str
        Select 'AM' for the descending overpass or 'PM' for the ascending one.
        If the version data does not contain multiple overpasses, this must be None
    crid : int, optional (default: None)
        Search for files with this Composite Release ID for reshuffling only.
        See also https://nsidc.org/data/smap/data_versions#CRID
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """

    input_dataset = SPL3SMP_Ds(input_root, parameter=parameters,
                               overpass=overpass, crid=crid, flatten=True)
    global_attr = {'product': 'SPL3SMP'}

    if overpass:
        global_attr['overpass'] = overpass

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)


    # get time series attributes from first day of data.
    data = input_dataset.read(startdate)
    ts_attributes = data.metadata
    ease36 = EASE2_grid(36000)
    lons, lats = np.meshgrid(ease36.londim, ease36.latdim)
    grid = BasicGrid(lons.flatten(), lats.flatten())

    reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath,
                        startdate=startdate, enddate=enddate, input_grid=grid,
                        imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0,
                        global_attr=global_attr, ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #8
0
def reshuffle(input_root,
              outputpath,
              startdate,
              enddate,
              parameters,
              imgbuffer=50):
    """
    Reshuffle method applied to ESACCI SM v0.42 data.

    Parameters
    ----------
    input_root: string
        input path where era interim data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list
        parameters to read and convert
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """

    input_dataset = CCI_SM_v042_025Ds(input_root, parameters, array_1D=True)

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    global_attr = {'product': 'ESACCI'}

    # get time series attributes from first day of data.
    data = input_dataset.read(startdate)
    ts_attributes = data.metadata
    grid = BasicGrid(data.lon, data.lat)

    reshuffler = Img2Ts(input_dataset=input_dataset,
                        outputpath=outputpath,
                        startdate=startdate,
                        enddate=enddate,
                        input_grid=grid,
                        imgbuffer=imgbuffer,
                        cellsize_lat=5.0,
                        cellsize_lon=5.0,
                        global_attr=global_attr,
                        zlib=True,
                        unlim_chunksize=1000,
                        ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #9
0
def reshuffle(input_root,
              outputpath,
              startdate,
              enddate,
              parameters,
              imgbuffer=50):
    """
    Reshuffle method applied to ERA-Interim data.

    Parameters
    ----------
    input_root: string
        input path where era interim data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list
        parameters to read and convert
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """

    input_dataset = ERAInterimDs(parameters, input_root, expand_grid=False)

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    global_attr = {'product': 'ERA Interim'}

    # get time series attributes from first day of data.
    data = input_dataset.read(startdate)
    ts_attributes = data.metadata
    grid = BasicGrid(data.lon, data.lat)

    reshuffler = Img2Ts(input_dataset=input_dataset,
                        outputpath=outputpath,
                        startdate=startdate,
                        enddate=enddate,
                        input_grid=grid,
                        imgbuffer=imgbuffer,
                        cellsize_lat=5.0,
                        cellsize_lon=5.0,
                        ts_dtypes=np.dtype('float32'),
                        global_attr=global_attr,
                        ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #10
0
def reshuffle(input_root, outputpath,
              startdate, enddate,
              parameters,
              imgbuffer=50):
    """
    Reshuffle method applied to ERA-Interim data.

    Parameters
    ----------
    input_root: string
        input path where era interim data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list
        parameters to read and convert
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """

    input_dataset = SPL3SMP_Ds(input_root, parameter=parameters, flatten=True)

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    global_attr = {'product': 'SPL3SMP'}

    # get time series attributes from first day of data.
    data = input_dataset.read(startdate)
    ts_attributes = data.metadata
    ease36 = EASE2_grid(36000)
    lons, lats = np.meshgrid(ease36.londim, ease36.latdim)
    grid = BasicGrid(lons.flatten(), lats.flatten())

    reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath,
                        startdate=startdate, enddate=enddate,
                        input_grid=grid,
                        imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0,
                        global_attr=global_attr,
                        ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #11
0
def reshuffle(input_root,
              outputpath,
              startdate,
              enddate,
              parameters=None,
              land_points=True,
              bbox=None,
              ignore_meta=False,
              imgbuffer=500):
    """
    Reshuffle method applied to C3S data.

    Parameters
    ----------
    input_root: string
        input path where c3s images were downloaded.
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list, optional (default: None)
        parameters to read and convert
    land_points : bool, optional (default: True)
        Use the land grid to calculate time series on.
        Leads to faster processing and smaller files.
    bbox : tuple
        Min lon, min lat, max lon, max lat
        BBox to read data for.
    ignore_meta : bool, optional (default: False)
        Ignore metadata and reshuffle only the values. Can be used e.g. if a
        version is not yet supported.
    imgbuffer: int, optional (default: 50)
        How many images to read at once before writing time series.
    """

    if land_points:
        grid = SMECV_Grid_v052('land')
    else:
        grid = SMECV_Grid_v052(None)

    if bbox:
        grid = grid.subgrid_from_bbox(*bbox)

    if parameters is None:
        file_args, file_vars = parse_filename(input_root)
        parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']]

    subpath_templ = ('%Y', ) if os.path.isdir(
        os.path.join(input_root, str(startdate.year))) else None
    input_dataset = C3S_Nc_Img_Stack(data_path=input_root,
                                     parameters=parameters,
                                     subgrid=grid,
                                     flatten=True,
                                     fillval=None,
                                     subpath_templ=subpath_templ)

    if not ignore_meta:
        prod_args = input_dataset.fname_args

        kwargs = {
            'sensor_type': prod_args['prod'].lower(),
            'cdr_type': prod_args['cdr'],
            'product_temp_res': prod_args['temp'],
            'cls': getattr(metadata, f"C3S_SM_TS_Attrs_{prod_args['vers']}")
        }

        if prod_args['temp'].upper() == 'DAILY':
            kwargs.pop('product_temp_res')
            attrs = C3S_daily_tsatt_nc(**kwargs)
        else:
            attrs = C3S_dekmon_tsatt_nc(**kwargs)

        ts_attributes = {}
        global_attributes = attrs.global_attr

        for var in parameters:
            ts_attributes.update(attrs.ts_attributes[var])
    else:
        global_attributes = None
        ts_attributes = None

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    reshuffler = Img2Ts(input_dataset=input_dataset,
                        outputpath=outputpath,
                        startdate=startdate,
                        enddate=enddate,
                        input_grid=grid,
                        imgbuffer=imgbuffer,
                        cellsize_lat=5.0,
                        cellsize_lon=5.0,
                        global_attr=global_attributes,
                        zlib=True,
                        unlim_chunksize=1000,
                        ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #12
0
def reshuffle(input_root,
              outputpath,
              startdate,
              enddate,
              parameters,
              imgbuffer=50):
    """
    Reshuffle method applied to ERA-Interim data.

    Parameters
    ----------
    input_root: string
        input path where era interim data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list
        parameters to read and convert
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """

    input_dataset = ECMWF_ERA5_025Ds(input_root, parameters, array_1D=True)

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    global_attr = {'product': 'ERA5'}

    # get time series attributes from first day of data.
    data = input_dataset.read(startdate)
    ts_attributes = data.metadata
    grid = BasicGrid(data.lon, data.lat)

    # test
    test_data = data['skt']
    #test_data_res = np.reshape(test_data, (720,1440))
    #test_lon_res = np.reshape(data.lon, (720, 1440))
    #test_lat_res = np.reshape(data.lat, (720, 1440))

    #test_data_res[test_data_res > 100] = np.nan

    #plt.figure(1)
    #plt.pcolor(test_lon_res, test_lat_res, test_data_res)
    #plt.show()

    reshuffler = Img2Ts(input_dataset=input_dataset,
                        outputpath=outputpath,
                        startdate=startdate,
                        enddate=enddate,
                        input_grid=grid,
                        imgbuffer=imgbuffer,
                        cellsize_lat=5.0,
                        cellsize_lon=5.0,
                        global_attr=global_attr,
                        zlib=True,
                        unlim_chunksize=1000,
                        ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #13
0
def reshuffle(input_root,
              outputpath,
              startdate,
              enddate,
              parameters=None,
              land_points=True,
              ignore_meta=False,
              imgbuffer=200):
    """
    Reshuffle method applied to ESA CCI SM images.

    Parameters
    ----------
    input_root: string
        input path where era interim data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list, optional (default: None)
        parameters to read and convert
        If none are passed, we read an image in the root path and use vars from
        the image.
    land_points : bool, optional (default: True)
        Use the land grid to calculate time series on.
        Leads to faster processing and smaller files.
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """
    if land_points:
        grid = CCILandGrid()
    else:
        grid = CCICellGrid()

    gpis, lons, lats, cells = grid.get_grid_points()
    grid_vars = {'gpis': gpis, 'lons': lons, 'lats': lats}
    # repurpose cannot handle masked arrays
    for k, v in grid_vars.items():  # type v: np.ma.MaskedArray
        if isinstance(v, np.ma.MaskedArray):
            grid_vars[k] = v.filled()

    grid = BasicGrid(lon=grid_vars['lons'],
                     lat=grid_vars['lats'],
                     gpis=grid_vars['gpis']).to_cell_grid(5.)

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    file_args, file_vars = parse_filename(input_root)

    if parameters is None:
        parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']]

    input_dataset = CCI_SM_025Ds(data_path=input_root,
                                 parameter=parameters,
                                 subgrid=grid,
                                 array_1D=True)

    if not ignore_meta:
        global_attr, ts_attributes = read_metadata(
            sensortype=file_args['sensor_type'],
            version=int(file_args['version']),
            varnames=parameters,
            subversion=file_args['sub_version'])
    else:
        global_attr = {'product': 'ESA CCI SM'}
        ts_attributes = None

    reshuffler = Img2Ts(input_dataset=input_dataset,
                        outputpath=outputpath,
                        startdate=startdate,
                        enddate=enddate,
                        input_grid=grid,
                        imgbuffer=imgbuffer,
                        cellsize_lat=5.0,
                        cellsize_lon=5.0,
                        global_attr=global_attr,
                        zlib=True,
                        unlim_chunksize=1000,
                        ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #14
0
def reshuffle(
        input_root,
        outputpath,
        startdate,
        enddate,
        variables,
        product=None,
        bbox=None,
        h_steps=(0, 6, 12, 18),
        land_points=False,
        imgbuffer=50,
):
    """
    Reshuffle method applied to ERA images for conversion into netcdf time
    series format.

    Parameters
    ----------
    input_root: str
        Input path where ERA image data was downloaded to.
    outputpath : str
        Output path, where the reshuffled netcdf time series are stored.
    startdate : datetime
        Start date, from which images are read and time series are generated.
    enddate : datetime
        End date, from which images are read and time series are generated.
    variables: tuple or list or str
        Variables to read from the passed images and convert into time
        series format.
    product : str, optional (default: None)
        Either era5 or era5-land, if None is passed we guess the product from
        the downloaded image files.
    bbox: tuple optional (default: None)
        (min_lon, min_lat, max_lon, max_lat) - wgs84.
        To load only a subset of the global grid / file.
    h_steps : list or tuple, optional (default: (0, 6, 12, 18))
        Hours at which images are read for each day and used for reshuffling,
        therefore this defines the sub-daily temporal resolution of the time
        series that are generated.
    land_points: bool, optional (default: False)
        Reshuffle only land points. Uses the ERA5 land mask to create a land
        grid.
        The land grid is fixed to 0.25*0.25 or 0.1*0.1 deg for now.
    imgbuffer: int, optional (default: 200)
        How many images to read at once before writing time series.
        This number affects how many images are stored in memory and should
        be chosen according to the available amount of memory and the size of
        a single image.
    """

    if h_steps is None:
        h_steps = (0, 6, 12, 18)

    filetype = parse_filetype(input_root)
    product = parse_product(input_root) if not product else product

    if land_points:
        if product == "era5":
            grid = ERA5_RegularImgLandGrid(res_lat=0.25,
                                           res_lon=0.25,
                                           bbox=bbox)
        elif product == "era5-land":
            grid = ERA5_RegularImgLandGrid(res_lat=0.1, res_lon=0.1, bbox=bbox)
        else:
            raise NotImplementedError(
                product, "Land grid not implemented for product.")
    else:
        if product == "era5":
            grid = ERA_RegularImgGrid(res_lat=0.25, res_lon=0.25, bbox=bbox)
        elif product == "era5-land":
            grid = ERA_RegularImgGrid(res_lat=0.1, res_lon=0.1, bbox=bbox)
        else:
            raise NotImplementedError(product,
                                      "Grid not implemented for product.")

    if filetype == "grib":
        if land_points:
            raise NotImplementedError(
                "Reshuffling land points only implemented for netcdf files")

        input_dataset = ERA5GrbDs(
            root_path=input_root,
            parameter=variables,
            subgrid=grid,
            array_1D=True,
            h_steps=h_steps,
            product=product,
            mask_seapoints=False,
        )
    elif filetype == "netcdf":
        input_dataset = ERA5NcDs(
            root_path=input_root,
            parameter=variables,
            subgrid=grid,
            array_1D=True,
            h_steps=h_steps,
            product=product,
            mask_seapoints=False,
        )
    else:
        raise Exception("Unknown file format")

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    global_attr = {f"product": f"{product.upper()} (from {filetype})"}

    # get time series attributes from first day of data.
    first_date_time = datetime.combine(startdate.date(), time(h_steps[0], 0))

    # get time series attributes from first day of data.
    data = input_dataset.read(first_date_time)
    ts_attributes = data.metadata

    reshuffler = Img2Ts(
        input_dataset=input_dataset,
        outputpath=outputpath,
        startdate=startdate,
        enddate=enddate,
        input_grid=grid,
        imgbuffer=imgbuffer,
        cellsize_lat=5.0,
        cellsize_lon=5.0,
        ts_dtypes=np.dtype("float32"),
        global_attr=global_attr,
        zlib=True,
        unlim_chunksize=1000,
        ts_attributes=ts_attributes,
    )
    reshuffler.calc()
Пример #15
0
def reshuffle(input_root,
              outputpath,
              startdate,
              enddate,
              parameters,
              land_points=False,
              imgbuffer=50):
    """
    Reshuffle method applied to ERA-Interim data.

    Parameters
    ----------
    input_root: string
        input path where era interim data was downloaded
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list
        parameters to read and convert
    landpoints: bool
        reshuffle land points only (not implemented yet)
    imgbuffer: int, optional
        How many images to read at once before writing time series.
    """
    filetype = get_filetype(input_root)
    if filetype == 'grib':
        input_dataset = ERAGrbDs(input_root, parameters, expand_grid=False)
    elif filetype == 'netcdf':
        input_dataset = ERANcDs(input_root,
                                parameters,
                                subgrid=False,
                                array_1D=True)
    else:
        raise Exception('Unknown file format')

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    global_attr = {'product': 'ECMWF Reanalysis from {}'.format(filetype)}

    # get time series attributes from first day of data.
    data = input_dataset.read(startdate)
    ts_attributes = data.metadata

    grid = BasicGrid(data.lon, data.lat)

    reshuffler = Img2Ts(input_dataset=input_dataset,
                        outputpath=outputpath,
                        startdate=startdate,
                        enddate=enddate,
                        input_grid=grid,
                        imgbuffer=imgbuffer,
                        cellsize_lat=5.0,
                        cellsize_lon=5.0,
                        ts_dtypes=np.dtype('float32'),
                        global_attr=global_attr,
                        zlib=True,
                        unlim_chunksize=1000,
                        ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #16
0
def reshuffle(in_path,
              out_path,
              start_date,
              end_date,
              parameters,
              temporal_sampling=6,
              img_buffer=50):
    """
    Reshuffle method applied to MERRA2 data.

    Parameters
    ----------
    in_path: string
        input path where merra2 data was downloaded
    out_path : string
        Output path.
    start_date : datetime
        Start date.
    end_date : datetime
        End date.
    parameters: list
        parameters to read and convert
    temporal_sampling: int in range [1, 24]
            Get an image every n hours where n=temporal_sampling. For example:
            if 1: return hourly sampled data -> hourly sampling
            if 6: return an image every 6 hours -> 6 hourly sampling
            if 24: return the 00:30 image of each day -> daily sampling
    img_buffer: int, optional
        How many images to read at once before writing the time series.
    """

    # define input dataset
    # the img_bulk class in img2ts iterates through every nth
    # timestamp as specified by temporal_sampling
    input_dataset = MerraImageStack(data_path=in_path,
                                    parameter=parameters,
                                    temporal_sampling=temporal_sampling,
                                    array_1d=True)
    product = 'MERRA2_hourly'

    # create out_path directory if it does not exist yet
    if not os.path.exists(out_path):
        os.makedirs(out_path)

    # set global attribute
    global_attributes = {'product': product}

    # get ts attributes from fist day of data
    data = input_dataset.read(start_date)
    ts_attributes = data.metadata
    # define grid
    grid = BasicGrid(data.lon, data.lat)

    # define reshuffler
    reshuffler = Img2Ts(input_dataset=input_dataset,
                        outputpath=out_path,
                        startdate=start_date,
                        enddate=end_date,
                        input_grid=grid,
                        imgbuffer=img_buffer,
                        cellsize_lat=5.0,
                        cellsize_lon=6.25,
                        global_attr=global_attributes,
                        zlib=True,
                        unlim_chunksize=1000,
                        ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #17
0
def reshuffle(input_root,
              outputpath,
              startdate,
              enddate,
              parameters,
              land_points=True,
              imgbuffer=50):
    """
    Reshuffle method applied to C3S data.
    Parameters
    ----------
    input_root: string
        input path where c3s images were downloaded.
    outputpath : string
        Output path.
    startdate : datetime
        Start date.
    enddate : datetime
        End date.
    parameters: list
        parameters to read and convert
    land_points : bool, optional (default: True)
        Use the land grid to calculate time series on.
        Leads to faster processing and smaller files.
    imgbuffer: int, optional (default: 50)
        How many images to read at once before writing time series.
    """

    if land_points:
        grid = C3SLandGrid()
    else:
        grid = C3SCellGrid()

    gpis, lons, lats, cells = grid.get_grid_points()
    grid_vars = {'gpis': gpis, 'lons': lons, 'lats': lats}
    # repurpose cannot handle masked arrays
    for k, v in grid_vars.items():  # type v: np.ma.MaskedArray
        if isinstance(v, np.ma.MaskedArray):
            grid_vars[k] = v.filled()

    grid = BasicGrid(lon=grid_vars['lons'],
                     lat=grid_vars['lats'],
                     gpis=grid_vars['gpis']).to_cell_grid(5.)

    if parameters is None:
        file_args, file_vars = parse_filename(input_root)
        parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']]

    input_dataset = C3S_Nc_Img_Stack(data_path=input_root,
                                     parameters=parameters,
                                     subgrid=grid,
                                     array_1D=True)

    prod_args = input_dataset.fname_args

    kwargs = {
        'product_sensor_type': prod_args['sensor_type'].lower(),
        'sub_version': '.' + prod_args['sub_version'],
        'product_sub_type': prod_args['sub_prod']
    }

    class_str = "C3S_SM_TS_Attrs_%s" % (prod_args['version'])
    subattr = getattr(metadata, class_str)

    if prod_args['temp_res'] == 'DAILY':
        attrs = C3S_daily_tsatt_nc(subattr, **kwargs)
    else:
        attrs = C3S_dekmon_tsatt_nc(subattr, **kwargs)

    ts_attributes = {}
    global_attributes = attrs.global_attr

    # todo: attrs for all vars or only for the ones that TS were created for.
    for var in parameters:
        ts_attributes.update(attrs.ts_attributes[var])

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    reshuffler = Img2Ts(input_dataset=input_dataset,
                        outputpath=outputpath,
                        startdate=startdate,
                        enddate=enddate,
                        input_grid=grid,
                        imgbuffer=imgbuffer,
                        cellsize_lat=5.0,
                        cellsize_lon=5.0,
                        global_attr=global_attributes,
                        zlib=True,
                        unlim_chunksize=1000,
                        ts_attributes=ts_attributes)
    reshuffler.calc()
Пример #18
0
def reshuffle(
        input_root,
        outputpath,
        startdate,
        enddate,
        variables,
        mask_seapoints=False,
        h_steps=(0, 6, 12, 18),
        imgbuffer=50,
):
    """
    Reshuffle method applied to ERA images for conversion into netcdf time
    series format.

    Parameters
    ----------
    input_root: str
        Input path where ERA image data was downloaded to.
    outputpath : str
        Output path, where the reshuffled netcdf time series are stored.
    startdate : datetime
        Start date, from which images are read and time series are generated.
    enddate : datetime
        End date, from which images are read and time series are generated.
    variables: list or str or tuple
        Variables to read from the passed images and convert into time
        series format.
    mask_seapoints: bool, optional (default: False)
        Mask points over sea, replace them with nan.
    h_steps: tuple, optional (default: (0,6,12,18))
        Full hours for which images are available.
    imgbuffer: int, optional (default: 50)
        How many images to read at once before writing time series. This number
        affects how many images are stored in memory and should be chosen according
        to the available amount of memory and the size of a single image.
    """

    filetype = parse_filetype(input_root)

    if filetype == "grib":
        input_dataset = ERAIntGrbDs(
            root_path=input_root,
            parameter=variables,
            subgrid=None,
            array_1D=True,
            mask_seapoints=mask_seapoints,
            h_steps=h_steps,
        )
    elif filetype == "netcdf":
        input_dataset = ERAIntNcDs(
            root_path=input_root,
            parameter=variables,
            subgrid=None,
            array_1D=True,
            mask_seapoints=mask_seapoints,
            h_steps=h_steps,
        )
    else:
        raise Exception("Unknown file format")

    if not os.path.exists(outputpath):
        os.makedirs(outputpath)

    global_attr = {"product": "ERA Interim (from {})".format(filetype)}

    # get time series attributes from first day of data.
    first_date_time = datetime.combine(startdate.date(), time(h_steps[0], 0))

    data = input_dataset.read(first_date_time)
    ts_attributes = data.metadata

    grid = BasicGrid(data.lon, data.lat)

    reshuffler = Img2Ts(
        input_dataset=input_dataset,
        outputpath=outputpath,
        startdate=startdate,
        enddate=enddate,
        input_grid=grid,
        imgbuffer=imgbuffer,
        cellsize_lat=5.0,
        cellsize_lon=5.0,
        ts_dtypes=np.dtype("float32"),
        global_attr=global_attr,
        zlib=True,
        unlim_chunksize=1000,
        ts_attributes=ts_attributes,
    )
    reshuffler.calc()