Exemplo n.º 1
0
def read_multinc(fnames,
                 nms=None,
                 axis=None,
                 keys=None,
                 align=False,
                 concatenate_only=False,
                 **kwargs):
    """ read multiple netCDF files 

    parameters:
    -----------
    fnames: list of file names or file handles to be read
    nms: variable names to be read
    axis, optional: string, dimension along which the files are concatenated 
        (created as new dimension if not already existing)
    keys, optional: sequence to be passed to stack_nc, if axis is not part of the dataset
    align, optional: if True, align axis prior to stacking (default to False)
    concatenate_only, optional: if True, only concatenate along existing axis (and raise error if axis not existing)
    **kwargs: keyword arguments passed to io.nc.read_variable  (cannot 
    contain 'axis', though, but indices can be passed as a dictionary
    if needed, e.g. {'time':2010})

    returns:
    --------
    dimarray's Dataset instance

    This function reads several files and call stack_ds or concatenate_ds 
    depending on whether `axis` is absent from or present in the dataset, respectively
    """
    variables = None
    dimensions = None

    datasets = []
    for fn in fnames:
        ds = read_dataset(fn, nms, **kwargs)

        # check that the same variables are present in the file
        if variables is None:
            variables = ds.keys()
        else:
            variables_new = ds.keys()
            assert variables == variables_new, \
                    "netCDF files must contain the same \
                    subset of variables to be concatenated/stacked"

        # check that the same dimensions are present in the file
        if dimensions is None:
            dimensions = ds.dims
        else:
            dimensions_new = ds.dims
            assert dimensions == dimensions_new, \
                    "netCDF files must contain the same \
                    subset of dimensions to be concatenated/stacked"

        datasets.append(ds)

    # check that dimension in dataset if required
    if concatenate_only and axis not in dimensions:
        raise Exception('required axis {} not found, only got {}'.format(
            axis, dimensions))

    # Join dataset
    if axis in dimensions:
        if keys is None: warnings.warn('keys argument is not used')
        ds = concatenate_ds(datasets, axis=axis)

    else:
        # use file name as keys by default
        if keys is None:
            keys = [os.path.splitext(fn)[0] for fn in fnames]
        ds = stack_ds(datasets, axis=axis, keys=keys, align=align)

    return ds
Exemplo n.º 2
0
def _read_multinc(fnames, names=None, axis=None, keys=None, align=False, sort=False, join='outer', concatenate_only=False, **kwargs):
    """ read multiple netCDF files 

    Parameters
    ----------
    fnames : list of file names or file handles to be read
    names : variable names to be read
    axis : str, optional
        dimension along which the files are concatenated 
        (created as new dimension if not already existing)
    keys : sequence, optional
        to be passed to stack_nc, if axis is not part of the dataset
    align : `bool`, optional
        if True, reindex axis prior to stacking / concatenating (default to False)
    sort : `bool`, optional
        if True, sort common axes prior to stacking / concatenating (defaut to False)
    join : `str`, optional
        join method in align (default to 'outer')
    concatenate_only : `bool`, optional
        if True, only concatenate along existing axis (and raise error if axis not existing) 

    **kwargs : keyword arguments passed to DatasetOnDisk.read  (cannot 
    contain 'axis', though, but indices can be passed as a dictionary
    if needed, e.g. {'time':2010})

    Returns
    -------
    dimarray's Dataset instance

    This function reads several files and call stack_ds or concatenate_ds 
    depending on whether `axis` is absent from or present in the dataset, respectively

    See `dimarray.read_nc` for more complete documentation.

    See Also
    --------
    read_nc
    """
    variables = None
    dimensions = None

    datasets = []
    for fn in fnames:
        with DatasetOnDisk(fn) as f:
            ds = f.read(names, **kwargs)

        # check that the same variables are present in the file
        if variables is None:
            variables = ds.keys()
        else:
            variables_new = ds.keys()
            assert variables == variables_new, \
                    "netCDF files must contain the same \
                    subset of variables to be concatenated/stacked"

        # check that the same dimensions are present in the file
        if dimensions is None:
            dimensions = ds.dims
        else:
            dimensions_new = ds.dims
            assert dimensions == dimensions_new, \
                    "netCDF files must contain the same \
                    subset of dimensions to be concatenated/stacked"

        datasets.append(ds)

    # check that dimension in dataset if required
    if concatenate_only and axis not in dimensions:
        raise Exception('required axis {} not found, only got {}'.format(axis, dimensions))

    # Join dataset
    if axis in dimensions:
        ds = concatenate_ds(datasets, axis=axis, align=align, sort=sort, join=join)
        if keys is not None: 
            ds = ds.reindex_axis(keys, axis=axis)
        # elif sort:
        #     ds = ds.sort_axis(axis=axis)
            #warnings.warn('keys argument will be ignored.', RuntimeWarning)

    else:
        # use file name as keys by default
        if keys is None:
            keys = [os.path.splitext(fn)[0] for fn in fnames]
        ds = stack_ds(datasets, axis=axis, keys=keys, align=align, sort=sort, join=join)

    return ds
Exemplo n.º 3
0
def read_multinc(fnames, nms=None, axis=None, keys=None, align=False, concatenate_only=False, **kwargs):
    """ read multiple netCDF files 

    parameters:
    -----------
    fnames: list of file names or file handles to be read
    nms: variable names to be read
    axis, optional: string, dimension along which the files are concatenated 
        (created as new dimension if not already existing)
    keys, optional: sequence to be passed to stack_nc, if axis is not part of the dataset
    align, optional: if True, align axis prior to stacking (default to False)
    concatenate_only, optional: if True, only concatenate along existing axis (and raise error if axis not existing)
    **kwargs: keyword arguments passed to io.nc.read_variable  (cannot 
    contain 'axis', though, but indices can be passed as a dictionary
    if needed, e.g. {'time':2010})

    returns:
    --------
    dimarray's Dataset instance

    This function reads several files and call stack_ds or concatenate_ds 
    depending on whether `axis` is absent from or present in the dataset, respectively
    """
    variables = None
    dimensions = None

    datasets = []
    for fn in fnames:
        ds = read_dataset(fn, nms, **kwargs)

        # check that the same variables are present in the file
        if variables is None:
            variables = ds.keys()
        else:
            variables_new = ds.keys()
            assert variables == variables_new, \
                    "netCDF files must contain the same \
                    subset of variables to be concatenated/stacked"

        # check that the same dimensions are present in the file
        if dimensions is None:
            dimensions = ds.dims
        else:
            dimensions_new = ds.dims
            assert dimensions == dimensions_new, \
                    "netCDF files must contain the same \
                    subset of dimensions to be concatenated/stacked"

        datasets.append(ds)

    # check that dimension in dataset if required
    if concatenate_only and axis not in dimensions:
        raise Exception('required axis {} not found, only got {}'.format(axis, dimensions))

    # Join dataset
    if axis in dimensions:
	if keys is None: warnings.warn('keys argument is not used')
        ds = concatenate_ds(datasets, axis=axis)

    else:
        # use file name as keys by default
        if keys is None:
            keys = [os.path.splitext(fn)[0] for fn in fnames]
        ds = stack_ds(datasets, axis=axis, keys=keys, align=align)

    return ds