예제 #1
0
def test_parse_diagnostics(all_mds_datadirs, layers_mds_datadirs):
    """Make sure we can parse the available_diagnostics.log file."""
    from xmitgcm.utils import parse_available_diagnostics
    dirname, expected = all_mds_datadirs
    diagnostics_fname = os.path.join(dirname, 'available_diagnostics.log')
    ad = parse_available_diagnostics(diagnostics_fname)

    # a somewhat random sampling of diagnostics
    expected_diags = {
        'UVEL': {
            'dims': ['k', 'j', 'i_g'],
            'attrs': {
                'units': 'm/s',
                'long_name': 'Zonal Component of Velocity (m/s)',
                'standard_name': 'UVEL',
                'mate': 'VVEL'
            }
        },
        'TFLUX': {
            'dims': ['j', 'i'],
            'attrs': {
                'units': 'W/m^2',
                'long_name': 'total heat flux (match heat-content '
                'variations), >0 increases theta',
                'standard_name': 'TFLUX'
            }
        }
    }

    for key, val in expected_diags.items():
        assert ad[key] == val

    # test layers
    dirname, expected = layers_mds_datadirs
    diagnostics_fname = os.path.join(dirname, 'available_diagnostics.log')
    ad = parse_available_diagnostics(diagnostics_fname)

    expected_diags = {
        'LaUH1RHO': {
            'dims': ['_UNKNOWN_', 'j', 'i_g'],
            'attrs': {
                'units': 'm.m/s',
                'long_name': 'Layer Integrated  zonal Transport (UH, m^2/s)',
                'standard_name': 'LaUH1RHO',
                'mate': 'LaVH1RHO'
            }
        },
    }

    for key, val in expected_diags.items():
        assert ad[key] == val
예제 #2
0
def from_catalog(name, catalog_url=None):
    """
    Import oceandataset using a yaml catalog.
    Try to use :py:mod:`intake-xarray`,
    otherwise use :py:mod:`intake-xarray` and
    :py:func:`xmitgcm.open_mdsdataset`.

    Parameters
    ----------
    name: str
        Name of the oceandataset to open.
    catalog_url: str or None
        Path from which to read the catalog.
        If None, use SciServer's catalogs.

    References
    ----------
    | intake-xarray: https://github.com/intake/intake-xarray
    | xmitgcm: https://xmitgcm.readthedocs.io/en/stable/usage.html
    """

    # Message
    print("Opening {}.".format(name))
    cat, entries, url, intake_switch = _find_entries(name, catalog_url)

    # Store all dataset
    datasets = []
    metadata = {}
    for entry in entries:
        if intake_switch:
            # Use intake-xarray

            # Pop metadata
            mtdt = cat[entry].metadata

            # Create ds
            ds = cat[entry].to_dask()
        else:
            # Pop args and metadata
            args = cat[entry].pop("args")
            mtdt = cat[entry].pop("metadata", None)

            # If iter is a string, need to be evaluated (likely range)
            iters = args.pop("iters", None)
            if isinstance(iters, str) and "range" in iters:
                iters = eval(iters)
            if iters is not None:
                args["iters"] = iters

            # Create ds
            with _warnings.catch_warnings():
                # Not sure why, Marcello's print a lot of warnings, Neil no.
                # TODO: this need to be addressed
                _warnings.simplefilter("ignore")
                ds = _xmitgcm.open_mdsdataset(**args)

        # Rename
        rename = mtdt.pop("rename", None)
        ds = ds.rename(rename)

        # swaps dimension k (index space) to Z (depth) - LLC data
        swap_dims = mtdt.pop("swap_dims", None)
        if swap_dims is not None:
            ds = ds.swap_dims(swap_dims)

        # Fix Z dimensions (Zmd, ...)
        default_Zs = ["Zp1", "Zu", "Zl", "Z"]
        # Make sure they're sorted with decreasing letter number

        default_Zs = sorted(default_Zs, key=len, reverse=True)
        for Zdim in default_Zs:  # pragma: no cover
            for dim, size in ds.sizes.items():
                if dim in default_Zs:
                    continue
                elif Zdim in dim:
                    if size == 1:
                        ds = ds.squeeze(dim)
                    else:
                        if Zdim in ds.dims:
                            ds = ds.rename({Zdim: "tmp"})
                            ds = ds.rename({"tmp": Zdim, dim: Zdim})
                        else:
                            ds = ds.rename({dim: Zdim})

        # Original output
        or_out = mtdt.pop("original_output", None)
        if or_out is not None:
            for var in ds.data_vars:
                ds[var].attrs["original_output"] = or_out

        # Select
        isel = mtdt.pop("isel", None)
        if isel is not None:
            isel = {key: eval(value) for key, value in isel.items()}
            ds = ds.isel(isel)

        # Append
        datasets.append(ds)

        # Metadata
        metadata = {**metadata, **mtdt}

    # Merge
    ds = _xr.merge(datasets)

    # Consistent chunking
    chunks = {}
    for var in ds.data_vars:
        if ds[var].chunks is not None:
            for i, dim in enumerate(ds[var].dims):
                chunk = ds[var].chunks[i]
                if dim not in chunks or len(chunks[dim]) < len(chunk):
                    chunks[dim] = chunk
    ds = ds.chunk(chunks)

    # Initialize OceanDataset
    od = _OceanDataset(ds)

    # Shift averages
    shift_averages = metadata.pop("shift_averages", None)
    if shift_averages is not None:
        od = od.shift_averages(**shift_averages)

    # Set OceanSpy stuff
    for var in ["aliases", "parameters", "name", "description", "projection"]:
        val = metadata.pop(var, None)
        if val is not None:
            od = eval("od.set_{}(val)".format(var))

    # Manipulate coordinates
    manipulate_coords = metadata.pop("manipulate_coords", None)
    if manipulate_coords is not None:
        od = od.manipulate_coords(**manipulate_coords)

    grid_periodic = metadata.pop("grid_periodic", None)
    if grid_periodic is not None:
        od = od.set_grid_periodic(**grid_periodic)

    # Set grid coordinates
    grid_coords = metadata.pop("grid_coords", None)
    if grid_coords is not None:
        od = od.set_grid_coords(**grid_coords)

    #  Set grid topology if present (e.g. llc4320)
    face_connections = metadata.pop("face_connections", None)
    if face_connections is not None:
        od = od.set_face_connections(**face_connections)

    # Set attributes (use xmitgcm)
    try:
        from xmitgcm.variables import (
            vertical_coordinates,
            horizontal_grid_variables,
            vertical_grid_variables,
            volume_grid_variables,
            mask_variables,
            state_variables,
            package_state_variables,
            extra_grid_variables,
        )
        from xmitgcm.utils import parse_available_diagnostics
        from xmitgcm import default_diagnostics

        diagnostics = parse_available_diagnostics(default_diagnostics.__file__)
        variables = _OrderedDict(
            list(vertical_coordinates.items())
            + list(horizontal_grid_variables.items())
            + list(vertical_grid_variables.items())
            + list(volume_grid_variables.items())
            + list(mask_variables.items())
            + list(state_variables.items())
            + list(package_state_variables.items())
            + list(extra_grid_variables.items())
        )
        variables = _OrderedDict({**diagnostics, **variables})

        # My extra attributes
        variables["Temp"] = variables.pop("T")
        variables["HFacC"] = variables.pop("hFacC")
        variables["HFacW"] = variables.pop("hFacW")
        variables["HFacS"] = variables.pop("hFacS")

        for var in ["HFacC", "HFacW", "HFacS"]:
            variables[var]["attrs"]["units"] = " "

        variables["phiHyd"] = variables.pop("PHIHYD")
        variables["phiHydLow"] = dict(
            attrs=dict(
                long_name=(
                    "Phi-Hydrostatic at r-lower boundary"
                    "(bottom in z-coordinates,"
                    "top in p-coordinates)"
                ),
                units=variables["phiHyd"]["attrs"]["units"],
            )
        )

        variables["AngleCS"] = dict(
            attrs=dict(
                standard_name="Cos of grid orientation angle",
                long_name="AngleCS",
                units=" ",
                coordinate="YC XC",
            )
        )
        variables["AngleSN"] = dict(
            attrs=dict(
                standard_name="Sin of grid orientation angle",
                long_name="AngleSN",
                units=" ",
            )
        )
        variables["dxF"] = dict(
            attrs=dict(
                standard_name="x cell face separation",
                long_name="cell x size",
                units="m",
            )
        )
        variables["dyF"] = dict(
            attrs=dict(
                standard_name="y cell face separation",
                long_name="cell y size",
                units="m",
            )
        )
        variables["dxV"] = dict(
            attrs=dict(
                standard_name="x v-velocity separation",
                long_name="cell x size",
                units="m",
            )
        )
        variables["dyU"] = dict(
            attrs=dict(
                standard_name="y u-velocity separation",
                long_name="cell y size",
                units="m",
            )
        )
        variables["fCori"] = dict(
            attrs=dict(
                standard_name="Coriolis f at cell center",
                long_name="Coriolis f",
                units="s^-1",
            )
        )
        variables["fCoriG"] = dict(
            attrs=dict(
                standard_name="Coriolis f at cell corner",
                long_name="Coriolis f",
                units="s^-1",
            )
        )

        #  LLC4320 Surface Variables
        variables["SSS"] = dict(
            attrs=dict(
                standard_name=("surface sea_water_salinity"),
                units=variables["S"]["attrs"]["units"],
            )
        )
        variables["SST"] = dict(
            attrs=dict(
                standard_name=("surface sea_water_temperature"),
                units=variables["Temp"]["attrs"]["units"],
            )
        )
        variables["SSU"] = dict(
            attrs=dict(
                standard_name=("surface sea_water_x_velocity"),
                units=variables["U"]["attrs"]["units"],
            )
        )
        variables["SSV"] = dict(
            attrs=dict(
                standard_name=("surface sea_water_y_velocity"),
                units=variables["V"]["attrs"]["units"],
            )
        )

        # Extract variables in dataset only
        variables = _OrderedDict(
            **{var: variables[var] for var in od._ds.variables if var in variables}
        )

        # Add attributes
        for var in variables:
            attrs = variables[var]["attrs"]
            for attr in attrs:
                if attr not in od._ds[var].attrs:
                    od._ds[var].attrs[attr] = attrs[attr]
    except ImportError:  # pragma: no cover
        pass

    # Print message
    toprint = od.description
    for add_str in ["citation", "characteristics", "mates"]:
        thisprint = metadata.pop(add_str, None)
        if thisprint is not None:
            if add_str == "mates":
                add_str = "see also"
            if thisprint[-1:] == "\n":
                thisprint = thisprint[:-1]
            toprint += "\n{}:\n * {}".format(
                add_str.capitalize(), thisprint.replace("\n", "\n * ")
            )
    if toprint is not None:
        print(toprint.replace("\n\n", "\n"))

    return od
def from_catalog(name, catalog_url=None):
    """
    Import oceandataset using a yaml catalog.
    Try to use :py:mod:`intake-xarray`,
    otherwise use :py:mod:`intake-xarray` and
    :py:func:`xmitgcm.open_mdsdataset`.

    Parameters
    ----------
    name: str
        Name of the oceandataset to open.
    catalog_url: str or None
        Path from which to read the catalog.
        If None, use SciServer's catalogs.

    References
    ----------
    | intake-xarray: https://github.com/intake/intake-xarray
    | xmitgcm: https://xmitgcm.readthedocs.io/en/stable/usage.html
    """

    # Message
    print('Opening {}.'.format(name))
    cat, entries, url, intake_switch = _find_entries(name, catalog_url)

    # Store all dataset
    datasets = []
    chunks = {}
    metadata = {}
    for entry in entries:
        if intake_switch:
            # Use intake-xarray

            # Pop metadata
            mtdt = cat[entry].metadata

            # Create ds
            ds = cat[entry].to_dask()
        else:
            # Pop args and metadata
            args = cat[entry].pop('args')
            mtdt = cat[entry].pop('metadata', None)

            # If iter is a string, need to be evaluated (likely range)
            iters = args.pop('iters', None)
            if isinstance(iters, str) and 'range' in iters:
                iters = eval(iters)
            if iters is not None:
                args['iters'] = iters

            # Create ds
            with _warnings.catch_warnings():
                # Not sure why, Marcello's print a lot of warnings, Neil no.
                # TODO: this need to be addressed
                _warnings.simplefilter("ignore")
                ds = _xmitgcm.open_mdsdataset(**args)

        # Rename
        rename = mtdt.pop('rename', None)
        ds = ds.rename(rename)

        # Fix Z dimensions (Zmd, ...)
        default_Zs = ['Zp1', 'Zu', 'Zl', 'Z']
        # Make sure they're sorted with decreasing letter number
        default_Zs = sorted(default_Zs, key=len, reverse=True)
        for Zdim in default_Zs:  # pragma: no cover
            for dim, size in ds.sizes.items():
                if dim in default_Zs:
                    continue
                elif Zdim in dim:
                    if size == 1:
                        ds = ds.squeeze(dim)
                    else:
                        if Zdim in ds.dims:
                            ds = ds.rename({Zdim: 'tmp'})
                            ds = ds.rename({'tmp': Zdim, dim: Zdim})
                        else:
                            ds = ds.rename({dim: Zdim})

        # Original output
        or_out = mtdt.pop('original_output', None)
        if or_out is not None:
            for var in ds.data_vars:
                ds[var].attrs['original_output'] = or_out

        # Select
        isel = mtdt.pop('isel', None)
        if isel is not None:
            isel = {key: eval(value) for key, value in isel.items()}
            ds = ds.isel(isel)

        # Append
        datasets.append(ds)

        # Metadata
        metadata = {**metadata, **mtdt}

    # Merge
    ds = _xr.merge(datasets)

    # Consistent chunking
    chunks = {}
    for var in ds.data_vars:
        if ds[var].chunks is not None:
            for i, dim in enumerate(ds[var].dims):
                chunk = ds[var].chunks[i]
                if dim not in chunks or len(chunks[dim]) < len(chunk):
                    chunks[dim] = chunk
    ds = ds.chunk(chunks)

    # Initialize OceanDataset
    od = _OceanDataset(ds)

    # Shift averages
    shift_averages = metadata.pop('shift_averages', None)
    if shift_averages is not None:
        od = od.shift_averages(**shift_averages)

    # Set OceanSpy stuff
    for var in ['aliases', 'parameters', 'name', 'description', 'projection']:
        val = metadata.pop(var, None)
        if val is not None:
            od = eval('od.set_{}(val)'.format(var))

    # Manipulate coordinates
    manipulate_coords = metadata.pop('manipulate_coords', None)
    if manipulate_coords is not None:
        od = od.manipulate_coords(**manipulate_coords)

    # Set grid coordinates
    grid_coords = metadata.pop('grid_coords', None)
    if grid_coords is not None:
        od = od.set_grid_coords(**grid_coords)

    # Set attributes (use xmitgcm)
    try:
        from xmitgcm.variables import (vertical_coordinates,
                                       horizontal_grid_variables,
                                       vertical_grid_variables,
                                       volume_grid_variables,
                                       mask_variables,
                                       state_variables,
                                       package_state_variables,
                                       extra_grid_variables)
        from xmitgcm.utils import parse_available_diagnostics
        from xmitgcm import default_diagnostics
        diagnostics = parse_available_diagnostics(default_diagnostics.__file__)
        variables = _OrderedDict(list(vertical_coordinates.items())
                                 + list(horizontal_grid_variables.items())
                                 + list(vertical_grid_variables.items())
                                 + list(volume_grid_variables.items())
                                 + list(mask_variables.items())
                                 + list(state_variables.items())
                                 + list(package_state_variables.items())
                                 + list(extra_grid_variables.items()))
        variables = _OrderedDict({**diagnostics, **variables})

        # My extra attributes
        variables['Temp'] = variables.pop('T')
        variables['HFacC'] = variables.pop('hFacC')
        variables['HFacW'] = variables.pop('hFacW')
        variables['HFacS'] = variables.pop('hFacS')

        for var in ['HFacC', 'HFacW', 'HFacS']:
            variables[var]['attrs']['units'] = " "

        variables['phiHyd'] = variables.pop('PHIHYD')
        variables['phiHydLow'] = dict(
            attrs=dict(long_name=('Phi-Hydrostatic at r-lower boundary'
                                  '(bottom in z-coordinates,'
                                  'top in p-coordinates)'),
                       units=variables['phiHyd']['attrs']['units']))

        variables['AngleCS'] = dict(
            attrs=dict(standard_name="Cos of grid orientation angle",
                       long_name="AngleCS",
                       units=" ", coordinate="YC XC"))
        variables['AngleSN'] = dict(
            attrs=dict(standard_name="Sin of grid orientation angle",
                       long_name="AngleSN",
                       units=" "))
        variables['dxF'] = dict(
            attrs=dict(standard_name="x cell face separation",
                       long_name="cell x size",
                       units="m"))
        variables['dyF'] = dict(
            attrs=dict(standard_name="y cell face separation",
                       long_name="cell y size",
                       units="m"))
        variables['dxV'] = dict(
            attrs=dict(standard_name="x v-velocity separation",
                       long_name="cell x size",
                       units="m"))
        variables['dyU'] = dict(
            attrs=dict(standard_name="y u-velocity separation",
                       long_name="cell y size",
                       units="m"))
        variables['fCori'] = dict(
            attrs=dict(standard_name="Coriolis f at cell center",
                       long_name="Coriolis f",
                       units="s^-1"))
        variables['fCoriG'] = dict(
            attrs=dict(standard_name="Coriolis f at cell corner",
                       long_name="Coriolis f",
                       units="s^-1"))

        # Extract variables in dataset only
        variables = _OrderedDict(**{var: variables[var]
                                    for var in od._ds.variables
                                    if var in variables})

        # Add attributes
        for var in variables:
            attrs = variables[var]['attrs']
            for attr in attrs:
                if attr not in od._ds[var].attrs:
                    od._ds[var].attrs[attr] = attrs[attr]
    except ImportError:  # pragma: no cover
        pass

    # Print message
    toprint = od.description
    for add_str in ['citation', 'characteristics', 'mates']:
        thisprint = metadata.pop(add_str, None)
        if thisprint is not None:
            if add_str == 'mates':
                add_str = 'see also'
            if thisprint[-1:] == '\n':
                thisprint = thisprint[:-1]
            toprint += '\n{}:\n * {}'.format(add_str.capitalize(),
                                             thisprint.replace('\n', '\n * '))
    if toprint is not None:
        print(toprint.replace('\n\n', '\n'))

    return od