def main(cfg):
    """Run the diagnostic."""
    cfg = get_default_settings(cfg)
    diag = check_cfg(cfg)
    sns.set(**cfg.get('seaborn_settings', {}))

    # Get input data
    input_data = list(cfg['input_data'].values())
    input_data.extend(io.netcdf_to_metadata(cfg, pattern=cfg.get('pattern')))
    input_data = deepcopy(input_data)
    check_input_data(input_data)
    grouped_data = group_metadata(input_data, 'dataset')

    # Calculate X-axis of emergent constraint
    diag_func = globals()[diag]
    (diag_data, var_attrs, attrs) = diag_func(grouped_data, cfg)
    attrs.update(get_global_attributes(input_data, cfg))

    # Save data
    netcdf_path = get_diagnostic_filename(diag, cfg)
    io.save_scalar_data(diag_data, netcdf_path, var_attrs, attributes=attrs)
    logger.info("Found data:\n%s", pformat(diag_data))

    # Provenance
    provenance_record = ec.get_provenance_record(
        {diag: attrs}, [diag],
        caption=attrs['plot_xlabel'],
        ancestors=[d['filename'] for d in input_data])
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(netcdf_path, provenance_record)
def get_input_data(cfg, pattern=None, check_mlr_attributes=True, ignore=None):
    """Get input data and check MLR attributes if desired.

    Use ``input_data`` and ancestors to get all relevant input files.

    Parameters
    ----------
    cfg : dict
        Recipe configuration.
    pattern : str, optional
        Pattern matched against ancestor file names.
    check_mlr_attributes : bool, optional (default: True)
        If ``True``, only returns datasets with valid MLR attributes. If
        ``False``, returns all found datasets.
    ignore : list of dict, optional
        Ignore specific datasets by specifying multiple :obj:`dict`s of
        metadata. By setting an attribute to ``None``, ignore all datasets
        which do not have that attribute.

    Returns
    -------
    list of dict
        List of input datasets.

    Raises
    ------
    ValueError
        No input data found or at least one dataset has invalid attributes.

    """
    logger.debug("Extracting input files")
    input_data = list(cfg['input_data'].values())
    input_data.extend(io.netcdf_to_metadata(cfg, pattern=pattern))
    input_data = deepcopy(input_data)
    if ignore is not None:
        valid_data = []
        ignored_datasets = []
        logger.info("Ignoring files with %s", ignore)
        for kwargs in ignore:
            ignored_datasets.extend(_get_datasets(input_data, **kwargs))
        for dataset in input_data:
            if dataset not in ignored_datasets:
                valid_data.append(dataset)
    else:
        valid_data = input_data
    if not valid_data:
        raise ValueError("No input data found")
    if check_mlr_attributes:
        if not datasets_have_mlr_attributes(valid_data, log_level='error'):
            raise ValueError("At least one input dataset does not have valid "
                             "MLR attributes")
    logger.debug("Found files:")
    logger.debug(pformat([d['filename'] for d in valid_data]))
    return valid_data
Пример #3
0
def get_psi(cfg):
    """Get time-dependent ``psi`` data."""
    psi_cubes = {}
    psi_obs = []
    for (dataset, [data]) in group_metadata(
            io.netcdf_to_metadata(cfg, pattern='psi_*.nc'), 'dataset').items():
        cube = iris.load_cube(data['filename'])
        cube = cube.aggregated_by('year', iris.analysis.MEAN)
        psi_cubes[dataset] = cube
        if data['project'] == 'OBS':
            psi_obs.append(dataset)
    return (psi_cubes, psi_obs)
Пример #4
0
def main(cfg):
    """Run the diagnostic."""
    input_data = (
        select_metadata(cfg['input_data'].values(), short_name='tas') +
        select_metadata(cfg['input_data'].values(), short_name='tasa'))
    if not input_data:
        raise ValueError("This diagnostics needs 'tas' or 'tasa' variable")

    # Get tas data
    tas_cubes = {}
    tas_obs = []
    for (dataset, [data]) in group_metadata(input_data, 'dataset').items():
        cube = iris.load_cube(data['filename'])
        iris.coord_categorisation.add_year(cube, 'time')
        cube = cube.aggregated_by('year', iris.analysis.MEAN)
        tas_cubes[dataset] = cube
        if data['project'] == 'OBS':
            tas_obs.append(dataset)

    # Get time-dependent psi data
    psi_cubes = {}
    psi_obs = []
    for (dataset, [data]) in group_metadata(
            io.netcdf_to_metadata(cfg, pattern='psi_*.nc'), 'dataset').items():
        cube = iris.load_cube(data['filename'])
        cube = cube.aggregated_by('year', iris.analysis.MEAN)
        psi_cubes[dataset] = cube
        if data['project'] == 'OBS':
            psi_obs.append(dataset)

    # Get psi, ECS and psi for models
    (psi_cube, ecs_cube, lambda_cube) = get_external_cubes(cfg)

    # Plots
    for obs_name in tas_obs:
        logger.info("Observation for tas: %s", obs_name)
        plot_temperature_anomaly(cfg, tas_cubes, lambda_cube, obs_name)
    for obs_name in psi_obs:
        logger.info("Observation for psi: %s", obs_name)
        plot_psi(cfg, psi_cubes, lambda_cube, obs_name)
        obs_cube = psi_cubes[obs_name]
        plot_emergent_relationship(cfg, psi_cube, ecs_cube, lambda_cube,
                                   obs_cube)
        plot_pdf(cfg, psi_cube, ecs_cube, obs_cube)
        plot_cdf(cfg, psi_cube, ecs_cube, obs_cube)

        # Print ECS range
        ecs_range = get_ecs_range(cfg, psi_cube, ecs_cube, obs_cube)
        logger.info("Observational constraint: Ψ = (%.2f ± %.2f) K",
                    np.mean(obs_cube.data), np.std(obs_cube.data))
        logger.info(
            "Constrained ECS range: (%.2f - %.2f) K with best "
            "estimate %.2f K", ecs_range[1], ecs_range[2], ecs_range[0])
def test_netcdf_to_metadata(mock_walk, mock_load_cube, mock_logger,
                            mock_get_all_ancestors, cubes, walk_out, root,
                            output, n_logger):
    """Test cube to metadata."""
    ancestors = []
    for (files_root, _, files) in walk_out:
        new_files = [os.path.join(files_root, f) for f in files]
        ancestors.extend(new_files)
    mock_get_all_ancestors.return_value = ancestors
    mock_walk.return_value = walk_out
    mock_load_cube.side_effect = cubes
    if isinstance(output, type):
        with pytest.raises(output):
            io.netcdf_to_metadata({}, pattern=root, root=root)
    else:
        for dataset in output:
            dataset['short_name'] = dataset.pop('var_name')
            dataset.setdefault('standard_name', None)
        metadata = io.netcdf_to_metadata({}, pattern=root, root=root)
        assert metadata == output
    assert mock_logger.error.call_count == n_logger
Пример #6
0
def test_netcdf_to_metadata(mock_walk, mock_load_cube, mock_logger,
                            mock_get_all_ancestors, root):
    """Test cube to metadata."""
    attrs = [
        {
            'dataset': 'model',
            'filename': 'path/to/model1.nc',
            'project': 'CMIP42',
        },
        {
            'dataset': 'model',
            'filename': 'path/to/model1.yml',
            'project': 'CMIP42',
        },
        {
            'dataset': 'model',
            'filename': 'path/to/model2.nc',
        },
        {
            'dataset': 'model',
            'filename': 'path/to/model3.nc',
            'project': 'CMIP42',
        },
        {
            'dataset': 'model',
            'filename': 'path/to/model4.nc',
            'project': 'CMIP42',
        },
    ]
    var_attrs = [
        {
            'long_name': LONG_NAME,
            'var_name': SHORT_NAME,
            'units': UNITS,
        },
        {
            'long_name': LONG_NAME,
            'var_name': SHORT_NAME,
            'units': UNITS,
        },
        {
            'long_name': LONG_NAME,
            'var_name': SHORT_NAME,
        },
        {
            'long_name': LONG_NAME,
            'var_name': SHORT_NAME,
            'standard_name': STANDARD_NAME,
            'units': UNITS,
        },
        {
            'long_name': LONG_NAME,
            'var_name': SHORT_NAME,
            'standard_name': None,
            'units': UNITS,
        },
    ]
    cubes = [
        iris.cube.Cube(0, attributes=attrs[0], **var_attrs[0]),
        iris.cube.Cube(0, attributes=attrs[2], **var_attrs[2]),
        iris.cube.Cube(0, attributes=attrs[3], **var_attrs[3]),
        iris.cube.Cube(0, attributes=attrs[4], **var_attrs[4]),
    ]
    walk_output = [
        ('path/to', [], ['model1.nc', 'model1.yml']),
        ('path/to', ['d'], ['model2.nc', 'model3.nc', 'model4.nc']),
    ]
    output = deepcopy([{**attrs[i], **var_attrs[i]} for i in (0, 3, 4)])
    for out in output:
        out['short_name'] = out.pop('var_name')
        out.setdefault('standard_name', None)
    mock_get_all_ancestors.return_value = [a['filename'] for a in attrs]
    mock_walk.return_value = walk_output
    mock_load_cube.side_effect = cubes
    metadata = io.netcdf_to_metadata({}, pattern=root, root=root)
    assert metadata == output
    mock_logger.warning.assert_called()