def _write_xy_provenance(cfg, cubes, plot_path, title, *attrs):
    """Write provenance information for X-Y plots."""
    cubes = cubes.copy()
    if isinstance(cubes, iris.cube.Cube):
        cubes = iris.cube.CubeList([cubes])
    ancestors = []
    for attr in attrs:
        ancestors.extend(attr['filename'].split('|'))
    netcdf_path = mlr.get_new_path(cfg, plot_path)
    io.iris_save(cubes, netcdf_path)
    long_name = ' and '.join([cube.long_name for cube in cubes])
    caption = f"Line plot of {long_name}"
    if title:
        caption += f" for {title}."
    else:
        caption += '.'
    record = {
        'ancestors': ancestors,
        'authors': ['schlund_manuel'],
        'caption': caption,
        'plot_file': plot_path,
        'plot_types': ['line'],
        'references': ['schlund20jgr'],
    }
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(netcdf_path, record)
示例#2
0
def postprocess_mean(cfg, cube, data):
    """Postprocess mean prediction cube."""
    logger.info("Postprocessing mean prediction cube %s",
                cube.summary(shorten=True))
    cube = _collapse_regular_cube(cfg, cube)
    _convert_units(cfg, cube)
    new_path = mlr.get_new_path(cfg, data['filename'])
    io.iris_save(cube, new_path)
    logger.info("Mean prediction: %s %s", cube.data, cube.units)
    _write_provenance(cfg, "Postprocessed", cube, new_path, [data['filename']])
def _write_provenance(cfg, data_frame, plot_path, title, ancestors,
                      **cube_kwargs):
    """Write provenance information."""
    cube = ec.pandas_object_to_cube(data_frame, **cube_kwargs)
    netcdf_path = mlr.get_new_path(cfg, plot_path)
    io.iris_save(cube, netcdf_path)
    record = {
        'ancestors': ancestors,
        'authors': ['schlund_manuel'],
        'caption': f"Boxplot of {title}.",
        'plot_file': plot_path,
        'plot_types': ['box'],
        'references': ['schlund20jgr'],
    }
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(netcdf_path, record)
def _write_map_provenance(cfg, cube, plot_path, title, *attrs):
    """Write provenance information for map plots."""
    cube = cube.copy()
    ancestors = []
    for attr in attrs:
        ancestors.extend(attr['filename'].split('|'))
    netcdf_path = mlr.get_new_path(cfg, plot_path)
    io.iris_save(cube, netcdf_path)
    record = {
        'ancestors': ancestors,
        'authors': ['schlund_manuel'],
        'caption': f"Geographical distribution of {cube.long_name} for "
        f"{title}.",
        'plot_file': plot_path,
        'plot_types': ['geo'],
        'references': ['schlund20jgr'],
    }
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(netcdf_path, record)
示例#5
0
def postprocess_errors(cfg, ref_cube, error_datasets, cov_estim_datasets):
    """Postprocess errors."""
    logger.info(
        "Postprocessing errors using mean prediction cube %s as reference",
        ref_cube.summary(shorten=True))

    # Extract covariance
    (cov_cube,
     error_datasets) = _get_covariance_dataset(error_datasets, ref_cube)

    # Extract squared errors
    squared_error_cube = mlr.get_squared_error_cube(ref_cube, error_datasets)

    # Extract variance from covariance if desired
    if cfg.get('add_var_from_cov', True) and cov_cube is not None:
        var = np.ma.empty(ref_cube.shape, dtype=ref_cube.dtype)
        mask = np.ma.getmaskarray(ref_cube.data)
        var[mask] = np.ma.masked
        var[~mask] = np.diagonal(cov_cube.data.copy())
        squared_error_cube.data += var
        logger.debug(
            "Added variance calculated from covariance to squared error "
            "datasets")
        if not error_datasets:
            error_datasets = True

    # Extract basename for error cubes
    basepath = mlr.get_new_path(cfg, ref_cube.attributes['filename'])
    basepath = basepath.replace('.nc', '_error.nc')

    # Lower and upper error bounds
    if error_datasets:
        _calculate_lower_error_bound(cfg, squared_error_cube, basepath)
        _calculate_upper_error_bound(cfg, squared_error_cube, basepath)

        # Estimated real error using estimated covariance
        if cov_estim_datasets:
            _estimate_real_error(cfg, squared_error_cube,
                                 cov_estim_datasets[0], basepath)

    # Real error
    if cov_cube is not None:
        _calculate_real_error(cfg, ref_cube, cov_cube, basepath)
def rescale_labels(cfg, y_data, y_mean, y_std):
    """Rescale labels."""
    input_data = _get_input_data(cfg)
    labels_to_rescale = select_metadata(input_data,
                                        var_type='label_to_rescale')
    _check_datasets(labels_to_rescale, 'label_to_rescale')

    # Get groups
    groups = []
    for dataset in labels_to_rescale:
        group = _get_group(dataset, cfg['group_by_attributes'])
        groups.append(group)
        dataset['group__for__rescaling'] = group

    groups.sort()
    if set(groups) != set(y_data.index):
        raise ValueError(
            f"Expected identical groups for 'label' and 'label_to_rescale' "
            f"data, got\n'label': {y_data.index.values}\n'label_to_rescale': "
            f"{np.array(groups)}")

    # Rescale data
    ref_cube = _get_ref_cube(labels_to_rescale)
    for dataset in labels_to_rescale:
        cube = iris.load_cube(dataset['filename'])
        rescaling_factor = (
            y_mean / y_data.loc[dataset['group__for__rescaling']].values)
        logger.info("Rescaling '%s' with factor %.2f",
                    dataset['group__for__rescaling'], rescaling_factor)
        rescaled_cube = cube.copy(cube.data * rescaling_factor)

        # Adapt metadata
        rescaled_dataset = deepcopy(dataset)
        rescaled_dataset['var_type'] = 'label'
        rescaled_dataset['rescaled'] = 'using emergent relationship'
        if '_label' in dataset['filename']:
            rescaled_dataset['filename'] = dataset['filename'].replace(
                '_label_to_rescale', '_rescaled_label')
        else:
            rescaled_dataset['filename'] = dataset['filename'].replace(
                '.nc', '_rescaled_label.nc')

        # Save data
        rescaled_dataset['filename'] = mlr.get_new_path(
            cfg, rescaled_dataset['filename'])
        io.metadata_to_netcdf(rescaled_cube, rescaled_dataset)

        # Provenance
        record = {
            'ancestors': [dataset['filename']] + _get_ec_ancestors(cfg),
            'authors': ['schlund_manuel'],
            'caption':
            f"Rescaled {rescaled_cube.long_name} for "
            f"{mlr.get_alias(rescaled_dataset)} using emergent "
            f"relationship.",
            'references': ['schlund20jgr'],
            'themes': ['EC'],
        }
        with ProvenanceLogger(cfg) as provenance_logger:
            provenance_logger.log(rescaled_dataset['filename'], record)

    # Rescale MMM to estimate error
    logger.debug("Estimating error using global error %e", y_std)
    mmm_cube = _get_mmm_cube(labels_to_rescale)
    error_cube = ref_cube.copy(mmm_cube.data * y_std / y_data.mean().values)
    error_dataset = _get_error_dataset(cfg, labels_to_rescale)
    io.metadata_to_netcdf(error_cube, error_dataset)

    # Provenance
    record = {
        'ancestors':
        ([d['filename'] for d in labels_to_rescale] + _get_ec_ancestors(cfg)),
        'authors': ['schlund_manuel'],
        'caption':
        f"Rescaled {error_cube.long_name} using emergent "
        f"relationship.",
        'references': ['schlund20jgr'],
        'themes': ['EC'],
    }
    with ProvenanceLogger(cfg) as provenance_logger:
        provenance_logger.log(error_dataset['filename'], record)