Exemplo n.º 1
0
    def test_dataframe_attributes(self):
        """
        Test the attributes that get created for a dataframe.
        """
        attrs = {'CLASS': 'TABLE',
                 'FIELD_0_NAME': 'index',
                 'FIELD_1_NAME': 'float_data',
                 'FIELD_2_NAME': 'integer_data',
                 'TITLE': 'Table',
                 'VERSION': '0.2',
                 'float_data_dtype': 'float64',
                 'index_dtype': 'int64',
                 'index_names': numpy.array(['index'], dtype=object),
                 'integer_data_dtype': 'int64',
                 'metadata': '`Pandas.DataFrame` converted to HDF5 compound datatype.', # pylint: disable=line-too-long
                 'nrows': 10,
                 'python_type': '`Pandas.DataFrame`'}

        df = pandas.DataFrame(self.table_data)

        fname = 'test_dataframe_attributes.h5'
        with h5py.File(fname, **self.memory_kwargs) as fid:
            hdf5.write_dataframe(df, 'dataframe', fid)

            test = {k: v for k, v in fid['dataframe'].attrs.items()}
            self.assertDictEqual(test, attrs)
Exemplo n.º 2
0
    def test_dataframe_attributes(self):
        """
        Test the attributes that get created for a dataframe.
        """
        attrs = {
            "CLASS": "TABLE",
            "FIELD_0_NAME": "index",
            "FIELD_1_NAME": "float_data",
            "FIELD_2_NAME": "integer_data",
            "TITLE": "Table",
            "VERSION": "0.2",
            "float_data_dtype": "float64",
            "index_dtype": "int64",
            "index_names": numpy.array(["index"], dtype=object),
            "integer_data_dtype": "int64",
            "metadata":
            "`Pandas.DataFrame` converted to HDF5 compound datatype.",  # pylint: disable=line-too-long # noqa: E501
            "nrows": 10,
            "python_type": "`Pandas.DataFrame`",
        }

        df = pandas.DataFrame(self.table_data)

        fname = "test_dataframe_attributes.h5"
        with h5py.File(fname, "w", **self.memory_kwargs) as fid:
            hdf5.write_dataframe(df, "dataframe", fid)

            test = {k: v for k, v in fid["dataframe"].attrs.items()}
            self.assertDictEqual(test, attrs)
Exemplo n.º 3
0
def convert(aerosol_path, output_filename, compression, filter_opts):
    """
    Converts all the .pix and .cmp files found in `aerosol_path`
    to a HDF5 file.
    """
    # define a case switch
    func = {'pix': read_pix, 'cmp': read_cmp}

    # create the output file
    with h5py.File(output_filename, 'w') as fid:

        pattern = ['*.pix', '*.cmp']
        for p in pattern:
            search = pjoin(aerosol_path, p)
            files = glob.glob(search)
            for fname in files:
                pth, ext = splitext(fname)
                ext = ext.split(".")[-1]
                grp_name = basename(pth)
                out_path = ppjoin(ext, grp_name)

                # read/write
                df, extents = func[ext](fname)
                attrs = {'extents': wkt.dumps(extents),
                         'source filename': fname}
                write_dataframe(df, out_path, fid, compression=compression,
                                attrs=attrs, filter_opts=filter_opts)
Exemplo n.º 4
0
def table_results(table_group,
                  compression=H5CompressionFilter.LZF,
                  filter_opts=None):
    """
    Combine the residual results of each TABLE Dataset into a
    single TABLE Dataset.
    """
    # potentially could just use visit...
    paths = find(table_group, 'TABLE')

    equivalent = []
    products = []
    name = []

    for pth in paths:
        dset = table_group[pth]
        equivalent.append(dset.attrs['equal'])
        products.append(pbasename(dset.parent.name))
        name.append(pbasename(dset.name))

    df = pandas.DataFrame({
        'product': products,
        'dataset_name': name,
        'equivalent': equivalent
    })

    # output
    write_dataframe(df,
                    'TABLE-EQUIVALENCY',
                    table_group,
                    compression,
                    title='EQUIVALENCY-RESULTS',
                    filter_opts=filter_opts)
Exemplo n.º 5
0
def scalar_results(scalar_group, compression=H5CompressionFilter.LZF, filter_opts=None):
    """
    Combine the residual results of each SCALAR Dataset into a
    single TABLE Dataset.
    """
    # potentially could just use visit...
    paths = find(scalar_group, "SCALAR")

    equivalent = []
    products = []
    name = []

    for pth in paths:
        dset = scalar_group[pth]
        equivalent.append(dset[()])
        products.append(pbasename(dset.parent.name))
        name.append(pbasename(dset.name))

    df = pandas.DataFrame(
        {"product": products, "dataset_name": name, "equivalent": equivalent}
    )

    # output
    write_dataframe(
        df,
        "SCALAR-EQUIVALENCY",
        scalar_group,
        compression,
        title="EQUIVALENCY-RESULTS",
        filter_opts=filter_opts,
    )
Exemplo n.º 6
0
def run(aerosol_path, output_filename):
    """
    Converts all the .pix and .cmp files found in `aerosol_path`
    to a HDF5 file.
    """
    # define a case switch
    func = {"pix": read_pix, "cmp": read_cmp}

    # create the output file
    fid = h5py.File(output_filename, "w")

    pattern = ["*.pix", "*.cmp"]
    for p in pattern:
        search = pjoin(aerosol_path, p)
        files = glob.glob(search)
        for fname in files:
            pth, ext = splitext(fname)
            ext = ext.split(".")[-1]
            grp_name = basename(pth)
            out_path = ppjoin(ext, grp_name)

            # read/write
            df, extents = func[ext](fname)
            attrs = {"extents": wkt.dumps(extents), "source filename": fname}
            write_dataframe(df, out_path, fid, attrs=attrs)

    fid.close()
Exemplo n.º 7
0
def image_results(image_group,
                  compression=H5CompressionFilter.LZF,
                  filter_opts=None):
    """
    Combine the residual results of each IMAGE Dataset into a
    single TABLE Dataset.
    """
    # potentially could just use visit...
    img_paths = find(image_group, 'IMAGE')

    min_ = []
    max_ = []
    percent = []
    pct_90 = []
    pct_99 = []
    resid_paths = []
    hist_paths = []
    chist_paths = []
    products = []
    name = []

    for pth in img_paths:
        hist_pth = pth.replace('RESIDUALS', 'FREQUENCY-DISTRIBUTIONS')
        chist_pth = pth.replace('RESIDUALS', 'CUMULATIVE-DISTRIBUTIONS')
        resid_paths.append(ppjoin(image_group.name, pth))
        hist_paths.append(ppjoin(image_group.name, hist_pth))
        chist_paths.append(ppjoin(image_group.name, chist_pth))

        dset = image_group[pth]
        min_.append(dset.attrs['min_residual'])
        max_.append(dset.attrs['max_residual'])
        percent.append(dset.attrs['percent_difference'])
        products.append(pbasename(dset.parent.name))
        name.append(pbasename(dset.name))

        dset = image_group[chist_pth]
        pct_90.append(dset.attrs['90th_percentile'])
        pct_99.append(dset.attrs['99th_percentile'])

    df = pandas.DataFrame({
        'product': products,
        'dataset_name': name,
        'min_residual': min_,
        'max_residual': max_,
        'percent_difference': percent,
        '90th_percentile': pct_90,
        '99th_percentile': pct_99,
        'residual_image_pathname': resid_paths,
        'residual_histogram_pathname': hist_paths,
        'residual_cumulative_pathname': chist_paths
    })

    # output
    write_dataframe(df,
                    'IMAGE-RESIDUALS',
                    image_group,
                    compression,
                    title='RESIDUALS-TABLE',
                    filter_opts=filter_opts)
Exemplo n.º 8
0
def query(
    outdir,
    product_name_test,
    product_name_reference,
    db_env_test,
    db_env_reference,
    time,
    lon,
    lat,
    additional_filters,
):
    """
    Database querying of test and reference products.
    """

    outdir = Path(outdir)
    log_fname = outdir.joinpath(DirectoryNames.LOGS.value,
                                LogNames.QUERY.value)

    if not log_fname.parent.exists():
        log_fname.parent.mkdir(parents=True)

    with open(log_fname, "w") as fobj:
        structlog.configure(logger_factory=structlog.PrintLoggerFactory(fobj),
                            processors=LOG_PROCESSORS)

        results = query_products(
            product_name_test,
            product_name_reference,
            db_env_test,
            db_env_reference,
            time,
            lon,
            lat,
            additional_filters,
        )

        results_fname = outdir.joinpath(DirectoryNames.RESULTS.value,
                                        FileNames.RESULTS.value)
        dataset_name = DatasetNames.QUERY.value

        _LOG.info(
            "saving results of query",
            out_fname=str(results_fname),
            dataset_name=dataset_name,
        )

        with h5py.File(str(results_fname), "w") as fid:
            write_dataframe(results, dataset_name, fid)
Exemplo n.º 9
0
    def test_dataframe_roundtrip(self):
        """
        Test that the pandas dataframe roundtrips, i.e. save to HDF5
        and is read back into a dataframe seamlessly.
        Float, integer, datetime and string datatypes will be
        tested.
        """
        df = pandas.DataFrame(self.table_data)
        df['timestamps'] = pandas.date_range('1/1/2000', periods=10, freq='D')
        df['string_data'] = ['period {}'.format(i) for i in range(10)]

        fname = 'test_dataframe_roundtrip.h5'
        with h5py.File(fname, **self.memory_kwargs) as fid:
            hdf5.write_dataframe(df, 'dataframe', fid)
            self.assertTrue(df.equals(hdf5.read_h5_table(fid, 'dataframe')))
Exemplo n.º 10
0
def convert(aerosol_path, out_h5: h5py.Group, compression, filter_opts):
    """
    Converts all the .pix and .cmp files found in `aerosol_path`
    to a HDF5 file.
    """
    # define a case switch
    func = {"pix": read_pix, "cmp": read_cmp}
    dataset_names = []
    metadata = []

    pattern = ["*.pix", "*.cmp"]
    for p in pattern:
        for search_path in aerosol_path.glob(p):
            _path = search_path.resolve()
            fname, ext = _path.stem, _path.suffix[
                1:]  # exclude the period from ext
            out_path = ppjoin(ext, fname)
            df, extents = func[ext](_path)

            # read/write
            df, extents = func[ext](_path)

            # src checksum; used to help derive fallback uuid
            with _path.open("rb") as src:
                src_checksum = generate_md5sum(src).hexdigest()

            attrs = {
                "extents": wkt.dumps(extents),
                "source filename": str(_path)
            }
            write_dataframe(
                df,
                out_path,
                out_h5,
                compression=compression,
                attrs=attrs,
                filter_opts=filter_opts,
            )
            dataset_names.append(out_path)
            metadata.append({
                "id":
                str(
                    generate_fallback_uuid(PRODUCT_HREF,
                                           path=str(_path.stem),
                                           md5=src_checksum))
            })

    return metadata, dataset_names
Exemplo n.º 11
0
def _convert_4d(rds, fid, dataset_name, compression, filter_opts):
    """
    Private routine for converting the multiples of 37 layer
    atmospheric data in the GRIB file to HDF5.
    For a months worth of data, the dimensions become:
        * (day, atmospheric level, y, x)
    """
    attrs = {
        "geotransform": rds.transform.to_gdal(),
        "crs_wkt": rds.crs.wkt,
        "history": "Converted to HDF5",
    }

    # band groups of 37, nrows to process (ytile)
    band_groups = range(1, rds.count + 1, 37)
    ytile = filter_opts["chunks"][2]
    dims = (len(band_groups), 37, rds.height, rds.width)
    tiles = generate_tiles(rds.width, rds.height, rds.width, ytile)

    # dataset creation options
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()
    kwargs["shape"] = dims
    kwargs["dtype"] = rds.dtypes[0]

    dataset = fid.create_dataset(dataset_name, **kwargs)
    attach_image_attributes(dataset, attrs)

    # add dimension labels, but should we also include dimension scales?
    dataset.dims[0].label = "Day"
    dataset.dims[1].label = "Atmospheric Level"
    dataset.dims[2].label = "Y"
    dataset.dims[3].label = "X"

    # process by spatial tile containing 37 atmospheric layers for 1 day
    for i, bg in enumerate(band_groups):
        bands = list(range(bg, bg + 37))
        for tile in tiles:
            idx = (
                slice(i, bg),
                slice(None),
                slice(tile[0][0], tile[0][1]),
                slice(tile[1][0], tile[1][1]),
            )
            dataset[idx] = rds.read(bands, window=tile)

    # metadata
    metadata = metadata_dataframe(rds)
    write_dataframe(metadata, "METADATA", fid, compression)
Exemplo n.º 12
0
 def test_write_dataframe(self):
     """
     Test the write_dataframe function.
     """
     df = pandas.DataFrame(self.table_data)
     fname = "test_write_dataframe.h5"
     with h5py.File(fname, "w", **self.memory_kwargs) as fid:
         self.assertIsNone(hdf5.write_dataframe(df, "dataframe", fid))
Exemplo n.º 13
0
def query_filesystem(
    outdir,
    product_pathname_test,
    product_pathname_reference,
    glob_pattern_test,
    glob_pattern_reference,
):
    """
    Filesystem querying of test and reference products.
    """

    outdir = Path(outdir)
    log_fname = outdir.joinpath(DirectoryNames.LOGS.value,
                                LogNames.QUERY.value)

    if not log_fname.parent.exists():
        log_fname.parent.mkdir(parents=True)

    with open(log_fname, "w") as fobj:
        structlog.configure(logger_factory=structlog.PrintLoggerFactory(fobj))

        results = query_via_filepath(
            product_pathname_test,
            product_pathname_reference,
            glob_pattern_test,
            glob_pattern_reference,
        )

        results_fname = outdir.joinpath(DirectoryNames.RESULTS.value,
                                        FileNames.RESULTS.value)
        dataset_name = DatasetNames.QUERY.value

        _LOG.info(
            "saving results of query",
            out_fname=str(results_fname),
            dataset_name=dataset_name,
        )

        if not results_fname.parent.exists():
            results_fname.parent.mkdir(parents=True)

        with h5py.File(str(results_fname), "w") as fid:
            write_dataframe(results, dataset_name, fid)
Exemplo n.º 14
0
def _convert_3d(rds, fid, dataset_name, compression, filter_opts):
    """
    Private routine for converting the 37 layer atmospheric data
    in the GRIB file to HDF5.
    """
    # basic metadata to attach to the dataset
    attrs = {
        'geotransform': rds.transform.to_gdal(),
        'crs_wkt': rds.crs.wkt,
        'history': 'Converted to HDF5'
    }

    # bands list, nrows to process (ytile)
    bands = list(range(1, rds.count + 1))
    ytile = filter_opts['chunks'][1]
    dims = (rds.count, rds.height, rds.width)

    # dataset creation options
    kwargs = compression.config(**filter_opts).dataset_compression_kwargs()
    kwargs['shape'] = dims
    kwargs['dtype'] = rds.dtypes[0]

    dataset = fid.create_dataset(dataset_name, **kwargs)
    attach_image_attributes(dataset, attrs)

    # add dimension labels, but should we also include dimension scales?
    dataset.dims[0].label = 'Atmospheric Level'
    dataset.dims[1].label = 'Y'
    dataset.dims[2].label = 'X'

    # process by tile
    for tile in generate_tiles(rds.width, rds.height, rds.width, ytile):
        idx = (
            slice(None),
            slice(tile[0][0], tile[0][1]),
            slice(tile[1][0], tile[1][1])
        )
        dataset[idx] = rds.read(bands, window=tile)

    # metadata
    metadata = metadata_dataframe(rds)
    write_dataframe(metadata, 'METADATA', fid, compression)
Exemplo n.º 15
0
def _convert_2d(rds, fid, dataset_name, compression, filter_opts):
    """
    Private routine for converting the 2D GRIB file to HDF5.
    """
    attrs = {
        'geotransform': rds.transform.to_gdal(),
        'crs_wkt': rds.crs.wkt,
        'history': 'Converted to HDF5'
    }
    data = rds.read(1)
    write_h5_image(data, dataset_name, fid, compression, attrs, filter_opts)

    # add dimension labels, but should we also include dimension scales?
    dataset = fid[dataset_name]
    dataset.dims[0].label = 'Y'
    dataset.dims[1].label = 'X'

    # metadata
    metadata = metadata_dataframe(rds)
    write_dataframe(metadata, 'METADATA', fid, compression)
Exemplo n.º 16
0
def _convert_2d(rds, fid, dataset_name, compression, filter_opts):
    """
    Private routine for converting the 2D GRIB file to HDF5.
    """
    attrs = {
        "geotransform": rds.transform.to_gdal(),
        "crs_wkt": rds.crs.wkt,
        "history": "Converted to HDF5",
    }
    data = rds.read(1)
    write_h5_image(data, dataset_name, fid, compression, attrs, filter_opts)

    # add dimension labels, but should we also include dimension scales?
    dataset = fid[dataset_name]
    dataset.dims[0].label = "Y"
    dataset.dims[1].label = "X"

    # metadata
    metadata = metadata_dataframe(rds)
    write_dataframe(metadata, "METADATA", fid, compression)
Exemplo n.º 17
0
    def test_dataframe_roundtrip(self):
        """
        Test that the pandas dataframe roundtrips, i.e. save to HDF5
        and is read back into a dataframe seamlessly.
        Float, integer, datetime and string datatypes will be
        tested.
        """
        df = pandas.DataFrame(self.table_data)
        df["timestamps"] = pandas.date_range("1/1/2000",
                                             periods=10,
                                             freq="D",
                                             tz="UTC")
        df["string_data"] = ["period {}".format(i) for i in range(10)]

        fname = "test_dataframe_roundtrip.h5"
        with h5py.File(fname, "w", **self.memory_kwargs) as fid:
            hdf5.write_dataframe(df, "dataframe", fid)
            # Apply conversion to no timezone that occurs in serialisation to hdf5
            # Numpy is timezone naive; pandas has timezone support
            df["timestamps"] = df["timestamps"].dt.tz_convert(None)
            self.assertTrue(df.equals(hdf5.read_h5_table(fid, "dataframe")))
Exemplo n.º 18
0
def comparison(outdir: Union[str, Path], proc_info: bool) -> None:
    """
    Test and Reference product intercomparison evaluation.
    """

    outdir = Path(outdir)
    if proc_info:
        log_fname = outdir.joinpath(DirectoryNames.LOGS.value,
                                    LogNames.PROC_INFO_INTERCOMPARISON.value)
    else:
        log_fname = outdir.joinpath(DirectoryNames.LOGS.value,
                                    LogNames.MEASUREMENT_INTERCOMPARISON.value)

    out_stream = MPIStreamIO(str(log_fname))
    structlog.configure(processors=DEFAULT_PROCESSORS,
                        logger_factory=MPILoggerFactory(out_stream))

    # processor info
    rank = COMM.Get_rank()
    n_processors = COMM.Get_size()

    results_fname = outdir.joinpath(DirectoryNames.RESULTS.value,
                                    FileNames.RESULTS.value)

    with h5py.File(str(results_fname), "r") as fid:
        dataframe = read_h5_table(fid, DatasetNames.QUERY.value)

    if rank == 0:
        index = dataframe.index.values.tolist()
        blocks = scatter(index, n_processors)

        # some basic attribute information
        doc: Union[Granule, None] = load_odc_metadata(
            Path(dataframe.iloc[0].yaml_pathname_reference))
        attrs: Dict[str, Any] = {
            "framing": doc.framing,
            "thematic": False,
            "proc-info": False,
        }
    else:
        blocks = None
        doc = None
        attrs = dict()

    COMM.Barrier()

    # equally partition the work across all procesors
    indices = COMM.scatter(blocks, root=0)

    if proc_info:
        attrs["proc-info"] = True
        if rank == 0:
            _LOG.info("procssing proc-info documents")

        gqa_dataframe, ancillary_dataframe = _process_proc_info(
            dataframe.iloc[indices], rank)

        if rank == 0:
            _LOG.info("saving gqa dataframe results to tables")

            if not results_fname.parent.exists():
                results_fname.parent.mkdir(parents=True)

            with h5py.File(str(results_fname), "a") as fid:
                dataset_name = PPath(DatasetGroups.INTERCOMPARISON.value,
                                     DatasetNames.GQA_RESULTS.value)

                write_dataframe(gqa_dataframe,
                                str(dataset_name),
                                fid,
                                attrs=attrs)

            _LOG.info("saving ancillary dataframe results to tables")

            if not results_fname.parent.exists():
                results_fname.parent.mkdir(parents=True)

            with h5py.File(str(results_fname), "a") as fid:
                dataset_name = PPath(
                    DatasetGroups.INTERCOMPARISON.value,
                    DatasetNames.ANCILLARY_RESULTS.value,
                )

                write_dataframe(ancillary_dataframe,
                                str(dataset_name),
                                fid,
                                attrs=attrs)

            _LOG.info("saving software versions dataframe to tables")

            with h5py.File(str(results_fname), "a") as fid:
                dataset_name = PPath(DatasetNames.SOFTWARE_VERSIONS.value)

                software_attrs = {
                    "description": "ARD Pipeline software versions"
                }
                software_df = compare_proc_info.compare_software(dataframe)
                write_dataframe(software_df,
                                str(dataset_name),
                                fid,
                                attrs=software_attrs)

    else:
        if rank == 0:
            _LOG.info("processing odc-metadata documents")
        results = _process_odc_doc(dataframe.iloc[indices], rank)

        if rank == 0:
            # save each table
            _LOG.info("saving dataframes to tables")
            with h5py.File(str(results_fname), "a") as fid:

                attrs["thematic"] = False
                write_dataframe(
                    results[0],
                    str(
                        PPath(
                            DatasetGroups.INTERCOMPARISON.value,
                            DatasetNames.GENERAL_RESULTS.value,
                        )),
                    fid,
                    attrs=attrs,
                )

                attrs["thematic"] = True
                write_dataframe(
                    results[1],
                    str(
                        PPath(
                            DatasetGroups.INTERCOMPARISON.value,
                            DatasetNames.FMASK_RESULTS.value,
                        )),
                    fid,
                    attrs=attrs,
                )

                write_dataframe(
                    results[2],
                    str(
                        PPath(
                            DatasetGroups.INTERCOMPARISON.value,
                            DatasetNames.CONTIGUITY_RESULTS.value,
                        )),
                    fid,
                    attrs=attrs,
                )

                write_dataframe(
                    results[3],
                    str(
                        PPath(
                            DatasetGroups.INTERCOMPARISON.value,
                            DatasetNames.SHADOW_RESULTS.value,
                        )),
                    fid,
                    attrs=attrs,
                )

    if rank == 0:
        workflow = "proc-info field" if proc_info else "product measurement"
        msg = f"{workflow} comparison processing finished"
        _LOG.info(msg)
Exemplo n.º 19
0
def image_results(image_group, compression=H5CompressionFilter.LZF, filter_opts=None):
    """
    Combine the residual results of each IMAGE Dataset into a
    single TABLE Dataset.
    """
    # potentially could just use visit...
    img_paths = find(image_group, "IMAGE")

    min_ = []
    max_ = []
    percent = []
    pct_90 = []
    pct_99 = []
    resid_paths = []
    hist_paths = []
    chist_paths = []
    products = []
    name = []

    for pth in img_paths:
        hist_pth = pth.replace("RESIDUALS", "FREQUENCY-DISTRIBUTIONS")
        chist_pth = pth.replace("RESIDUALS", "CUMULATIVE-DISTRIBUTIONS")
        resid_paths.append(ppjoin(image_group.name, pth))
        hist_paths.append(ppjoin(image_group.name, hist_pth))
        chist_paths.append(ppjoin(image_group.name, chist_pth))

        dset = image_group[pth]
        min_.append(dset.attrs["min_residual"])
        max_.append(dset.attrs["max_residual"])
        percent.append(dset.attrs["percent_difference"])
        products.append(pbasename(dset.parent.name))
        name.append(pbasename(dset.name))

        dset = image_group[chist_pth]
        pct_90.append(dset.attrs["90th_percentile"])
        pct_99.append(dset.attrs["99th_percentile"])

    df = pandas.DataFrame(
        {
            "product": products,
            "dataset_name": name,
            "min_residual": min_,
            "max_residual": max_,
            "percent_difference": percent,
            "90th_percentile": pct_90,
            "99th_percentile": pct_99,
            "residual_image_pathname": resid_paths,
            "residual_histogram_pathname": hist_paths,
            "residual_cumulative_pathname": chist_paths,
        }
    )

    # output
    write_dataframe(
        df,
        "IMAGE-RESIDUALS",
        image_group,
        compression,
        title="RESIDUALS-TABLE",
        filter_opts=filter_opts,
    )
Exemplo n.º 20
0
Arquivo: prwtr.py Projeto: sixy6e/swfo
def convert_file(fname,
                 out_h5: h5py.Group,
                 compression,
                 filter_opts: Optional[Dict] = None):
    """
    Convert a PR_WTR NetCDF file into HDF5.

    :param fname:
        A str containing the PR_WTR filename.

    :param out_fname:
        A h5py.Group to write output datasets to

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    with rasterio.open(fname) as ds:
        name_fmt = "BAND-{}"

        # global attributes
        # TODO update the history attrs
        # TODO remove the NC_GLOBAL str and just have plain attr names
        g_attrs = ds.tags()

        # get timestamp info
        origin = g_attrs.pop("time#units").replace("hours since ", "")
        hours = json.loads(
            g_attrs.pop("NETCDF_DIM_time_VALUES").replace("{", "[").replace(
                "}", "]"))
        df = pandas.DataFrame({
            "timestamp":
            pandas.to_datetime(hours, unit="h", origin=origin),
            "band_name": [name_fmt.format(i + 1) for i in range(ds.count)],
        })
        df["dataset_name"] = df.timestamp.dt.strftime("%Y/%B-%d/%H%M")
        df["dataset_name"] = df["dataset_name"].str.upper()

        # create a timestamp and band name index table dataset
        desc = "Timestamp and Band Name index information."
        attrs = {"description": desc}
        write_dataframe(df, "INDEX", out_h5, compression, attrs=attrs)

        attach_attributes(out_h5, g_attrs)

        # process every band
        for i in range(1, ds.count + 1):
            ds_name = df.iloc[i - 1].dataset_name

            # create empty or copy the user supplied filter options
            if not filter_opts:
                f_opts = dict()
            else:
                f_opts = filter_opts.copy()

            # band attributes
            # TODO remove NETCDF tags
            # TODO add fillvalue attr
            attrs = ds.tags(i)
            attrs["timestamp"] = df.iloc[i - 1]["timestamp"].replace(
                tzinfo=timezone.utc)
            attrs["band_name"] = df.iloc[i - 1]["band_name"]
            attrs["geotransform"] = ds.transform.to_gdal()
            attrs["crs_wkt"] = CRS.ExportToWkt()

            # use ds native chunks if none are provided
            if "chunks" not in f_opts:
                try:
                    f_opts["chunks"] = ds.block_shapes[i]
                except IndexError:
                    print("Chunk error: {}".format(fname))
                    f_opts["chunks"] = (73, 144)

            # write to disk as an IMAGE Class Dataset
            write_h5_image(
                ds.read(i),
                ds_name,
                out_h5,
                attrs=attrs,
                compression=compression,
                filter_opts=f_opts,
            )
Exemplo n.º 21
0
def calculate_coefficients(atmospheric_results_group, out_group,
                           compression=H5CompressionFilter.LZF,
                           filter_opts=None):
    """
    Calculate the atmospheric coefficients from the MODTRAN output
    and used in the BRDF and atmospheric correction.
    Coefficients are computed for each band for each each coordinate
    for each atmospheric coefficient. The atmospheric coefficients can be
    found in `Workflow.STANDARD.atmos_coefficients`.

    :param atmospheric_results_group:
        The root HDF5 `Group` that contains the atmospheric results
        from each MODTRAN run.

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.

        The datasets will be formatted to the HDF5 TABLE specification
        and the dataset names will be as follows:

        * DatasetName.NBAR_COEFFICIENTS (if Workflow.STANDARD or Workflow.NBAR)
        * DatasetName.SBT_COEFFICIENTS (if Workflow.STANDARD or Workflow.SBT)

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :param filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    nbar_coefficients = pd.DataFrame()
    sbt_coefficients = pd.DataFrame()

    channel_data = channel_solar_angle = upward = downward = None

    # Initialise the output group/file
    if out_group is None:
        fid = h5py.File('atmospheric-coefficients.h5', driver='core',
                        backing_store=False)
    else:
        fid = out_group

    res = atmospheric_results_group
    npoints = res.attrs['npoints']
    nbar_atmos = res.attrs['nbar_atmospherics']
    sbt_atmos = res.attrs['sbt_atmospherics']

    for point in range(npoints):
        point_grp = res[POINT_FMT.format(p=point)]
        lonlat = point_grp.attrs['lonlat']
        timestamp = pd.to_datetime(point_grp.attrs['datetime'])
        grp_path = ppjoin(POINT_FMT.format(p=point), ALBEDO_FMT)

        if nbar_atmos:
            channel_path = ppjoin(grp_path.format(a=Albedos.ALBEDO_0.value),
                                  DatasetName.CHANNEL.value)
            channel_data = read_h5_table(res, channel_path)

            channel_solar_angle_path = ppjoin(
                grp_path.format(a=Albedos.ALBEDO_0.value),
                DatasetName.SOLAR_ZENITH_CHANNEL.value
            )

            channel_solar_angle = read_h5_table(res, channel_solar_angle_path)

        if sbt_atmos:
            dname = ppjoin(grp_path.format(a=Albedos.ALBEDO_TH.value),
                           DatasetName.UPWARD_RADIATION_CHANNEL.value)
            upward = read_h5_table(res, dname)

            dname = ppjoin(grp_path.format(a=Albedos.ALBEDO_TH.value),
                           DatasetName.DOWNWARD_RADIATION_CHANNEL.value)
            downward = read_h5_table(res, dname)

        kwargs = {'channel_data': channel_data,
                  'solar_zenith_angle': channel_solar_angle,
                  'upward_radiation': upward,
                  'downward_radiation': downward}

        result = coefficients(**kwargs)

        # insert some datetime/geospatial fields
        if result[0] is not None:
            result[0].insert(0, 'POINT', point)
            result[0].insert(1, 'LONGITUDE', lonlat[0])
            result[0].insert(2, 'LATITUDE', lonlat[1])
            result[0].insert(3, 'DATETIME', timestamp)
            nbar_coefficients = nbar_coefficients.append(result[0])

        if result[1] is not None:
            result[1].insert(0, 'POINT', point)
            result[1].insert(1, 'LONGITUDE', lonlat[0])
            result[1].insert(2, 'LATITUDE', lonlat[1])
            result[1].insert(3, 'DATETIME', pd.to_datetime(timestamp))
            sbt_coefficients = sbt_coefficients.append(result[1])

    nbar_coefficients.reset_index(inplace=True)
    sbt_coefficients.reset_index(inplace=True)

    attrs = {'npoints': npoints}
    description = "Coefficients derived from the VNIR solar irradiation."
    attrs['description'] = description
    dname = DatasetName.NBAR_COEFFICIENTS.value

    if GroupName.COEFFICIENTS_GROUP.value not in fid:
        fid.create_group(GroupName.COEFFICIENTS_GROUP.value)

    group = fid[GroupName.COEFFICIENTS_GROUP.value]

    if nbar_atmos:
        write_dataframe(nbar_coefficients, dname, group, compression,
                        attrs=attrs, filter_opts=filter_opts)

    description = "Coefficients derived from the THERMAL solar irradiation."
    attrs['description'] = description
    dname = DatasetName.SBT_COEFFICIENTS.value

    if sbt_atmos:
        write_dataframe(sbt_coefficients, dname, group, compression,
                        attrs=attrs, filter_opts=filter_opts)

    if out_group is None:
        return fid
Exemplo n.º 22
0
def run_modtran(acquisitions, atmospherics_group, workflow, npoints, point,
                albedos, modtran_exe, basedir, out_group,
                compression=H5CompressionFilter.LZF, filter_opts=None):
    """
    Run MODTRAN and channel results.
    """
    lonlat = atmospherics_group[POINT_FMT.format(p=point)].attrs['lonlat']

    # determine the output group/file
    if out_group is None:
        fid = h5py.File('atmospheric-results.h5', driver='core',
                        backing_store=False)
    else:
        fid = out_group

    # initial attributes
    base_attrs = {'Point': point,
                  'lonlat': lonlat,
                  'datetime': acquisitions[0].acquisition_datetime}

    base_path = ppjoin(GroupName.ATMOSPHERIC_RESULTS_GRP.value,
                       POINT_FMT.format(p=point))

    # what atmospheric calculations have been run and how many points
    group_name = GroupName.ATMOSPHERIC_RESULTS_GRP.value
    if group_name not in fid:
        fid.create_group(group_name)

    fid[group_name].attrs['npoints'] = npoints
    applied = workflow in (Workflow.STANDARD, Workflow.NBAR)
    fid[group_name].attrs['nbar_atmospherics'] = applied
    applied = workflow in (Workflow.STANDARD, Workflow.SBT)
    fid[group_name].attrs['sbt_atmospherics'] = applied

    acqs = acquisitions
    for albedo in albedos:
        base_attrs['Albedo'] = albedo.value
        workpath = pjoin(basedir, POINT_FMT.format(p=point),
                         ALBEDO_FMT.format(a=albedo.value))

        json_mod_infile = pjoin(workpath, ''.join(
            [POINT_ALBEDO_FMT.format(p=point, a=albedo.value), '.json']))

        group_path = ppjoin(base_path, ALBEDO_FMT.format(a=albedo.value))

        subprocess.check_call([modtran_exe, json_mod_infile], cwd=workpath)

        chn_fname = glob.glob(pjoin(workpath, '*.chn'))[0]
        tp6_fname = glob.glob(pjoin(workpath, '*.tp6'))[0]

        if albedo == Albedos.ALBEDO_TH:
            acq = [acq for acq in acqs if acq.band_type == BandType.THERMAL][0]

            channel_data = read_modtran_channel(chn_fname, tp6_fname, acq, albedo)

            attrs = base_attrs.copy()
            dataset_name = DatasetName.UPWARD_RADIATION_CHANNEL.value
            attrs['description'] = ('Upward radiation channel output from '
                                    'MODTRAN')
            dset_name = ppjoin(group_path, dataset_name)
            write_dataframe(channel_data[0], dset_name, fid, compression,
                            attrs=attrs, filter_opts=filter_opts)

            # downward radiation
            attrs = base_attrs.copy()
            dataset_name = DatasetName.DOWNWARD_RADIATION_CHANNEL.value
            attrs['description'] = ('Downward radiation channel output from '
                                    'MODTRAN')
            dset_name = ppjoin(group_path, dataset_name)
            write_dataframe(channel_data[1], dset_name, fid, compression,
                            attrs=attrs, filter_opts=filter_opts)
        else:
            acq = [acq for acq in acqs if
                   acq.band_type == BandType.REFLECTIVE][0]

            # Will require updating to handle JSON output from modtran
            channel_data = read_modtran_channel(chn_fname, tp6_fname, acq, albedo)

            attrs = base_attrs.copy()
            dataset_name = DatasetName.CHANNEL.value
            attrs['description'] = 'Channel output from MODTRAN'
            dset_name = ppjoin(group_path, dataset_name)
            write_dataframe(channel_data[0], dset_name, fid, compression,
                            attrs=attrs, filter_opts=filter_opts)

            # solar zenith angle at surface
            attrs = base_attrs.copy()
            dataset_name = DatasetName.SOLAR_ZENITH_CHANNEL.value
            attrs['description'] = 'Solar zenith angle at different atmosphere levels'
            dset_name = ppjoin(group_path, dataset_name)
            write_dataframe(channel_data[1], dset_name, fid, compression,
                            attrs=attrs, filter_opts=filter_opts)

    # metadata for a given point
    alb_vals = [alb.value for alb in workflow.albedos]
    fid[base_path].attrs['lonlat'] = lonlat
    fid[base_path].attrs['datetime'] = acqs[0].acquisition_datetime.isoformat()
    fid[base_path].attrs.create('albedos', data=alb_vals, dtype=VLEN_STRING)

    if out_group is None:
        return fid
Exemplo n.º 23
0
def convert_file(fname, out_fname, compression, filter_opts):
    """
    Convert a PR_WTR NetCDF file into HDF5.

    :param fname:
        A str containing the PR_WTR filename.

    :param out_fname:
        A str containing the output filename for the HDF5 file.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None. Content is written directly to disk.
    """
    with h5py.File(out_fname, 'w') as fid:
        with rasterio.open(fname) as ds:
            name_fmt = 'BAND-{}'

            # global attributes
            # TODO update the history attrs
            # TODO remove the NC_GLOBAL str and just have plain attr names
            g_attrs = ds.tags()

            # get timestamp info
            origin = g_attrs.pop('time#units').replace('hours since ', '')
            hours = json.loads(
                g_attrs.pop('NETCDF_DIM_time_VALUES').replace('{', '[').replace('}', ']')
            )
            df = pandas.DataFrame(
                {
                    'timestamp': pandas.to_datetime(hours, unit='h', origin=origin),
                    'band_name': [name_fmt.format(i+1) for i in range(ds.count)]
                }
            )
            df['dataset_name'] = df.timestamp.dt.strftime('%Y/%B-%d/%H%M')
            df['dataset_name'] = df['dataset_name'].str.upper()

            # create a timestamp and band name index table dataset
            desc = "Timestamp and Band Name index information."
            attrs = {
                'description': desc
            }
            write_dataframe(df, 'INDEX', fid, compression, attrs=attrs)

            attach_attributes(fid, g_attrs)

            # process every band
            for i in range(1, ds.count + 1):
                ds_name = df.iloc[i-1].dataset_name

                # create empty or copy the user supplied filter options
                if not filter_opts:
                    f_opts = dict()
                else:
                    f_opts = filter_opts.copy()


                # band attributes
                # TODO remove NETCDF tags
                # TODO add fillvalue attr
                attrs = ds.tags(i)
                attrs['timestamp'] = df.iloc[i-1]['timestamp']
                attrs['band_name'] = df.iloc[i-1]['band_name']
                attrs['geotransform'] = ds.transform.to_gdal()
                attrs['crs_wkt'] = CRS.ExportToWkt()

                # use ds native chunks if none are provided
                if 'chunks' not in f_opts:
                    try:
                        f_opts['chunks'] = ds.block_shapes[i]
                    except IndexError:
                        print("Chunk error: {}".format(fname))
                        f_opts['chunks'] = (73, 144)

                # write to disk as an IMAGE Class Dataset
                write_h5_image(ds.read(i), ds_name, fid, attrs=attrs,
                               compression=compression, filter_opts=f_opts)
Exemplo n.º 24
0
def table_residual(ref_fid,
                   test_fid,
                   pathname,
                   out_fid,
                   compression=H5CompressionFilter.LZF,
                   save_inputs=False,
                   filter_opts=None):
    """
    Output a residual TABLE of the numerical columns, ignoring
    columns with the dtype `object`.
    An equivalency test using `pandas.DataFrame.equals` is also
    undertaken which if False, requires further investigation to
    determine the column(s) and row(s) that are different.

    :param ref_fid:
        A h5py file object (essentially the root Group), containing
        the reference data.

    :param test_fid:
        A h5py file object (essentially the root Group), containing
        the test data.

    :param pathname:
        A `str` containing the pathname to the TABLE Dataset.

    :param out_fid:
        A h5py file object (essentially the root Group), opened for
        writing the output data.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :param save_inputs:
        A `bool` indicating whether or not to save the input datasets
        used for evaluating the residuals alongside the results.
        Default is False.

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        None; This routine will only return None or a print statement,
        this is essential for the HDF5 visit routine.
    """
    class_name = 'TABLE'
    ref_df = read_h5_table(ref_fid, pathname)
    test_df = read_h5_table(test_fid, pathname)

    # ignore any `object` dtype columns (mostly just strings)
    cols = [
        col for col in ref_df.columns if ref_df[col].dtype.name != 'object'
    ]

    # difference and pandas.DataFrame.equals test
    df = ref_df[cols] - test_df[cols]
    equal = test_df.equals(ref_df)

    # ignored cols
    cols = [
        col for col in ref_df.columns if ref_df[col].dtype.name == 'object'
    ]

    # output
    attrs = {
        'description': 'Residuals of numerical columns only',
        'columns_ignored': numpy.array(cols, VLEN_STRING),
        'equivalent': equal
    }
    base_dname = pbasename(pathname)
    group_name = ref_fid[pathname].parent.name.strip('/')
    dname = ppjoin('RESULTS', class_name, 'RESIDUALS', group_name, base_dname)
    write_dataframe(df,
                    dname,
                    out_fid,
                    compression,
                    attrs=attrs,
                    filter_opts=filter_opts)

    if save_inputs:
        # copy the reference data
        out_grp = out_fid.require_group(ppjoin('REFERENCE-DATA', group_name))
        ref_fid.copy(ref_fid[pathname], out_grp)

        # copy the test data
        out_grp = out_fid.require_group(ppjoin('TEST-DATA', group_name))
        test_fid.copy(test_fid[pathname], out_grp)
Exemplo n.º 25
0
def collate(outdir: Union[str, Path]) -> None:
    """
    Collate the results of the product comparison.
    Firstly the results are merged with the framing geometry, and second
    they're summarised.
    """

    outdir = Path(outdir)
    log_fname = outdir.joinpath(DirectoryNames.LOGS.value,
                                LogNames.COLLATE.value)

    if not log_fname.parent.exists():
        log_fname.parent.mkdir(parents=True)

    with open(log_fname, "w") as fobj:
        structlog.configure(logger_factory=structlog.PrintLoggerFactory(fobj),
                            processors=LOG_PROCESSORS)

        comparison_results_fname = outdir.joinpath(
            DirectoryNames.RESULTS.value, FileNames.RESULTS.value)

        _LOG.info("opening intercomparison results file",
                  fname=str(comparison_results_fname))

        with h5py.File(str(comparison_results_fname), "a") as fid:
            grp = fid[DatasetGroups.INTERCOMPARISON.value]

            for dataset_name in grp:
                _LOG.info("reading dataset", dataset_name=dataset_name)
                dataframe = read_h5_table(grp, dataset_name)

                # some important attributes
                framing = grp[dataset_name].attrs["framing"]
                thematic = grp[dataset_name].attrs["thematic"]
                proc_info = grp[dataset_name].attrs["proc-info"]

                _LOG.info(
                    "merging results with framing",
                    framing=framing,
                    dataset_name=dataset_name,
                )

                geo_dataframe = merge_framing(dataframe, framing)

                out_fname = outdir.joinpath(
                    DirectoryNames.RESULTS.value,
                    FileNames[MergeLookup[DatasetNames(
                        dataset_name).name].value].value,
                )

                _LOG.info("saving as GeoJSON", out_fname=str(out_fname))
                geo_dataframe.to_file(str(out_fname), driver="GeoJSONSeq")

                _LOG.info("summarising")

                summary_dataframe = summarise(geo_dataframe, thematic,
                                              proc_info)

                out_dname = PPath(
                    DatasetGroups.SUMMARY.value,
                    DatasetNames[SummaryLookup[DatasetNames(
                        dataset_name).name].value].value,
                )

                _LOG.info("saving summary table",
                          out_dataset_name=str(out_dname))
                write_dataframe(summary_dataframe, str(out_dname), fid)
Exemplo n.º 26
0
def collect_sbt_ancillary(
    acquisition,
    lonlats,
    ancillary_path,
    invariant_fname=None,
    out_group=None,
    compression=H5CompressionFilter.LZF,
    filter_opts=None,
):
    """
    Collects the ancillary data required for surface brightness
    temperature.

    :param acquisition:
        An instance of an `Acquisition` object.

    :param lonlats:
        A `list` of tuples containing (longitude, latitude) coordinates.

    :param ancillary_path:
        A `str` containing the directory pathname to the ECMWF
        ancillary data.

    :param invariant_fname:
        A `str` containing the file pathname to the invariant geopotential
        data.

    :param out_group:
        If set to None (default) then the results will be returned
        as an in-memory hdf5 file, i.e. the `core` driver. Otherwise,
        a writeable HDF5 `Group` object.

    :param compression:
        The compression filter to use.
        Default is H5CompressionFilter.LZF

    :filter_opts:
        A dict of key value pairs available to the given configuration
        instance of H5CompressionFilter. For example
        H5CompressionFilter.LZF has the keywords *chunks* and *shuffle*
        available.
        Default is None, which will use the default settings for the
        chosen H5CompressionFilter instance.

    :return:
        An opened `h5py.File` object, that is either in-memory using the
        `core` driver, or on disk.
    """
    # Initialise the output files
    if out_group is None:
        fid = h5py.File("sbt-ancillary.h5",
                        "w",
                        driver="core",
                        backing_store=False)
    else:
        fid = out_group

    fid.attrs["sbt-ancillary"] = True

    dt = acquisition.acquisition_datetime

    description = ("Combined Surface and Pressure Layer data retrieved from "
                   "the ECWMF catalogue.")
    attrs = {"description": description, "Date used for querying ECWMF": dt}

    for i, lonlat in enumerate(lonlats):
        pnt = POINT_FMT.format(p=i)
        # get data located at the surface
        dew = ecwmf_dewpoint_temperature(ancillary_path, lonlat, dt)
        t2m = ecwmf_temperature_2metre(ancillary_path, lonlat, dt)
        sfc_prs = ecwmf_surface_pressure(ancillary_path, lonlat, dt)
        sfc_hgt = ecwmf_elevation(invariant_fname, lonlat)
        sfc_rh = relative_humdity(t2m[0], dew[0])

        # output the scalar data along with the attrs
        dname = ppjoin(pnt, DatasetName.DEWPOINT_TEMPERATURE.value)
        write_scalar(dew[0], dname, fid, dew[1])

        dname = ppjoin(pnt, DatasetName.TEMPERATURE_2M.value)
        write_scalar(t2m[0], dname, fid, t2m[1])

        dname = ppjoin(pnt, DatasetName.SURFACE_PRESSURE.value)
        write_scalar(sfc_prs[0], dname, fid, sfc_prs[1])

        dname = ppjoin(pnt, DatasetName.SURFACE_GEOPOTENTIAL.value)
        write_scalar(sfc_hgt[0], dname, fid, sfc_hgt[1])

        dname = ppjoin(pnt, DatasetName.SURFACE_RELATIVE_HUMIDITY.value)
        attrs = {"description": "Relative Humidity calculated at the surface"}
        write_scalar(sfc_rh, dname, fid, attrs)

        # get the data from each of the pressure levels (1 -> 1000 ISBL)
        gph = ecwmf_geo_potential(ancillary_path, lonlat, dt)
        tmp = ecwmf_temperature(ancillary_path, lonlat, dt)
        rh = ecwmf_relative_humidity(ancillary_path, lonlat, dt)

        dname = ppjoin(pnt, DatasetName.GEOPOTENTIAL.value)
        write_dataframe(gph[0],
                        dname,
                        fid,
                        compression,
                        attrs=gph[1],
                        filter_opts=filter_opts)

        dname = ppjoin(pnt, DatasetName.TEMPERATURE.value)
        write_dataframe(tmp[0],
                        dname,
                        fid,
                        compression,
                        attrs=tmp[1],
                        filter_opts=filter_opts)

        dname = ppjoin(pnt, DatasetName.RELATIVE_HUMIDITY.value)
        write_dataframe(rh[0],
                        dname,
                        fid,
                        compression,
                        attrs=rh[1],
                        filter_opts=filter_opts)

        # combine the surface and higher pressure layers into a single array
        cols = [
            "GeoPotential_Height", "Pressure", "Temperature",
            "Relative_Humidity"
        ]
        layers = pandas.DataFrame(columns=cols,
                                  index=range(rh[0].shape[0]),
                                  dtype="float64")

        layers["GeoPotential_Height"] = gph[0]["GeoPotential_Height"].values
        layers["Pressure"] = ECWMF_LEVELS[::-1]
        layers["Temperature"] = tmp[0]["Temperature"].values
        layers["Relative_Humidity"] = rh[0]["Relative_Humidity"].values

        # define the surface level
        df = pandas.DataFrame(
            {
                "GeoPotential_Height": sfc_hgt[0],
                "Pressure": sfc_prs[0],
                "Temperature": kelvin_2_celcius(t2m[0]),
                "Relative_Humidity": sfc_rh,
            },
            index=[0],
        )

        # MODTRAN requires the height to be ascending
        # and the pressure to be descending
        wh = (layers["GeoPotential_Height"] >
              sfc_hgt[0]) & (layers["Pressure"] < sfc_prs[0].round())
        df = df.append(layers[wh])
        df.reset_index(drop=True, inplace=True)

        dname = ppjoin(pnt, DatasetName.ATMOSPHERIC_PROFILE.value)
        write_dataframe(df,
                        dname,
                        fid,
                        compression,
                        attrs=attrs,
                        filter_opts=filter_opts)

        fid[pnt].attrs["lonlat"] = lonlat

    if out_group is None:
        return fid
Exemplo n.º 27
0
def run_modtran(acquisitions,
                atmospherics_group,
                workflow,
                npoints,
                point,
                albedos,
                modtran_exe,
                basedir,
                out_group,
                compression=H5CompressionFilter.LZF,
                filter_opts=None):
    """
    Run MODTRAN and return the flux and channel results.
    """
    lonlat = atmospherics_group[POINT_FMT.format(p=point)].attrs['lonlat']

    # determine the output group/file
    if out_group is None:
        fid = h5py.File('atmospheric-results.h5',
                        driver='core',
                        backing_store=False)
    else:
        fid = out_group

    # initial attributes
    base_attrs = {
        'Point': point,
        'lonlat': lonlat,
        'datetime': acquisitions[0].acquisition_datetime
    }

    base_path = ppjoin(GroupName.ATMOSPHERIC_RESULTS_GRP.value,
                       POINT_FMT.format(p=point))

    # what atmospheric calculations have been run and how many points
    group_name = GroupName.ATMOSPHERIC_RESULTS_GRP.value
    if group_name not in fid:
        fid.create_group(group_name)

    fid[group_name].attrs['npoints'] = npoints
    applied = workflow == Workflow.STANDARD or workflow == Workflow.NBAR
    fid[group_name].attrs['nbar_atmospherics'] = applied
    applied = workflow == Workflow.STANDARD or workflow == Workflow.SBT
    fid[group_name].attrs['sbt_atmospherics'] = applied

    acqs = acquisitions
    for albedo in albedos:
        base_attrs['Albedo'] = albedo.value
        workpath = pjoin(basedir, POINT_FMT.format(p=point),
                         ALBEDO_FMT.format(a=albedo.value))
        group_path = ppjoin(base_path, ALBEDO_FMT.format(a=albedo.value))

        subprocess.check_call([modtran_exe], cwd=workpath)
        chn_fname = glob.glob(pjoin(workpath, '*.chn'))[0]

        if albedo == Albedos.ALBEDO_TH:
            acq = [acq for acq in acqs if acq.band_type == BandType.THERMAL][0]
            channel_data = read_modtran_channel(chn_fname, acq, albedo)

            # upward radiation
            attrs = base_attrs.copy()
            dataset_name = DatasetName.UPWARD_RADIATION_CHANNEL.value
            attrs['description'] = ('Upward radiation channel output from '
                                    'MODTRAN')
            dset_name = ppjoin(group_path, dataset_name)
            write_dataframe(channel_data[0],
                            dset_name,
                            fid,
                            compression,
                            attrs=attrs,
                            filter_opts=filter_opts)

            # downward radiation
            attrs = base_attrs.copy()
            dataset_name = DatasetName.DOWNWARD_RADIATION_CHANNEL.value
            attrs['description'] = ('Downward radiation channel output from '
                                    'MODTRAN')
            dset_name = ppjoin(group_path, dataset_name)
            write_dataframe(channel_data[1],
                            dset_name,
                            fid,
                            compression,
                            attrs=attrs,
                            filter_opts=filter_opts)
        else:
            acq = [
                acq for acq in acqs if acq.band_type == BandType.REFLECTIVE
            ][0]
            flux_fname = glob.glob(pjoin(workpath, '*_b.flx'))[0]
            flux_data, altitudes = read_modtran_flux(flux_fname)
            channel_data = read_modtran_channel(chn_fname, acq, albedo)

            # ouput the flux data
            attrs = base_attrs.copy()
            dset_name = ppjoin(group_path, DatasetName.FLUX.value)
            attrs['description'] = 'Flux output from MODTRAN'
            write_dataframe(flux_data,
                            dset_name,
                            fid,
                            compression,
                            attrs=attrs,
                            filter_opts=filter_opts)

            # output the altitude data
            attrs = base_attrs.copy()
            attrs['description'] = 'Altitudes output from MODTRAN'
            attrs['altitude_levels'] = altitudes.shape[0]
            attrs['units'] = 'km'
            dset_name = ppjoin(group_path, DatasetName.ALTITUDES.value)
            write_dataframe(altitudes,
                            dset_name,
                            fid,
                            compression,
                            attrs=attrs,
                            filter_opts=filter_opts)

            # accumulate the solar irradiance
            transmittance = True if albedo == Albedos.ALBEDO_T else False
            response = acq.spectral_response()
            accumulated = calculate_solar_radiation(flux_data, response,
                                                    altitudes.shape[0],
                                                    transmittance)

            attrs = base_attrs.copy()
            dset_name = ppjoin(group_path, DatasetName.SOLAR_IRRADIANCE.value)
            description = ("Accumulated solar irradiation for point {} "
                           "and albedo {}.")
            attrs['description'] = description.format(point, albedo.value)
            write_dataframe(accumulated,
                            dset_name,
                            fid,
                            compression,
                            attrs=attrs,
                            filter_opts=filter_opts)

            attrs = base_attrs.copy()
            dataset_name = DatasetName.CHANNEL.value
            attrs['description'] = 'Channel output from MODTRAN'
            dset_name = ppjoin(group_path, dataset_name)
            write_dataframe(channel_data,
                            dset_name,
                            fid,
                            compression,
                            attrs=attrs,
                            filter_opts=filter_opts)

    # metadata for a given point
    alb_vals = [alb.value for alb in workflow.albedos]
    fid[base_path].attrs['lonlat'] = lonlat
    fid[base_path].attrs['datetime'] = acqs[0].acquisition_datetime.isoformat()
    fid[base_path].attrs.create('albedos', data=alb_vals, dtype=VLEN_STRING)

    if out_group is None:
        return fid