示例#1
0
文件: test.py 项目: korenmiklos/oak
class TestNodesForTemplate(ut.TestCase):
    def setUp(self):
        os.makedirs('testdata/folder1')
        os.makedirs('testdata/folder2')
        doc0 = open('testdata/document.yaml', 'w')
        doc1 = open('testdata/folder1/document.yaml', 'w')
        doc2 = open('testdata/folder2/document.yaml', 'w')
        data = dict(title='Test document', content='Test data')
        
        for stream in [doc0, doc1, doc2]:
            yaml.dump(data, stream)
            stream.close()

        self.node = DataTree('testdata')

    def tearDown(self):
        rmtree('testdata')

    def test_no_leading_slash(self):
        self.assertEqual(module.get_nodes_for_template(self.node, 'folder1/index.html').values()[0], self.node.root.folder1)

    def test_index_html(self):
        self.assertEqual(module.get_nodes_for_template(self.node, '/folder1/index.html').values()[0], self.node.root.folder1)

    def test_two_deep(self):
        self.assertEqual(module.get_nodes_for_template(self.node, '/folder1/document/index.html').values()[0], self.node.root.folder1.document)

    def test_reverse_url(self):
        self.assertEqual(module.get_nodes_for_template(self.node, '/folder1/document/index.html').values()[0].get_absolute_url(), '/folder1/document')

    def test_paper1_html(self):
        self.assertEqual(module.get_nodes_for_template(self.node, '/folder1.html').values()[0], self.node.root.folder1)

    def test_unknown_html(self):
        self.assertDictEqual(module.get_nodes_for_template(self.node, '/paper3.html'), {'/paper3.html': self.node.root})

    def test_children(self):
        nodes = {'/folder2/index.html': self.node.get_by_url('/folder2'),
            '/folder1/index.html': self.node.get_by_url('/folder1'),
            '/document/index.html': self.node.get_by_url('/document'),
            }
        self.assertDictEqual(module.get_nodes_for_template(self.node, '/_children/index.html'), nodes)

    def test_grand_children(self):
        nodes = {'/folder2/document.html': self.node.get_by_url('/folder2/document'),
            '/folder1/document.html': self.node.get_by_url('/folder1/document'),
            '/document/title.html': self.node.get_by_url('/document/title'),
            '/document/content.html': self.node.get_by_url('/document/content'),
            }
        self.assertDictEqual(module.get_nodes_for_template(self.node, '/_children/_children.html'), nodes)

    def test_children_html(self):
        nodes = {'/folder1/document.html': self.node.get_by_url('/folder1/document')}
        self.assertDictEqual(module.get_nodes_for_template(self.node, '/folder1/_children.html'), nodes)
示例#2
0
    def test_get_dataset(self, converted_zarr):
        ed = EchoData.from_file(converted_raw_path=converted_zarr)
        node = DataTree()
        result = ed._EchoData__get_dataset(node)

        ed_node = ed._tree['Sonar']
        ed_result = ed._EchoData__get_dataset(ed_node)

        assert result is None
        assert isinstance(ed_result, xr.Dataset)
示例#3
0
文件: test.py 项目: korenmiklos/oak
    def setUp(self):
        os.makedirs('testdata/folder1')
        os.makedirs('testdata/folder2')
        doc0 = open('testdata/document.yaml', 'w')
        doc1 = open('testdata/folder1/document.yaml', 'w')
        doc2 = open('testdata/folder2/document.yaml', 'w')
        data = dict(title='Test document', content='Test data')
        
        for stream in [doc0, doc1, doc2]:
            yaml.dump(data, stream)
            stream.close()

        self.node = DataTree('testdata')
示例#4
0
def open_raw(
    raw_file: Optional["PathHint"] = None,
    sonar_model: Optional["SonarModelsHint"] = None,
    xml_path: Optional["PathHint"] = None,
    convert_params: Optional[Dict[str, str]] = None,
    storage_options: Optional[Dict[str, str]] = None,
) -> Optional[EchoData]:
    """Create an EchoData object containing parsed data from a single raw data file.

    The EchoData object can be used for adding metadata and ancillary data
    as well as to serialize the parsed data to zarr or netcdf.

    Parameters
    ----------
    raw_file : str
        path to raw data file
    sonar_model : str
        model of the sonar instrument

        - ``EK60``: Kongsberg Simrad EK60 echosounder
        - ``ES70``: Kongsberg Simrad ES70 echosounder
        - ``EK80``: Kongsberg Simrad EK80 echosounder
        - ``EA640``: Kongsberg EA640 echosounder
        - ``AZFP``: ASL Environmental Sciences AZFP echosounder
        - ``AD2CP``: Nortek Signature series ADCP
          (tested with Signature 500 and Signature 1000)

    xml_path : str
        path to XML config file used by AZFP
    convert_params : dict
        parameters (metadata) that may not exist in the raw file
        and need to be added to the converted file
    storage_options : dict
        options for cloud storage

    Returns
    -------
    EchoData object
    """
    if (sonar_model is None) and (raw_file is None):
        print("Please specify the path to the raw data file and the sonar model.")
        return

    # Check inputs
    if convert_params is None:
        convert_params = {}
    storage_options = storage_options if storage_options is not None else {}

    if sonar_model is None:
        print("Please specify the sonar model.")

        if xml_path is None:
            sonar_model = "EK60"
            warnings.warn(
                "Current behavior is to default sonar_model='EK60' when no XML file is passed in as argument. "  # noqa
                "Specifying sonar_model='EK60' will be required in the future, "
                "since .raw extension is used for many Kongsberg/Simrad sonar systems.",
                DeprecationWarning,
                2,
            )
        else:
            sonar_model = "AZFP"
            warnings.warn(
                "Current behavior is to set sonar_model='AZFP' when an XML file is passed in as argument. "  # noqa
                "Specifying sonar_model='AZFP' will be required in the future.",
                DeprecationWarning,
                2,
            )
    else:
        # Uppercased model in case people use lowercase
        sonar_model = sonar_model.upper()  # type: ignore

        # Check models
        if sonar_model not in SONAR_MODELS:
            raise ValueError(
                f"Unsupported echosounder model: {sonar_model}\nMust be one of: {list(SONAR_MODELS)}"  # noqa
            )

    # Check paths and file types
    if raw_file is None:
        raise FileNotFoundError("Please specify the path to the raw data file.")

    # Check for path type
    if isinstance(raw_file, Path):
        raw_file = str(raw_file)
    if not isinstance(raw_file, str):
        raise TypeError("file must be a string or Path")

    assert sonar_model is not None

    # Check file extension and existence
    file_chk, xml_chk = _check_file(raw_file, sonar_model, xml_path, storage_options)

    # TODO: the if-else below only works for the AZFP vs EK contrast,
    #  but is brittle since it is abusing params by using it implicitly
    if SONAR_MODELS[sonar_model]["xml"]:
        params = xml_chk
    else:
        params = "ALL"  # reserved to control if only wants to parse a certain type of datagram

    # Parse raw file and organize data into groups
    parser = SONAR_MODELS[sonar_model]["parser"](
        file_chk, params=params, storage_options=storage_options
    )
    parser.parse_raw()
    setgrouper = SONAR_MODELS[sonar_model]["set_groups"](
        parser,
        input_file=file_chk,
        output_path=None,
        sonar_model=sonar_model,
        params=_set_convert_params(convert_params),
    )

    # Setup tree dictionary
    tree_dict = {}

    # Top-level date_created varies depending on sonar model
    # Top-level is called "root" within tree
    if sonar_model in ["EK60", "ES70", "EK80", "ES80", "EA640"]:
        tree_dict["root"] = setgrouper.set_toplevel(
            sonar_model=sonar_model,
            date_created=parser.config_datagram["timestamp"],
        )
    else:
        tree_dict["root"] = setgrouper.set_toplevel(
            sonar_model=sonar_model, date_created=parser.ping_time[0]
        )
    tree_dict["Environment"] = setgrouper.set_env()
    tree_dict["Platform"] = setgrouper.set_platform()
    if sonar_model in ["EK60", "ES70", "EK80", "ES80", "EA640"]:
        tree_dict["Platform/NMEA"] = setgrouper.set_nmea()
    tree_dict["Provenance"] = setgrouper.set_provenance()
    # Allocate a tree_dict entry for Sonar? Otherwise, a DataTree error occurs
    tree_dict["Sonar"] = None

    # Set multi beam groups
    beam_groups = setgrouper.set_beam()
    if isinstance(beam_groups, xr.Dataset):
        # if it's a single dataset like the ek60, make into list
        beam_groups = [beam_groups]

    valid_beam_groups_count = 0
    for idx, beam_group in enumerate(beam_groups, start=1):
        if beam_group is not None:
            valid_beam_groups_count += 1
            tree_dict[f"Sonar/Beam_group{idx}"] = beam_group

    if sonar_model in ["EK80", "ES80", "EA640"]:
        tree_dict["Sonar"] = setgrouper.set_sonar(beam_group_count=valid_beam_groups_count)
    else:
        tree_dict["Sonar"] = setgrouper.set_sonar()

    tree_dict["Vendor_specific"] = setgrouper.set_vendor()

    # Create tree and echodata
    # TODO: make the creation of tree dynamically generated from yaml
    tree = DataTree.from_dict(tree_dict)
    echodata = EchoData(source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model)
    echodata._set_tree(tree)
    echodata._load_tree()

    return echodata
示例#5
0
def combine_echodata(echodatas: List[EchoData],
                     combine_attrs="override") -> EchoData:
    """
    Combines multiple `EchoData` objects into a single `EchoData` object.

    Parameters
    ----------
    echodatas: List[EchoData]
        The list of `EchoData` objects to be combined.
    combine_attrs: { "override", "drop", "identical", "no_conflicts", "overwrite_conflicts" }
        String indicating how to combine attrs of the `EchoData` objects being merged.
        This parameter matches the identically named xarray parameter
        (see https://xarray.pydata.org/en/latest/generated/xarray.combine_nested.html)
        with the exception of the "overwrite_conflicts" value.

        * "override": Default. skip comparing and copy attrs from the first `EchoData`
          object to the result.
        * "drop": empty attrs on returned `EchoData` object.
        * "identical": all attrs must be the same on every object.
        * "no_conflicts": attrs from all objects are combined,
          any that have the same name must also have the same value.
        * "overwrite_conflicts": attrs from all `EchoData` objects are combined,
          attrs with conflicting keys will be overwritten by later `EchoData` objects.

    Returns
    -------
    EchoData
        An `EchoData` object with all of the data from the input `EchoData` objects combined.

    Raises
    ------
    ValueError
        If `echodatas` contains `EchoData` objects with different or `None` `sonar_model` values
        (i.e., all `EchoData` objects must have the same non-None `sonar_model` value).
    ValueError
        If EchoData objects have conflicting source file names.

    Warns
    -----
    UserWarning
        If the `sonar_model` of the input `EchoData` objects is `"EK60"` and any `EchoData` objects
        have non-monotonically increasing `ping_time`, `time1` or `time2` values,
        the corresponding values in the output `EchoData` object will be increased starting at the
        timestamp where the reversal occurs such that all values in the output are monotonically
        increasing. Additionally, the original `ping_time`, `time1` or `time2` values
        will be stored in the `Provenance` group, although this behavior may change in future
        versions.

    Warnings
    --------
    Changes in parameters between `EchoData` objects are not currently checked;
    however, they may raise an error in future versions.

    Notes
    -----
    * `EchoData` objects are combined by combining their groups individually.
    * Attributes from all groups before the combination will be stored in the provenance group,
      although this behavior may change in future versions.
    * The `source_file` and `converted_raw_path` attributes will be copied from the first
      `EchoData` object in the given list, but this may change in future versions.

    Examples
    --------
    >>> ed1 = echopype.open_converted("file1.nc")
    >>> ed2 = echopype.open_converted("file2.zarr")
    >>> combined = echopype.combine_echodata([ed1, ed2])
    """

    tree_dict = {}
    result = EchoData()
    if len(echodatas) == 0:
        return result
    result.source_file = echodatas[0].source_file
    result.converted_raw_path = echodatas[0].converted_raw_path

    sonar_model = None
    for echodata in echodatas:
        if echodata.sonar_model is None:
            raise ValueError(
                "all EchoData objects must have non-None sonar_model values")
        elif sonar_model is None:
            sonar_model = echodata.sonar_model
        elif echodata.sonar_model != sonar_model:
            raise ValueError(
                "all EchoData objects must have the same sonar_model value")

    # ping time before reversal correction
    old_ping_time = None
    # ping time after reversal correction
    new_ping_time = None
    # location time before reversal correction
    old_time1 = None
    # location time after reversal correction
    new_time1 = None
    # mru time before reversal correction
    old_time2 = None
    # mru time after reversal correction
    new_time2 = None
    # time3 before reversal correction
    old_time3 = None
    # time3 after reversal correction
    new_time3 = None

    # all attributes before combination
    # { group1: [echodata1 attrs, echodata2 attrs, ...], ... }
    old_attrs: Dict[str, List[Dict[str, Any]]] = dict()

    for group, value in EchoData.group_map.items():
        group_datasets = [
            getattr(echodata, group) for echodata in echodatas
            if getattr(echodata, group) is not None
        ]
        if group in ("top", "sonar"):
            combined_group = getattr(echodatas[0], group)
        elif group == "provenance":
            combined_group = assemble_combined_provenance([
                echodata.source_file if echodata.source_file is not None else
                echodata.converted_raw_path for echodata in echodatas
            ])
        else:
            if len(group_datasets) == 0:
                setattr(result, group, None)
                continue

            concat_dim = SONAR_MODELS[sonar_model]["concat_dims"].get(
                group, SONAR_MODELS[sonar_model]["concat_dims"]["default"])
            concat_data_vars = SONAR_MODELS[sonar_model][
                "concat_data_vars"].get(
                    group,
                    SONAR_MODELS[sonar_model]["concat_data_vars"]["default"])
            combined_group = xr.combine_nested(
                group_datasets,
                [concat_dim],
                data_vars=concat_data_vars,
                coords="minimal",
                combine_attrs="drop"
                if combine_attrs == "overwrite_conflicts" else combine_attrs,
            )
            if combine_attrs == "overwrite_conflicts":
                combined_group.attrs.update(union_attrs(group_datasets))

            if group == "beam":
                if sonar_model == "EK80":
                    combined_group[
                        "transceiver_software_version"] = combined_group[
                            "transceiver_software_version"].astype("<U10")
                    combined_group["channel"] = combined_group[
                        "channel"].astype("<U50")
                elif sonar_model == "EK60":
                    combined_group["gpt_software_version"] = combined_group[
                        "gpt_software_version"].astype("<U10")

                    # TODO: investigate further why we need to do .astype("<U50")
                    combined_group["channel"] = combined_group[
                        "channel"].astype("<U50")

            if sonar_model != "AD2CP":

                combined_group, old_ping_time, new_ping_time = check_and_correct_reversed_time(
                    combined_group, old_ping_time, new_ping_time, "ping_time",
                    sonar_model)

                if group != "nmea":
                    combined_group, old_time1, new_time1 = check_and_correct_reversed_time(
                        combined_group, old_time1, new_time1, "time1",
                        sonar_model)

                combined_group, old_time2, new_time2 = check_and_correct_reversed_time(
                    combined_group, old_time2, new_time2, "time2", sonar_model)

                combined_group, old_time3, new_time3 = check_and_correct_reversed_time(
                    combined_group, old_time3, new_time3, "time3", sonar_model)

        if len(group_datasets) > 1:
            old_attrs[group] = [
                group_dataset.attrs for group_dataset in group_datasets
            ]
        if combined_group is not None:
            # xarray inserts this dimension when concatenating along multiple dimensions
            combined_group = combined_group.drop_dims("concat_dim",
                                                      errors="ignore")

        combined_group = set_encodings(combined_group)
        if value["ep_group"] is None:
            tree_dict["root"] = combined_group
        else:
            tree_dict[value["ep_group"]] = combined_group

    # Set tree into echodata object
    result._set_tree(tree=DataTree.from_dict(tree_dict))
    result._load_tree()

    # save ping time before reversal correction
    if old_ping_time is not None:
        result.provenance["old_ping_time"] = old_ping_time
        result.provenance.attrs["reversed_ping_times"] = 1
    # save location time before reversal correction
    if old_time1 is not None:
        result.provenance["old_time1"] = old_time1
        result.provenance.attrs["reversed_ping_times"] = 1
    # save mru time before reversal correction
    if old_time2 is not None:
        result.provenance["old_time2"] = old_time2
        result.provenance.attrs["reversed_ping_times"] = 1
    # save time3 before reversal correction
    if old_time3 is not None:
        result.provenance["old_time3"] = old_time3
        result.provenance.attrs["reversed_ping_times"] = 1
    # TODO: possible parameter to disable original attributes and original ping_time storage
    # in provenance group?
    # save attrs from before combination
    for group in old_attrs:
        all_group_attrs = set()
        for group_attrs in old_attrs[group]:
            for attr in group_attrs:
                all_group_attrs.add(attr)
        echodata_filenames = []
        for ed in echodatas:
            if ed.source_file is not None:
                filepath = ed.source_file
            elif ed.converted_raw_path is not None:
                filepath = ed.converted_raw_path
            else:
                # unreachable
                raise ValueError("EchoData object does not have a file path")
            filename = Path(filepath).name
            if filename in echodata_filenames:
                raise ValueError("EchoData objects have conflicting filenames")
            echodata_filenames.append(filename)
        attrs = xr.DataArray(
            [[group_attrs.get(attr) for attr in all_group_attrs]
             for group_attrs in old_attrs[group]],
            coords={
                "echodata_filename": echodata_filenames,
                f"{group}_attr_key": list(all_group_attrs),
            },
            dims=["echodata_filename", f"{group}_attr_key"],
        )
        result.provenance = result.provenance.assign({f"{group}_attrs": attrs})

    # Add back sonar model
    result.sonar_model = sonar_model

    return result