class TestNodesForTemplate(ut.TestCase): def setUp(self): os.makedirs('testdata/folder1') os.makedirs('testdata/folder2') doc0 = open('testdata/document.yaml', 'w') doc1 = open('testdata/folder1/document.yaml', 'w') doc2 = open('testdata/folder2/document.yaml', 'w') data = dict(title='Test document', content='Test data') for stream in [doc0, doc1, doc2]: yaml.dump(data, stream) stream.close() self.node = DataTree('testdata') def tearDown(self): rmtree('testdata') def test_no_leading_slash(self): self.assertEqual(module.get_nodes_for_template(self.node, 'folder1/index.html').values()[0], self.node.root.folder1) def test_index_html(self): self.assertEqual(module.get_nodes_for_template(self.node, '/folder1/index.html').values()[0], self.node.root.folder1) def test_two_deep(self): self.assertEqual(module.get_nodes_for_template(self.node, '/folder1/document/index.html').values()[0], self.node.root.folder1.document) def test_reverse_url(self): self.assertEqual(module.get_nodes_for_template(self.node, '/folder1/document/index.html').values()[0].get_absolute_url(), '/folder1/document') def test_paper1_html(self): self.assertEqual(module.get_nodes_for_template(self.node, '/folder1.html').values()[0], self.node.root.folder1) def test_unknown_html(self): self.assertDictEqual(module.get_nodes_for_template(self.node, '/paper3.html'), {'/paper3.html': self.node.root}) def test_children(self): nodes = {'/folder2/index.html': self.node.get_by_url('/folder2'), '/folder1/index.html': self.node.get_by_url('/folder1'), '/document/index.html': self.node.get_by_url('/document'), } self.assertDictEqual(module.get_nodes_for_template(self.node, '/_children/index.html'), nodes) def test_grand_children(self): nodes = {'/folder2/document.html': self.node.get_by_url('/folder2/document'), '/folder1/document.html': self.node.get_by_url('/folder1/document'), '/document/title.html': self.node.get_by_url('/document/title'), '/document/content.html': self.node.get_by_url('/document/content'), } self.assertDictEqual(module.get_nodes_for_template(self.node, '/_children/_children.html'), nodes) def test_children_html(self): nodes = {'/folder1/document.html': self.node.get_by_url('/folder1/document')} self.assertDictEqual(module.get_nodes_for_template(self.node, '/folder1/_children.html'), nodes)
def test_get_dataset(self, converted_zarr): ed = EchoData.from_file(converted_raw_path=converted_zarr) node = DataTree() result = ed._EchoData__get_dataset(node) ed_node = ed._tree['Sonar'] ed_result = ed._EchoData__get_dataset(ed_node) assert result is None assert isinstance(ed_result, xr.Dataset)
def setUp(self): os.makedirs('testdata/folder1') os.makedirs('testdata/folder2') doc0 = open('testdata/document.yaml', 'w') doc1 = open('testdata/folder1/document.yaml', 'w') doc2 = open('testdata/folder2/document.yaml', 'w') data = dict(title='Test document', content='Test data') for stream in [doc0, doc1, doc2]: yaml.dump(data, stream) stream.close() self.node = DataTree('testdata')
def open_raw( raw_file: Optional["PathHint"] = None, sonar_model: Optional["SonarModelsHint"] = None, xml_path: Optional["PathHint"] = None, convert_params: Optional[Dict[str, str]] = None, storage_options: Optional[Dict[str, str]] = None, ) -> Optional[EchoData]: """Create an EchoData object containing parsed data from a single raw data file. The EchoData object can be used for adding metadata and ancillary data as well as to serialize the parsed data to zarr or netcdf. Parameters ---------- raw_file : str path to raw data file sonar_model : str model of the sonar instrument - ``EK60``: Kongsberg Simrad EK60 echosounder - ``ES70``: Kongsberg Simrad ES70 echosounder - ``EK80``: Kongsberg Simrad EK80 echosounder - ``EA640``: Kongsberg EA640 echosounder - ``AZFP``: ASL Environmental Sciences AZFP echosounder - ``AD2CP``: Nortek Signature series ADCP (tested with Signature 500 and Signature 1000) xml_path : str path to XML config file used by AZFP convert_params : dict parameters (metadata) that may not exist in the raw file and need to be added to the converted file storage_options : dict options for cloud storage Returns ------- EchoData object """ if (sonar_model is None) and (raw_file is None): print("Please specify the path to the raw data file and the sonar model.") return # Check inputs if convert_params is None: convert_params = {} storage_options = storage_options if storage_options is not None else {} if sonar_model is None: print("Please specify the sonar model.") if xml_path is None: sonar_model = "EK60" warnings.warn( "Current behavior is to default sonar_model='EK60' when no XML file is passed in as argument. " # noqa "Specifying sonar_model='EK60' will be required in the future, " "since .raw extension is used for many Kongsberg/Simrad sonar systems.", DeprecationWarning, 2, ) else: sonar_model = "AZFP" warnings.warn( "Current behavior is to set sonar_model='AZFP' when an XML file is passed in as argument. " # noqa "Specifying sonar_model='AZFP' will be required in the future.", DeprecationWarning, 2, ) else: # Uppercased model in case people use lowercase sonar_model = sonar_model.upper() # type: ignore # Check models if sonar_model not in SONAR_MODELS: raise ValueError( f"Unsupported echosounder model: {sonar_model}\nMust be one of: {list(SONAR_MODELS)}" # noqa ) # Check paths and file types if raw_file is None: raise FileNotFoundError("Please specify the path to the raw data file.") # Check for path type if isinstance(raw_file, Path): raw_file = str(raw_file) if not isinstance(raw_file, str): raise TypeError("file must be a string or Path") assert sonar_model is not None # Check file extension and existence file_chk, xml_chk = _check_file(raw_file, sonar_model, xml_path, storage_options) # TODO: the if-else below only works for the AZFP vs EK contrast, # but is brittle since it is abusing params by using it implicitly if SONAR_MODELS[sonar_model]["xml"]: params = xml_chk else: params = "ALL" # reserved to control if only wants to parse a certain type of datagram # Parse raw file and organize data into groups parser = SONAR_MODELS[sonar_model]["parser"]( file_chk, params=params, storage_options=storage_options ) parser.parse_raw() setgrouper = SONAR_MODELS[sonar_model]["set_groups"]( parser, input_file=file_chk, output_path=None, sonar_model=sonar_model, params=_set_convert_params(convert_params), ) # Setup tree dictionary tree_dict = {} # Top-level date_created varies depending on sonar model # Top-level is called "root" within tree if sonar_model in ["EK60", "ES70", "EK80", "ES80", "EA640"]: tree_dict["root"] = setgrouper.set_toplevel( sonar_model=sonar_model, date_created=parser.config_datagram["timestamp"], ) else: tree_dict["root"] = setgrouper.set_toplevel( sonar_model=sonar_model, date_created=parser.ping_time[0] ) tree_dict["Environment"] = setgrouper.set_env() tree_dict["Platform"] = setgrouper.set_platform() if sonar_model in ["EK60", "ES70", "EK80", "ES80", "EA640"]: tree_dict["Platform/NMEA"] = setgrouper.set_nmea() tree_dict["Provenance"] = setgrouper.set_provenance() # Allocate a tree_dict entry for Sonar? Otherwise, a DataTree error occurs tree_dict["Sonar"] = None # Set multi beam groups beam_groups = setgrouper.set_beam() if isinstance(beam_groups, xr.Dataset): # if it's a single dataset like the ek60, make into list beam_groups = [beam_groups] valid_beam_groups_count = 0 for idx, beam_group in enumerate(beam_groups, start=1): if beam_group is not None: valid_beam_groups_count += 1 tree_dict[f"Sonar/Beam_group{idx}"] = beam_group if sonar_model in ["EK80", "ES80", "EA640"]: tree_dict["Sonar"] = setgrouper.set_sonar(beam_group_count=valid_beam_groups_count) else: tree_dict["Sonar"] = setgrouper.set_sonar() tree_dict["Vendor_specific"] = setgrouper.set_vendor() # Create tree and echodata # TODO: make the creation of tree dynamically generated from yaml tree = DataTree.from_dict(tree_dict) echodata = EchoData(source_file=file_chk, xml_path=xml_chk, sonar_model=sonar_model) echodata._set_tree(tree) echodata._load_tree() return echodata
def combine_echodata(echodatas: List[EchoData], combine_attrs="override") -> EchoData: """ Combines multiple `EchoData` objects into a single `EchoData` object. Parameters ---------- echodatas: List[EchoData] The list of `EchoData` objects to be combined. combine_attrs: { "override", "drop", "identical", "no_conflicts", "overwrite_conflicts" } String indicating how to combine attrs of the `EchoData` objects being merged. This parameter matches the identically named xarray parameter (see https://xarray.pydata.org/en/latest/generated/xarray.combine_nested.html) with the exception of the "overwrite_conflicts" value. * "override": Default. skip comparing and copy attrs from the first `EchoData` object to the result. * "drop": empty attrs on returned `EchoData` object. * "identical": all attrs must be the same on every object. * "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. * "overwrite_conflicts": attrs from all `EchoData` objects are combined, attrs with conflicting keys will be overwritten by later `EchoData` objects. Returns ------- EchoData An `EchoData` object with all of the data from the input `EchoData` objects combined. Raises ------ ValueError If `echodatas` contains `EchoData` objects with different or `None` `sonar_model` values (i.e., all `EchoData` objects must have the same non-None `sonar_model` value). ValueError If EchoData objects have conflicting source file names. Warns ----- UserWarning If the `sonar_model` of the input `EchoData` objects is `"EK60"` and any `EchoData` objects have non-monotonically increasing `ping_time`, `time1` or `time2` values, the corresponding values in the output `EchoData` object will be increased starting at the timestamp where the reversal occurs such that all values in the output are monotonically increasing. Additionally, the original `ping_time`, `time1` or `time2` values will be stored in the `Provenance` group, although this behavior may change in future versions. Warnings -------- Changes in parameters between `EchoData` objects are not currently checked; however, they may raise an error in future versions. Notes ----- * `EchoData` objects are combined by combining their groups individually. * Attributes from all groups before the combination will be stored in the provenance group, although this behavior may change in future versions. * The `source_file` and `converted_raw_path` attributes will be copied from the first `EchoData` object in the given list, but this may change in future versions. Examples -------- >>> ed1 = echopype.open_converted("file1.nc") >>> ed2 = echopype.open_converted("file2.zarr") >>> combined = echopype.combine_echodata([ed1, ed2]) """ tree_dict = {} result = EchoData() if len(echodatas) == 0: return result result.source_file = echodatas[0].source_file result.converted_raw_path = echodatas[0].converted_raw_path sonar_model = None for echodata in echodatas: if echodata.sonar_model is None: raise ValueError( "all EchoData objects must have non-None sonar_model values") elif sonar_model is None: sonar_model = echodata.sonar_model elif echodata.sonar_model != sonar_model: raise ValueError( "all EchoData objects must have the same sonar_model value") # ping time before reversal correction old_ping_time = None # ping time after reversal correction new_ping_time = None # location time before reversal correction old_time1 = None # location time after reversal correction new_time1 = None # mru time before reversal correction old_time2 = None # mru time after reversal correction new_time2 = None # time3 before reversal correction old_time3 = None # time3 after reversal correction new_time3 = None # all attributes before combination # { group1: [echodata1 attrs, echodata2 attrs, ...], ... } old_attrs: Dict[str, List[Dict[str, Any]]] = dict() for group, value in EchoData.group_map.items(): group_datasets = [ getattr(echodata, group) for echodata in echodatas if getattr(echodata, group) is not None ] if group in ("top", "sonar"): combined_group = getattr(echodatas[0], group) elif group == "provenance": combined_group = assemble_combined_provenance([ echodata.source_file if echodata.source_file is not None else echodata.converted_raw_path for echodata in echodatas ]) else: if len(group_datasets) == 0: setattr(result, group, None) continue concat_dim = SONAR_MODELS[sonar_model]["concat_dims"].get( group, SONAR_MODELS[sonar_model]["concat_dims"]["default"]) concat_data_vars = SONAR_MODELS[sonar_model][ "concat_data_vars"].get( group, SONAR_MODELS[sonar_model]["concat_data_vars"]["default"]) combined_group = xr.combine_nested( group_datasets, [concat_dim], data_vars=concat_data_vars, coords="minimal", combine_attrs="drop" if combine_attrs == "overwrite_conflicts" else combine_attrs, ) if combine_attrs == "overwrite_conflicts": combined_group.attrs.update(union_attrs(group_datasets)) if group == "beam": if sonar_model == "EK80": combined_group[ "transceiver_software_version"] = combined_group[ "transceiver_software_version"].astype("<U10") combined_group["channel"] = combined_group[ "channel"].astype("<U50") elif sonar_model == "EK60": combined_group["gpt_software_version"] = combined_group[ "gpt_software_version"].astype("<U10") # TODO: investigate further why we need to do .astype("<U50") combined_group["channel"] = combined_group[ "channel"].astype("<U50") if sonar_model != "AD2CP": combined_group, old_ping_time, new_ping_time = check_and_correct_reversed_time( combined_group, old_ping_time, new_ping_time, "ping_time", sonar_model) if group != "nmea": combined_group, old_time1, new_time1 = check_and_correct_reversed_time( combined_group, old_time1, new_time1, "time1", sonar_model) combined_group, old_time2, new_time2 = check_and_correct_reversed_time( combined_group, old_time2, new_time2, "time2", sonar_model) combined_group, old_time3, new_time3 = check_and_correct_reversed_time( combined_group, old_time3, new_time3, "time3", sonar_model) if len(group_datasets) > 1: old_attrs[group] = [ group_dataset.attrs for group_dataset in group_datasets ] if combined_group is not None: # xarray inserts this dimension when concatenating along multiple dimensions combined_group = combined_group.drop_dims("concat_dim", errors="ignore") combined_group = set_encodings(combined_group) if value["ep_group"] is None: tree_dict["root"] = combined_group else: tree_dict[value["ep_group"]] = combined_group # Set tree into echodata object result._set_tree(tree=DataTree.from_dict(tree_dict)) result._load_tree() # save ping time before reversal correction if old_ping_time is not None: result.provenance["old_ping_time"] = old_ping_time result.provenance.attrs["reversed_ping_times"] = 1 # save location time before reversal correction if old_time1 is not None: result.provenance["old_time1"] = old_time1 result.provenance.attrs["reversed_ping_times"] = 1 # save mru time before reversal correction if old_time2 is not None: result.provenance["old_time2"] = old_time2 result.provenance.attrs["reversed_ping_times"] = 1 # save time3 before reversal correction if old_time3 is not None: result.provenance["old_time3"] = old_time3 result.provenance.attrs["reversed_ping_times"] = 1 # TODO: possible parameter to disable original attributes and original ping_time storage # in provenance group? # save attrs from before combination for group in old_attrs: all_group_attrs = set() for group_attrs in old_attrs[group]: for attr in group_attrs: all_group_attrs.add(attr) echodata_filenames = [] for ed in echodatas: if ed.source_file is not None: filepath = ed.source_file elif ed.converted_raw_path is not None: filepath = ed.converted_raw_path else: # unreachable raise ValueError("EchoData object does not have a file path") filename = Path(filepath).name if filename in echodata_filenames: raise ValueError("EchoData objects have conflicting filenames") echodata_filenames.append(filename) attrs = xr.DataArray( [[group_attrs.get(attr) for attr in all_group_attrs] for group_attrs in old_attrs[group]], coords={ "echodata_filename": echodata_filenames, f"{group}_attr_key": list(all_group_attrs), }, dims=["echodata_filename", f"{group}_attr_key"], ) result.provenance = result.provenance.assign({f"{group}_attrs": attrs}) # Add back sonar model result.sonar_model = sonar_model return result