def __init__(self) -> None: from openghg.util import timestamp_now from addict import Dict as aDict self._creation_datetime = timestamp_now() self._stored = False # Use an addict Dict here for easy nested data storage self._datasource_table = aDict() # Keyed by Datasource UUID self._datasource_uuids: Dict[str, str] = {} # Hashes of previously uploaded files self._file_hashes: Dict[str, str] = {} # Keyed by UUID self._rank_data = aDict()
def to_dashboard_mobile(data: Dict, filename: Union[str, Path] = None) -> Union[Dict, None]: """Export the Glasgow LICOR data to JSON for the dashboard Args: data: Data dictionary filename: Filename for export of JSON Returns: dict or None: Dictonary if no filename given """ to_export = aDict() for species, species_data in data.items(): spec_data = species_data["data"] metadata = species_data["metadata"] latitude = spec_data["latitude"].values.tolist() longitude = spec_data["longitude"].values.tolist() ch4 = spec_data["ch4"].values.tolist() to_export[species]["data"] = { "lat": latitude, "lon": longitude, "z": ch4 } to_export[species]["metadata"] = metadata if filename is not None: with open(filename, "w") as f: dump(to_export, f) return None else: to_return: Dict = to_export.to_dict() return to_return
def init_cfg(prgname, prgdir, libdir, dbg): """ This procedure returns a dictionary containing the important parts of configuration """ #dbg.dprint(256, "in init_cfg") cfg = aDict() cfg.prgname = prgname cfg.prgdir = prgdir if prgdir not in sys.path: sys.path.insert(0, prgdir) files = [ os.path.join(prgdir, prgname + '_imp.py'), os.path.join(prgdir, prgname + "_cfg.py"), os.path.join(prgdir, prgname + "_usg.py") ] for f in files: res = None if f.endswith("imp.py"): res = try_import_rx(f, cfg, 'imports') elif f.endswith("cfg.py"): res = try_import_rx(f, cfg, 'data', 'argdefaults', 'guidefs', tp='rx') elif f.endswith("usg.py"): res = try_import_rx(f, cfg, 'usage') if not isinstance(res, list): dbg.exitf(res, "in", f) return (cfg)
def parse_glasow_picarro( data_filepath: Union[str, Path], site: str, network: str, inlet: str, instrument: str = "picarro", sampling_period: Optional[str] = None, measurement_type: str = "timeseries", ) -> Dict: """Read the Glasgow Science Tower Picarro data Args: data_filepath: Path to data file Returns: dict: Dictionary of processed data """ warn(message= "Temporary function used to read Glasgow Science Tower Picarro data") df = pd.read_csv(data_filepath, index_col=[0], parse_dates=True) df = df.dropna(axis="rows", how="any") # We just want the concentration values for now species = ["co2", "ch4"] rename_cols = {f" {s}_C": s for s in species} df = df.rename(columns=rename_cols) site = "GST" long_site_name = "Glasgow Science Centre Tower" units = {"ch4": "ppb", "co2": "ppm"} if sampling_period is None: sampling_period = "NOT_SET" gas_data = aDict() for s in species: gas_data[s]["data"] = df[[s]].to_xarray() gas_data[s]["metadata"] = { "species": s, "long_name": long_site_name, "latitude": 55.859238, "longitude": -4.296180, "network": "npl_picarro", "inlet": "124m", "sampling_period": sampling_period, "site": site, "instrument": "picarro", "units": units[s], } # TODO - remove this once mypy stubs for addict are added to_return: Dict = gas_data.to_dict() return to_return
def rankings(self) -> Dict: if not self.ranked_data: print("No rank data") rank_result = aDict() for site, species_data in self.results.items(): for species, data in species_data.items(): rank_result[site][species] = data["rank_metadata"] to_return: Dict = rank_result.to_dict() return to_return
def from_data(cls: Type[T], data: Dict) -> T: """Create an object from data Args: data: JSON data Returns: cls: Class object of cls type """ from openghg.util import timestamp_tzaware from addict import Dict as aDict if not data: raise ValueError("Unable to create object with empty dictionary") c = cls() c._creation_datetime = timestamp_tzaware(data["creation_datetime"]) c._datasource_uuids = data["datasource_uuids"] c._file_hashes = data["file_hashes"] c._datasource_table = aDict(data["datasource_table"]) c._rank_data = aDict(data["rank_data"]) c._stored = False return c
def parse_glasow_licor(filepath: Path, sampling_period: Optional[str] = None) -> Dict: """Read the Glasgow LICOR data from NPL Args: filepath: Path to data file Returns: dict: Dictionary of data """ date_index = {"time": ["DATE", "TIME"]} use_cols = [0, 1, 3, 4, 5] nan_values = [",,,"] df = read_csv( filepath, parse_dates=date_index, na_values=nan_values, infer_datetime_format=True, index_col="time", usecols=use_cols, ) rename_cols = { "LAT": "latitude", "LON": "longitude", "Methane_Enhancement_Over_Background(ppb)": "ch4", } df = df.rename(columns=rename_cols).dropna(axis="rows", how="any") df.index = to_datetime(df.index) ds = df.to_xarray() if sampling_period is None: sampling_period = "NOT_SET" metadata = { "units": "ppb", "notes": "measurement value is methane enhancement over background", "sampling_period": sampling_period, } data = aDict() data["ch4"]["metadata"] = metadata data["ch4"]["data"] = ds to_return: Dict = data.to_dict() return to_return
def retrieve_all(self) -> Dict: """Retrieve all the data found during the serch Returns: dict: Dictionary of all data """ data = aDict() # Can we just traverse the dict without looping? for site, species_data in self.results.items(): for species, inlet_data in species_data.items(): for inlet, keys in inlet_data.items(): data[site][species][inlet] = self._create_obsdata(site=site, species=species, inlet=inlet) # TODO - update this once addict is stubbed data_dict: Dict = data.to_dict() return data_dict
def inittk(): top = tk.Tk() top.option_add('*tearoff', 0) top.option_add('*tearOff', False) if pydevprog: dbg.entersub() cfg.widgets = aDict() cfg.widgets['MainWindow'] = top import mygui.themestuff as themestuff themes = themestuff.load_all_themes(top) if pydevprog: #dbg.dprint(4,"Available themes", themes) cfg.guidefs.available_themes = themes cfg.guidefs.loaded = True dbg.leavesub() return top
def _parse_metadata(filepath: pathType) -> Dict: """Parse AQMesh metadata Args: filepath: Path to metadata CSV pipeline: If running in pipeline skip the writing of metadata to file Returns: dict: Dictionary of metadata """ from addict import Dict as aDict from pandas import read_csv from openghg.util import check_date filepath = Path(filepath) raw_metadata = read_csv(filepath) site_metadata = aDict() for _, row in raw_metadata.iterrows(): site_name = row["location_name"].replace(" ", "").lower() site_data = site_metadata[site_name] site_data["site"] = site_name site_data["pod_id"] = row["pod_id_location"] site_data["start_date"] = check_date(row["start_date_UTC"]) site_data["end_date"] = check_date(row["end_date_UTC"]) site_data["relocate_date"] = check_date(row["relocate_date_UTC"]) site_data["long_name"] = row["location_name"] site_data["borough"] = row["Borough"] site_data["site_type"] = row["Type"] site_data["in_ulez"] = row["ULEZ"] site_data["latitude"] = row["Latitude"] site_data["longitude"] = row["Longitude"] site_data["inlet"] = row["Height"] site_data["network"] = "aqmesh_glasgow" site_data["sampling_period"] = "NA" # TODO - I feel this is a bit clunky dict_metadata: Dict = site_metadata.to_dict() return dict_metadata
def parse_aqmesh( data_filepath: pathType, metadata_filepath: pathType, sampling_period: Optional[str] = None, ) -> Dict: """Read AQMesh data files Args: data_filepath: Data filepath metadata_filepath: Metadata filepath sampling_period: Measurement sampling period (str) Returns: dict: Dictionary of data """ from addict import Dict as aDict from pandas import read_csv if sampling_period is None: sampling_period = "NOT_SET" use_cols = [0, 1, 4, 6] datetime_cols = {"time": ["date_UTC"]} na_values = [-999, -999.0] df = read_csv( data_filepath, index_col="time", usecols=use_cols, parse_dates=datetime_cols, na_values=na_values, ) # This might change so we'll read it each time for now metadata = _parse_metadata(filepath=metadata_filepath) # Species is given in the data column orig_species = df.columns[0] species_split = orig_species.split("_") species = species_split[0] units = species_split[1] species_lower = species.lower() rename_cols = {orig_species: species_lower, "location_name": "site"} df = df.rename(columns=rename_cols) df = df.dropna(axis="rows", subset=[species_lower]) # TODO - add in assignment of attributes # assign_attributes site_groups = df.groupby(df["site"]) site_data = aDict() for site, site_df in site_groups: site_name = site.replace(" ", "").lower() site_df = site_df.drop("site", axis="columns") site_data[site_name]["data"] = site_df.to_xarray() site_data[site_name]["metadata"] = metadata[site_name] # Add in the species to the metadata site_data[site_name]["metadata"]["species"] = species_lower site_data[site_name]["metadata"]["units"] = units site_data[site_name]["metadata"]["sampling_period"] = sampling_period site_dict: Dict = site_data.to_dict() return site_dict
def search(**kwargs): # type: ignore """Search for observations data. Any keyword arguments may be passed to the the function and these keywords will be used to search the metadata associated with each Datasource. Example / commonly used arguments are given below. Args: species: Terms to search for in Datasources locations: Where to search for the terms in species inlet: Inlet height such as 100m instrument: Instrument name such as picarro find_all: Require all search terms to be satisfied start_date: Start datetime for search. If None a start datetime of UNIX epoch (1970-01-01) is set end_date: End datetime for search. If None an end datetime of the current datetime is set skip_ranking: If True skip ranking system, defaults to False Returns: dict: List of keys of Datasources matching the search parameters """ from addict import Dict as aDict from copy import deepcopy from itertools import chain as iter_chain from openghg.store import ObsSurface, Footprints, Emissions, EulerianModel from openghg.store.base import Datasource from openghg.util import ( timestamp_now, timestamp_epoch, timestamp_tzaware, clean_string, closest_daterange, find_daterange_gaps, split_daterange_str, load_json, ) from openghg.dataobjects import SearchResults # Get a copy of kwargs as we make some modifications below kwargs_copy = deepcopy(kwargs) # Do this here otherwise we have to produce them for every datasource start_date = kwargs.get("start_date") end_date = kwargs.get("end_date") if start_date is None: start_date = timestamp_epoch() else: start_date = timestamp_tzaware(start_date) if end_date is None: end_date = timestamp_now() else: end_date = timestamp_tzaware(end_date) kwargs_copy["start_date"] = start_date kwargs_copy["end_date"] = end_date skip_ranking = kwargs_copy.get("skip_ranking", False) try: del kwargs_copy["skip_ranking"] except KeyError: pass # As we might have kwargs that are None we want to get rid of those search_kwargs = {k: clean_string(v) for k, v in kwargs_copy.items() if v is not None} # Speices translation species = search_kwargs.get("species") if species is not None: if not isinstance(species, list): species = [species] translator = load_json("species_translator.json") updated_species = [] for s in species: updated_species.append(s) try: translated = translator[s] except KeyError: pass else: updated_species.extend(translated) search_kwargs["species"] = updated_species data_type = search_kwargs.get("data_type", "timeseries") valid_data_types = ("timeseries", "footprints", "emissions", "eulerian_model") if data_type not in valid_data_types: raise ValueError(f"{data_type} is not a valid data type, please select one of {valid_data_types}") # Assume we want timeseries data obj: Union[ObsSurface, Footprints, Emissions, EulerianModel] = ObsSurface.load() if data_type == "footprints": obj = Footprints.load() elif data_type == "emissions": obj = Emissions.load() elif data_type == "eulerian_model": obj = EulerianModel.load() datasource_uuids = obj.datasources() # Shallow load the Datasources so we can search their metadata datasources = (Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids) # For the time being this will return a dict until we know how best to represent # the footprints and emissions results in a SearchResult object if data_type in {"emissions", "footprints", "eulerian_model"}: sources: Dict = aDict() for datasource in datasources: if datasource.search_metadata(**search_kwargs): uid = datasource.uuid() sources[uid]["keys"] = datasource.keys_in_daterange(start_date=start_date, end_date=end_date) sources[uid]["metadata"] = datasource.metadata() return sources # Find the Datasources that contain matching metadata matching_sources = {d.uuid(): d for d in datasources if d.search_metadata(**search_kwargs)} # TODO - Update this as it only uses the ACRG repo JSON at the moment # Check if this site only has one inlet, if so skip ranking # if "site" in search_kwargs: # site = search_kwargs["site"] # if not isinstance(site, list) and not multiple_inlets(site=site): # skip_ranking = True # If there isn't *any* ranking data at all, skip all the ranking functionality if not obj._rank_data: skip_ranking = True # If only one datasource has been returned, skip all the ranking functionality if len(matching_sources) == 1: skip_ranking = True # If we have the site, inlet and instrument then just return the data # TODO - should instrument be added here if {"site", "inlet", "species"} <= search_kwargs.keys() or skip_ranking is True: specific_sources = aDict() for datasource in matching_sources.values(): specific_keys = datasource.keys_in_daterange(start_date=start_date, end_date=end_date) if not specific_keys: continue metadata = datasource.metadata() site = metadata["site"] species = metadata["species"] inlet = metadata["inlet"] specific_sources[site][species][inlet]["keys"] = specific_keys specific_sources[site][species][inlet]["metadata"] = metadata return SearchResults(results=specific_sources.to_dict(), ranked_data=False) highest_ranked = aDict() for uid, datasource in matching_sources.items(): # Find the site and then the ranking metadata = datasource.metadata() # Get the site inlet and species site = metadata["site"] species = metadata["species"] rank_data = obj.get_rank(uuid=uid, start_date=start_date, end_date=end_date) # If this Datasource doesn't have any ranking data skip it and move on if not rank_data: continue # There will only be a single rank key rank_value = next(iter(rank_data)) # Get the daterange this rank covers rank_dateranges = rank_data[rank_value] # Each match we store gives us the information we need # to retrieve the data match = {"uuid": uid, "dateranges": rank_dateranges} # Need to ensure we get all the dates covered if species in highest_ranked[site]: species_rank_data = highest_ranked[site][species] # If we have a higher (lower number) rank save it if rank_value < species_rank_data["rank"]: species_rank_data["rank"] = rank_value species_rank_data["matching"] = [match] # If another Datasource has the same rank for another daterange # we want to save that as well elif rank_value == species_rank_data["rank"]: species_rank_data["matching"].append(match) else: highest_ranked[site][species]["rank"] = rank_value highest_ranked[site][species]["matching"] = [match] if not highest_ranked: raise ValueError( ( "No ranking data set for the given search parameters." " Please refine your search to include a specific site, species and inlet." ) ) # Now we have the highest ranked data the dateranges there are ranks for # we want to fill in the gaps with (currently) the highest inlet from that site # We just want some rank_metadata to go along with the final data scheme # Can key a key of date - inlet data_keys: Dict = aDict() for site, species in highest_ranked.items(): for sp, data in species.items(): # data_keys[site][sp]["keys"] = [] species_keys = [] species_rank_data = {} species_metadata = {} for match_data in data["matching"]: uuid = match_data["uuid"] match_dateranges = match_data["dateranges"] # Get the datasource as it's already in the dictionary # we created earlier datasource = matching_sources[uuid] metadata = datasource.metadata() inlet = metadata["inlet"] keys = [] for dr in match_dateranges: date_keys = datasource.keys_in_daterange_str(daterange=dr) if date_keys: keys.extend(date_keys) # We'll add this to the metadata in the search results we return at the end species_rank_data[dr] = inlet species_keys.extend(keys) species_metadata[inlet] = metadata # Only create the dictionary keys if we have some data keys if species_keys: data_keys[site][sp]["keys"] = species_keys data_keys[site][sp]["rank_metadata"] = species_rank_data data_keys[site][sp]["metadata"] = species_metadata else: continue # We now need to retrieve data for the dateranges for which we don't have ranking data # To do this find the gaps in the daterange over which the user has requested data # and the dates for which we have ranking information # Get the dateranges that are covered by ranking information daterange_strs = list(iter_chain.from_iterable([m["dateranges"] for m in data["matching"]])) # Find the gaps in the ranking coverage gap_dateranges = find_daterange_gaps( start_search=start_date, end_search=end_date, dateranges=daterange_strs ) # We want the dateranges and inlets for those dateranges inlet_dateranges = data_keys[site][sp]["rank_metadata"] # These are the dateranges for which we have ranking information for this site and species ranked_dateranges = list(data_keys[site][sp]["rank_metadata"].keys()) for gap_daterange in gap_dateranges: # We want to select the inlet that's ranked for dates closest to the ones we have here closest_dr = closest_daterange(to_compare=gap_daterange, dateranges=ranked_dateranges) gap_start, gap_end = split_daterange_str(gap_daterange) # Find the closest ranked inlet by date chosen_inlet = inlet_dateranges[closest_dr] inlet_metadata = data_keys[site][sp]["metadata"][chosen_inlet] inlet_instrument = inlet_metadata["instrument"] inlet_sampling_period = inlet_metadata["sampling_period"] # Then we want to retrieve the correct metadata for those inlets results: SearchResults = search( site=site, species=sp, inlet=chosen_inlet, instrument=inlet_instrument, sampling_period=inlet_sampling_period, start_date=gap_start, end_date=gap_end, ) # type: ignore if not results: continue # Retrieve the data keys inlet_data_keys = results.keys(site=site, species=sp, inlet=chosen_inlet) data_keys[site][sp]["keys"].extend(inlet_data_keys) # Remove any duplicate keys data_keys[site][sp]["keys"] = list(set(data_keys[site][sp]["keys"])) # TODO - create a stub for addict dict_data_keys = data_keys.to_dict() # type: ignore return SearchResults(results=dict_data_keys, ranked_data=True)
def to_dashboard(data: Dict, selected_vars: List, downsample_n: int = 3, filename: str = None) -> Union[Dict, None]: """Takes a Dataset produced by OpenGHG and outputs it into a JSON format readable by the OpenGHG dashboard or a related project. This also exports a separate file with the locations of the sites for use with map selector component. Note - this function does not currently support export of data from multiple inlets. Args: data: Dictionary of retrieved data selected_vars: The variables to want to export downsample_n: Take every nth value from the data filename: filename to write output to Returns: None """ to_export = aDict() if not isinstance(selected_vars, list): selected_vars = [selected_vars] selected_vars = [str(c).lower() for c in selected_vars] for site, species_data in data.items(): for species, inlet_data in species_data.items(): measurement_data: ObsData for inlet, measurement_data in inlet_data.items(): dataset = measurement_data.data metadata = measurement_data.metadata attributes = dataset.attrs df = dataset.to_dataframe() rename_lower = {c: str(c).lower() for c in df.columns} df = df.rename(columns=rename_lower) # We just want the selected variables to_extract = [c for c in df.columns if c in selected_vars] if not to_extract: continue df = df[to_extract] # Downsample the data if downsample_n > 1: df = df.iloc[::downsample_n] network = metadata["network"] instrument = metadata["instrument"] # TODO - remove this if we add site location to standard metadata location = { "latitude": attributes["station_latitude"], "longitude": attributes["station_longitude"], } metadata.update(location) json_data = loads(df.to_json()) metadata = measurement_data.metadata to_export[species][network][site][inlet][instrument] = { "data": json_data, "metadata": metadata, } if filename is not None: with open(filename, "w") as f: dump(obj=to_export, fp=f) return None else: # TODO - remove this once addict is stubbed export_dict: Dict = to_export.to_dict() return export_dict
def visualise_store() -> pyvis.network.Network: """View the object store using a pyvis force graph. This function should only be called from within a notebook Returns: pyvis.network.Network """ from addict import Dict as aDict data = query_store() net = pyvis.network.Network("800px", "100%", notebook=True) net.force_atlas_2based() # Create the ObsSurface node net.add_node(0, label="Surface Observations", color="#4e79a7", value=5000) network_split = aDict() for key, value in data.items(): # Iterate over Datasources to select the networks network = value["network"] site = value["site"] inlet = value["inlet"] network_split[network][site][inlet][key] = value for network, sites in network_split.items(): network_name = network.upper() net.add_node(network, label=network_name, color="#59a14f", value=2500) net.add_edge(source=0, to=network) # Then we want a subnode for each site for site, site_data in sites.items(): # Don't want to use a site here as a site might be in multiple networks site_name = site.upper() site_id = str(uuid4()) net.add_node(site_id, label=site_name, color="#e15759", value=1000) net.add_edge(source=network, to=site_id) for inlet, inlet_data in site_data.items(): inlet_name = str(inlet).lower() inlet_id = str(uuid4()) net.add_node(n_id=inlet_id, label=inlet_name, color="#808080", value=500) net.add_edge(source=site_id, to=inlet_id) # Now for each site create the datasource nodes for uid, datasource in inlet_data.items(): species = datasource["species"] instrument = datasource["instrument"].upper() label = f"{species.upper()} {instrument}" title = "\n".join([ f"Site: {site.upper()}", f"Species : {species.upper()}", f"Instrument: {instrument}", ]) net.add_node(n_id=uid, label=label, title=title, color="#f28e2b", value=100) net.add_edge(source=inlet_id, to=uid) return net.show("openghg_objstore.html")
def _split_species( data: DataFrame, site: str, instrument: str, species: List, metadata: Dict, units: Dict, scale: Dict, gc_params: Dict, ) -> Dict: """Splits the species into separate dataframe into sections to be stored within individual Datasources Args: data: DataFrame of raw data site: Name of site from which this data originates instrument: Name of instrument species: List of species contained in data metadata: Dictionary of metadata units: Dictionary of units for each species scale: Dictionary of scales for each species gc_params: GCWERKS parameter dictionary Returns: dict: Dataframe of gas data and metadata """ from addict import Dict as aDict from fnmatch import fnmatch from openghg.util import load_json, clean_string # Load species translator so we can keep species names consistent attributes_data = load_json("attributes.json") species_translator = attributes_data["species_translation"] # Read inlets from the parameters expected_inlets = _get_inlets(site_code=site, gc_params=gc_params) try: data_inlets = data["Inlet"].unique().tolist() except KeyError: raise KeyError( "Unable to read inlets from data, please ensure this data is of the GC type expected by this retrieve module" ) combined_data = aDict() for spec in species: # Skip this species if the data is all NaNs if data[spec].isnull().all(): continue # Here inlet is the inlet in the data and inlet_label is the label we want to use as metadata for inlet, inlet_label in expected_inlets.items(): # Create a copy of metadata for local modification spec_metadata = metadata.copy() spec_metadata["units"] = units[spec] spec_metadata["scale"] = scale[spec] # If we've only got a single inlet if inlet == "any" or inlet == "air": spec_data = data[[ spec, spec + " repeatability", spec + " status_flag", spec + " integration_flag", "Inlet", ]] spec_data = spec_data.dropna(axis="index", how="any") spec_metadata["inlet"] = inlet_label elif "date" in inlet: dates = inlet.split("_")[1:] data_sliced = data.loc[dates[0]:dates[1]] spec_data = data_sliced[[ spec, spec + " repeatability", spec + " status_flag", spec + " integration_flag", "Inlet", ]] spec_data = spec_data.dropna(axis="index", how="any") spec_metadata["inlet"] = inlet_label else: # Find the inlet matching_inlets = [i for i in data_inlets if fnmatch(i, inlet)] if not matching_inlets: continue # Only set the label in metadata when we have the correct label spec_metadata["inlet"] = inlet_label # There should only be one matching label select_inlet = matching_inlets[0] # Take only data for this inlet from the dataframe inlet_data = data.loc[data["Inlet"] == select_inlet] spec_data = inlet_data[[ spec, spec + " repeatability", spec + " status_flag", spec + " integration_flag", "Inlet", ]] spec_data = spec_data.dropna(axis="index", how="any") # Now we drop the inlet column spec_data = spec_data.drop("Inlet", axis="columns") # Check that the Dataframe has something in it if spec_data.empty: continue attributes = _get_site_attributes(site=site, inlet=inlet_label, instrument=instrument, gc_params=gc_params) attributes = attributes.copy() # We want an xarray Dataset spec_data = spec_data.to_xarray() # Create a standardised / cleaned species label try: comp_species = species_translator[spec.upper()]["chem"] except KeyError: comp_species = clean_string(spec.lower()) # Add the cleaned species name to the metadata and alternative name if present spec_metadata["species"] = comp_species if comp_species != spec.lower() and comp_species != spec.upper(): spec_metadata["species_alt"] = spec # Rename variables so they have lowercase and alphanumeric names to_rename = {} for var in spec_data.variables: if spec in var: new_name = var.replace(spec, comp_species) to_rename[var] = new_name spec_data = spec_data.rename(to_rename) # As a single species may have measurements from multiple inlets we # use the species and inlet as a key data_key = f"{comp_species}_{inlet_label}" combined_data[data_key]["metadata"] = spec_metadata combined_data[data_key]["data"] = spec_data combined_data[data_key]["attributes"] = attributes to_return: Dict = combined_data.to_dict() return to_return