def test_read_CRDS(): get_local_bucket(empty=True) filepath = get_datapath(filename="bsd.picarro.1minute.248m.dat", data_type="CRDS") results = ObsSurface.read_file(filepath=filepath, data_type="CRDS") keys = results["bsd.picarro.1minute.248m.dat"].keys() expected_keys = sorted([ "bsd.picarro.1minute.248m_ch4", "bsd.picarro.1minute.248m_co", "bsd.picarro.1minute.248m_co2", ]) assert sorted(keys) == expected_keys # Load up the assigned Datasources and check they contain the correct data data = results["bsd.picarro.1minute.248m.dat"] ch4_data = Datasource.load( uuid=data["bsd.picarro.1minute.248m_ch4"]).data() ch4_data = ch4_data["2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00"] assert ch4_data.time[0] == Timestamp("2014-01-30T10:52:30") assert ch4_data["ch4"][0] == 1960.24 assert ch4_data["ch4"][-1] == 1952.24 assert ch4_data["ch4_stdev"][-1] == 0.674 assert ch4_data["ch4_n_meas"][-1] == 25.0 obs = ObsSurface.load() assert sorted(obs._datasource_names.keys()) == expected_keys
def test_delete_Datasource(): bucket = get_local_bucket(empty=True) data_filepath = get_datapath(filename="tta.co2.1minute.222m.min.dat", data_type="ICOS") ObsSurface.read_file(filepath=data_filepath, data_type="ICOS") obs = ObsSurface.load() datasources = obs.datasources() uuid = datasources[0] datasource = Datasource.load(uuid=uuid) data = datasource.data( )["2011-12-07-01:38:00+00:00_2011-12-31-19:57:00+00:00"] assert data["co2"][0] == pytest.approx(397.334) assert data.time[0] == Timestamp("2011-12-07T01:38:00") data_keys = datasource.data_keys() key = data_keys[0] assert exists(bucket=bucket, key=key) obs.delete(uuid=uuid) assert uuid not in obs.datasources() assert not exists(bucket=bucket, key=key)
def test_read_thames_barrier(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="thames_test_20190707.csv", data_type="THAMESBARRIER") results = ObsSurface.read_file(filepath=data_filepath, data_type="THAMESBARRIER") expected_keys = sorted([ 'thames_test_20190707_CH4', 'thames_test_20190707_CO2', 'thames_test_20190707_CO' ]) assert sorted(list( results["thames_test_20190707.csv"].keys())) == expected_keys uuid = results["thames_test_20190707.csv"]["thames_test_20190707_CO2"] data = Datasource.load(uuid=uuid, shallow=False).data() data = data["2019-07-01-00:39:55+00:00_2019-08-01-00:10:30+00:00"] assert data.time[0] == Timestamp("2019-07-01T00:39:55") assert data.time[-1] == Timestamp("2019-08-01T00:10:30") assert data["co2"][0] == pytest.approx(417.97344761) assert data["co2"][-1] == pytest.approx(417.80000653) assert data["co2_variability"][0] == 0 assert data["co2_variability"][-1] == 0 obs = ObsSurface.load() assert sorted(obs._datasource_names.keys()) == expected_keys
def crds_read(): get_local_bucket(empty=True) test_data = "../data/search_data" folder_path = os.path.join(os.path.dirname(__file__), test_data) ObsSurface.read_folder(folder_path=folder_path, data_type="CRDS", extension="dat")
def test_recombination_CRDS(): get_local_bucket(empty=True) filename = "hfd.picarro.1minute.100m.min.dat" filepath = get_datapath(filename=filename, data_type="CRDS") crds = CRDS() ObsSurface.read_file(filepath, data_type="CRDS") gas_data = crds.read_data(data_filepath=filepath, site="HFD") ch4_data_read = gas_data["ch4"]["data"] gas_name = "ch4" location = "hfd" keys = search(species=gas_name, locations=location) to_download = keys["ch4_hfd_100m_picarro"]["keys"][ "2013-12-04-14:02:30_2019-05-21-15:46:30"] ch4_data_recombined = recombine_sections(data_keys=to_download) ch4_data_recombined.attrs = {} assert ch4_data_read.time.equals(ch4_data_recombined.time) assert ch4_data_read["ch4"].equals(ch4_data_recombined["ch4"])
def crds(): get_local_bucket(empty=True) filename = "hfd.picarro.1minute.100m.min.dat" filepath = get_datapath(filename=filename, data_type="CRDS") ObsSurface.read_file(filepath=filepath, data_type="CRDS")
def test_read_noaa(): get_local_bucket(empty=True) data_filepath = get_datapath( filename="co_pocn25_surface-flask_1_ccgg_event.txt", data_type="NOAA") results = ObsSurface.read_file(filepath=data_filepath, data_type="NOAA") uuid = results["co_pocn25_surface-flask_1_ccgg_event.txt"][ "co_pocn25_surface-flask_1_ccgg_event_co"] co_data = Datasource.load(uuid=uuid, shallow=False).data() assert len(co_data.keys()) == 95 old_data = co_data["1990-12-02-12:23:00+00:00_1990-12-02-12:23:00+00:00"] assert old_data.time[0] == Timestamp("1990-12-02T12:23:00") assert old_data.time[-1] == Timestamp("1990-12-02T12:23:00") assert old_data["co"][0] == 141.61 assert old_data["co"][-1] == 141.61 assert old_data["co_repeatability"][0] == -999.99 assert old_data["co_repeatability"][-1] == -999.99 assert old_data["co_selection_flag"][0] == 0 assert old_data["co_selection_flag"][-1] == 0 obs = ObsSurface.load() assert list(obs._datasource_names.keys() )[0] == "co_pocn25_surface-flask_1_ccgg_event_co"
def crds(): get_local_bucket(empty=True) dir_path = os.path.dirname(__file__) test_data = "../data/proc_test_data/CRDS" filename = "hfd.picarro.1minute.100m.min.dat" filepath = os.path.join(dir_path, test_data, filename) ObsSurface.read_file(filepath=filepath, data_type="CRDS")
def test_upload_same_file_twice_raises(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="tta.co2.1minute.222m.min.dat", data_type="ICOS") ObsSurface.read_file(filepath=data_filepath, data_type="ICOS") with pytest.raises(ValueError): ObsSurface.read_file(filepath=data_filepath, data_type="ICOS")
def gc_read(): get_local_bucket(empty=True) data_file = "capegrim-medusa.18.C" prec_file = "capegrim-medusa.18.precisions.C" dir_path = os.path.dirname(__file__) test_data = "../data/proc_test_data/GC" data_filepath = os.path.join(dir_path, test_data, data_file) prec_filepath = os.path.join(dir_path, test_data, prec_file) ObsSurface.read_file(filepath=(data_filepath, prec_filepath), data_type="GCWERKS")
def rank_sources(self, updated_rankings, data_type): """ Assign the precendence of sources for each. This function expects a dictionary of the form This function expects a dictionary of the form {'site_string': {'rank': [daterange_str, ...], 'daterange': 'start_end', 'uuid': uuid}, Args: updated_ranking (dict): Dictionary of ranking data_type (str): Data type e.g. CRDS, GC Returns: None """ if updated_rankings == self._before_ranking: return obs = ObsSurface.load() for key in updated_rankings: uuid = updated_rankings[key]["uuid"] for rank, daterange in updated_rankings[key]["rank"].items(): if rank == 0: continue for d in daterange: obs.set_rank(uuid=uuid, rank=rank, daterange=d) obs.save()
def query_store(): """ Create a dictionary that can be used to visualise the object store Returns: dict: Dictionary of data ? """ from collections import defaultdict from HUGS.Modules import Datasource, ObsSurface obs = ObsSurface.load() datasource_uuids = obs.datasources() datasources = (Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids) data = defaultdict(dict) for d in datasources: metadata = d.metadata() result = { "site": metadata["site"], "species": metadata["species"], "instrument": metadata.get("instrument", "Unknown"), "network": metadata.get("network") } data[d.uuid()] = result return data # def visualise_store(): # """ Visualise the output of the # """
def rank_sources(args): """ Rank Datasources to be primary sources for specific species at specific sites. Args: args (dict): Dictionary containing ranking data Returns: None """ try: ranking_data = args["ranking"] except KeyError: raise KeyError("No ranking data passed") obs = ObsSurface.load() for key in ranking_data: uuid = ranking_data[key]["uuid"] for rank, daterange in ranking_data[key]["rank"].items(): if rank == 0: continue for d in daterange: obs.set_rank(uuid=uuid, rank=rank, daterange=d) obs.save()
def test_recombination_GC(): get_local_bucket(empty=True) gc = GCWERKS() data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") precision = get_datapath(filename="capegrim-medusa.18.precisions.C", data_type="GC") ObsSurface.read_file((data, precision), data_type="GCWERKS") data = gc.read_data(data_filepath=data, precision_filepath=precision, site="CGO", instrument="medusa") toluene_data = data["toluene"]["data"] gas_name = "toluene" location = "CGO" keys = search(species=gas_name, locations=location) to_download = keys["toluene_cgo_75m_4_medusa"]["keys"][ "2018-01-01-02:24:00_2018-01-31-23:33:00"] toluene_data_recombined = recombine_sections(data_keys=to_download) toluene_data.attrs = {} toluene_data_recombined.attrs = {} assert toluene_data.time.equals(toluene_data_recombined.time) assert toluene_data["toluene"].equals(toluene_data_recombined["c6h5ch3"]) assert toluene_data["toluene repeatability"].equals( toluene_data_recombined["c6h5ch3_repeatability"]) assert toluene_data["toluene status_flag"].equals( toluene_data_recombined["c6h5ch3_status_flag"]) assert toluene_data["toluene integration_flag"].equals( toluene_data_recombined["c6h5ch3_integration_flag"])
def test_read_cranfield(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="THB_hourly_means_test.csv", data_type="Cranfield_CRDS") results = ObsSurface.read_file(filepath=data_filepath, data_type="CRANFIELD") expected_keys = sorted([ "THB_hourly_means_test_ch4", "THB_hourly_means_test_co2", "THB_hourly_means_test_co", ]) assert sorted(results["THB_hourly_means_test.csv"].keys()) == expected_keys uuid = results["THB_hourly_means_test.csv"]["THB_hourly_means_test_ch4"] ch4_data = Datasource.load(uuid=uuid, shallow=False).data() ch4_data = ch4_data["2018-05-05-00:00:00+00:00_2018-05-13-16:00:00+00:00"] assert ch4_data.time[0] == Timestamp("2018-05-05") assert ch4_data.time[-1] == Timestamp("2018-05-13T16:00:00") assert ch4_data["ch4"][0] == pytest.approx(2585.651) assert ch4_data["ch4"][-1] == pytest.approx(1999.018) assert ch4_data["ch4 variability"][0] == pytest.approx(75.50218) assert ch4_data["ch4 variability"][-1] == pytest.approx(6.48413) # Check obs has stored the keys correctly obs = ObsSurface.load() assert sorted(list(obs._datasource_names.keys())) == sorted([ 'THB_hourly_means_test_ch4', 'THB_hourly_means_test_co2', 'THB_hourly_means_test_co' ])
def process_files(files, data_type, site=None, network=None, instrument=None, overwrite=False): """ Process the passed file(s) Args: files (str, list): Path of files to be processed data_type (str): Type of data to be processed (CRDS, GC etc) site (str, default=None): Site code or name network (str, default=None): Network name instrument (str, default=None): Instrument name overwrite (bool, default=False): Should this data overwrite data stored for these datasources for existing dateranges Returns: dict: UUIDs of Datasources storing data of processed files keyed by filename """ data_type = DataTypes[data_type.upper()].name if not isinstance(files, list): files = [files] obs = ObsSurface.load() results = {} # Ensure we have Paths # TODO: Delete this, as we already have the same warning in read_file? if data_type == "GCWERKS": if not all(isinstance(item, tuple) for item in files): return TypeError( "If data type is GC, a list of tuples for data and precision filenames must be passed" ) files = [(Path(f), Path(p)) for f, p in files] else: files = [Path(f) for f in files] r = obs.read_file(filepath=files, data_type=data_type, site=site, network=network, instrument=instrument, overwrite=overwrite) results.update(r) return results
def get_sources(self, site, species, data_type): """ Get the datasources for this site and species to allow a ranking to be set Args: site (str): Three letter site code species (str): Species name data_type (str): Must be valid datatype i.e. CRDS, GC See all valid datasources in the DataTypes class Returns: dict: Dictionary of datasource metadata """ if len(site) != 3 or not valid_site(site): # raise InvalidSiteError(f"{site} is not a valid site code") raise ValueError(f"{site} is not a valid site code") obs = ObsSurface.load() datasource_uuids = obs.datasources() # Shallow load the Datasources (only get their JSON metadata) datasources = [ Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids ] matching_sources = [ d for d in datasources if d.search_metadata(search_terms=[site, species], find_all=True) ] def name_str(d): return "_".join([d.species(), d.site(), d.inlet(), d.instrument()]) rank_info = { name_str(d): { "rank": d.rank(), "data_range": d.daterange_str(), "uuid": d.uuid() } for d in matching_sources } self._before_ranking = copy.deepcopy(rank_info) self._key_uuids = {key: rank_info[key]["uuid"] for key in rank_info} return rank_info
def get_sources(args): """ Get the Datasources associated with the specified species at a specified site Args: args (dict): Dictionary containing site and species keys Returns: dict: Dictionary of """ try: site = args["site"] except KeyError: # TODO - created a SiteError error type to raise here raise KeyError("Site must be specified") try: species = args["species"] except KeyError: raise KeyError("Species must be specified") obs = ObsSurface.load() datasource_uuids = obs.datasources() # Shallow load the Datasources (only get their JSON metadata) datasources = [ Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids ] matching_sources = [ d for d in datasources if d.search_metadata(search_terms=[site, species], find_all=True) ] def name_str(d): return "_".join([d.species(), d.site(), d.inlet(), d.instrument()]) unranked = { name_str(d): { "rank": d.rank(), "data_range": d.daterange_str(), "uuid": d.uuid() } for d in matching_sources } return unranked
def process_data( data_file, source_name, data_type="CRDS", site=None, instrument_name=None, overwrite=False, ): """ Passes the passed filename(s) to the correct processing object depending on the data_type argument. Args: data_file (str, tuple (str, str)): Paths of file(s) for processing source_name (str): Name of source data_type (str, default="CRDS"): Type of data to be processed (CRDS, GC etc) overwrite (bool, default=False): Should existing and overlapping data be overwritten Returns: list: List of Datasources """ from HUGS.Modules import ObsSurface processing_obj = ObsSurface.load() if data_type == "GC": try: data, precision = data_file except (TypeError, ValueError) as error: raise TypeError( "Ensure data and precision files are passed as a tuple\n", error) result = processing_obj.read_file(filepath=data_file, data_type=data_type, site=site, instrument=instrument_name) return result
def populate_store(): get_local_bucket(empty=True) filepath = hfd_filepath() ObsSurface.read_file(filepath=filepath, data_type="CRDS", site="hfd")
def test_read_icos(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="tta.co2.1minute.222m.min.dat", data_type="ICOS") results = ObsSurface.read_file(filepath=data_filepath, data_type="ICOS") assert list(results["tta.co2.1minute.222m.min.dat"].keys() )[0] == "tta.co2.1minute.222m.min_co2" uuid = results["tta.co2.1minute.222m.min.dat"][ "tta.co2.1minute.222m.min_co2"] data = Datasource.load(uuid=uuid, shallow=False).data() assert sorted(list(data.keys())) == sorted([ "2011-12-07-01:38:00+00:00_2011-12-31-19:57:00+00:00", "2011-06-01-05:54:00+00:00_2011-08-31-17:58:00+00:00", "2011-03-30-08:52:00+00:00_2011-05-31-20:59:00+00:00", "2011-09-01-11:20:00+00:00_2011-11-30-03:39:00+00:00", "2012-12-01-04:03:00+00:00_2012-12-31-15:41:00+00:00", "2012-06-01-11:15:00+00:00_2012-08-07-19:16:00+00:00", "2012-04-07-06:20:00+00:00_2012-05-31-18:00:00+00:00", "2012-09-05-02:15:00+00:00_2012-11-30-19:08:00+00:00", "2013-01-01-00:01:00+00:00_2013-01-17-18:06:00+00:00", ]) co2_data = data["2012-12-01-04:03:00+00:00_2012-12-31-15:41:00+00:00"] assert co2_data.time[0] == Timestamp("2012-12-01T04:03:00") assert co2_data.time[-1] == Timestamp("2012-12-31T15:41:00") assert co2_data["co2"][0] == 397.765 assert co2_data["co2"][-1] == 398.374 assert co2_data["co2_variability"][0] == 0.057 assert co2_data["co2_variability"][-1] == 0.063 assert co2_data["co2_number_of_observations"][0] == 12 assert co2_data["co2_number_of_observations"][-1] == 13 del co2_data.attrs["File created"] assert co2_data.attrs == { "Conditions of use": "Ensure that you contact the data owner at the outset of your project.", "Source": "In situ measurements of air", "Conventions": "CF-1.6", "Processed by": "*****@*****.**", "species": "co2", "Calibration_scale": "unknown", "station_longitude": -2.98598, "station_latitude": 56.55511, "station_long_name": "Angus Tower, UK", "station_height_masl": 300.0, } obs = ObsSurface.load() assert list( obs._datasource_names.keys())[0] == "tta.co2.1minute.222m.min_co2"
def search( locations, species=None, inlet=None, instrument=None, find_all=True, start_datetime=None, end_datetime=None, ): """ Search for gas data (optionally within a daterange) TODO - review this function - feel like it can be tidied and simplified Args: species (str or list): Terms to search for in Datasources locations (str or list): Where to search for the terms in species inlet (str, default=None): Inlet height such as 100m instrument (str, default=None): Instrument name such as picarro find_all (bool, default=True): Require all search terms to be satisfied start_datetime (datetime, default=None): Start datetime for search If None a start datetime of UNIX epoch (1970-01-01) is set end_datetime (datetime, default=None): End datetime for search If None an end datetime of the current datetime is set Returns: dict: List of keys of Datasources matching the search parameters """ from collections import defaultdict from json import load from HUGS.Modules import Datasource, ObsSurface from HUGS.Util import (get_datetime_now, get_datetime_epoch, create_daterange_str, timestamp_tzaware, get_datapath) # if species is not None and not isinstance(species, list): if not isinstance(species, list): species = [species] if not isinstance(locations, list): locations = [locations] # Allow passing of location names instead of codes site_codes_json = get_datapath(filename="site_codes.json") with open(site_codes_json, "r") as f: d = load(f) site_codes = d["name_code"] updated_locations = [] # Check locations, if they're longer than three letters do a lookup for loc in locations: if len(loc) > 3: try: site_code = site_codes[loc.lower()] updated_locations.append(site_code) except KeyError: raise ValueError(f"Invalid site {loc} passed") else: updated_locations.append(loc) locations = updated_locations if start_datetime is None: start_datetime = get_datetime_epoch() if end_datetime is None: end_datetime = get_datetime_now() # Ensure passed datetimes are timezone aware start_datetime = timestamp_tzaware(start_datetime) end_datetime = timestamp_tzaware(end_datetime) # Here we want to load in the ObsSurface module for now obs = ObsSurface.load() datasource_uuids = obs.datasources() # Shallow load the Datasources so we can search their metadata datasources = [ Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids ] # First we find the Datasources from locations we want to narrow down our search location_sources = defaultdict(list) # If we have locations to search for location in locations: for datasource in datasources: if datasource.search_metadata(search_terms=location): location_sources[location].append(datasource) # This is returned to the caller results = defaultdict(dict) # With both inlet and instrument specified we bypass the ranking system if inlet is not None and instrument is not None: for site, sources in location_sources.items(): for sp in species: for datasource in sources: # Just match the single source here if datasource.search_metadata( search_terms=[sp, site, inlet, instrument], find_all=True): daterange_str = create_daterange_str( start=start_datetime, end=end_datetime) # Get the data keys for the data in the matching daterange in_date = datasource.in_daterange( daterange=daterange_str) data_date_str = strip_dates_keys(in_date) key = f"{sp}_{site}_{inlet}_{instrument}".lower() # Find the keys that match the correct data results[key]["keys"] = {data_date_str: in_date} results[key]["metadata"] = datasource.metadata() return results for location, sources in location_sources.items(): # Loop over and look for the species species_data = defaultdict(list) for datasource in sources: for s in species: search_terms = [ x for x in (s, location, inlet, instrument) if x is not None ] # Check the species and the daterange if datasource.search_metadata(search_terms=search_terms, find_all=True): species_data[s].append(datasource) # For each location we want to find the highest ranking sources for the selected species for sp, sources in species_data.items(): ranked_sources = {} # How to return all the sources if they're all 0? for source in sources: rank_data = source.get_rank(start_date=start_datetime, end_date=end_datetime) # With no rank set we get an empty dictionary if not rank_data: ranked_sources[0] = 0 continue # Just get the highest ranked datasources and return them # Find the highest ranked data from this site highest_rank = sorted(rank_data.keys())[-1] if highest_rank == 0: ranked_sources[0] = 0 continue ranked_sources[source.uuid()] = { "rank": highest_rank, "dateranges": rank_data[highest_rank], "source": source } # If it's all zeroes we want to return all sources if list(ranked_sources) == [0]: for source in sources: key = f"{source.species()}_{source.site()}_{source.inlet()}_{source.instrument()}".lower( ) daterange_str = create_daterange_str(start=start_datetime, end=end_datetime) data_keys = source.in_daterange(daterange=daterange_str) if not data_keys: continue # Get a key that covers the daterange of the actual data and not from epoch to now # if no start/end datetimes are passed data_date_str = strip_dates_keys(data_keys) results[key]["keys"] = {data_date_str: data_keys} results[key]["metadata"] = source.metadata() continue else: # TODO - find a cleaner way of doing this # We might have a zero rank, delete it as we have higher ranked data try: del ranked_sources[0] except KeyError: pass # Otherwise iterate over the sources that are ranked and extract the keys for uid in ranked_sources: source = ranked_sources[uid]["source"] source_dateranges = ranked_sources[uid]["dateranges"] key = f"{source.species()}_{source.site()}_{source.inlet()}_{source.instrument()}".lower( ) data_keys = {} # Get the keys for each daterange for d in source_dateranges: keys_in_date = source.in_daterange(daterange=d) d = d.replace("+00:00", "") if keys_in_date: data_keys[d] = keys_in_date if not data_keys: continue results[key]["keys"] = data_keys results[key]["metadata"] = source.metadata() return results
def test_read_GC(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") precision_filepath = get_datapath( filename="capegrim-medusa.18.precisions.C", data_type="GC") results = ObsSurface.read_file(filepath=(data_filepath, precision_filepath), data_type="GCWERKS") expected_keys = sorted([ "capegrim-medusa.18_NF3", "capegrim-medusa.18_CF4", "capegrim-medusa.18_PFC-116", "capegrim-medusa.18_PFC-218", "capegrim-medusa.18_PFC-318", "capegrim-medusa.18_C4F10", "capegrim-medusa.18_C6F14", "capegrim-medusa.18_SF6", "capegrim-medusa.18_SO2F2", "capegrim-medusa.18_SF5CF3", "capegrim-medusa.18_HFC-23", "capegrim-medusa.18_HFC-32", "capegrim-medusa.18_HFC-125", "capegrim-medusa.18_HFC-134a", "capegrim-medusa.18_HFC-143a", "capegrim-medusa.18_HFC-152a", "capegrim-medusa.18_HFC-227ea", "capegrim-medusa.18_HFC-236fa", "capegrim-medusa.18_HFC-245fa", "capegrim-medusa.18_HFC-365mfc", "capegrim-medusa.18_HFC-4310mee", "capegrim-medusa.18_HCFC-22", "capegrim-medusa.18_HCFC-124", "capegrim-medusa.18_HCFC-132b", "capegrim-medusa.18_HCFC-133a", "capegrim-medusa.18_HCFC-141b", "capegrim-medusa.18_HCFC-142b", "capegrim-medusa.18_CFC-11", "capegrim-medusa.18_CFC-12", "capegrim-medusa.18_CFC-13", "capegrim-medusa.18_CFC-112", "capegrim-medusa.18_CFC-113", "capegrim-medusa.18_CFC-114", "capegrim-medusa.18_CFC-115", "capegrim-medusa.18_H-1211", "capegrim-medusa.18_H-1301", "capegrim-medusa.18_H-2402", "capegrim-medusa.18_CH3Cl", "capegrim-medusa.18_CH3Br", "capegrim-medusa.18_CH3I", "capegrim-medusa.18_CH2Cl2", "capegrim-medusa.18_CHCl3", "capegrim-medusa.18_CCl4", "capegrim-medusa.18_CH2Br2", "capegrim-medusa.18_CHBr3", "capegrim-medusa.18_CH3CCl3", "capegrim-medusa.18_TCE", "capegrim-medusa.18_PCE", "capegrim-medusa.18_ethyne", "capegrim-medusa.18_ethane", "capegrim-medusa.18_propane", "capegrim-medusa.18_c-propane", "capegrim-medusa.18_benzene", "capegrim-medusa.18_toluene", "capegrim-medusa.18_COS", "capegrim-medusa.18_desflurane", ]) sorted(list(results["capegrim-medusa.18.C"].keys())) == expected_keys # Load in some data uuid = results["capegrim-medusa.18.C"]["capegrim-medusa.18_HFC-152a"] hfc152a_data = Datasource.load(uuid=uuid, shallow=False).data() hfc152a_data = hfc152a_data[ "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00"] assert hfc152a_data.time[0] == Timestamp("2018-01-01T02:24:00") assert hfc152a_data.time[-1] == Timestamp("2018-01-31T23:33:00") assert hfc152a_data["hfc152a"][0] == 4.409 assert hfc152a_data["hfc152a"][-1] == 4.262 assert hfc152a_data["hfc152a_repeatability"][0] == 0.03557 assert hfc152a_data["hfc152a_repeatability"][-1] == 0.03271 assert hfc152a_data["hfc152a_status_flag"][0] == 0 assert hfc152a_data["hfc152a_status_flag"][-1] == 0 assert hfc152a_data["hfc152a_integration_flag"][0] == 0 assert hfc152a_data["hfc152a_integration_flag"][-1] == 0 # Check we have the Datasource info saved obs = ObsSurface.load() assert sorted(obs._datasource_names.keys()) == expected_keys del hfc152a_data.attrs["File created"] assert hfc152a_data.attrs == { "data_owner": "Paul Krummel", "data_owner_email": "*****@*****.**", "inlet_height_magl": "75m_4", "comment": "Medusa measurements. Output from GCWerks. See Miller et al. (2008).", "Conditions of use": "Ensure that you contact the data owner at the outset of your project.", "Source": "In situ measurements of air", "Conventions": "CF-1.6", "Processed by": "*****@*****.**", "species": "hfc152a", "Calibration_scale": "SIO-05", "station_longitude": 144.689, "station_latitude": -40.683, "station_long_name": "Cape Grim, Tasmania", "station_height_masl": 94.0, }