def test_read_file_thd(): thd_path = get_datapath(filename="trinidadhead.01.C", data_type="GC") thd_prec_path = get_datapath(filename="trinidadhead.01.precisions.C", data_type="GC") gas_data = parse_gcwerks( data_filepath=thd_path, precision_filepath=thd_prec_path, site="thd", network="agage", instrument="gcmd", ) parsed_surface_metachecker(data=gas_data) expected_keys = [ "ccl4_10m", "cfc113_10m", "cfc11_10m", "cfc12_10m", "ch3ccl3_10m", "ch4_10m", "chcl3_10m", "n2o_10m", ] assert sorted(list(gas_data.keys())) == expected_keys meas_data = gas_data["ch3ccl3_10m"]["data"] assert meas_data.time[0] == pd.Timestamp("2001-01-01T01:05:22.5") assert meas_data.time[-1] == pd.Timestamp("2001-12-31T23:18:22.5") assert meas_data["ch3ccl3"][0] == 41.537 assert meas_data["ch3ccl3"][-1] == 34.649
def test_no_precisions_species_raises(): cgo_path = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") missing_species_prec = get_datapath(filename="capegrim-medusa.18.precisions.broke.C", data_type="GC") with pytest.raises(ValueError): parse_gcwerks( data_filepath=cgo_path, precision_filepath=missing_species_prec, site="cgo", network="agage" )
def test_read_ridgehill_window_inlet_all_NaNs(): data_path = get_datapath(filename="ridgehill-md.11.C", data_type="GC") prec_path = get_datapath(filename="ridgehill-md.11.precisions.C", data_type="GC") res = parse_gcwerks( data_filepath=data_path, precision_filepath=prec_path, site="RGL", instrument="gcmd", network="agage" ) assert not res
def test_read_invalid_instrument_raises(): thd_path = get_datapath(filename="trinidadhead.01.C", data_type="GC") thd_prec_path = get_datapath(filename="trinidadhead.01.precisions.C", data_type="GC") with pytest.raises(ValueError): parse_gcwerks( data_filepath=thd_path, precision_filepath=thd_prec_path, site="CGO", instrument="fish", network="agage", )
def cgo_data(): cgo_data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") cgo_prec = get_datapath(filename="capegrim-medusa.18.precisions.C", data_type="GC") gas_data = parse_gcwerks( data_filepath=cgo_data, precision_filepath=cgo_prec, site="cgo", instrument="medusa", network="agage", ) return gas_data
def thd_data(): thd_path = get_datapath(filename="trinidadhead.01.C", data_type="GC") thd_prec_path = get_datapath(filename="trinidadhead.01.precisions.C", data_type="GC") gas_data = parse_gcwerks( data_filepath=thd_path, precision_filepath=thd_prec_path, site="THD", instrument="medusa", network="agage", ) return gas_data
def test_delete_Datasource(): bucket = get_local_bucket(empty=True) data_filepath = get_datapath(filename="thames_test_20190707.csv", data_type="THAMESBARRIER") ObsSurface.read_file(filepath=data_filepath, data_type="THAMESBARRIER", site="tmb", network="LGHG", sampling_period=60) obs = ObsSurface.load() datasources = obs.datasources() uuid = datasources[0] datasource = Datasource.load(uuid=uuid) data_keys = datasource.data_keys() key = data_keys[0] assert exists(bucket=bucket, key=key) obs.delete(uuid=uuid) assert uuid not in obs.datasources() assert not exists(bucket=bucket, key=key)
def test_read_thames_barrier(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="thames_test_20190707.csv", data_type="THAMESBARRIER") results = ObsSurface.read_file(filepath=data_filepath, data_type="THAMESBARRIER", site="TMB", network="LGHG", sampling_period=3600) expected_keys = sorted(["CH4", "CO2", "CO"]) assert sorted(list(results["processed"] ["thames_test_20190707.csv"].keys())) == expected_keys uuid = results["processed"]["thames_test_20190707.csv"]["CO2"] data = Datasource.load(uuid=uuid, shallow=False).data() data = data["2019-07-01-00:39:55+00:00_2019-08-01-00:10:30+00:00"] assert data.time[0] == Timestamp("2019-07-01T00:39:55") assert data.time[-1] == Timestamp("2019-08-01T00:10:30") assert data["co2"][0] == pytest.approx(417.97344761) assert data["co2"][-1] == pytest.approx(417.80000653) assert data["co2_variability"][0] == 0 assert data["co2_variability"][-1] == 0 obs = ObsSurface.load() assert sorted(obs._datasource_uuids.values()) == expected_keys
def test_read_noaa_obspack(): data_filepath = get_datapath( filename="ch4_esp_surface-flask_2_representative.nc", data_type="NOAA") results = ObsSurface.read_file(filepath=data_filepath, inlet="flask", data_type="NOAA", site="esp", network="NOAA", overwrite=True) uuid = results["processed"]["ch4_esp_surface-flask_2_representative.nc"][ "ch4"] ch4_data = Datasource.load(uuid=uuid, shallow=False).data() assert sorted(list(ch4_data.keys())) == [ "1993-06-17-00:12:30+00:00_1993-11-20-21:50:00+00:00", "1994-01-02-22:10:00+00:00_1994-12-24-22:15:00+00:00", "1995-02-06-12:00:00+00:00_1995-11-08-19:55:00+00:00", "1996-01-21-22:10:00+00:00_1996-12-01-20:00:00+00:00", "1997-02-12-19:00:00+00:00_1997-12-20-20:15:00+00:00", "1998-01-01-23:10:00+00:00_1998-12-31-19:50:00+00:00", "1999-01-14-22:15:00+00:00_1999-12-31-23:35:00+00:00", "2000-03-05-00:00:00+00:00_2000-11-04-22:30:00+00:00", "2001-01-05-21:45:00+00:00_2001-12-06-12:00:00+00:00", "2002-01-12-12:00:00+00:00_2002-01-12-12:00:00+00:00", ] data = ch4_data["1998-01-01-23:10:00+00:00_1998-12-31-19:50:00+00:00"] assert data.time[0] == Timestamp("1998-01-01T23:10:00") assert data["ch4"][0] == pytest.approx(1.83337e-06) assert data["ch4_number_of_observations"][0] == 2.0 assert data["ch4_variability"][0] == pytest.approx(2.093036e-09)
def test_read_obspack_flask_2021(): '''Test inputs from "obspack_multi-species_1_CCGGSurfaceFlask_v2.0_2021-02-09"''' filepath = get_datapath(filename="ch4_spf_surface-flask_1_ccgg_Event.nc", data_type="NOAA") data = parse_noaa(data_filepath=filepath, site="SPF", inlet="flask", measurement_type="flask", network="NOAA") ch4_data = data["ch4"]["data"] assert ch4_data.time[0] == Timestamp("1995-01-28T19:20:00") assert ch4_data.time[-1] == Timestamp("2015-12-12T20:15:00") assert ch4_data["ch4"][0] == pytest.approx(1673.89) assert ch4_data["ch4"][-1] == pytest.approx(1785.86) assert ch4_data["ch4_variability"][0] == pytest.approx(2.71) assert ch4_data["ch4_variability"][-1] == pytest.approx(0.91) attributes = ch4_data.attrs assert "sampling_period" in attributes assert attributes["sampling_period"] == "NOT_SET" assert "sampling_period_estimate" in attributes assert float(attributes["sampling_period_estimate"]) > 0.0 ch4_metadata = data["ch4"]["metadata"] assert "sampling_period" in ch4_metadata assert "sampling_period_estimate" in ch4_metadata parsed_surface_metachecker(data=data)
def test_recombination_CRDS(): get_local_bucket(empty=True) filename = "hfd.picarro.1minute.100m.min.dat" filepath = get_datapath(filename=filename, data_type="CRDS") ObsSurface.read_file(filepath, data_type="CRDS", site="hfd", network="DECC") gas_data = parse_crds(data_filepath=filepath, site="HFD", network="AGAGE") ch4_data_read = gas_data["ch4"]["data"] species = "ch4" site = "hfd" inlet = "100m" result = search(species=species, site=site, inlet=inlet) keys = result.keys(site=site, species=species, inlet=inlet) ch4_data_recombined = recombine_datasets(keys=keys) ch4_data_recombined.attrs = {} assert ch4_data_read.time.equals(ch4_data_recombined.time) assert ch4_data_read["ch4"].equals(ch4_data_recombined["ch4"])
def test_read_raw_file(): filepath = get_datapath( filename="co_pocn25_surface-flask_1_ccgg_event.txt", data_type="NOAA") data = parse_noaa(data_filepath=filepath, inlet="flask", site="pocn25", measurement_type="flask", sampling_period=1200) parsed_surface_metachecker(data=data) co_data = data["co"]["data"] assert co_data.time[0] == Timestamp("1990-06-29T05:00:00") assert co_data["co"][0] == pytest.approx(94.9) assert co_data["co_repeatability"][0] == pytest.approx(-999.99) assert co_data["co_selection_flag"][0] == 0 assert co_data.time[-1] == Timestamp("2017-07-15T04:15:00") assert co_data["co"][-1] == pytest.approx(73.16) assert co_data["co_repeatability"][-1] == pytest.approx(-999.99) assert co_data["co_selection_flag"][-1] == 0 attrs = co_data.attrs assert attributes_checker_obssurface(attrs=attrs, species="co")
def test_read_thd_window_inlet(): data_path = get_datapath(filename="trinidadhead.01.window-inlet.C", data_type="GC") prec_path = get_datapath(filename="trinidadhead.01.precisions.C", data_type="GC") res = parse_gcwerks( data_filepath=data_path, precision_filepath=prec_path, site="thd", instrument="gcmd", network="agage" ) parsed_surface_metachecker(data=res) data = res["ch4_10m"]["data"] assert data.time[0] == pd.Timestamp("2001-01-01T01:05:22.5") assert data.time[-1] == pd.Timestamp("2001-01-01T10:25:22.5") assert data["ch4"][0] == pytest.approx(1818.62) assert data["ch4"][-1] == pytest.approx(1840.432)
def test_read_obspack_2020(): '''Test inputs from "obspack_ch4_1_GLOBALVIEWplus_v2.0_2020-04-24"''' filepath = get_datapath( filename="ch4_esp_surface-flask_2_representative.nc", data_type="NOAA") data = parse_noaa(data_filepath=filepath, site="esp", inlet="flask", measurement_type="flask", network="NOAA") ch4_data = data["ch4"]["data"] assert ch4_data.time[0] == Timestamp("1993-06-17T00:12:30") assert ch4_data.time[-1] == Timestamp("2002-01-12T12:00:00") assert ch4_data["ch4"][0] == pytest.approx(1.76763e-06) assert ch4_data["ch4"][-1] == pytest.approx(1.848995e-06) assert ch4_data["ch4_number_of_observations"][0] == 2.0 assert ch4_data["ch4_number_of_observations"][-1] == 2.0 assert ch4_data["ch4_variability"][0] == pytest.approx(1.668772e-09) assert ch4_data["ch4_variability"][-1] == pytest.approx(1.5202796e-09) # Check added attributes around sampling period attributes = ch4_data.attrs assert "sampling_period" in attributes assert attributes["sampling_period"] == "NOT_SET" assert "sampling_period_estimate" in attributes ch4_metadata = data["ch4"]["metadata"] assert "sampling_period" in ch4_metadata assert "sampling_period_estimate" in ch4_metadata
def test_read_cranfield(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="THB_hourly_means_test.csv", data_type="Cranfield_CRDS") results = ObsSurface.read_file(filepath=data_filepath, data_type="CRANFIELD", site="THB", network="CRANFIELD") expected_keys = ["ch4", "co", "co2"] assert sorted(results["processed"] ["THB_hourly_means_test.csv"].keys()) == expected_keys uuid = results["processed"]["THB_hourly_means_test.csv"]["ch4"] ch4_data = Datasource.load(uuid=uuid, shallow=False).data() ch4_data = ch4_data["2018-05-05-00:00:00+00:00_2018-05-13-16:00:00+00:00"] assert ch4_data.time[0] == Timestamp("2018-05-05") assert ch4_data.time[-1] == Timestamp("2018-05-13T16:00:00") assert ch4_data["ch4"][0] == pytest.approx(2585.651) assert ch4_data["ch4"][-1] == pytest.approx(1999.018) assert ch4_data["ch4 variability"][0] == pytest.approx(75.50218) assert ch4_data["ch4 variability"][-1] == pytest.approx(6.48413)
def test_add_new_data_correct_datasource(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="capegrim-medusa.05.C", data_type="GC") precision_filepath = get_datapath( filename="capegrim-medusa.05.precisions.C", data_type="GC") results = ObsSurface.read_file(filepath=(data_filepath, precision_filepath), data_type="GCWERKS", site="CGO", network="AGAGE") first_results = results["processed"]["capegrim-medusa.05.C"] sorted_keys = sorted( list(results["processed"]["capegrim-medusa.05.C"].keys())) assert sorted_keys[:4] == [ 'c2cl4_10m', 'c2cl4_70m', 'c2f6_10m', 'c2f6_70m' ] assert sorted_keys[-4:] == [ 'hfc32_70m', 'sf6_70m', 'so2f2_10m', 'so2f2_70m' ] assert len(sorted_keys) == 69 data_filepath = get_datapath(filename="capegrim-medusa.06.C", data_type="GC") precision_filepath = get_datapath( filename="capegrim-medusa.06.precisions.C", data_type="GC") new_results = ObsSurface.read_file(filepath=(data_filepath, precision_filepath), data_type="GCWERKS", site="CGO", network="AGAGE") second_results = new_results["processed"]["capegrim-medusa.06.C"] shared_keys = [key for key in first_results if key in second_results] assert len(shared_keys) == 67 for key in shared_keys: assert first_results[key] == second_results[key]
def test_incorrect_site_raises(): filepath = get_datapath(filename="Unknown_site.csv", data_type="BEACO2N") with pytest.raises(ValueError): parse_beaco2n(data_filepath=filepath, site="test", network="test", inlet="test")
def crds_data(): hfd_filepath = get_datapath(filename="hfd.picarro.1minute.100m.min.dat", data_type="CRDS") gas_data = parse_crds(data_filepath=hfd_filepath, site="hfd", network="DECC") return gas_data
def test_read_incorrect_site_raises(): filepath = get_datapath( filename="ch4_UNKOWN_surface-flask_1_ccgg_event.txt", data_type="NOAA") with pytest.raises(ValueError): data = parse_noaa(data_filepath=filepath, site="NotASite", inlet="flask", measurement_type="flask")
def scsn06_data(): filepath = get_datapath( filename="ch4_scsn06_surface-flask_1_ccgg_event.txt", data_type="NOAA") data = parse_noaa(data_filepath=filepath, site="scsn06", inlet="flask", measurement_type="flask", sampling_period="1200") return data
def test_read_shangdianzi_ASM_inlet(): data_path = get_datapath(filename="shangdianzi-medusa.18.C", data_type="GC") prec_path = get_datapath(filename="shangdianzi-medusa.18.precisions.C", data_type="GC") res = parse_gcwerks( data_filepath=data_path, precision_filepath=prec_path, site="sdz", instrument="medusa", network="agage", ) parsed_surface_metachecker(data=res) data = res["nf3_80m"]["data"] data.time[0] == pd.Timestamp("2018-01-16T09:10:00") data.time[-1] == pd.Timestamp("2018-01-16T20:00:00") data["nf3"][0] == pytest.approx(2.172) data["nf3"][-1] == pytest.approx(2.061)
def test_read_glasgow_no_valid_data(): filepath = get_datapath(filename="171_UNIVERSITYOFSTRATHCLYDE.csv", data_type="BEACO2N") result = parse_beaco2n( data_filepath=filepath, site="UNIVERSITYOFSTRATHCLYDE", network="BEACO2N", inlet="99m", ) assert not result
def test_recombination_GC(): get_local_bucket(empty=True) data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC") precision = get_datapath(filename="capegrim-medusa.18.precisions.C", data_type="GC") ObsSurface.read_file((data, precision), data_type="GCWERKS", site="cgo", network="agage") data = parse_gcwerks(data_filepath=data, precision_filepath=precision, site="CGO", instrument="medusa", network="AGAGE") toluene_data = data["c6h5ch3_70m"]["data"] species = "c6h5ch3" site = "CGO" inlet = "70m" result = search(species=species, site=site, inlet=inlet) keys = result.keys(site=site, species=species, inlet=inlet) toluene_data_recombined = recombine_datasets(keys=keys) toluene_data.attrs = {} toluene_data_recombined.attrs = {} assert toluene_data.time.equals(toluene_data_recombined.time) assert toluene_data["c6h5ch3"].equals(toluene_data_recombined["c6h5ch3"]) assert toluene_data["c6h5ch3_repeatability"].equals( toluene_data_recombined["c6h5ch3_repeatability"]) assert toluene_data["c6h5ch3_status_flag"].equals( toluene_data_recombined["c6h5ch3_status_flag"]) assert toluene_data["c6h5ch3_integration_flag"].equals( toluene_data_recombined["c6h5ch3_integration_flag"])
def test_read_multiside_aqmesh(): datafile = get_datapath(filename="co2_data.csv", data_type="AQMESH") metafile = get_datapath(filename="co2_metadata.csv", data_type="AQMESH") result = ObsSurface.read_multisite_aqmesh(data_filepath=datafile, metadata_filepath=metafile, overwrite=True) # This crazy structure will be fixed when add_datsources is updated raith_uuid = result["raith"]["raith"] d = Datasource.load(uuid=raith_uuid, shallow=False) data = d.data()["2021-06-18-05:00:00+00:00_2021-06-21-13:00:00+00:00"] data.time[0] == Timestamp("2021-06-18T05:00:00") data.co2[0] == 442.64 data.time[-1] == Timestamp("2021-06-21T13:00:00") data.co2[-1] == 404.84 expected_attrs = { "site": "raith", "pod_id": 39245, "start_date": "2021-06-15 01:00:00", "end_date": "2021-10-04 00:59:00", "relocate_date": "NA", "long_name": "Raith", "borough": "Glasgow", "site_type": "Roadside", "in_ulez": "No", "latitude": 55.798813, "longitude": -4.058363, "inlet": 1, "network": "aqmesh_glasgow", "sampling_period": "NOT_SET", "species": "co2", "units": "ppm", } assert data.attrs == expected_attrs
def data_read(): ''' Data set up for running tests for these sets of modules. ''' get_local_bucket(empty=True) # Files for creating forward model (mf_mod) for methane at TAC site # Observation data # - TAC at 100m for 201208 site = "tac" network = "DECC" data_type = "CRDS" tac_path = get_datapath(filename="tac.picarro.1minute.100m.201208.dat", data_type="CRDS") ObsSurface.read_file(filepath=tac_path, data_type=data_type, site=site, network=network) # Emissions data # Anthropogenic ch4 (methane) data from 2012 for EUROPE species = "ch4" source = "anthro" domain = "EUROPE" emissions_datapath = get_emissions_datapath("ch4-anthro_EUROPE_2012.nc") Emissions.read_file( filepath=emissions_datapath, species=species, source=source, date="2012", domain=domain, high_time_resolution=False, ) # Footprint data # TAC footprint from 2012-08 - 2012-09 at 100m height = "100m" model = "NAME" fp_datapath = get_footprint_datapath("TAC-100magl_EUROPE_201208.nc") Footprints.read_file(filepath=fp_datapath, site=site, model=model, network=network, height=height, domain=domain)
def load_CRDS(): get_local_bucket(empty=True) tac_100m = get_datapath("tac.picarro.1minute.100m.min.dat", data_type="CRDS") hfd_50m = get_datapath("hfd.picarro.1minute.50m.min.dat", data_type="CRDS") bsd_42m = get_datapath("bsd.picarro.1minute.42m.min.dat", data_type="CRDS") bsd_108m = get_datapath("bsd.picarro.1minute.108m.min.dat", data_type="CRDS") bsd_248m = get_datapath("bsd.picarro.1minute.248m.min.dat", data_type="CRDS") ObsSurface.read_file(filepath=tac_100m, data_type="CRDS", site="tac", network="DECC") ObsSurface.read_file(filepath=hfd_50m, data_type="CRDS", site="hfd", network="DECC") ObsSurface.read_file(filepath=[bsd_42m, bsd_108m, bsd_248m], data_type="CRDS", site="bsd", network="DECC")
def test_aqmesh_read(): datafile = get_datapath(filename="co2_data.csv", data_type="AQMesh") metafile = get_datapath(filename="co2_metadata.csv", data_type="AQMesh") data = parse_aqmesh(data_filepath=datafile, metadata_filepath=metafile) site_data = data["briarroadclydebank"] dataset = site_data["data"] metadata = site_data["metadata"] assert dataset.time[0] == Timestamp("2021-06-16T01:00:00") assert dataset.co2[0] == 413.76 assert dataset.time[-1] == Timestamp("2021-10-01") assert dataset.co2[1] == 415.11 expected_metadata = { "site": "briarroadclydebank", "pod_id": 11245, "start_date": "2021-06-16 01:00:00", "end_date": "2021-10-04 00:59:00", "relocate_date": "NA", "long_name": "Briar Road Clydebank", "borough": "Glasgow", "site_type": "Roadside", "in_ulez": "No", "latitude": 55.91796, "longitude": -4.406231, "inlet": 1, "network": "aqmesh_glasgow", "sampling_period": "NOT_SET", "species": "co2", "units": "ppm" } assert metadata == expected_metadata
def test_read_glasgow_valid_data(): filepath = get_datapath(filename="175_BELLAHOUSTONACADEMY.csv", data_type="BEACO2N") result = parse_beaco2n( data_filepath=filepath, site="BELLAHOUSTONACADEMY", network="BEACO2N", inlet="99m", ) co2_data = result["co2"]["data"] assert sorted(list(result.keys())) == sorted(["pm", "co", "co2"]) assert co2_data.time[0] == Timestamp("2021-07-15T12:00:00") assert co2_data.co2[0] == 410.7 assert isnan(co2_data.co2_qc[0])
def test_read_beaco2n(): data_filepath = get_datapath(filename="Charlton_Community_Center.csv", data_type="BEACO2N") results = ObsSurface.read_file(filepath=data_filepath, data_type="BEACO2N", site="CCC", network="BEACO2N", overwrite=True) uuid = results["processed"]["Charlton_Community_Center.csv"]["co2"] co2_data = Datasource.load(uuid=uuid, shallow=False).data() co2_data = co2_data["2015-04-18-04:00:00+00:00_2015-04-18-10:00:00+00:00"] assert co2_data.time[0] == Timestamp("2015-04-18T04:00:00") assert co2_data["co2"][0] == 410.4 assert co2_data["co2_qc"][0] == 2
def test_upload_same_file_twice_raises(): get_local_bucket(empty=True) data_filepath = get_datapath(filename="thames_test_20190707.csv", data_type="THAMESBARRIER") ObsSurface.read_file(filepath=data_filepath, data_type="THAMESBARRIER", site="tmb", network="LGHG", sampling_period=60) # assert not res["error"] with pytest.raises(ValueError): ObsSurface.read_file(filepath=data_filepath, data_type="THAMESBARRIER", site="tmb", network="LGHG", sampling_period=60)