Exemplo n.º 1
0
def test_recombination_CRDS():
    get_local_bucket(empty=True)

    filename = "hfd.picarro.1minute.100m.min.dat"
    filepath = get_datapath(filename=filename, data_type="CRDS")

    ObsSurface.read_file(filepath,
                         data_type="CRDS",
                         site="hfd",
                         network="DECC")

    gas_data = parse_crds(data_filepath=filepath, site="HFD", network="AGAGE")

    ch4_data_read = gas_data["ch4"]["data"]

    species = "ch4"
    site = "hfd"
    inlet = "100m"

    result = search(species=species, site=site, inlet=inlet)

    keys = result.keys(site=site, species=species, inlet=inlet)

    ch4_data_recombined = recombine_datasets(keys=keys)

    ch4_data_recombined.attrs = {}

    assert ch4_data_read.time.equals(ch4_data_recombined.time)
    assert ch4_data_read["ch4"].equals(ch4_data_recombined["ch4"])
Exemplo n.º 2
0
def test_delete_Datasource():
    bucket = get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="thames_test_20190707.csv",
                                 data_type="THAMESBARRIER")

    ObsSurface.read_file(filepath=data_filepath,
                         data_type="THAMESBARRIER",
                         site="tmb",
                         network="LGHG",
                         sampling_period=60)

    obs = ObsSurface.load()

    datasources = obs.datasources()

    uuid = datasources[0]

    datasource = Datasource.load(uuid=uuid)

    data_keys = datasource.data_keys()

    key = data_keys[0]

    assert exists(bucket=bucket, key=key)

    obs.delete(uuid=uuid)

    assert uuid not in obs.datasources()

    assert not exists(bucket=bucket, key=key)
Exemplo n.º 3
0
def test_read_thames_barrier():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="thames_test_20190707.csv",
                                 data_type="THAMESBARRIER")

    results = ObsSurface.read_file(filepath=data_filepath,
                                   data_type="THAMESBARRIER",
                                   site="TMB",
                                   network="LGHG",
                                   sampling_period=3600)

    expected_keys = sorted(["CH4", "CO2", "CO"])

    assert sorted(list(results["processed"]
                       ["thames_test_20190707.csv"].keys())) == expected_keys

    uuid = results["processed"]["thames_test_20190707.csv"]["CO2"]

    data = Datasource.load(uuid=uuid, shallow=False).data()
    data = data["2019-07-01-00:39:55+00:00_2019-08-01-00:10:30+00:00"]

    assert data.time[0] == Timestamp("2019-07-01T00:39:55")
    assert data.time[-1] == Timestamp("2019-08-01T00:10:30")
    assert data["co2"][0] == pytest.approx(417.97344761)
    assert data["co2"][-1] == pytest.approx(417.80000653)
    assert data["co2_variability"][0] == 0
    assert data["co2_variability"][-1] == 0

    obs = ObsSurface.load()

    assert sorted(obs._datasource_uuids.values()) == expected_keys
Exemplo n.º 4
0
def data_read():
    '''
    Data set up for running tests for these sets of modules.
    '''
    get_local_bucket(empty=True)

    # Files for creating forward model (mf_mod) for methane at TAC site

    # Observation data
    #  - TAC at 100m for 201208
    site = "tac"
    network = "DECC"
    data_type = "CRDS"

    tac_path = get_datapath(filename="tac.picarro.1minute.100m.201208.dat",
                            data_type="CRDS")
    ObsSurface.read_file(filepath=tac_path,
                         data_type=data_type,
                         site=site,
                         network=network)

    # Emissions data
    # Anthropogenic ch4 (methane) data from 2012 for EUROPE
    species = "ch4"
    source = "anthro"
    domain = "EUROPE"

    emissions_datapath = get_emissions_datapath("ch4-anthro_EUROPE_2012.nc")

    Emissions.read_file(
        filepath=emissions_datapath,
        species=species,
        source=source,
        date="2012",
        domain=domain,
        high_time_resolution=False,
    )

    # Footprint data
    # TAC footprint from 2012-08 - 2012-09 at 100m
    height = "100m"
    model = "NAME"

    fp_datapath = get_footprint_datapath("TAC-100magl_EUROPE_201208.nc")

    Footprints.read_file(filepath=fp_datapath,
                         site=site,
                         model=model,
                         network=network,
                         height=height,
                         domain=domain)
Exemplo n.º 5
0
    def _get_sources_local(self, site: str, species: str) -> Dict:
        site = verify_site(site=site)

        # Save these
        self.site = site
        self.species = species

        obs = ObsSurface.load()
        datasource_uuids = obs.datasources()
        rank_table = obs.rank_data()

        # Shallow load the Datasources (only get their JSON metadata)
        datasources = (Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids)

        matching_sources = [d for d in datasources if d.search_metadata(site=site, species=species)]

        if not matching_sources:
            return {}

        self._user_info = {
            d.inlet(): {
                "rank_data": rank_table.get(d.uuid(), "NA"),
                "data_range": d.daterange_str(),
            }
            for d in matching_sources
        }

        self._key_lookup = {d.inlet(): d.uuid() for d in matching_sources}
        self._needs_update = False

        return self._user_info
Exemplo n.º 6
0
def query_store() -> Dict:
    """Create a dictionary that can be used to visualise the object store

    Returns:
        dict: Dictionary for data to be shown in force graph
    """
    from openghg.store.base import Datasource
    from openghg.store import ObsSurface

    obs = ObsSurface.load()

    datasource_uuids = obs.datasources()
    datasources = (Datasource.load(uuid=uuid, shallow=True)
                   for uuid in datasource_uuids)

    data = {}

    for d in datasources:
        metadata = d.metadata()
        result = {
            "site": metadata["site"],
            "species": metadata["species"],
            "instrument": metadata.get("instrument", "Unknown"),
            "network": metadata.get("network", "Unknown"),
            "inlet": metadata.get("inlet", "Unknown"),
        }
        data[d.uuid()] = result

    return data
Exemplo n.º 7
0
def get_sources(args: Dict) -> Dict:
    obs = ObsSurface.load()
    datasource_uuids = obs.datasources()
    rank_table = obs.rank_data()

    site = args["site"]
    species = args["species"]

    # Shallow load the Datasources (only get their JSON metadata)
    datasources = (Datasource.load(uuid=uuid, shallow=True)
                   for uuid in datasource_uuids)

    matching_sources = [
        d for d in datasources if d.search_metadata(site=site, species=species)
    ]

    if not matching_sources:
        return {}

    user_info = {
        d.inlet(): {
            "rank_data": rank_table.get(d.uuid(), "NA"),
            "data_range": d.daterange_str(),
        }
        for d in matching_sources
    }

    key_lookup = {d.inlet(): d.uuid() for d in matching_sources}

    return {"user_info": user_info, "key_lookup": key_lookup}
Exemplo n.º 8
0
def test_read_noaa_obspack():
    data_filepath = get_datapath(
        filename="ch4_esp_surface-flask_2_representative.nc", data_type="NOAA")

    results = ObsSurface.read_file(filepath=data_filepath,
                                   inlet="flask",
                                   data_type="NOAA",
                                   site="esp",
                                   network="NOAA",
                                   overwrite=True)

    uuid = results["processed"]["ch4_esp_surface-flask_2_representative.nc"][
        "ch4"]

    ch4_data = Datasource.load(uuid=uuid, shallow=False).data()

    assert sorted(list(ch4_data.keys())) == [
        "1993-06-17-00:12:30+00:00_1993-11-20-21:50:00+00:00",
        "1994-01-02-22:10:00+00:00_1994-12-24-22:15:00+00:00",
        "1995-02-06-12:00:00+00:00_1995-11-08-19:55:00+00:00",
        "1996-01-21-22:10:00+00:00_1996-12-01-20:00:00+00:00",
        "1997-02-12-19:00:00+00:00_1997-12-20-20:15:00+00:00",
        "1998-01-01-23:10:00+00:00_1998-12-31-19:50:00+00:00",
        "1999-01-14-22:15:00+00:00_1999-12-31-23:35:00+00:00",
        "2000-03-05-00:00:00+00:00_2000-11-04-22:30:00+00:00",
        "2001-01-05-21:45:00+00:00_2001-12-06-12:00:00+00:00",
        "2002-01-12-12:00:00+00:00_2002-01-12-12:00:00+00:00",
    ]

    data = ch4_data["1998-01-01-23:10:00+00:00_1998-12-31-19:50:00+00:00"]

    assert data.time[0] == Timestamp("1998-01-01T23:10:00")
    assert data["ch4"][0] == pytest.approx(1.83337e-06)
    assert data["ch4_number_of_observations"][0] == 2.0
    assert data["ch4_variability"][0] == pytest.approx(2.093036e-09)
Exemplo n.º 9
0
def test_read_cranfield():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="THB_hourly_means_test.csv",
                                 data_type="Cranfield_CRDS")

    results = ObsSurface.read_file(filepath=data_filepath,
                                   data_type="CRANFIELD",
                                   site="THB",
                                   network="CRANFIELD")

    expected_keys = ["ch4", "co", "co2"]

    assert sorted(results["processed"]
                  ["THB_hourly_means_test.csv"].keys()) == expected_keys

    uuid = results["processed"]["THB_hourly_means_test.csv"]["ch4"]

    ch4_data = Datasource.load(uuid=uuid, shallow=False).data()
    ch4_data = ch4_data["2018-05-05-00:00:00+00:00_2018-05-13-16:00:00+00:00"]

    assert ch4_data.time[0] == Timestamp("2018-05-05")
    assert ch4_data.time[-1] == Timestamp("2018-05-13T16:00:00")

    assert ch4_data["ch4"][0] == pytest.approx(2585.651)
    assert ch4_data["ch4"][-1] == pytest.approx(1999.018)

    assert ch4_data["ch4 variability"][0] == pytest.approx(75.50218)
    assert ch4_data["ch4 variability"][-1] == pytest.approx(6.48413)
Exemplo n.º 10
0
def test_add_new_data_correct_datasource():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="capegrim-medusa.05.C",
                                 data_type="GC")
    precision_filepath = get_datapath(
        filename="capegrim-medusa.05.precisions.C", data_type="GC")

    results = ObsSurface.read_file(filepath=(data_filepath,
                                             precision_filepath),
                                   data_type="GCWERKS",
                                   site="CGO",
                                   network="AGAGE")

    first_results = results["processed"]["capegrim-medusa.05.C"]

    sorted_keys = sorted(
        list(results["processed"]["capegrim-medusa.05.C"].keys()))

    assert sorted_keys[:4] == [
        'c2cl4_10m', 'c2cl4_70m', 'c2f6_10m', 'c2f6_70m'
    ]
    assert sorted_keys[-4:] == [
        'hfc32_70m', 'sf6_70m', 'so2f2_10m', 'so2f2_70m'
    ]
    assert len(sorted_keys) == 69

    data_filepath = get_datapath(filename="capegrim-medusa.06.C",
                                 data_type="GC")
    precision_filepath = get_datapath(
        filename="capegrim-medusa.06.precisions.C", data_type="GC")

    new_results = ObsSurface.read_file(filepath=(data_filepath,
                                                 precision_filepath),
                                       data_type="GCWERKS",
                                       site="CGO",
                                       network="AGAGE")

    second_results = new_results["processed"]["capegrim-medusa.06.C"]

    shared_keys = [key for key in first_results if key in second_results]

    assert len(shared_keys) == 67

    for key in shared_keys:
        assert first_results[key] == second_results[key]
Exemplo n.º 11
0
def test_set_rank_overwrite():
    o = ObsSurface.load()

    o._rank_data.clear()

    test_uid = "test-uid-123"

    daterange_str = create_daterange_str(start="2007-01-01", end="2015-01-01")
    o.set_rank(uuid=test_uid, rank=1, date_range=daterange_str)
    assert o._rank_data["test-uid-123"] == {
        "2007-01-01-00:00:00+00:00_2015-01-01-00:00:00+00:00": 1
    }

    daterange_str = create_daterange_str(start="2008-01-01", end="2009-01-01")
    o.set_rank(uuid=test_uid, rank=2, date_range=daterange_str, overwrite=True)

    expected_ranking = {
        "2007-01-01-00:00:00+00:00_2007-12-31-23:59:59+00:00": 1,
        "2008-01-01-00:00:00+00:00_2008-12-31-23:59:59+00:00": 2,
        "2009-01-01-00:00:01+00:00_2015-01-01-00:00:00+00:00": 1,
    }

    assert o._rank_data["test-uid-123"] == expected_ranking

    daterange_str = create_daterange_str(start="1994-01-01", end="2023-01-01")
    o.set_rank(uuid=test_uid, rank=2, date_range=daterange_str, overwrite=True)

    assert o._rank_data["test-uid-123"] == {
        "1994-01-01-00:00:00+00:00_2023-01-01-00:00:00+00:00": 2
    }

    o._rank_data.clear()

    daterange_str = create_daterange_str(start="2001-01-01", end="2021-01-01")
    o.set_rank(uuid=test_uid, rank=1, date_range=daterange_str)

    assert o._rank_data["test-uid-123"] == {
        "2001-01-01-00:00:00+00:00_2021-01-01-00:00:00+00:00": 1
    }

    daterange_str = create_daterange_str(start="2007-01-01", end="2009-01-01")
    o.set_rank(uuid=test_uid, rank=2, date_range=daterange_str, overwrite=True)

    daterange_str = create_daterange_str(start="2015-01-01", end="2016-01-01")
    o.set_rank(uuid=test_uid, rank=2, date_range=daterange_str, overwrite=True)

    expected = {
        "2001-01-01-00:00:00+00:00_2006-12-31-23:59:59+00:00": 1,
        "2007-01-01-00:00:00+00:00_2008-12-31-23:59:59+00:00": 2,
        "2009-01-01-00:00:01+00:00_2014-12-31-23:59:59+00:00": 1,
        "2015-01-01-00:00:00+00:00_2015-12-31-23:59:59+00:00": 2,
        "2016-01-01-00:00:01+00:00_2021-01-01-00:00:00+00:00": 1,
    }

    assert o._rank_data["test-uid-123"] == expected
Exemplo n.º 12
0
def co2_setup():
    get_local_bucket(empty=True)

    data_type = "CRDS"

    tac_file = get_datapath(filename="tac.picarro.hourly.100m.test.dat",
                            data_type=data_type)
    tac_footprint = get_fp_datapath("TAC-100magl_UKV_co2_TEST_201407.nc")
    co2_emissions = get_flux_datapath("co2-rtot-cardamom-2hr_TEST_2014.nc")

    site = "tac"
    species = "co2"
    network = "DECC"
    height = "100m"

    domain = "TEST"
    model = "NAME"
    metmodel = "UKV"

    source = "rtot-cardamom"
    date = "2014"

    ObsSurface.read_file(filepath=tac_file,
                         data_type=data_type,
                         site=site,
                         network=network,
                         inlet=height)

    Footprints.read_file(filepath=tac_footprint,
                         site=site,
                         height=height,
                         domain=domain,
                         model=model,
                         metmodel=metmodel,
                         species=species)

    Emissions.read_file(filepath=co2_emissions,
                        species=species,
                        source=source,
                        domain=domain,
                        date=date,
                        high_time_resolution=True)
Exemplo n.º 13
0
def test_upload_same_file_twice_raises():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="thames_test_20190707.csv",
                                 data_type="THAMESBARRIER")

    ObsSurface.read_file(filepath=data_filepath,
                         data_type="THAMESBARRIER",
                         site="tmb",
                         network="LGHG",
                         sampling_period=60)

    # assert not res["error"]

    with pytest.raises(ValueError):
        ObsSurface.read_file(filepath=data_filepath,
                             data_type="THAMESBARRIER",
                             site="tmb",
                             network="LGHG",
                             sampling_period=60)
Exemplo n.º 14
0
def _process_files_local(
    files: filepathType,
    data_type: str,
    site: str,
    network: str,
    inlet: str = None,
    instrument: str = None,
    overwrite: bool = False,
) -> Dict:
    """Process the passed file(s)

    Args:
        files: Path of files to be processed
        data_type: Type of data to be processed (CRDS, GC etc)
        site: Site code or name
        network: Network name
        instrument: Instrument name
        overwrite: Should this data overwrite data
        stored for these datasources for existing dateranges
    Returns:
        dict: UUIDs of Datasources storing data of processed files keyed by filename
    """
    data_type = DataTypes[data_type.upper()].name

    if not isinstance(files, list):
        files = [files]

    obs = ObsSurface.load()

    results = {}
    # Ensure we have Paths
    # TODO: Delete this, as we already have the same warning in read_file?
    if data_type == "GCWERKS":
        if not all(isinstance(item, tuple) for item in files):
            raise TypeError(
                "If data type is GC, a list of tuples for data and precision filenames must be passed"
            )
        files = [(Path(f), Path(p)) for f, p in files]
    else:
        files = [Path(f) for f in files]

    r = obs.read_file(
        filepath=files,
        data_type=data_type,
        site=site,
        network=network,
        instrument=instrument,
        inlet=inlet,
        overwrite=overwrite,
    )
    results.update(r)

    return results
Exemplo n.º 15
0
def set_rank(args: Dict) -> None:
    obs = ObsSurface.load()

    rank = args["rank"]
    uuid = args["uuid"]
    dateranges = args["dateranges"]
    overwrite = args["overwrite"]

    obs.set_rank(uuid=uuid,
                 rank=rank,
                 date_range=dateranges,
                 overwrite=overwrite)
Exemplo n.º 16
0
def test_recombination_GC():
    get_local_bucket(empty=True)

    data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC")
    precision = get_datapath(filename="capegrim-medusa.18.precisions.C",
                             data_type="GC")

    ObsSurface.read_file((data, precision),
                         data_type="GCWERKS",
                         site="cgo",
                         network="agage")

    data = parse_gcwerks(data_filepath=data,
                         precision_filepath=precision,
                         site="CGO",
                         instrument="medusa",
                         network="AGAGE")

    toluene_data = data["c6h5ch3_70m"]["data"]

    species = "c6h5ch3"
    site = "CGO"
    inlet = "70m"

    result = search(species=species, site=site, inlet=inlet)
    keys = result.keys(site=site, species=species, inlet=inlet)

    toluene_data_recombined = recombine_datasets(keys=keys)

    toluene_data.attrs = {}
    toluene_data_recombined.attrs = {}

    assert toluene_data.time.equals(toluene_data_recombined.time)
    assert toluene_data["c6h5ch3"].equals(toluene_data_recombined["c6h5ch3"])
    assert toluene_data["c6h5ch3_repeatability"].equals(
        toluene_data_recombined["c6h5ch3_repeatability"])
    assert toluene_data["c6h5ch3_status_flag"].equals(
        toluene_data_recombined["c6h5ch3_status_flag"])
    assert toluene_data["c6h5ch3_integration_flag"].equals(
        toluene_data_recombined["c6h5ch3_integration_flag"])
Exemplo n.º 17
0
    def _clear_rank_local(self, inlet: str) -> None:
        """Clear the ranking data for a Datasource

        Args:
            key: Key for specific source
        Returns:
            None
        """
        obs = ObsSurface.load()
        inlet = inlet.lower()
        uuid = self._key_lookup[inlet]
        obs.clear_rank(uuid=uuid)
        self._needs_update = True
Exemplo n.º 18
0
def test_rank_overlapping_dateranges():
    dateranges = [
        "2014-01-01_2099-06-06", "2014-06-07_2015-09-09",
        "2015-09-10_2019-01-06"
    ]

    o = ObsSurface.load()
    o._rank_data.clear()

    test_uid = "test-uid-123"

    o.set_rank(uuid=test_uid, rank=1, date_range=dateranges)

    with pytest.raises(ValueError):
        o.set_rank(uuid=test_uid, rank=2, date_range=dateranges)
Exemplo n.º 19
0
    def _set_rank_local(
        self,
        inlet: str,
        rank: Union[int, str],
        start_date: str,
        end_date: str,
        overwrite: bool = False,
    ) -> None:
        obs = ObsSurface.load()

        inlet = inlet.lower()
        uuid = self._key_lookup[inlet]

        daterange = create_daterange_str(start=start_date, end=end_date)

        obs.set_rank(uuid=uuid, rank=rank, date_range=daterange, overwrite=overwrite)

        self._needs_update = True
Exemplo n.º 20
0
def test_read_beaco2n():
    data_filepath = get_datapath(filename="Charlton_Community_Center.csv",
                                 data_type="BEACO2N")

    results = ObsSurface.read_file(filepath=data_filepath,
                                   data_type="BEACO2N",
                                   site="CCC",
                                   network="BEACO2N",
                                   overwrite=True)

    uuid = results["processed"]["Charlton_Community_Center.csv"]["co2"]

    co2_data = Datasource.load(uuid=uuid, shallow=False).data()
    co2_data = co2_data["2015-04-18-04:00:00+00:00_2015-04-18-10:00:00+00:00"]

    assert co2_data.time[0] == Timestamp("2015-04-18T04:00:00")
    assert co2_data["co2"][0] == 410.4
    assert co2_data["co2_qc"][0] == 2
Exemplo n.º 21
0
def test_rank_same_daterange_doesnt_change():
    o = ObsSurface.load()
    o._rank_data.clear()

    test_uid = "test-uid-123"

    o.set_rank(uuid=test_uid, rank=1, date_range="2012-01-01_2012-06-01")

    assert o._rank_data == {
        "test-uid-123": {
            "2012-01-01-00:00:00+00:00_2012-06-01-00:00:00+00:00": 1
        }
    }

    o.set_rank(uuid=test_uid, rank=1, date_range="2012-01-01_2012-06-01")

    assert o._rank_data == {
        "test-uid-123": {
            "2012-01-01-00:00:00+00:00_2012-06-01-00:00:00+00:00": 1
        }
    }
Exemplo n.º 22
0
def test_read_multiside_aqmesh():
    datafile = get_datapath(filename="co2_data.csv", data_type="AQMESH")
    metafile = get_datapath(filename="co2_metadata.csv", data_type="AQMESH")

    result = ObsSurface.read_multisite_aqmesh(data_filepath=datafile,
                                              metadata_filepath=metafile,
                                              overwrite=True)

    # This crazy structure will be fixed when add_datsources is updated
    raith_uuid = result["raith"]["raith"]

    d = Datasource.load(uuid=raith_uuid, shallow=False)

    data = d.data()["2021-06-18-05:00:00+00:00_2021-06-21-13:00:00+00:00"]

    data.time[0] == Timestamp("2021-06-18T05:00:00")
    data.co2[0] == 442.64
    data.time[-1] == Timestamp("2021-06-21T13:00:00")
    data.co2[-1] == 404.84

    expected_attrs = {
        "site": "raith",
        "pod_id": 39245,
        "start_date": "2021-06-15 01:00:00",
        "end_date": "2021-10-04 00:59:00",
        "relocate_date": "NA",
        "long_name": "Raith",
        "borough": "Glasgow",
        "site_type": "Roadside",
        "in_ulez": "No",
        "latitude": 55.798813,
        "longitude": -4.058363,
        "inlet": 1,
        "network": "aqmesh_glasgow",
        "sampling_period": "NOT_SET",
        "species": "co2",
        "units": "ppm",
    }

    assert data.attrs == expected_attrs
Exemplo n.º 23
0
def test_set_rank():
    o = ObsSurface.load()

    o._rank_data.clear()

    test_uid = "test-uid-123"

    daterange_str = create_daterange_str(start="2001-01-01", end="2005-01-01")
    o.set_rank(uuid=test_uid, rank=1, date_range=daterange_str)

    assert o._rank_data == {
        "test-uid-123": {
            "2001-01-01-00:00:00+00:00_2005-01-01-00:00:00+00:00": 1
        }
    }

    daterange_str = create_daterange_str(start="2007-01-01", end="2009-01-01")
    o.set_rank(uuid=test_uid, rank=1, date_range=daterange_str)

    assert o._rank_data["test-uid-123"] == {
        "2001-01-01-00:00:00+00:00_2005-01-01-00:00:00+00:00": 1,
        "2007-01-01-00:00:00+00:00_2009-01-01-00:00:00+00:00": 1,
    }

    # Make sure we can't set another rank for the same daterange
    with pytest.raises(ValueError):
        o.set_rank(uuid=test_uid, rank=2, date_range=daterange_str)

    daterange_str = create_daterange_str(start="2008-01-01", end="2009-01-01")

    with pytest.raises(ValueError):
        o.set_rank(uuid=test_uid, rank=3, date_range=daterange_str)

    daterange_str = create_daterange_str(start="2007-01-01", end="2015-01-01")
    o.set_rank(uuid=test_uid, rank=1, date_range=daterange_str)

    assert o._rank_data["test-uid-123"] == {
        "2001-01-01-00:00:00+00:00_2005-01-01-00:00:00+00:00": 1,
        "2007-01-01-00:00:00+00:00_2015-01-01-00:00:00+00:00": 1,
    }
Exemplo n.º 24
0
def test_rank_daterange_start_overlap_overwrite():
    o = ObsSurface.load()
    o._rank_data.clear()

    test_uid = "test-uid-123"

    o.set_rank(uuid=test_uid, rank=1, date_range="2012-01-01_2013-01-01")

    assert o._rank_data == {
        "test-uid-123": {
            "2012-01-01-00:00:00+00:00_2013-01-01-00:00:00+00:00": 1
        }
    }

    o.set_rank(uuid=test_uid,
               rank=2,
               date_range="2012-01-01_2012-06-01",
               overwrite=True)

    assert o._rank_data == {
        "test-uid-123": {
            "2012-06-01-00:00:01+00:00_2013-01-01-00:00:00+00:00": 1,
            "2012-01-01-00:00:00+00:00_2012-06-01-00:00:00+00:00": 2,
        }
    }

    o.set_rank(uuid=test_uid,
               rank=1,
               date_range="2012-01-01_2013-01-01",
               overwrite=True)

    expected = {
        "test-uid-123": {
            "2012-01-01-00:00:00+00:00_2013-01-01-00:00:00+00:00": 1
        }
    }

    assert o._rank_data == expected
Exemplo n.º 25
0
def test_read_noaa_raw():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(
        filename="co_pocn25_surface-flask_1_ccgg_event.txt", data_type="NOAA")

    results = ObsSurface.read_file(filepath=data_filepath,
                                   data_type="NOAA",
                                   site="POCN25",
                                   network="NOAA",
                                   inlet="flask")

    uuid = results["processed"]["co_pocn25_surface-flask_1_ccgg_event.txt"][
        "co"]

    co_data = Datasource.load(uuid=uuid, shallow=False).data()

    assert sorted(list(co_data.keys())) == [
        "1990-06-29-05:00:00+00:00_1990-07-10-21:28:00+00:00",
        "2009-06-13-16:32:00+00:00_2009-12-03-00:30:00+00:00",
        "2010-01-10-00:13:00+00:00_2010-12-09-16:05:00+00:00",
        "2011-01-27-04:55:00+00:00_2011-11-11-14:45:00+00:00",
        "2016-12-03-12:37:00+00:00_2016-12-18-05:30:00+00:00",
        "2017-01-27-19:10:00+00:00_2017-07-15-04:15:00+00:00",
    ]

    co_data = co_data["1990-06-29-05:00:00+00:00_1990-07-10-21:28:00+00:00"]

    assert co_data["co"][0] == pytest.approx(94.9)
    assert co_data["co"][-1] == pytest.approx(95.65)

    assert co_data["co_repeatability"][0] == pytest.approx(-999.99)
    assert co_data["co_repeatability"][-1] == pytest.approx(-999.99)

    assert co_data["co_selection_flag"][0] == 0
    assert co_data["co_selection_flag"][-1] == 0
Exemplo n.º 26
0
def load_CRDS():
    get_local_bucket(empty=True)

    tac_100m = get_datapath("tac.picarro.1minute.100m.min.dat",
                            data_type="CRDS")
    hfd_50m = get_datapath("hfd.picarro.1minute.50m.min.dat", data_type="CRDS")
    bsd_42m = get_datapath("bsd.picarro.1minute.42m.min.dat", data_type="CRDS")
    bsd_108m = get_datapath("bsd.picarro.1minute.108m.min.dat",
                            data_type="CRDS")
    bsd_248m = get_datapath("bsd.picarro.1minute.248m.min.dat",
                            data_type="CRDS")

    ObsSurface.read_file(filepath=tac_100m,
                         data_type="CRDS",
                         site="tac",
                         network="DECC")
    ObsSurface.read_file(filepath=hfd_50m,
                         data_type="CRDS",
                         site="hfd",
                         network="DECC")
    ObsSurface.read_file(filepath=[bsd_42m, bsd_108m, bsd_248m],
                         data_type="CRDS",
                         site="bsd",
                         network="DECC")
Exemplo n.º 27
0
def search(**kwargs):  # type: ignore
    """Search for observations data. Any keyword arguments may be passed to the
    the function and these keywords will be used to search the metadata associated
    with each Datasource.

    Example / commonly used arguments are given below.

    Args:
        species: Terms to search for in Datasources
        locations: Where to search for the terms in species
        inlet: Inlet height such as 100m
        instrument: Instrument name such as picarro
        find_all: Require all search terms to be satisfied
        start_date: Start datetime for search.
        If None a start datetime of UNIX epoch (1970-01-01) is set
        end_date: End datetime for search.
        If None an end datetime of the current datetime is set
        skip_ranking: If True skip ranking system, defaults to False
    Returns:
        dict: List of keys of Datasources matching the search parameters
    """
    from addict import Dict as aDict
    from copy import deepcopy
    from itertools import chain as iter_chain

    from openghg.store import ObsSurface, Footprints, Emissions, EulerianModel
    from openghg.store.base import Datasource

    from openghg.util import (
        timestamp_now,
        timestamp_epoch,
        timestamp_tzaware,
        clean_string,
        closest_daterange,
        find_daterange_gaps,
        split_daterange_str,
        load_json,
    )
    from openghg.dataobjects import SearchResults

    # Get a copy of kwargs as we make some modifications below
    kwargs_copy = deepcopy(kwargs)

    # Do this here otherwise we have to produce them for every datasource
    start_date = kwargs.get("start_date")
    end_date = kwargs.get("end_date")

    if start_date is None:
        start_date = timestamp_epoch()
    else:
        start_date = timestamp_tzaware(start_date)

    if end_date is None:
        end_date = timestamp_now()
    else:
        end_date = timestamp_tzaware(end_date)

    kwargs_copy["start_date"] = start_date
    kwargs_copy["end_date"] = end_date

    skip_ranking = kwargs_copy.get("skip_ranking", False)

    try:
        del kwargs_copy["skip_ranking"]
    except KeyError:
        pass

    # As we might have kwargs that are None we want to get rid of those
    search_kwargs = {k: clean_string(v) for k, v in kwargs_copy.items() if v is not None}

    # Speices translation

    species = search_kwargs.get("species")

    if species is not None:
        if not isinstance(species, list):
            species = [species]

        translator = load_json("species_translator.json")

        updated_species = []

        for s in species:
            updated_species.append(s)

            try:
                translated = translator[s]
            except KeyError:
                pass
            else:
                updated_species.extend(translated)

        search_kwargs["species"] = updated_species

    data_type = search_kwargs.get("data_type", "timeseries")

    valid_data_types = ("timeseries", "footprints", "emissions", "eulerian_model")
    if data_type not in valid_data_types:
        raise ValueError(f"{data_type} is not a valid data type, please select one of {valid_data_types}")

    # Assume we want timeseries data
    obj: Union[ObsSurface, Footprints, Emissions, EulerianModel] = ObsSurface.load()

    if data_type == "footprints":
        obj = Footprints.load()
    elif data_type == "emissions":
        obj = Emissions.load()
    elif data_type == "eulerian_model":
        obj = EulerianModel.load()

    datasource_uuids = obj.datasources()

    # Shallow load the Datasources so we can search their metadata
    datasources = (Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids)

    # For the time being this will return a dict until we know how best to represent
    # the footprints and emissions results in a SearchResult object
    if data_type in {"emissions", "footprints", "eulerian_model"}:
        sources: Dict = aDict()
        for datasource in datasources:
            if datasource.search_metadata(**search_kwargs):
                uid = datasource.uuid()
                sources[uid]["keys"] = datasource.keys_in_daterange(start_date=start_date, end_date=end_date)
                sources[uid]["metadata"] = datasource.metadata()

        return sources

    # Find the Datasources that contain matching metadata
    matching_sources = {d.uuid(): d for d in datasources if d.search_metadata(**search_kwargs)}

    # TODO - Update this as it only uses the ACRG repo JSON at the moment
    # Check if this site only has one inlet, if so skip ranking
    # if "site" in search_kwargs:
    #     site = search_kwargs["site"]
    #     if not isinstance(site, list) and not multiple_inlets(site=site):
    #         skip_ranking = True

    # If there isn't *any* ranking data at all, skip all the ranking functionality
    if not obj._rank_data:
        skip_ranking = True

    # If only one datasource has been returned, skip all the ranking functionality
    if len(matching_sources) == 1:
        skip_ranking = True

    # If we have the site, inlet and instrument then just return the data
    # TODO - should instrument be added here
    if {"site", "inlet", "species"} <= search_kwargs.keys() or skip_ranking is True:
        specific_sources = aDict()
        for datasource in matching_sources.values():
            specific_keys = datasource.keys_in_daterange(start_date=start_date, end_date=end_date)

            if not specific_keys:
                continue

            metadata = datasource.metadata()

            site = metadata["site"]
            species = metadata["species"]
            inlet = metadata["inlet"]

            specific_sources[site][species][inlet]["keys"] = specific_keys
            specific_sources[site][species][inlet]["metadata"] = metadata

        return SearchResults(results=specific_sources.to_dict(), ranked_data=False)

    highest_ranked = aDict()

    for uid, datasource in matching_sources.items():
        # Find the site and then the ranking
        metadata = datasource.metadata()
        # Get the site inlet and species
        site = metadata["site"]
        species = metadata["species"]

        rank_data = obj.get_rank(uuid=uid, start_date=start_date, end_date=end_date)

        # If this Datasource doesn't have any ranking data skip it and move on
        if not rank_data:
            continue

        # There will only be a single rank key
        rank_value = next(iter(rank_data))
        # Get the daterange this rank covers
        rank_dateranges = rank_data[rank_value]

        # Each match we store gives us the information we need
        # to retrieve the data
        match = {"uuid": uid, "dateranges": rank_dateranges}

        # Need to ensure we get all the dates covered
        if species in highest_ranked[site]:
            species_rank_data = highest_ranked[site][species]

            # If we have a higher (lower number) rank save it
            if rank_value < species_rank_data["rank"]:
                species_rank_data["rank"] = rank_value
                species_rank_data["matching"] = [match]
            # If another Datasource has the same rank for another daterange
            # we want to save that as well
            elif rank_value == species_rank_data["rank"]:
                species_rank_data["matching"].append(match)
        else:
            highest_ranked[site][species]["rank"] = rank_value
            highest_ranked[site][species]["matching"] = [match]

    if not highest_ranked:
        raise ValueError(
            (
                "No ranking data set for the given search parameters."
                " Please refine your search to include a specific site, species and inlet."
            )
        )
    # Now we have the highest ranked data the dateranges there are ranks for
    # we want to fill in the gaps with (currently) the highest inlet from that site

    # We just want some rank_metadata to go along with the final data scheme
    # Can key a key of date - inlet
    data_keys: Dict = aDict()
    for site, species in highest_ranked.items():
        for sp, data in species.items():
            # data_keys[site][sp]["keys"] = []

            species_keys = []
            species_rank_data = {}
            species_metadata = {}

            for match_data in data["matching"]:
                uuid = match_data["uuid"]
                match_dateranges = match_data["dateranges"]
                # Get the datasource as it's already in the dictionary
                # we created earlier
                datasource = matching_sources[uuid]
                metadata = datasource.metadata()
                inlet = metadata["inlet"]

                keys = []
                for dr in match_dateranges:
                    date_keys = datasource.keys_in_daterange_str(daterange=dr)

                    if date_keys:
                        keys.extend(date_keys)
                        # We'll add this to the metadata in the search results we return at the end
                        species_rank_data[dr] = inlet

                species_keys.extend(keys)
                species_metadata[inlet] = metadata

            # Only create the dictionary keys if we have some data keys
            if species_keys:
                data_keys[site][sp]["keys"] = species_keys
                data_keys[site][sp]["rank_metadata"] = species_rank_data
                data_keys[site][sp]["metadata"] = species_metadata
            else:
                continue

            # We now need to retrieve data for the dateranges for which we don't have ranking data
            # To do this find the gaps in the daterange over which the user has requested data
            # and the dates for which we have ranking information

            # Get the dateranges that are covered by ranking information
            daterange_strs = list(iter_chain.from_iterable([m["dateranges"] for m in data["matching"]]))
            # Find the gaps in the ranking coverage
            gap_dateranges = find_daterange_gaps(
                start_search=start_date, end_search=end_date, dateranges=daterange_strs
            )

            # We want the dateranges and inlets for those dateranges
            inlet_dateranges = data_keys[site][sp]["rank_metadata"]
            # These are the dateranges for which we have ranking information for this site and species
            ranked_dateranges = list(data_keys[site][sp]["rank_metadata"].keys())

            for gap_daterange in gap_dateranges:
                # We want to select the inlet that's ranked for dates closest to the ones we have here
                closest_dr = closest_daterange(to_compare=gap_daterange, dateranges=ranked_dateranges)

                gap_start, gap_end = split_daterange_str(gap_daterange)
                # Find the closest ranked inlet by date
                chosen_inlet = inlet_dateranges[closest_dr]

                inlet_metadata = data_keys[site][sp]["metadata"][chosen_inlet]
                inlet_instrument = inlet_metadata["instrument"]
                inlet_sampling_period = inlet_metadata["sampling_period"]

                # Then we want to retrieve the correct metadata for those inlets
                results: SearchResults = search(
                    site=site,
                    species=sp,
                    inlet=chosen_inlet,
                    instrument=inlet_instrument,
                    sampling_period=inlet_sampling_period,
                    start_date=gap_start,
                    end_date=gap_end,
                )  # type: ignore

                if not results:
                    continue

                # Retrieve the data keys
                inlet_data_keys = results.keys(site=site, species=sp, inlet=chosen_inlet)

                data_keys[site][sp]["keys"].extend(inlet_data_keys)

            # Remove any duplicate keys
            data_keys[site][sp]["keys"] = list(set(data_keys[site][sp]["keys"]))

    # TODO - create a stub for addict
    dict_data_keys = data_keys.to_dict()  # type: ignore

    return SearchResults(results=dict_data_keys, ranked_data=True)
Exemplo n.º 28
0
def test_read_CRDS():
    get_local_bucket(empty=True)

    filepath = get_datapath(filename="bsd.picarro.1minute.248m.min.dat",
                            data_type="CRDS")
    results = ObsSurface.read_file(filepath=filepath,
                                   data_type="CRDS",
                                   site="bsd",
                                   network="DECC")

    keys = results["processed"]["bsd.picarro.1minute.248m.min.dat"].keys()

    assert sorted(keys) == ["ch4", "co", "co2"]

    # Load up the assigned Datasources and check they contain the correct data
    data = results["processed"]["bsd.picarro.1minute.248m.min.dat"]

    ch4_data = Datasource.load(uuid=data["ch4"]).data()
    ch4_data = ch4_data["2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00"]

    assert ch4_data.time[0] == Timestamp("2014-01-30T11:12:30")
    assert ch4_data["ch4"][0] == 1959.55
    assert ch4_data["ch4"][-1] == 1962.8
    assert ch4_data["ch4_variability"][-1] == 1.034
    assert ch4_data["ch4_number_of_observations"][-1] == 26.0

    obs = ObsSurface.load()
    uuid_one = obs.datasources()[0]
    datasource = Datasource.load(uuid=uuid_one)

    data_keys = list(datasource.data().keys())

    expected_keys = [
        "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-11-30-11:23:30+00:00",
        "2016-04-02-06:52:30+00:00_2016-11-02-12:54:30+00:00",
        "2017-02-18-06:36:30+00:00_2017-12-18-15:41:30+00:00",
        "2018-02-18-15:42:30+00:00_2018-12-18-15:42:30+00:00",
        "2019-02-03-17:38:30+00:00_2019-12-09-10:47:30+00:00",
        "2020-02-01-18:08:30+00:00_2020-12-01-22:31:30+00:00",
    ]

    assert data_keys == expected_keys

    filepath = get_datapath(filename="bsd.picarro.1minute.248m.future.dat",
                            data_type="CRDS")
    results = ObsSurface.read_file(filepath=filepath,
                                   data_type="CRDS",
                                   site="bsd",
                                   network="DECC")

    uuid_one = obs.datasources()[0]
    datasource = Datasource.load(uuid=uuid_one)
    data_keys = sorted(list(datasource.data().keys()))

    new_expected_keys = [
        "2014-01-30-11:12:30+00:00_2014-11-30-11:23:30+00:00",
        "2015-01-30-11:12:30+00:00_2015-11-30-11:23:30+00:00",
        "2016-04-02-06:52:30+00:00_2016-11-02-12:54:30+00:00",
        "2017-02-18-06:36:30+00:00_2017-12-18-15:41:30+00:00",
        "2018-02-18-15:42:30+00:00_2018-12-18-15:42:30+00:00",
        "2019-02-03-17:38:30+00:00_2019-12-09-10:47:30+00:00",
        "2020-02-01-18:08:30+00:00_2020-12-01-22:31:30+00:00",
        "2023-01-30-13:56:30+00:00_2023-01-30-14:20:30+00:00",
    ]

    assert data_keys == new_expected_keys

    table = obs._datasource_table
    assert table["bsd"]["decc"]["ch4"]["248m"]
    assert table["bsd"]["decc"]["co2"]["248m"]
    assert table["bsd"]["decc"]["co"]["248m"]
Exemplo n.º 29
0
def test_read_GC():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="capegrim-medusa.18.C",
                                 data_type="GC")
    precision_filepath = get_datapath(
        filename="capegrim-medusa.18.precisions.C", data_type="GC")

    results = ObsSurface.read_file(filepath=(data_filepath,
                                             precision_filepath),
                                   data_type="GCWERKS",
                                   site="CGO",
                                   network="AGAGE")

    # 30/11/2021: Species labels were updated to be standardised in line with variable naming
    # This list of expected labels was updated.
    expected_keys = [
        'c2cl4_70m', 'c2f6_70m', 'c2h2_70m', 'c2h6_70m', 'c2hcl3_70m',
        'c3f8_70m', 'c3h8_70m', 'c4f10_70m', 'c4f8_70m', 'c6f14_70m',
        'c6h5ch3_70m', 'c6h6_70m', 'cc3h8_70m', 'ccl4_70m', 'cf4_70m',
        'cfc112_70m', 'cfc113_70m', 'cfc114_70m', 'cfc115_70m', 'cfc11_70m',
        'cfc12_70m', 'cfc13_70m', 'ch2br2_70m', 'ch2cl2_70m', 'ch3br_70m',
        'ch3ccl3_70m', 'ch3cl_70m', 'ch3i_70m', 'chbr3_70m', 'chcl3_70m',
        'cos_70m', 'desflurane_70m', 'halon1211_70m', 'halon1301_70m',
        'halon2402_70m', 'hcfc124_70m', 'hcfc132b_70m', 'hcfc133a_70m',
        'hcfc141b_70m', 'hcfc142b_70m', 'hcfc22_70m', 'hfc125_70m',
        'hfc134a_70m', 'hfc143a_70m', 'hfc152a_70m', 'hfc227ea_70m',
        'hfc236fa_70m', 'hfc23_70m', 'hfc245fa_70m', 'hfc32_70m',
        'hfc365mfc_70m', 'hfc4310mee_70m', 'nf3_70m', 'sf5cf3_70m', 'sf6_70m',
        'so2f2_70m'
    ]

    assert sorted(list(
        results["processed"]["capegrim-medusa.18.C"].keys())) == expected_keys

    # Load in some data
    uuid = results["processed"]["capegrim-medusa.18.C"]["hfc152a_70m"]

    hfc152a_data = Datasource.load(uuid=uuid, shallow=False).data()
    hfc152a_data = hfc152a_data[
        "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00"]

    assert hfc152a_data.time[0] == Timestamp("2018-01-01T02:24:00")
    assert hfc152a_data.time[-1] == Timestamp("2018-01-31T23:33:00")

    assert hfc152a_data["hfc152a"][0] == 4.409
    assert hfc152a_data["hfc152a"][-1] == 4.262

    assert hfc152a_data["hfc152a_repeatability"][0] == 0.03557
    assert hfc152a_data["hfc152a_repeatability"][-1] == 0.03271

    assert hfc152a_data["hfc152a_status_flag"][0] == 0
    assert hfc152a_data["hfc152a_status_flag"][-1] == 0

    assert hfc152a_data["hfc152a_integration_flag"][0] == 0
    assert hfc152a_data["hfc152a_integration_flag"][-1] == 0

    # Check we have the Datasource info saved
    obs = ObsSurface.load()

    assert sorted(obs._datasource_uuids.values()) == expected_keys

    attrs = hfc152a_data.attrs

    assert attributes_checker_obssurface(attrs=attrs, species="hfc152a")

    # # Now test that if we add more data it adds it to the same Datasource
    uuid_one = obs.datasources()[0]

    datasource = Datasource.load(uuid=uuid_one)

    data_one = datasource.data()
    assert list(data_one.keys()) == [
        "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00"
    ]

    data_filepath = get_datapath(filename="capegrim-medusa.future.C",
                                 data_type="GC")
    precision_filepath = get_datapath(
        filename="capegrim-medusa.future.precisions.C", data_type="GC")

    results = ObsSurface.read_file(filepath=(data_filepath,
                                             precision_filepath),
                                   data_type="GCWERKS",
                                   site="CGO",
                                   network="AGAGE")

    datasource = Datasource.load(uuid=uuid_one)
    data_one = datasource.data()

    assert sorted(list(data_one.keys())) == [
        "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00",
        "2023-01-01-02:24:00+00:00_2023-01-31-23:33:00+00:00",
    ]

    data_filepath = get_datapath(filename="trinidadhead.01.C", data_type="GC")
    precision_filepath = get_datapath(filename="trinidadhead.01.precisions.C",
                                      data_type="GC")

    ObsSurface.read_file(
        filepath=(data_filepath, precision_filepath),
        data_type="GCWERKS",
        site="THD",
        instrument="gcmd",
        network="AGAGE",
    )

    obs = ObsSurface.load()
    table = obs._datasource_table

    assert table["cgo"]["agage"]["nf3"]["70m"]
    assert table["cgo"]["agage"]["hfc236fa"]["70m"]
    assert table["cgo"]["agage"]["halon1211"]["70m"]

    assert table["thd"]["agage"]["cfc11"]["10m"]
    assert table["thd"]["agage"]["n2o"]["10m"]
    assert table["thd"]["agage"]["ccl4"]["10m"]
Exemplo n.º 30
0
def data_read():
    get_local_bucket(empty=True)

    # DECC network sites
    network = "DECC"
    bsd_248_path = get_datapath(filename="bsd.picarro.1minute.248m.min.dat", data_type="CRDS")
    bsd_108_path = get_datapath(filename="bsd.picarro.1minute.108m.min.dat", data_type="CRDS")
    bsd_42_path = get_datapath(filename="bsd.picarro.1minute.42m.min.dat", data_type="CRDS")

    bsd_paths = [bsd_248_path, bsd_108_path, bsd_42_path]

    bsd_results = ObsSurface.read_file(filepath=bsd_paths, data_type="CRDS", site="bsd", network=network)

    hfd_100_path = get_datapath(filename="hfd.picarro.1minute.100m.min.dat", data_type="CRDS")
    hfd_50_path = get_datapath(filename="hfd.picarro.1minute.50m.min.dat", data_type="CRDS")
    hfd_paths = [hfd_100_path, hfd_50_path]

    ObsSurface.read_file(filepath=hfd_paths, data_type="CRDS", site="hfd", network=network)

    tac_path = get_datapath(filename="tac.picarro.1minute.100m.test.dat", data_type="CRDS")
    ObsSurface.read_file(filepath=tac_path, data_type="CRDS", site="tac", network=network)

    # GCWERKS data (AGAGE network sites)
    data_filepath = get_datapath(filename="capegrim-medusa.18.C", data_type="GC")
    prec_filepath = get_datapath(filename="capegrim-medusa.18.precisions.C", data_type="GC")

    ObsSurface.read_file(filepath=(data_filepath, prec_filepath), site="CGO", data_type="GCWERKS", network="AGAGE")

    mhd_data_filepath = get_datapath(filename="macehead.12.C", data_type="GC")
    mhd_prec_filepath = get_datapath(filename="macehead.12.precisions.C", data_type="GC")

    ObsSurface.read_file(filepath=(mhd_data_filepath, mhd_prec_filepath), site="MHD", data_type="GCWERKS", network="AGAGE", instrument="GCMD")

    # Set ranking information for BSD
    obs = ObsSurface.load()

    uid_248 = bsd_results["processed"]["bsd.picarro.1minute.248m.min.dat"]["ch4"]
    obs.set_rank(uuid=uid_248, rank=1, date_range="2012-01-01_2013-01-01")

    uid_108 = bsd_results["processed"]["bsd.picarro.1minute.108m.min.dat"]["ch4"]
    obs.set_rank(uuid=uid_108, rank=1, date_range="2014-09-02_2014-11-01")

    obs.set_rank(uuid=uid_248, rank=1, date_range="2015-01-01_2015-11-01")

    obs.set_rank(uuid=uid_108, rank=1, date_range="2016-09-02_2018-11-01")

    uid_42 = bsd_results["processed"]["bsd.picarro.1minute.42m.min.dat"]["ch4"]
    obs.set_rank(uuid=uid_42, rank=1, date_range="2019-01-02_2021-01-01")

    # Emissions data
    test_datapath = get_emissions_datapath("co2-gpp-cardamom-mth_EUROPE_2012.nc")

    Emissions.read_file(
        filepath=test_datapath,
        species="co2",
        source="gpp-cardamom",
        date="2012",
        domain="europe",
        high_time_resolution=False,
    )

    # Footprint data
    datapath = get_footprint_datapath("footprint_test.nc")

    site = "TMB"
    network = "LGHG"
    height = "10m"
    domain = "EUROPE"
    model = "test_model"

    Footprints.read_file(
        filepath=datapath, site=site, model=model, network=network, height=height, domain=domain
    )