def test_get_obs_surface_no_inlet_ranking(): ''' Test metadata and attributes returned from get_obs_surface - ranking data is set - inlet is not specified - date range not specified (all dates returned) Checks - metadata includes expected "rank_metadata" attribute - check inlet details have been appropriately updated ''' obsdata = get_obs_surface(site="bsd", species="ch4") data = obsdata.data metadata = obsdata.metadata assert data assert metadata["rank_metadata"] == { "2015-01-01-00:00:00+00:00_2015-11-01-00:00:00+00:00": "248m", "2014-09-02-00:00:00+00:00_2014-11-01-00:00:00+00:00": "108m", "2016-09-02-00:00:00+00:00_2018-11-01-00:00:00+00:00": "108m", "2019-01-02-00:00:00+00:00_2021-01-01-00:00:00+00:00": "42m", } assert "inlet" in data assert data.attrs["inlet"] == "multiple" assert metadata["inlet"] == "multiple"
def test_get_obs_surface_ranking_unique(): ''' Test data returned from get_obs_surface data - ranking data is set - inlet is not specified - date range covers multiple inlets Covers tests not included in `test_get_obs_surface_no_inlet_ranking` TODO: At the moment this fails - unique data is not returned and there are multiple entries for some time stamps. This is a bug which will need to be fixed. ''' obsdata = get_obs_surface(site="bsd", species="ch4") data = obsdata.data inlet_slice = data["inlet"].sel( time=slice("2015-01-01-00:00:00", "2015-11-01-00:00:00")).values expected_array = np.tile("248m", len(inlet_slice)) np.testing.assert_equal(inlet_slice, expected_array) data_at_one_time_108m = data["mf"].sel(time="2016-04-02T09:07:30") assert data_at_one_time_108m.size == 1 assert data_at_one_time_108m["inlet"] == "108m" data_at_one_time_248m = data.sel(time="2015-01-30T11:12:30") assert data_at_one_time_248m["mf"].size == 1 assert data_at_one_time_248m["inlet"] == "248m"
def test_timeslice_slices_correctly(): # Test time slicing works correctly timeslice_data = get_obs_surface(site="bsd", species="co2", inlet="248m", start_date="2017-01-01", end_date="2018-03-03") sliced_co2_data = timeslice_data.data assert sliced_co2_data.time[0] == Timestamp("2017-02-18T06:36:30") assert sliced_co2_data.time[-1] == Timestamp("2018-02-18T15:42:30")
def test_get_obs_surface(): obsdata = get_obs_surface(site="bsd", species="co2", inlet="248m") co2_data = obsdata.data assert co2_data.time[0] == Timestamp("2014-01-30T11:12:30") assert co2_data.time[-1] == Timestamp("2020-12-01T22:31:30") assert co2_data.mf[0] == 409.55 assert co2_data.mf[-1] == 417.65 metadata = obsdata.metadata assert metadata["data_owner"] == "Simon O'Doherty" assert metadata["inlet_height_magl"] == "248m" averaged_data = get_obs_surface(site="bsd", species="co2", inlet="248m", average="2h") time = obsdata.data.time averaged_time = averaged_data.data.time assert not time.equals(averaged_time)
def test_timeslice_slices_correctly_exclusive(): # Test time slicing works with an exclusive time range for continuous data - up to but not including the end point timeslice_data = get_obs_surface(site="mhd", species="ch4", inlet="10m", start_date="2012-01-11", end_date="2012-02-05") sliced_mhd_data = timeslice_data.data sampling_period = Timedelta(75, unit="seconds") assert sliced_mhd_data.time[0] == (Timestamp("2012-01-11T00:13") - sampling_period / 2.0) assert sliced_mhd_data.time[-1] == (Timestamp("2012-02-04T23:47") - sampling_period / 2.0) assert sliced_mhd_data.mf[0] == 1849.814 assert sliced_mhd_data.mf[-1] == 1891.094
def test_get_obs_surface_ranking_single(): ''' Test data returned from get_obs_surface data - ranking data is set - inlet is not specified - date range should only include date for one inlet ''' obsdata = get_obs_surface(site="bsd", species="ch4", start_date="2015-01-01", end_date="2015-11-01") data = obsdata.data metadata = obsdata.metadata assert data assert data.attrs["inlet"] == "248m" assert metadata["inlet"] == "248m" data_at_one_time = data["mf"].sel(time="2015-01-30T11:12:30") assert data_at_one_time.size == 1
def test_no_inlet_no_ranked_data_raises(): with pytest.raises(ValueError): get_obs_surface(site="bsd", species="co2")
def test_averaging_incorrect_period_raises(): with pytest.raises(ValueError): get_obs_surface(site="bsd", species="co2", inlet="248m", average="888")
def single_site_footprint( site: str, height: str, network: str, domain: str, species: str, start_date: Union[str, Timestamp], end_date: Union[str, Timestamp], resample_to: str = "coarsest", site_modifier: Optional[str] = None, platform: Optional[str] = None, instrument: Optional[str] = None, ) -> Dataset: """Creates a Dataset for a single site's measurement data and footprints Args: site: Site name height: Height of inlet in metres network: Network name resample_to: Resample the data to a given time dataset. Valid options are ["obs", "footprints", "coarsen"]. - "obs" resamples the footprints to the observation time series data - "footprints" resamples to to the footprints time series - "coarsest" resamples to the data with the coarsest time resolution site_modifier: The name of the site given in the footprints. This is useful for example if the same site footprints are run with a different met and they are named slightly differently from the obs file. E.g. site="DJI", site_modifier = "DJI-SAM" - station called DJI, footprints site called DJI-SAM platform: Observation platform used to decide whether to resample instrument: species: Species type Returns: xarray.Dataset """ from openghg.retrieve import get_obs_surface, get_footprint from openghg.util import timestamp_tzaware start_date = timestamp_tzaware(start_date) end_date = timestamp_tzaware(end_date) resample_to = resample_to.lower() resample_choices = ("obs", "footprints", "coarsest") if resample_to not in resample_choices: raise ValueError( f"Invalid resample choice {resample_to} past, please select from one of {resample_choices}" ) # As we're not retrieve any satellite data yet just set tolerance to None tolerance = None platform = None # Here we want to use get_obs_surface obs_results = get_obs_surface( site=site, inlet=height, start_date=start_date, end_date=end_date, species=species, instrument=instrument, ) obs_data = obs_results.data # Save the observation data units try: units: Union[float, None] = float(obs_data.mf.attrs["units"]) except KeyError: units = None except AttributeError: raise AttributeError( "Unable to read mf attribute from observation data.") # If the site for the footprints has a different name, pass that in if site_modifier: footprint_site = site_modifier else: footprint_site = site # Try to find appropriate footprints file first with and then without species name try: footprint = get_footprint( site=footprint_site, domain=domain, height=height, start_date=start_date, end_date=end_date, species=species, ) except ValueError: footprint = get_footprint( site=footprint_site, domain=domain, height=height, start_date=start_date, end_date=end_date, ) # TODO: Add checks for particular species e.g. co2 and short-lived species # which should have a specific footprints available rather than the generic one # Extract dataset footprint_data = footprint.data # Align the two Datasets aligned_obs, aligned_footprint = align_datasets( obs_data=obs_data, footprint_data=footprint_data, platform=platform, resample_to=resample_to, ) combined_dataset = combine_datasets(dataset_A=aligned_obs, dataset_B=aligned_footprint, tolerance=tolerance) # Transpose to keep time in the last dimension position in case it has been moved in resample combined_dataset = combined_dataset.transpose(..., "time") if units is not None: combined_dataset["fp"].values = combined_dataset["fp"].values / units # if HiTRes: # combined_dataset.update({"fp_HiTRes": (combined_dataset.fp_HiTRes.dims, (combined_dataset.fp_HiTRes / units))}) return combined_dataset