def read_file(self, data_filepath, site=None, overwrite=False): """ Reads EUROCOM data files and returns the UUIDS of the Datasources the processed data has been assigned to Args: filepath (str or Path): Path of file to load Returns: list: UUIDs of Datasources data has been assigned to """ from pathlib import Path from HUGS.Processing import assign_attributes data_filepath = Path(data_filepath) if site is None: site = data_filepath.stem.split("_")[0] # This should return xarray Datasets gas_data = self.read_data(data_filepath=data_filepath, site=site) # Assign attributes to the xarray Datasets here data here makes it a lot easier to test gas_data = assign_attributes(data=gas_data, site=site, sampling_period=self._sampling_period) return gas_data
def read_file( self, data_filepath, species=None, site=None, overwrite=False, ): """ Reads NOAA data files and returns the UUIDS of the Datasources the processed data has been assigned to Args: filepath (str or Path): Path of file to load Returns: list: UUIDs of Datasources data has been assigned to """ from HUGS.Processing import assign_attributes from pathlib import Path data_filepath = Path(data_filepath) filename = data_filepath.name if species is None: species = filename.split("_")[0].lower() source_name = data_filepath.stem source_name = source_name.split("-")[0] gas_data = self.read_data(data_filepath=data_filepath, species=species) if site is None: site = gas_data[species.lower()]["metadata"]["site"] gas_data = assign_attributes(data=gas_data, site=site) return gas_data
def read_file( self, data_filepath, precision_filepath, site=None, instrument=None, network=None ): """ Reads a GC data file by creating a GC object and associated datasources TODO - should this default to GCMD when no instrument is passed? Args: data_filepath (str, pathlib.Path): Path of data file precision_filepath (str, pathlib.Path): Path of precision file site (str, default=None): Three letter code or name for site instrument (str, default=None): Instrument name network (str, default=None): Network name Returns: dict: Dictionary of source_name : UUIDs """ from pathlib import Path from HUGS.Processing import assign_attributes from HUGS.Util import is_number import re data_filepath = Path(data_filepath) if site is None: # Read from the filename site_name = re.findall(r"[\w']+", data_filepath.stem)[0] site = self.get_site_code(site_name) # We need to have the 3 character site code here if len(site) != 3: site = self.get_site_code(site) # Try and find the instrument name in the filename if instrument is None: # Get the first part of the filename # Example filename: capegrim-medusa.18.C instrument = re.findall(r"[\w']+", str(data_filepath.name))[1] if is_number(instrument): # has picked out the year, rather than the instrument. Default to GCMD for this type of file instrument = "GCMD" gas_data = self.read_data( data_filepath=data_filepath, precision_filepath=precision_filepath, site=site, instrument=instrument, network=network ) # Assign attributes to the data for CF compliant NetCDFs gas_data = assign_attributes(data=gas_data, site=site) return gas_data
def read_file(self, data_filepath): """ Reads THAMESBARRIER data files and returns the UUIDS of the Datasources the processed data has been assigned to Args: data_filepath (str or Path): Path of file to load Returns: list: UUIDs of Datasources data has been assigned to """ from pathlib import Path from HUGS.Processing import assign_attributes data_filepath = Path(data_filepath) site = "TMB" gas_data = self.read_data(data_filepath=data_filepath) gas_data = assign_attributes(data=gas_data, site=site) return gas_data
def read_file(self, data_filepath, source_name=None, site=None, network=None): """ Creates a CRDS object holding data stored within Datasources Args: filepath (str): Path of file to load source_name (str, default=None): Name of source site (str, default=None): Name of site source_id (str, default=None): Source's unique ID overwrite (bool, default=False): If True overwrite any data currently stored for this date range Returns: None """ from pathlib import Path from HUGS.Processing import assign_attributes if not isinstance(data_filepath, Path): data_filepath = Path(data_filepath) if not source_name: source_name = data_filepath.stem if not site: site = source_name.split(".")[0] # Process the data into separate Datasets gas_data = self.read_data(data_filepath=data_filepath, site=site, network=network) # Ensure the data is CF compliant gas_data = assign_attributes(data=gas_data, site=site, sampling_period=self._sampling_period) return gas_data
def test_crds_attributes(): _ = get_local_bucket(empty=True) crds = CRDS() filepath = get_datapath(filename="tac.picarro.1minute.100m.test.dat", data_type="CRDS") combined = crds.read_data(data_filepath=filepath, site="tac") combined_attributes = assign_attributes(data=combined, site="tac") # for key in combined_attributes: # ds = combined_attributes[key]["data"] # ds.to_netcdf(f"/tmp/testfile_{key}.nc") ch4_data = combined_attributes["ch4"]["data"] co2_data = combined_attributes["co2"]["data"] ch4_attr = ch4_data.attrs co2_attr = co2_data.attrs ch4_attr_complete = ch4_attr.copy() co2_attr_complete = co2_attr.copy() del ch4_attr["File created"] del co2_attr["File created"] del ch4_attr["species"] del co2_attr["species"] del ch4_attr["Calibration_scale"] del co2_attr["Calibration_scale"] del ch4_attr["data_owner_email"] del co2_attr["data_owner_email"] del ch4_attr["data_owner"] del co2_attr["data_owner"] global_attributes = { "inlet_height_magl": "100m", "comment": "Cavity ring-down measurements. Output from GCWerks", "Conditions of use": "Ensure that you contact the data owner at the outset of your project.", "Source": "In situ measurements of air", "Conventions": "CF-1.6", "Processed by": "*****@*****.**", "station_longitude": 1.13872, "station_latitude": 52.51775, "station_long_name": "Tacolneston Tower, UK", "station_height_masl": 50.0, } assert ch4_attr == global_attributes assert co2_attr == global_attributes assert ch4_attr_complete["species"] == "ch4" assert co2_attr_complete["species"] == "co2" # Check the individual variables attributes time_attributes = { "label": "left", "standard_name": "time", "comment": "Time stamp corresponds to beginning of sampling period. Time since midnight UTC of reference date. Note that sampling periods are approximate.", } assert ch4_data.time.attrs == time_attributes assert co2_data.time.attrs == time_attributes # Check individual variables assert ch4_data["ch4"].attrs == { "long_name": "mole_fraction_of_methane_in_air", "units": "1e-9", } assert ch4_data["ch4_stdev"].attrs == { "long_name": "mole_fraction_of_methane_in_air_stdev", "units": "1e-9", } assert ch4_data["ch4_n_meas"].attrs == { "long_name": "mole_fraction_of_methane_in_air_n_meas" } assert co2_data["co2"].attrs == { "long_name": "mole_fraction_of_carbon_dioxide_in_air", "units": "1e-6", } assert co2_data["co2_stdev"].attrs == { "long_name": "mole_fraction_of_carbon_dioxide_in_air_stdev", "units": "1e-6", } assert co2_data["co2_n_meas"].attrs == { "long_name": "mole_fraction_of_carbon_dioxide_in_air_n_meas" }