Python ObsSurface示例，HUGS.Modules.ObsSurface Python示例

示例#1

0

显示文件

def test_read_CRDS():
    get_local_bucket(empty=True)

    filepath = get_datapath(filename="bsd.picarro.1minute.248m.dat",
                            data_type="CRDS")

    results = ObsSurface.read_file(filepath=filepath, data_type="CRDS")

    keys = results["bsd.picarro.1minute.248m.dat"].keys()

    expected_keys = sorted([
        "bsd.picarro.1minute.248m_ch4",
        "bsd.picarro.1minute.248m_co",
        "bsd.picarro.1minute.248m_co2",
    ])
    assert sorted(keys) == expected_keys

    # Load up the assigned Datasources and check they contain the correct data
    data = results["bsd.picarro.1minute.248m.dat"]

    ch4_data = Datasource.load(
        uuid=data["bsd.picarro.1minute.248m_ch4"]).data()
    ch4_data = ch4_data["2014-01-30-10:52:30+00:00_2014-01-30-14:20:30+00:00"]

    assert ch4_data.time[0] == Timestamp("2014-01-30T10:52:30")
    assert ch4_data["ch4"][0] == 1960.24
    assert ch4_data["ch4"][-1] == 1952.24
    assert ch4_data["ch4_stdev"][-1] == 0.674
    assert ch4_data["ch4_n_meas"][-1] == 25.0

    obs = ObsSurface.load()

    assert sorted(obs._datasource_names.keys()) == expected_keys

示例#2

0

显示文件

def test_delete_Datasource():
    bucket = get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="tta.co2.1minute.222m.min.dat",
                                 data_type="ICOS")

    ObsSurface.read_file(filepath=data_filepath, data_type="ICOS")

    obs = ObsSurface.load()

    datasources = obs.datasources()

    uuid = datasources[0]

    datasource = Datasource.load(uuid=uuid)

    data = datasource.data(
    )["2011-12-07-01:38:00+00:00_2011-12-31-19:57:00+00:00"]

    assert data["co2"][0] == pytest.approx(397.334)
    assert data.time[0] == Timestamp("2011-12-07T01:38:00")

    data_keys = datasource.data_keys()

    key = data_keys[0]

    assert exists(bucket=bucket, key=key)

    obs.delete(uuid=uuid)

    assert uuid not in obs.datasources()

    assert not exists(bucket=bucket, key=key)

示例#3

0

显示文件

def test_read_thames_barrier():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="thames_test_20190707.csv",
                                 data_type="THAMESBARRIER")

    results = ObsSurface.read_file(filepath=data_filepath,
                                   data_type="THAMESBARRIER")

    expected_keys = sorted([
        'thames_test_20190707_CH4', 'thames_test_20190707_CO2',
        'thames_test_20190707_CO'
    ])

    assert sorted(list(
        results["thames_test_20190707.csv"].keys())) == expected_keys

    uuid = results["thames_test_20190707.csv"]["thames_test_20190707_CO2"]

    data = Datasource.load(uuid=uuid, shallow=False).data()
    data = data["2019-07-01-00:39:55+00:00_2019-08-01-00:10:30+00:00"]

    assert data.time[0] == Timestamp("2019-07-01T00:39:55")
    assert data.time[-1] == Timestamp("2019-08-01T00:10:30")
    assert data["co2"][0] == pytest.approx(417.97344761)
    assert data["co2"][-1] == pytest.approx(417.80000653)
    assert data["co2_variability"][0] == 0
    assert data["co2_variability"][-1] == 0

    obs = ObsSurface.load()

    assert sorted(obs._datasource_names.keys()) == expected_keys

示例#4

0

显示文件

文件： test_search.py 项目： hugs-cloud/hugs

def crds_read():
    get_local_bucket(empty=True)
    test_data = "../data/search_data"
    folder_path = os.path.join(os.path.dirname(__file__), test_data)
    ObsSurface.read_folder(folder_path=folder_path,
                           data_type="CRDS",
                           extension="dat")

示例#5

0

显示文件

def test_recombination_CRDS():
    get_local_bucket(empty=True)

    filename = "hfd.picarro.1minute.100m.min.dat"
    filepath = get_datapath(filename=filename, data_type="CRDS")

    crds = CRDS()

    ObsSurface.read_file(filepath, data_type="CRDS")

    gas_data = crds.read_data(data_filepath=filepath, site="HFD")

    ch4_data_read = gas_data["ch4"]["data"]

    gas_name = "ch4"
    location = "hfd"

    keys = search(species=gas_name, locations=location)

    to_download = keys["ch4_hfd_100m_picarro"]["keys"][
        "2013-12-04-14:02:30_2019-05-21-15:46:30"]

    ch4_data_recombined = recombine_sections(data_keys=to_download)

    ch4_data_recombined.attrs = {}

    assert ch4_data_read.time.equals(ch4_data_recombined.time)
    assert ch4_data_read["ch4"].equals(ch4_data_recombined["ch4"])

示例#6

0

显示文件

def crds():
    get_local_bucket(empty=True)

    filename = "hfd.picarro.1minute.100m.min.dat"
    filepath = get_datapath(filename=filename, data_type="CRDS")

    ObsSurface.read_file(filepath=filepath, data_type="CRDS")

示例#7

0

显示文件

def test_read_noaa():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(
        filename="co_pocn25_surface-flask_1_ccgg_event.txt", data_type="NOAA")

    results = ObsSurface.read_file(filepath=data_filepath, data_type="NOAA")

    uuid = results["co_pocn25_surface-flask_1_ccgg_event.txt"][
        "co_pocn25_surface-flask_1_ccgg_event_co"]

    co_data = Datasource.load(uuid=uuid, shallow=False).data()

    assert len(co_data.keys()) == 95

    old_data = co_data["1990-12-02-12:23:00+00:00_1990-12-02-12:23:00+00:00"]

    assert old_data.time[0] == Timestamp("1990-12-02T12:23:00")
    assert old_data.time[-1] == Timestamp("1990-12-02T12:23:00")

    assert old_data["co"][0] == 141.61
    assert old_data["co"][-1] == 141.61

    assert old_data["co_repeatability"][0] == -999.99
    assert old_data["co_repeatability"][-1] == -999.99

    assert old_data["co_selection_flag"][0] == 0
    assert old_data["co_selection_flag"][-1] == 0

    obs = ObsSurface.load()

    assert list(obs._datasource_names.keys()
                )[0] == "co_pocn25_surface-flask_1_ccgg_event_co"

示例#8

0

显示文件

文件： test_local_search.py 项目： hugs-cloud/hugs

def crds():
    get_local_bucket(empty=True)
    dir_path = os.path.dirname(__file__)
    test_data = "../data/proc_test_data/CRDS"
    filename = "hfd.picarro.1minute.100m.min.dat"
    filepath = os.path.join(dir_path, test_data, filename)

    ObsSurface.read_file(filepath=filepath, data_type="CRDS")

示例#9

0

显示文件

def test_upload_same_file_twice_raises():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="tta.co2.1minute.222m.min.dat",
                                 data_type="ICOS")

    ObsSurface.read_file(filepath=data_filepath, data_type="ICOS")

    with pytest.raises(ValueError):
        ObsSurface.read_file(filepath=data_filepath, data_type="ICOS")

示例#10

0

显示文件

文件： test_search.py 项目： hugs-cloud/hugs

def gc_read():
    get_local_bucket(empty=True)
    data_file = "capegrim-medusa.18.C"
    prec_file = "capegrim-medusa.18.precisions.C"
    dir_path = os.path.dirname(__file__)
    test_data = "../data/proc_test_data/GC"
    data_filepath = os.path.join(dir_path, test_data, data_file)
    prec_filepath = os.path.join(dir_path, test_data, prec_file)

    ObsSurface.read_file(filepath=(data_filepath, prec_filepath),
                         data_type="GCWERKS")

示例#11

0

显示文件

    def rank_sources(self, updated_rankings, data_type):
        """ Assign the precendence of sources for each.
            This function expects a dictionary of the form

            This function expects a dictionary of the form

            {'site_string': {'rank': [daterange_str, ...], 'daterange': 'start_end', 'uuid': uuid}, 

            Args:
                updated_ranking (dict): Dictionary of ranking
                data_type (str): Data type e.g. CRDS, GC
            Returns:
                None
        """
        if updated_rankings == self._before_ranking:
            return

        obs = ObsSurface.load()

        for key in updated_rankings:
            uuid = updated_rankings[key]["uuid"]

            for rank, daterange in updated_rankings[key]["rank"].items():
                if rank == 0:
                    continue

                for d in daterange:
                    obs.set_rank(uuid=uuid, rank=rank, daterange=d)

        obs.save()

示例#12

0

显示文件

def query_store():
    """ Create a dictionary that can be used to visualise the object store 

        Returns:
            dict: Dictionary of data ? 

    """
    from collections import defaultdict
    from HUGS.Modules import Datasource, ObsSurface

    obs = ObsSurface.load()

    datasource_uuids = obs.datasources()
    datasources = (Datasource.load(uuid=uuid, shallow=True)
                   for uuid in datasource_uuids)

    data = defaultdict(dict)

    for d in datasources:
        metadata = d.metadata()
        result = {
            "site": metadata["site"],
            "species": metadata["species"],
            "instrument": metadata.get("instrument", "Unknown"),
            "network": metadata.get("network")
        }
        data[d.uuid()] = result

    return data


# def visualise_store():
#     """ Visualise the output of the

#     """

示例#13

0

显示文件

文件： rank_sources.py 项目： hugs-cloud/hugs

def rank_sources(args):
    """ Rank Datasources to be primary sources for specific species at specific sites.

        Args:
            args (dict): Dictionary containing ranking data
        Returns:
            None
    """
    try:
        ranking_data = args["ranking"]
    except KeyError:
        raise KeyError("No ranking data passed")

    obs = ObsSurface.load()

    for key in ranking_data:
        uuid = ranking_data[key]["uuid"]

        for rank, daterange in ranking_data[key]["rank"].items():
            if rank == 0:
                continue

            for d in daterange:
                obs.set_rank(uuid=uuid, rank=rank, daterange=d)

    obs.save()

示例#14

0

显示文件

def test_recombination_GC():
    get_local_bucket(empty=True)

    gc = GCWERKS()

    data = get_datapath(filename="capegrim-medusa.18.C", data_type="GC")
    precision = get_datapath(filename="capegrim-medusa.18.precisions.C",
                             data_type="GC")

    ObsSurface.read_file((data, precision), data_type="GCWERKS")

    data = gc.read_data(data_filepath=data,
                        precision_filepath=precision,
                        site="CGO",
                        instrument="medusa")

    toluene_data = data["toluene"]["data"]

    gas_name = "toluene"
    location = "CGO"

    keys = search(species=gas_name, locations=location)

    to_download = keys["toluene_cgo_75m_4_medusa"]["keys"][
        "2018-01-01-02:24:00_2018-01-31-23:33:00"]

    toluene_data_recombined = recombine_sections(data_keys=to_download)

    toluene_data.attrs = {}
    toluene_data_recombined.attrs = {}

    assert toluene_data.time.equals(toluene_data_recombined.time)
    assert toluene_data["toluene"].equals(toluene_data_recombined["c6h5ch3"])
    assert toluene_data["toluene repeatability"].equals(
        toluene_data_recombined["c6h5ch3_repeatability"])
    assert toluene_data["toluene status_flag"].equals(
        toluene_data_recombined["c6h5ch3_status_flag"])
    assert toluene_data["toluene integration_flag"].equals(
        toluene_data_recombined["c6h5ch3_integration_flag"])

示例#15

0

显示文件

def test_read_cranfield():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="THB_hourly_means_test.csv",
                                 data_type="Cranfield_CRDS")

    results = ObsSurface.read_file(filepath=data_filepath,
                                   data_type="CRANFIELD")

    expected_keys = sorted([
        "THB_hourly_means_test_ch4",
        "THB_hourly_means_test_co2",
        "THB_hourly_means_test_co",
    ])

    assert sorted(results["THB_hourly_means_test.csv"].keys()) == expected_keys

    uuid = results["THB_hourly_means_test.csv"]["THB_hourly_means_test_ch4"]

    ch4_data = Datasource.load(uuid=uuid, shallow=False).data()
    ch4_data = ch4_data["2018-05-05-00:00:00+00:00_2018-05-13-16:00:00+00:00"]

    assert ch4_data.time[0] == Timestamp("2018-05-05")
    assert ch4_data.time[-1] == Timestamp("2018-05-13T16:00:00")

    assert ch4_data["ch4"][0] == pytest.approx(2585.651)
    assert ch4_data["ch4"][-1] == pytest.approx(1999.018)

    assert ch4_data["ch4 variability"][0] == pytest.approx(75.50218)
    assert ch4_data["ch4 variability"][-1] == pytest.approx(6.48413)

    # Check obs has stored the keys correctly
    obs = ObsSurface.load()

    assert sorted(list(obs._datasource_names.keys())) == sorted([
        'THB_hourly_means_test_ch4', 'THB_hourly_means_test_co2',
        'THB_hourly_means_test_co'
    ])

示例#16

0

显示文件

文件： _process.py 项目： hugs-cloud/hugs

def process_files(files,
                  data_type,
                  site=None,
                  network=None,
                  instrument=None,
                  overwrite=False):
    """ Process the passed file(s)

        Args:
            files (str, list): Path of files to be processed
            data_type (str): Type of data to be processed (CRDS, GC etc)
            site (str, default=None): Site code or name
            network (str, default=None): Network name
            instrument (str, default=None): Instrument name
            overwrite (bool, default=False): Should this data overwrite data
            stored for these datasources for existing dateranges
        Returns:
            dict: UUIDs of Datasources storing data of processed files keyed by filename
    """
    data_type = DataTypes[data_type.upper()].name

    if not isinstance(files, list):
        files = [files]

    obs = ObsSurface.load()

    results = {}
    # Ensure we have Paths
    # TODO: Delete this, as we already have the same warning in read_file?
    if data_type == "GCWERKS":
        if not all(isinstance(item, tuple) for item in files):
            return TypeError(
                "If data type is GC, a list of tuples for data and precision filenames must be passed"
            )
        files = [(Path(f), Path(p)) for f, p in files]
    else:
        files = [Path(f) for f in files]

    r = obs.read_file(filepath=files,
                      data_type=data_type,
                      site=site,
                      network=network,
                      instrument=instrument,
                      overwrite=overwrite)
    results.update(r)

    return results

示例#17

0

显示文件

    def get_sources(self, site, species, data_type):
        """ Get the datasources for this site and species to allow a ranking to be set

            Args:
                site (str): Three letter site code
                species (str): Species name
                data_type (str): Must be valid datatype i.e. CRDS, GC
                See all valid datasources in the DataTypes class
            Returns:
                dict: Dictionary of datasource metadata
        """
        if len(site) != 3 or not valid_site(site):
            # raise InvalidSiteError(f"{site} is not a valid site code")
            raise ValueError(f"{site} is not a valid site code")

        obs = ObsSurface.load()
        datasource_uuids = obs.datasources()

        # Shallow load the Datasources (only get their JSON metadata)
        datasources = [
            Datasource.load(uuid=uuid, shallow=True)
            for uuid in datasource_uuids
        ]

        matching_sources = [
            d for d in datasources
            if d.search_metadata(search_terms=[site, species], find_all=True)
        ]

        def name_str(d):
            return "_".join([d.species(), d.site(), d.inlet(), d.instrument()])

        rank_info = {
            name_str(d): {
                "rank": d.rank(),
                "data_range": d.daterange_str(),
                "uuid": d.uuid()
            }
            for d in matching_sources
        }

        self._before_ranking = copy.deepcopy(rank_info)
        self._key_uuids = {key: rank_info[key]["uuid"] for key in rank_info}

        return rank_info

示例#18

0

显示文件

文件： get_sources.py 项目： hugs-cloud/hugs

def get_sources(args):
    """ Get the Datasources associated with the specified species at a specified site

        Args:
            args (dict): Dictionary containing site and species keys
        Returns:
            dict: Dictionary of 
    """
    try:
        site = args["site"]
    except KeyError:
        # TODO - created a SiteError error type to raise here
        raise KeyError("Site must be specified")

    try:
        species = args["species"]
    except KeyError:
        raise KeyError("Species must be specified")

    obs = ObsSurface.load()

    datasource_uuids = obs.datasources()
    # Shallow load the Datasources (only get their JSON metadata)
    datasources = [
        Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids
    ]

    matching_sources = [
        d for d in datasources
        if d.search_metadata(search_terms=[site, species], find_all=True)
    ]

    def name_str(d):
        return "_".join([d.species(), d.site(), d.inlet(), d.instrument()])

    unranked = {
        name_str(d): {
            "rank": d.rank(),
            "data_range": d.daterange_str(),
            "uuid": d.uuid()
        }
        for d in matching_sources
    }

    return unranked

示例#19

0

显示文件

def process_data(
    data_file,
    source_name,
    data_type="CRDS",
    site=None,
    instrument_name=None,
    overwrite=False,
):
    """ Passes the passed filename(s) to the correct processing
        object depending on the data_type argument.

        Args:
            data_file (str, tuple (str, str)): Paths of file(s) for processing
            source_name (str): Name of source
            data_type (str, default="CRDS"): Type of data to be processed (CRDS, GC etc)
            overwrite (bool, default=False): Should existing and overlapping data be overwritten
        Returns:
            list: List of Datasources
    """
    from HUGS.Modules import ObsSurface

    processing_obj = ObsSurface.load()

    if data_type == "GC":
        try:
            data, precision = data_file
        except (TypeError, ValueError) as error:
            raise TypeError(
                "Ensure data and precision files are passed as a tuple\n",
                error)

    result = processing_obj.read_file(filepath=data_file,
                                      data_type=data_type,
                                      site=site,
                                      instrument=instrument_name)

    return result

示例#20

0

显示文件

文件： test_local_object_store.py 项目： hugs-cloud/hugs

def populate_store():
    get_local_bucket(empty=True)
    filepath = hfd_filepath()
    ObsSurface.read_file(filepath=filepath, data_type="CRDS", site="hfd")

示例#21

0

显示文件

def test_read_icos():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="tta.co2.1minute.222m.min.dat",
                                 data_type="ICOS")

    results = ObsSurface.read_file(filepath=data_filepath, data_type="ICOS")

    assert list(results["tta.co2.1minute.222m.min.dat"].keys()
                )[0] == "tta.co2.1minute.222m.min_co2"

    uuid = results["tta.co2.1minute.222m.min.dat"][
        "tta.co2.1minute.222m.min_co2"]

    data = Datasource.load(uuid=uuid, shallow=False).data()

    assert sorted(list(data.keys())) == sorted([
        "2011-12-07-01:38:00+00:00_2011-12-31-19:57:00+00:00",
        "2011-06-01-05:54:00+00:00_2011-08-31-17:58:00+00:00",
        "2011-03-30-08:52:00+00:00_2011-05-31-20:59:00+00:00",
        "2011-09-01-11:20:00+00:00_2011-11-30-03:39:00+00:00",
        "2012-12-01-04:03:00+00:00_2012-12-31-15:41:00+00:00",
        "2012-06-01-11:15:00+00:00_2012-08-07-19:16:00+00:00",
        "2012-04-07-06:20:00+00:00_2012-05-31-18:00:00+00:00",
        "2012-09-05-02:15:00+00:00_2012-11-30-19:08:00+00:00",
        "2013-01-01-00:01:00+00:00_2013-01-17-18:06:00+00:00",
    ])

    co2_data = data["2012-12-01-04:03:00+00:00_2012-12-31-15:41:00+00:00"]

    assert co2_data.time[0] == Timestamp("2012-12-01T04:03:00")
    assert co2_data.time[-1] == Timestamp("2012-12-31T15:41:00")

    assert co2_data["co2"][0] == 397.765
    assert co2_data["co2"][-1] == 398.374

    assert co2_data["co2_variability"][0] == 0.057
    assert co2_data["co2_variability"][-1] == 0.063

    assert co2_data["co2_number_of_observations"][0] == 12
    assert co2_data["co2_number_of_observations"][-1] == 13

    del co2_data.attrs["File created"]

    assert co2_data.attrs == {
        "Conditions of use":
        "Ensure that you contact the data owner at the outset of your project.",
        "Source": "In situ measurements of air",
        "Conventions": "CF-1.6",
        "Processed by": "*****@*****.**",
        "species": "co2",
        "Calibration_scale": "unknown",
        "station_longitude": -2.98598,
        "station_latitude": 56.55511,
        "station_long_name": "Angus Tower, UK",
        "station_height_masl": 300.0,
    }

    obs = ObsSurface.load()

    assert list(
        obs._datasource_names.keys())[0] == "tta.co2.1minute.222m.min_co2"

示例#22

0

显示文件

def search(
    locations,
    species=None,
    inlet=None,
    instrument=None,
    find_all=True,
    start_datetime=None,
    end_datetime=None,
):
    """ Search for gas data (optionally within a daterange)

        TODO - review this function - feel like it can be tidied and simplified

        Args:
            species (str or list): Terms to search for in Datasources
            locations (str or list): Where to search for the terms in species
            inlet (str, default=None): Inlet height such as 100m
            instrument (str, default=None): Instrument name such as picarro
            find_all (bool, default=True): Require all search terms to be satisfied
            start_datetime (datetime, default=None): Start datetime for search
            If None a start datetime of UNIX epoch (1970-01-01) is set
            end_datetime (datetime, default=None): End datetime for search
            If None an end datetime of the current datetime is set
        Returns:
            dict: List of keys of Datasources matching the search parameters
    """
    from collections import defaultdict
    from json import load
    from HUGS.Modules import Datasource, ObsSurface
    from HUGS.Util import (get_datetime_now, get_datetime_epoch,
                           create_daterange_str, timestamp_tzaware,
                           get_datapath)

    # if species is not None and not isinstance(species, list):
    if not isinstance(species, list):
        species = [species]

    if not isinstance(locations, list):
        locations = [locations]

    # Allow passing of location names instead of codes
    site_codes_json = get_datapath(filename="site_codes.json")
    with open(site_codes_json, "r") as f:
        d = load(f)
        site_codes = d["name_code"]

    updated_locations = []
    # Check locations, if they're longer than three letters do a lookup
    for loc in locations:
        if len(loc) > 3:
            try:
                site_code = site_codes[loc.lower()]
                updated_locations.append(site_code)
            except KeyError:
                raise ValueError(f"Invalid site {loc} passed")
        else:
            updated_locations.append(loc)

    locations = updated_locations

    if start_datetime is None:
        start_datetime = get_datetime_epoch()
    if end_datetime is None:
        end_datetime = get_datetime_now()

    # Ensure passed datetimes are timezone aware
    start_datetime = timestamp_tzaware(start_datetime)
    end_datetime = timestamp_tzaware(end_datetime)

    # Here we want to load in the ObsSurface module for now
    obs = ObsSurface.load()
    datasource_uuids = obs.datasources()

    # Shallow load the Datasources so we can search their metadata
    datasources = [
        Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids
    ]

    # First we find the Datasources from locations we want to narrow down our search
    location_sources = defaultdict(list)
    # If we have locations to search
    for location in locations:
        for datasource in datasources:
            if datasource.search_metadata(search_terms=location):
                location_sources[location].append(datasource)

    # This is returned to the caller
    results = defaultdict(dict)

    # With both inlet and instrument specified we bypass the ranking system
    if inlet is not None and instrument is not None:
        for site, sources in location_sources.items():
            for sp in species:
                for datasource in sources:
                    # Just match the single source here
                    if datasource.search_metadata(
                            search_terms=[sp, site, inlet, instrument],
                            find_all=True):
                        daterange_str = create_daterange_str(
                            start=start_datetime, end=end_datetime)
                        # Get the data keys for the data in the matching daterange
                        in_date = datasource.in_daterange(
                            daterange=daterange_str)

                        data_date_str = strip_dates_keys(in_date)

                        key = f"{sp}_{site}_{inlet}_{instrument}".lower()

                        # Find the keys that match the correct data
                        results[key]["keys"] = {data_date_str: in_date}
                        results[key]["metadata"] = datasource.metadata()

        return results

    for location, sources in location_sources.items():
        # Loop over and look for the species
        species_data = defaultdict(list)
        for datasource in sources:
            for s in species:
                search_terms = [
                    x for x in (s, location, inlet, instrument)
                    if x is not None
                ]
                # Check the species and the daterange
                if datasource.search_metadata(search_terms=search_terms,
                                              find_all=True):
                    species_data[s].append(datasource)

        # For each location we want to find the highest ranking sources for the selected species
        for sp, sources in species_data.items():
            ranked_sources = {}

            # How to return all the sources if they're all 0?
            for source in sources:
                rank_data = source.get_rank(start_date=start_datetime,
                                            end_date=end_datetime)

                # With no rank set we get an empty dictionary
                if not rank_data:
                    ranked_sources[0] = 0
                    continue

                # Just get the highest ranked datasources and return them
                # Find the highest ranked data from this site
                highest_rank = sorted(rank_data.keys())[-1]

                if highest_rank == 0:
                    ranked_sources[0] = 0
                    continue

                ranked_sources[source.uuid()] = {
                    "rank": highest_rank,
                    "dateranges": rank_data[highest_rank],
                    "source": source
                }

            # If it's all zeroes we want to return all sources
            if list(ranked_sources) == [0]:
                for source in sources:
                    key = f"{source.species()}_{source.site()}_{source.inlet()}_{source.instrument()}".lower(
                    )

                    daterange_str = create_daterange_str(start=start_datetime,
                                                         end=end_datetime)
                    data_keys = source.in_daterange(daterange=daterange_str)

                    if not data_keys:
                        continue

                    # Get a key that covers the daterange of the actual data and not from epoch to now
                    # if no start/end datetimes are passed
                    data_date_str = strip_dates_keys(data_keys)

                    results[key]["keys"] = {data_date_str: data_keys}
                    results[key]["metadata"] = source.metadata()

                continue
            else:
                # TODO - find a cleaner way of doing this
                # We might have a zero rank, delete it as we have higher ranked data
                try:
                    del ranked_sources[0]
                except KeyError:
                    pass

            # Otherwise iterate over the sources that are ranked and extract the keys
            for uid in ranked_sources:
                source = ranked_sources[uid]["source"]
                source_dateranges = ranked_sources[uid]["dateranges"]

                key = f"{source.species()}_{source.site()}_{source.inlet()}_{source.instrument()}".lower(
                )

                data_keys = {}
                # Get the keys for each daterange
                for d in source_dateranges:
                    keys_in_date = source.in_daterange(daterange=d)
                    d = d.replace("+00:00", "")
                    if keys_in_date:
                        data_keys[d] = keys_in_date

                if not data_keys:
                    continue

                results[key]["keys"] = data_keys
                results[key]["metadata"] = source.metadata()

    return results

示例#23

0

显示文件

def test_read_GC():
    get_local_bucket(empty=True)

    data_filepath = get_datapath(filename="capegrim-medusa.18.C",
                                 data_type="GC")
    precision_filepath = get_datapath(
        filename="capegrim-medusa.18.precisions.C", data_type="GC")

    results = ObsSurface.read_file(filepath=(data_filepath,
                                             precision_filepath),
                                   data_type="GCWERKS")

    expected_keys = sorted([
        "capegrim-medusa.18_NF3",
        "capegrim-medusa.18_CF4",
        "capegrim-medusa.18_PFC-116",
        "capegrim-medusa.18_PFC-218",
        "capegrim-medusa.18_PFC-318",
        "capegrim-medusa.18_C4F10",
        "capegrim-medusa.18_C6F14",
        "capegrim-medusa.18_SF6",
        "capegrim-medusa.18_SO2F2",
        "capegrim-medusa.18_SF5CF3",
        "capegrim-medusa.18_HFC-23",
        "capegrim-medusa.18_HFC-32",
        "capegrim-medusa.18_HFC-125",
        "capegrim-medusa.18_HFC-134a",
        "capegrim-medusa.18_HFC-143a",
        "capegrim-medusa.18_HFC-152a",
        "capegrim-medusa.18_HFC-227ea",
        "capegrim-medusa.18_HFC-236fa",
        "capegrim-medusa.18_HFC-245fa",
        "capegrim-medusa.18_HFC-365mfc",
        "capegrim-medusa.18_HFC-4310mee",
        "capegrim-medusa.18_HCFC-22",
        "capegrim-medusa.18_HCFC-124",
        "capegrim-medusa.18_HCFC-132b",
        "capegrim-medusa.18_HCFC-133a",
        "capegrim-medusa.18_HCFC-141b",
        "capegrim-medusa.18_HCFC-142b",
        "capegrim-medusa.18_CFC-11",
        "capegrim-medusa.18_CFC-12",
        "capegrim-medusa.18_CFC-13",
        "capegrim-medusa.18_CFC-112",
        "capegrim-medusa.18_CFC-113",
        "capegrim-medusa.18_CFC-114",
        "capegrim-medusa.18_CFC-115",
        "capegrim-medusa.18_H-1211",
        "capegrim-medusa.18_H-1301",
        "capegrim-medusa.18_H-2402",
        "capegrim-medusa.18_CH3Cl",
        "capegrim-medusa.18_CH3Br",
        "capegrim-medusa.18_CH3I",
        "capegrim-medusa.18_CH2Cl2",
        "capegrim-medusa.18_CHCl3",
        "capegrim-medusa.18_CCl4",
        "capegrim-medusa.18_CH2Br2",
        "capegrim-medusa.18_CHBr3",
        "capegrim-medusa.18_CH3CCl3",
        "capegrim-medusa.18_TCE",
        "capegrim-medusa.18_PCE",
        "capegrim-medusa.18_ethyne",
        "capegrim-medusa.18_ethane",
        "capegrim-medusa.18_propane",
        "capegrim-medusa.18_c-propane",
        "capegrim-medusa.18_benzene",
        "capegrim-medusa.18_toluene",
        "capegrim-medusa.18_COS",
        "capegrim-medusa.18_desflurane",
    ])

    sorted(list(results["capegrim-medusa.18.C"].keys())) == expected_keys

    # Load in some data
    uuid = results["capegrim-medusa.18.C"]["capegrim-medusa.18_HFC-152a"]

    hfc152a_data = Datasource.load(uuid=uuid, shallow=False).data()
    hfc152a_data = hfc152a_data[
        "2018-01-01-02:24:00+00:00_2018-01-31-23:33:00+00:00"]

    assert hfc152a_data.time[0] == Timestamp("2018-01-01T02:24:00")
    assert hfc152a_data.time[-1] == Timestamp("2018-01-31T23:33:00")

    assert hfc152a_data["hfc152a"][0] == 4.409
    assert hfc152a_data["hfc152a"][-1] == 4.262

    assert hfc152a_data["hfc152a_repeatability"][0] == 0.03557
    assert hfc152a_data["hfc152a_repeatability"][-1] == 0.03271

    assert hfc152a_data["hfc152a_status_flag"][0] == 0
    assert hfc152a_data["hfc152a_status_flag"][-1] == 0

    assert hfc152a_data["hfc152a_integration_flag"][0] == 0
    assert hfc152a_data["hfc152a_integration_flag"][-1] == 0

    # Check we have the Datasource info saved

    obs = ObsSurface.load()

    assert sorted(obs._datasource_names.keys()) == expected_keys

    del hfc152a_data.attrs["File created"]

    assert hfc152a_data.attrs == {
        "data_owner": "Paul Krummel",
        "data_owner_email": "*****@*****.**",
        "inlet_height_magl": "75m_4",
        "comment":
        "Medusa measurements. Output from GCWerks. See Miller et al. (2008).",
        "Conditions of use":
        "Ensure that you contact the data owner at the outset of your project.",
        "Source": "In situ measurements of air",
        "Conventions": "CF-1.6",
        "Processed by": "*****@*****.**",
        "species": "hfc152a",
        "Calibration_scale": "SIO-05",
        "station_longitude": 144.689,
        "station_latitude": -40.683,
        "station_long_name": "Cape Grim, Tasmania",
        "station_height_masl": 94.0,
    }