Пример #1
0
    def get_metadata_header_values(self, elements=None):
        """
        Get metadata file in the format called Header Values.

        Parameters
        ----------
        elements : dict, optional (default: None)
            Previously loaded elements can be passed here to avoid reading the
            file again.

        Returns
        -------
        metadata : MetaData
            Metadata information.
        depth : Depth
            Sensor Depth, generated from file name
        """
        if elements:
            headr = elements["headr"]
            scnd = elements["scnd"]
            last = elements["last"]
            fname = elements["fname"]
        else:
            headr, scnd, last, fname = self.get_elements_from_file()

        if len(fname) > 9:
            instrument = "_".join(fname[6:len(fname) - 2])
        else:
            instrument = fname[6]

        if fname[3] in const.VARIABLE_LUT:
            variable = const.VARIABLE_LUT[fname[3]]
        else:
            variable = fname[3]

        timerange_from = pd.to_datetime(" ".join(scnd[:2]))
        timerange_to = pd.to_datetime(" ".join(last[:2]))

        depth = Depth(float(headr[6]), float(headr[7]))

        metadata = MetaData([
            MetaVar("network", headr[1]),
            MetaVar("station", headr[2]),
            MetaVar("variable", variable, depth),
            MetaVar("instrument", instrument, depth),
            MetaVar("timerange_from", timerange_from),
            MetaVar("timerange_to", timerange_to),
            MetaVar("latitude", float(headr[3])),
            MetaVar("longitude", float(headr[4])),
            MetaVar("elevation", float(headr[5])),
        ])

        return metadata, depth
Пример #2
0
    def test_MetaVar(self):
        var = MetaVar('myvar', 1.1, Depth(0, 1))
        assert str(var) == "myvar (0.0 to 1.0 [m]): 1.1"
        assert tuple(var) == ('myvar', 1.1, 0, 1)
        assert var == var

        nvar = MetaVar('negmyvar', 1.1, Depth(0, -1))
        assert str(nvar) == "negmyvar (0.0 to -1.0 [m]): 1.1"
        assert tuple(nvar) == ('negmyvar', 1.1, -0, -1)
        assert nvar != var

        other = MetaVar('other', 99)
        assert str(other) == "other (no depth): 99"
        assert tuple(other) == ('other', 99, None, None)

        assert other != var
Пример #3
0
    def get_metadata_ceop_sep(self, elements=None):
        """
        Get metadata in the file format called CEOP in separate files.

        Parameters
        ----------
        elements : dict, optional (default: None)
            Previously loaded elements can be passed here to avoid reading the
            file again.

        Returns
        -------
        metadata : MetaData
            Metadata information.
        depth : Depth
            Sensor Depth, generated from file name
        """
        if elements:
            headr = elements['headr']
            last = elements['last']
            fname = elements['fname']
        else:
            headr, _, last, fname = self.get_elements_from_file()

        if len(fname) > 9:
            instr = '_'.join(fname[6:len(fname) - 2])
        else:
            instr = fname[6]

        if fname[3] in const.VARIABLE_LUT:
            variable = const.VARIABLE_LUT[fname[3]]
        else:
            variable = fname[3]

        timerange_from = pd.to_datetime(' '.join(headr[:2]))
        timerange_to = pd.to_datetime(' '.join(last[:2]))

        depth = Depth(float(fname[4]), float(fname[5]))

        metadata = MetaData([
            MetaVar('network', fname[1]),
            MetaVar('station', fname[2]),
            MetaVar('variable', variable, depth),
            MetaVar('instrument', instr, depth),
            MetaVar('timerange_from', timerange_from),
            MetaVar('timerange_to', timerange_to),
            MetaVar('latitude', float(headr[7])),
            MetaVar('longitude', float(headr[8])),
            MetaVar('elevation', float(headr[9])),
        ])

        return metadata, depth
Пример #4
0
    def test_MetaData(self):
        assert len(self.dat) == 4
        assert 'second' in self.dat
        assert self.dat[1] in self.dat
        assert tuple(self.dat[0]) == ('first', 1, 0., 1.)
        assert tuple(self.dat[1]) == ('second', 0, None, None)

        assert self.dat['dup'] == self.dat[3]

        assert self.dat.keys() == ['first', 'second', 'neg', 'dup']

        assert MetaData([MetaVar.from_tuple(
            ('first', 1, 0., 1.))]) == self.dat[['first']]
Пример #5
0
    def test_MetaData(self):
        assert len(self.dat) == 4
        assert "second" in self.dat
        assert self.dat[1] in self.dat
        assert tuple(self.dat[0]) == ("first", 1, 0.0, 1.0)
        assert tuple(self.dat[1]) == ("second", 0, None, None)

        assert self.dat["dup"] == self.dat[3]

        assert self.dat.keys() == ["first", "second", "neg", "dup"]

        assert (MetaData([MetaVar.from_tuple(
            ("first", 1, 0.0, 1.0))]) == self.dat[["first"]])
Пример #6
0
    def setUp(self) -> None:
        vars = [
            MetaVar('first', 1, Depth(0, 1)),
            MetaVar('second', 0),
            MetaVar('neg', -99, Depth(-0.25, -1)),
            MetaVar('dup', '3rd', Depth(1, 3))
        ]

        self.dat = MetaData(vars)

        self.other = MetaData(
            [MetaVar('dup', '3rd', Depth(2, 4)),
             MetaVar('4', 4)])
Пример #7
0
    def setUp(self) -> None:
        vars = [
            MetaVar("first", 1, Depth(0, 1)),
            MetaVar("second", 0),
            MetaVar("neg", -99, Depth(-0.25, -1)),
            MetaVar("dup", "3rd", Depth(1, 3)),
        ]

        self.dat = MetaData(vars)

        self.other = MetaData(
            [MetaVar("dup", "3rd", Depth(2, 4)),
             MetaVar("4", 4)])
Пример #8
0
    def __read_field(data, fieldname: str, new_name=None) -> np.array:
        """
        Extract a field from the loaded csv metadata
        """
        field_vars = []

        if fieldname in data.index:

            froms = np.atleast_1d(data.loc[fieldname]['depth_from[m]'])
            tos = np.atleast_1d(data.loc[fieldname]['depth_to[m]'])
            vals = np.atleast_1d(data.loc[fieldname]['value'])

            for d_from, d_to, val in zip(froms, tos, vals):
                d = Depth(d_from, d_to)
                name = new_name if new_name is not None else fieldname
                try:
                    val = float(val)
                except ValueError:
                    pass  # value is actually a string, that's ok
                field_vars.append(MetaVar(name, val, d))

        return field_vars
Пример #9
0
    def read_metadata(self) -> MetaData:
        """
        Read csv file containing static variables into data frame.

        Returns
        -------
        metadata : MetaData
            Static metadata read from csv file.
        """
        if self.root.zip:
            if not self.root.isopen:
                self.root.open()
            with TemporaryDirectory(prefix="ismn",
                                    dir=self.temp_root) as tempdir:
                extracted = self.root.extract_file(self.file_path, tempdir)
                data = self.__read_csv(extracted)
        else:
            data = self.__read_csv(self.root.path / self.file_path)

        # read landcover classifications
        lc = data.loc[["land cover classification"
                       ]][["value", "quantity_source_name"]]

        lc_dict = {
            "CCI_landcover_2000": const.CSV_META_TEMPLATE["lc_2000"],
            "CCI_landcover_2005": const.CSV_META_TEMPLATE["lc_2005"],
            "CCI_landcover_2010": const.CSV_META_TEMPLATE["lc_2010"],
            "insitu": const.CSV_META_TEMPLATE["lc_insitu"],
        }

        cl_dict = {
            "koeppen_geiger_2007": const.CSV_META_TEMPLATE["climate_KG"],
            "insitu": const.CSV_META_TEMPLATE["climate_insitu"],
        }

        for key in lc_dict.keys():
            if key in lc["quantity_source_name"].values:
                if key != "insitu":
                    lc_dict[key] = np.int(lc.loc[lc["quantity_source_name"] ==
                                                 key]["value"].values[0])
                else:
                    lc_dict[key] = lc.loc[lc["quantity_source_name"] ==
                                          key]["value"].values[0]
                    logging.info(
                        f"insitu land cover classification available: {self.file_path}"
                    )

        # read climate classifications
        try:
            cl = data.loc[["climate classification"
                           ]][["value", "quantity_source_name"]]
            for key in cl_dict.keys():
                if key in cl["quantity_source_name"].values:
                    cl_dict[key] = cl.loc[cl["quantity_source_name"] ==
                                          key]["value"].values[0]
                    if key == "insitu":
                        logging.info(
                            f"insitu climate classification available: {self.file_path}"
                        )
        except KeyError:
            logging.info(f"No climate metadata found for {self.file_path}")

        metavars = [
            MetaVar("lc_2000", lc_dict["CCI_landcover_2000"]),
            MetaVar("lc_2005", lc_dict["CCI_landcover_2005"]),
            MetaVar("lc_2010", lc_dict["CCI_landcover_2010"]),
            MetaVar("lc_insitu", lc_dict["insitu"]),
            MetaVar("climate_KG", cl_dict["koeppen_geiger_2007"]),
            MetaVar("climate_insitu", cl_dict["insitu"]),
        ]

        static_meta = {
            "saturation":
            self.__read_field(data, "saturation"),
            "clay_fraction":
            self.__read_field(data, "clay fraction",
                              const.VARIABLE_LUT["cl_h"]),
            "sand_fraction":
            self.__read_field(data, "sand fraction",
                              const.VARIABLE_LUT["sa_h"]),
            "silt_fraction":
            self.__read_field(data, "silt fraction",
                              const.VARIABLE_LUT["si_h"]),
            "organic_carbon":
            self.__read_field(data, "organic carbon",
                              const.VARIABLE_LUT["oc_h"]),
        }

        for name, v in static_meta.items():
            if len(v) > 0:
                metavars += v
            else:
                metavars.append(MetaVar(name, const.CSV_META_TEMPLATE[name]))

        metadata = MetaData(metavars)

        return metadata
Пример #10
0
def _read_station_dir(
    root: Union[IsmnRoot, Path, str],
    stat_dir: Union[Path, str],
    temp_root: Path,
) -> (dict, list):
    """
    Parallelizable function to read metadata for files in station dir
    """
    infos = []

    if not isinstance(root, IsmnRoot):
        proc_root = True
        root = IsmnRoot(root)
    else:
        proc_root = False

    csv = root.find_files(stat_dir, "*.csv")

    try:
        if len(csv) == 0:
            raise IsmnFileError(
                "Expected 1 csv file for station, found 0. "
                "Use empty static metadata."
            )
        else:
            if len(csv) > 1:
                infos.append(
                    f"Expected 1 csv file for station, found {len(csv)}. "
                    f"Use first file in dir."
                )
            static_meta_file = StaticMetaFile(
                root, csv[0], load_metadata=True, temp_root=temp_root
            )
            station_meta = static_meta_file.metadata
    except IsmnFileError as e:
        infos.append(f"Error loading static meta for station: {e}")
        station_meta = MetaData([MetaVar(k, v) for k, v in CSV_META_TEMPLATE.items()])

    data_files = root.find_files(stat_dir, "*.stm")

    filelist = []

    for file_path in data_files:
        try:
            f = DataFile(root, file_path, temp_root=temp_root)
        except IOError as e:
            infos.append(f"Error loading ismn file: {e}")
            continue

        f.metadata.merge(station_meta, inplace=True)

        f.metadata = f.metadata.best_meta_for_depth(
            Depth(
                f.metadata["instrument"].depth.start,
                f.metadata["instrument"].depth.end,
            )
        )

        network = f.metadata["network"].val
        station = f.metadata["station"].val

        filelist.append((network, station, f))

        infos.append(f"Processed file {file_path}")

    if proc_root:
        root.close()

    return filelist, infos
Пример #11
0
    def from_metadata_csv(
        cls, data_root, meta_csv_file, network=None, temp_root=gettempdir()
    ):
        """
        Load a previously created and stored filelist from
        :func:`ismn.filecollection.IsmnFileCollection.to_metadata_csv`

        Parameters
        ----------
        data_root : IsmnRoot or str or Path
            Path where the ismn data is stored, can also be a zip file
        meta_csv_file : str or Path
            Csv file where the metadata is stored.
        network : list, optional (default: None)
            List of networks that are considered.
            Filehandlers for other networks are set to None.
        temp_root : str or Path, optional (default: gettempdir())
            Temporary folder where extracted data is copied during reading from
            zip archive.
        """
        if network is not None:
            network = np.atleast_1d(network)

        if isinstance(data_root, IsmnRoot):
            root = data_root
        else:
            root = IsmnRoot(data_root)

        print(f"Found existing ismn metadata in {meta_csv_file}.")

        metadata_df = _load_metadata_df(meta_csv_file)

        filelist = OrderedDict([])

        all_networks = metadata_df["network"]["val"].values

        columns = np.array(list(metadata_df.columns))

        for i, row in enumerate(metadata_df.values):  # todo: slow!?? parallelise?
            this_nw = all_networks[i]
            if (network is not None) and not np.isin([this_nw], network)[0]:
                f = None
                continue
            else:
                vars = np.unique(columns[:-2][:, 0])
                vals = row[:-2].reshape(-1, 3)

                metadata = MetaData(
                    [
                        MetaVar.from_tuple(
                            (vars[i], vals[i][2], vals[i][0], vals[i][1])
                        )
                        for i in range(len(vars))
                    ]
                )

                f = DataFile(
                    root=root,
                    file_path=str(PurePosixPath(row[-2])),
                    load_metadata=False,
                    temp_root=temp_root,
                )

                f.metadata = metadata
                f.file_type = row[-1]

                this_nw = f.metadata["network"].val

            if this_nw not in filelist.keys():
                filelist[this_nw] = []

            filelist[this_nw].append(f)

        if network is None:
            cls.metadata_df = metadata_df
        else:
            flags = np.isin(metadata_df["network"]["val"].values, network)
            cls.metadata_df = metadata_df.loc[flags]

        return cls(root, filelist=filelist)
Пример #12
0
    def read_metadata(self) -> MetaData:
        """
        Read csv file containing static variables into data frame.

        Returns
        -------
        metadata : MetaData
            Static metadata read from csv file.
        """
        if self.root.zip:
            if not self.root.isopen: self.root.open()
            with TemporaryDirectory(prefix='ismn',
                                    dir=self.temp_root) as tempdir:
                extracted = self.root.extract_file(self.file_path, tempdir)
                data = self.__read_csv(extracted)
        else:
            data = self.__read_csv(self.root.path / self.file_path)

        # read landcover classifications
        lc = data.loc[['land cover classification'
                       ]][['value', 'quantity_source_name']]

        lc_dict = {
            'CCI_landcover_2000': const.CSV_META_TEMPLATE['lc_2000'],
            'CCI_landcover_2005': const.CSV_META_TEMPLATE['lc_2005'],
            'CCI_landcover_2010': const.CSV_META_TEMPLATE['lc_2010'],
            'insitu': const.CSV_META_TEMPLATE['lc_insitu']
        }

        cl_dict = {
            'koeppen_geiger_2007': const.CSV_META_TEMPLATE['climate_KG'],
            'insitu': const.CSV_META_TEMPLATE['climate_insitu']
        }

        for key in lc_dict.keys():
            if key in lc['quantity_source_name'].values:
                if key != 'insitu':
                    lc_dict[key] = np.int(lc.loc[lc['quantity_source_name'] ==
                                                 key]['value'].values[0])
                else:
                    lc_dict[key] = lc.loc[lc['quantity_source_name'] ==
                                          key]['value'].values[0]
                    logging.info(
                        f'insitu land cover classification available: {self.file_path}'
                    )

        # read climate classifications
        cl = data.loc[['climate classification'
                       ]][['value', 'quantity_source_name']]
        for key in cl_dict.keys():
            if key in cl['quantity_source_name'].values:
                cl_dict[key] = cl.loc[cl['quantity_source_name'] ==
                                      key]['value'].values[0]
                if key == 'insitu':
                    logging.info(
                        f'insitu climate classification available: {self.file_path}'
                    )

        metavars = [
            MetaVar('lc_2000', lc_dict['CCI_landcover_2000']),
            MetaVar('lc_2005', lc_dict['CCI_landcover_2005']),
            MetaVar('lc_2010', lc_dict['CCI_landcover_2010']),
            MetaVar('lc_insitu', lc_dict['insitu']),
            MetaVar('climate_KG', cl_dict['koeppen_geiger_2007']),
            MetaVar('climate_insitu', cl_dict['insitu']),
        ]

        static_meta = {
            'saturation':
            self.__read_field(data, 'saturation'),
            'clay_fraction':
            self.__read_field(data, 'clay fraction',
                              const.VARIABLE_LUT['cl_h']),
            'sand_fraction':
            self.__read_field(data, 'sand fraction',
                              const.VARIABLE_LUT['sa_h']),
            'silt_fraction':
            self.__read_field(data, 'silt fraction',
                              const.VARIABLE_LUT['si_h']),
            'organic_carbon':
            self.__read_field(data, 'organic carbon',
                              const.VARIABLE_LUT['oc_h']),
        }

        for name, v in static_meta.items():
            if len(v) > 0:
                metavars += v
            else:
                metavars.append(MetaVar(name, const.CSV_META_TEMPLATE[name]))

        metadata = MetaData(metavars)

        return metadata
Пример #13
0
    def from_metadata_csv(cls,
                          data_root,
                          meta_csv_file,
                          network=None,
                          temp_root=gettempdir()):
        """
        Load a previously created and stored filelist from pkl.

        Parameters
        ----------
        data_root : IsmnRoot or str or Path
            Path where the ismn data is stored, can also be a zip file
        meta_csv_file : str or Path
            Csv file where the metadata is stored.
        network : list, optional (default: None)
            List of networks that are considered. Other filehandlers are set to None.
        temp_root : str or Path, optional (default: gettempdir())
            Temporary folder where extracted data is copied during reading from
            zip archive.
        """
        if network is not None:
            network = np.atleast_1d(network)

        if isinstance(data_root, IsmnRoot):
            root = data_root
        else:
            root = IsmnRoot(data_root)

        print(f"Found existing ismn metadata in {meta_csv_file}.")

        metadata_df = pd.read_csv(meta_csv_file,
                                  index_col=0,
                                  header=[0, 1],
                                  low_memory=False,
                                  engine='c')

        # parse date cols as datetime
        for col in ['timerange_from', 'timerange_to']:
            metadata_df[col, 'val'] = pd.to_datetime(metadata_df[col, 'val'])

        lvars = []
        for c in metadata_df.columns:
            if c[0] not in lvars:
                lvars.append(c[0])

        # we assume triples for all vars except these, so they must be at the end
        assert lvars[-2:] == ['file_path', 'file_type'], \
            "file_type and file_path must be at the end."

        filelist = OrderedDict([])

        all_networks = metadata_df['network']['val'].values

        columns = np.array(list(metadata_df.columns))

        for i, row in enumerate(
                metadata_df.values):  # todo: slow!?? parallelise?
            this_nw = all_networks[i]
            if (network is not None) and not np.isin([this_nw], network)[0]:
                f = None
                continue
            else:
                vars = np.unique(columns[:-2][:, 0])
                vals = row[:-2].reshape(-1, 3)

                metadata = MetaData([
                    MetaVar.from_tuple(
                        (vars[i], vals[i][2], vals[i][0], vals[i][1]))
                    for i in range(len(vars))
                ])

                f = DataFile(root=root,
                             file_path=str(PurePosixPath(row[-2])),
                             load_metadata=False,
                             temp_root=temp_root)

                f.metadata = metadata
                f.file_type = row[-1]

                this_nw = f.metadata['network'].val

            if this_nw not in filelist.keys():
                filelist[this_nw] = []

            filelist[this_nw].append(f)

        return cls(root, filelist=filelist)