Пример #1
0
def copy_log_from_where(rundate, pipeline, args):
    kwargs = dict()
    for a in args.split():
        if "=" in a:
            a = a.split("=", maxsplit=1)
            kwargs[a[0].lstrip("-")] = a[1]

    file_vars = dict(
        **config.program_vars(rundate, pipeline, use_options=False, **kwargs),
        **config.date_vars(rundate))
    log_level = config.where.runner.log_level.str
    current_level = "none"
    try:
        with config.files.open("log", file_vars=file_vars) as fid:
            for line in fid:
                line_level, _, text = line.partition(" ")
                line_level = line_level.strip().lower()
                current_level = line_level if line_level else current_level
                text = text.strip()
                if getattr(LogLevel, current_level) >= getattr(
                        LogLevel, log_level) and text:
                    # strip the 20 date characters from the text
                    log.log(text[20:], current_level)
    except FileNotFoundError as err:
        log.warn(f"'{err}'")
Пример #2
0
def list_datasets(rundate, tech, session, stage, **kwargs):
    """List datasets in a given dataset file

    Args:
        rundate:  Datetime, the model run date.
        tech:     String, the technique.
        stage:    String, the stage.
        kwargs:   Other arguments are passed to files.open.

    Returns:
        List of strings describing the datasets.
    """
    file_vars = dict(
        config.program_vars(rundate,
                            tech,
                            session=session,
                            stage=stage,
                            **kwargs), **config.date_vars(rundate))

    try:
        with files.open("dataset_json", file_vars=file_vars) as fid:
            json_data = json.load(fid)
    except FileNotFoundError:
        return list()
        log.fatal(
            f"No data found for {tech.upper()} {stage} {rundate.strftime(config.FMT_date)}"
        )

    return sorted(k for k in json_data.keys()
                  if not k.startswith("_") and "/" in k)
Пример #3
0
def parse_dataset_id(rundate, tech, stage, dataset_name, dataset_id, **kwargs):
    """Allow for some advanced handling of dataset_id

    In addition to using regular numbers as dataset_id, some text keywords can be used:

    + 'last': Use the last dataset_id written to file, default 0 if no file is previously written.
    + 'all':  Return a list of all dataset_ids in the file.
    """
    if isinstance(dataset_id, (float, int)):
        return dataset_id

    # Use the JSON-file to find information about the dataset ids
    file_vars = dict(
        config.program_vars(rundate,
                            tech,
                            session=dataset_name,
                            stage=stage,
                            **kwargs), **config.date_vars(rundate))
    try:
        with files.open("dataset_json", file_vars=file_vars) as fid:
            json_data = json.load(fid)
    except FileNotFoundError:
        json_data = dict()

    if dataset_id == "last":
        # If _last_dataset_id is not given, use dataset_id=0 as default
        return json_data.get(dataset_name, dict()).get("_last_dataset_id", 0)

    if dataset_id == "all":
        return [
            int(k.split("/")[-1]) for k in json_data.keys()
            if k.startswith("{}/".format(dataset_name))
        ]
Пример #4
0
    def __init__(self,
                 rundate,
                 tech,
                 stage,
                 dataset_name,
                 dataset_id,
                 empty=False,
                 **kwargs):
        """Create a new Dataset or read an existing one

        Note:
            Be aware that the implementation is dependent on ``self._fields`` being the first attribute to be set. See
            :func:`__setattr__` for more information.

        Args:
            rundate:      Date, the model run date.
            tech:         String, the technique.
            stage:        String, the stage.
            dataset_name: String, the name of the dataset.
            dataset_id:   Int, id of the dataset.
            empty:        Boolean, if False (default) will read dataset from disk if available.

        """
        self._fields = dict()
        self._data = dict()
        self._num_obs = 0
        self._default_field_suffix = None
        self._kwargs = kwargs
        self._kwargs.setdefault("session",
                                dataset_name)  # TODO: Can this be removed?
        self.vars = dict(
            config.program_vars(**dict(
                kwargs,
                rundate=rundate,
                tech_name=tech,
                stage=stage,
                dataset_name=dataset_name,
                dataset_id=str(dataset_id),
            )))
        self.vars.update(**kwargs)
        self.rundate = rundate
        dataset_id = _data.parse_dataset_id(rundate, tech, stage, dataset_name,
                                            dataset_id)
        self.name = "{name}/{id:04d}".format(name=dataset_name, id=dataset_id)
        self.meta = dict()

        # Try to read dataset from disk unless explicitly told to create an empty dataset
        if not empty:
            try:
                self.read()
            except FileNotFoundError:
                pass
Пример #5
0
def copy_log_from_where(rundate, pipeline, session):
    file_vars = dict(**config.program_vars(rundate, pipeline, session),
                     **config.date_vars(rundate))
    log_level = config.where.runner.log_level.str
    current_level = "none"
    try:
        with files.open("log", file_vars=file_vars) as fid:
            for line in fid:
                line_level, _, text = line.partition(" ")
                line_level = line_level.strip().lower()
                current_level = line_level if line_level else current_level
                text = text.strip()
                if getattr(LogLevel, current_level) >= getattr(
                        LogLevel, log_level) and text:
                    log.log(text, current_level)
    except FileNotFoundError as err:
        log.warn(f"'{err}'")
Пример #6
0
def _concatenate_datasets(from_date: date, to_date: date, dset_vars: Dict[str,
                                                                          str],
                          only_for_rundate: bool) -> np.ndarray:
    """Concatenate datasets

    Args:
        from_date:         Start date for reading Dataset.
        to_date:           End date for reading Dataset.
        dset_vars:         Common Dataset variables.
        only_for_rundate:  Concatenate only data for given rundate.
    """
    merged_vars = config.program_vars(rundate=from_date,
                                      tech_name=dset_vars["tech"],
                                      **dset_vars)
    merged_vars["id"] += "_concatenated"
    dset_merged = data.Dataset(
        **dict(merged_vars, rundate=from_date, empty=True))

    date_to_read = from_date
    while date_to_read <= to_date:
        dset = data.Dataset(rundate=date_to_read, **dset_vars)

        current_date = date_to_read
        date_to_read += timedelta(days=1)

        if dset.num_obs == 0:
            log.info(f"No data to read for {current_date}")
            continue

        if only_for_rundate:
            _keep_data_only_for_rundate(dset)

            if dset.num_obs == 0:
                log.info(f"No data to read for {current_date}")
                continue

        log.info(f"Reading data for {current_date}")
        if not dset_merged:
            dset_merged.copy_from(dset)
        else:
            dset_merged.extend(dset)

    return dset_merged
Пример #7
0
def concatenate_datasets(from_date, to_date, dset_vars):
    merged_vars = config.program_vars(rundate=from_date, tech_name=dset_vars["tech"], **dset_vars)
    merged_vars["id"] += "_concatenated"
    dset_merged = data.Dataset(**dict(merged_vars, rundate=from_date, empty=True))

    date_to_read = from_date
    while date_to_read <= to_date:
        dset = data.Dataset(rundate=date_to_read, **dset_vars)
        current_date = date_to_read
        date_to_read += timedelta(days=1)
        if dset.num_obs == 0:
            log.info(f"No data to read for {current_date}")
            continue
        log.info(f"Reading data for {current_date}")
        if not dset_merged:
            dset_merged.copy_from(dset)
        else:
            dset_merged.extend(dset)

    return dset_merged