示例#1
0
文件: parser.py 项目: yxw027/where
    def __init__(self, rundate=None, file_path=None):
        """Set up the basic information needed by the parser

        Subclasses of Parser should extend this constructor, by calling super().__init__(rundate) before setting
        attributes (at the very least self.file_key).

        The `file_path` parameter is mainly for when running the parsers indepently of Where, and can be used to
        specify a file independent of `files.conf`. Note that this should not be done inside of the Where program, as
        that looses some of the logging and maintainability.

        Args:
            rundate (date):        The model run date (optional, used to set up date variables).
            file_path (String):    Optional path to file that will be read.
        """
        super().__init__()
        self.file_key = "Overwritten by subclasses"
        self.file_path = file_path
        self.rundate = rundate
        self.data_available = True
        self.dependencies = list()

        # Initialize the data
        self.vars = dict()
        self.meta = dict()
        self.data = dict()

        # Use _parser.Parser and subclasses instead
        log.dev(
            f"parser.Parser is deprecated, let {self.__class__.__name__} subclass one of "
            f"LineParser, ChainParser or SinexParser instead")
示例#2
0
文件: parser.py 项目: yxw027/where
    def parse(self):
        """Parse data

        This is a basic implementation that carries out the whole pipeline of reading and parsing datafiles including
        calculating secondary data.

        Returns:
            Parser: The parsed data
        """
        log.dev(
            f"where.parsers.parser is deprecated. Use where.parsers._parser or one of it's subclasses instead."
        )

        if self.file_path is None:
            self.file_path = config.files.path(self.file_key,
                                               file_vars=self.vars,
                                               download_missing=True)

        parser_package, parser_name = self.__module__.rsplit(".", maxsplit=1)
        with Timer("Finish {} ({}) - {} in".format(parser_name, parser_package,
                                                   self.file_key)):
            if self.data_available:
                self.read_data()

            if not self.data_available:  # May have been set to False by self.read_data()
                log.warn(
                    f"No data found by {self.__class__.__name__} for {self.rundate.strftime(config.FMT_date)} "
                    f"(was looking for {self.file_path})")
                return self

            self.calculate_data()
            dependencies.add(*self.dependencies, label=self.file_key)

        return self
示例#3
0
def glob_paths(file_key, file_vars=None, is_zipped=None):
    """Find all filepaths matching a filename pattern

    Using pathlib.Path.glob() here is not trivial because we need to split into a base directory to start searching
    from and a pattern which may include directories. With glob.glob() this is trivial. The downside is that it only
    returns strings and not pathlib.Paths.
    """
    import sys

    caller = sys._getframe(1)
    func_name = caller.f_code.co_name
    file_name = caller.f_code.co_filename
    line_num = caller.f_lineno
    log.dev(
        f"{file_name} ({line_num}) {func_name}: 'lib.files.glob_paths()' is deprecated. Use 'lib.config.files.glob_paths()' instead"
    )

    path_string = str(
        path(file_key, file_vars, default="*", is_zipped=is_zipped))
    glob_path = pathlib.Path(re.sub(r"\*+", "*", path_string))
    idx = min((i for i, p in enumerate(glob_path.parts) if "*" in p),
              default=len(glob_path.parts) - 1)
    glob_base = pathlib.Path(*glob_path.parts[:idx])
    glob_pattern = str(pathlib.Path(*glob_path.parts[idx:]))
    return list(glob_base.glob(glob_pattern))
示例#4
0
def data_handling(dset):
    """Edits data based on SLR handling file

    Args:
        dset:     A Dataset containing model data.

    Returns:
        Array containing False for observations to throw away
    """
    handling = apriori.get("slr_handling_file", time=dset.time)

    remove_idx = np.zeros(dset.num_obs, dtype=bool)
    for station in dset.unique("station"):
        # TODO: To be implemented
        if "V" in handling.get(station, {}):
            log.dev(
                f"TODO: Station {station}, marked with a V, not sure what that means"
            )

        # X is data to be deleted
        # N is a non reliable station, not to be used for operational analysis
        # Q is a station in quarantene
        for key in ["X", "N", "Q"]:
            intervals = handling.get(station, {}).get(key, [])
            for interval in intervals:
                start_x, end_x = interval[0]
                int_idx = (dset.filter(station=station) &
                           (dset.time.datetime >= start_x) &
                           (dset.time.datetime <= end_x))
                if np.any(int_idx):
                    log.debug(
                        f"Removed data for station {station} in interval {start_x}-{end_x}, marked with {key}"
                    )
                    remove_idx |= int_idx
    return ~remove_idx
示例#5
0
    def __init__(self, text="Elapsed time:", unit=None, logger=log.time):
        """Set up a new timer

        The text to be shown when logging the timer can be customized. Typically, the value of the timer will be added
        at the end of the string (e.g. 'Elapsed time: 0.1234 seconds'). However, this can be customized by adding a
        '{}' to the text. For example `text='Used {} to run the code'` will produce something like 'Used 0.1234 seconds
        to run the code'.

        Args:
            text (String):      Text used when logging the timer (see above).
            unit (String):      Unit used for logging the timer (Default is seconds).
            logger (Function):  Function used to do the logging.
        """
        super().__init__()
        self._start = None
        self._end = None
        self.text = text if "{}" in text else (text + " {}").strip()
        self.unit_name = "seconds" if unit is None else unit
        self.unit_factor = 1 if unit is None else Unit("seconds", unit)
        self.logger = logger

        # Use midgard instead
        caller = sys._getframe(1)
        func_name = caller.f_code.co_name
        file_name = caller.f_code.co_filename
        line_num = caller.f_lineno
        log.dev(
            f"{file_name} ({line_num}) {func_name}: where.lib.timer is deprecated, use midgard.dev.timer instead"
        )
示例#6
0
文件: parser.py 项目: yxw027/where
    def process_data(self):
        """Deprecate this method

        Can be removed when all references to process_data() are gone
        """
        name = self.__class__.__name__
        log.dev(f"{name}.process_data is deprecated. Use {name}.parse instead")
        self.parse()
示例#7
0
def open(file_key,
         file_vars=None,
         create_dirs=False,
         is_zipped=None,
         download_missing=True,
         **kwargs):
    """Open a Where file

    Open a Where file based on file key which is looked up in the Where file list.

    The function automatically handles reading from gzipped files if the filename is specified with the special
    {gz}-ending (including the curly braces) in the file list. In that case, the mode should be specified to be 'rt' if
    the contents of the file should be treated as text. If both a zipped and an unzipped version is available, the
    zipped version is used. This can be overridden by specifying True or False for the is_zipped-parameter.

    This function behaves similar to the built-in open-function, and should typically be used with a context manager as
    follows:

    Example:
        with files.open('eopc04_iau', mode='rt') as fid:
            for line in fid:
                print(line.strip())

    Args:
        file_key:    String that is looked up in the Where file list.
        file_vars:   Dict, used to replace variables in file name and path.
        create_dirs: True or False, if True missing directories are created.
        kwargs:      All keyword arguments are passed on to open_path.

    Returns:
        File object representing the file.
    """
    import sys

    caller = sys._getframe(2)
    func_name = caller.f_code.co_name
    file_name = caller.f_code.co_filename
    line_num = caller.f_lineno
    log.dev(
        f"{file_name} ({line_num}) {func_name}: 'lib.files.open()' is deprecated. Use 'lib.config.files.open()' instead"
    )

    download_missing = download_missing and "r" in kwargs.get("mode", "r")
    file_path = path(file_key,
                     file_vars,
                     is_zipped=is_zipped,
                     download_missing=download_missing)
    kwargs.setdefault("encoding", encoding(file_key))
    try:
        with open_path(file_path,
                       description=file_key,
                       create_dirs=create_dirs,
                       is_zipped=is_path_zipped(file_path),
                       **kwargs) as fid:
            yield fid
    except Exception:
        raise
示例#8
0
def _interpolate_meteorological_data(dset, data, rundate):
    """Calculate temperature, humidity and pressure at observation epochs

    Meteorological data are calculated at observation epochs by interpolating in the data given on the observation
    file for each station.

    Missing meteorological data are currently not handled.
    """
    rundate = datetime(rundate.year, rundate.month, rundate.day)
    for field, station in [(f, f[4:]) for f in data.keys()
                           if f.startswith("met_")]:
        log.debug(f"Meteorological data available for station {station}")

        met_time = data[field].pop("met_time")
        flat_list = [item for sublist in met_time for item in sublist]
        met_time_float = np.array([(flat_list[i] - rundate).total_seconds()
                                   for i in range(0, len(flat_list))])
        met_time_unique, met_index = np.unique(met_time_float,
                                               return_index=True)

        diff = len(met_time_float) - len(met_time_unique)
        if diff > 0:
            log.dev(f"Removed duplicate met data for station {station}")
            log.dev("Do this for the actual obs data also!")
        if len(met_time_unique) == 1:
            for met_type in data[field].keys():
                data[field][met_type] = np.repeat(data[field][met_type][0],
                                                  dset.num_obs)
            continue

        # Extrapolation one month before/after
        # (this is overkill, most of these values will be removed later when taking the diagonal)
        min_time = min(met_time_unique) - 31 * 86400
        max_time = max(met_time_unique) + 31 * 86400
        met_time_unique = np.hstack(
            (np.array(min_time), met_time_unique, np.array(max_time)))

        for met_type in data[field].keys():
            met_data_array = data[field][met_type]
            flat_list = [
                item for sublist in met_data_array for item in sublist
            ]
            met_data_array = np.array([flat_list[i] for i in met_index])
            met_data_array = np.hstack(
                (met_data_array[0], met_data_array, met_data_array[-1]))
            data[field][met_type] = interpolation.interpolate(met_time_unique,
                                                              met_data_array,
                                                              dset.obs_time,
                                                              kind="cubic")

    return data
示例#9
0
def register(func, name=None, sort_value=0):
    """Register a plug-in

    Plug-ins are registered based on the name of the module (file) they are defined in, as well as the package
    (directory) which contains them. Typically all plug-ins of a given type are collected in a package, e.g. models,
    techniques, parsers, etc. The path to the source code file is also stored. This is used to be able to add the
    source code as a dependency file when the plug-in is called.

    If `name` is given, the plug-in is registered based on this name instead of the name of the module. The name of the
    module is still registered as a part that can be used to distinguish between similar plug-ins in different files
    (see for instance how `session` is used in `where.techniques`).

    Args:
        func (Function):       The function that is being registered.
        name (String):         Alternative name of plug-in. Used by `register_named`.
        sort_value (Number):   The value used when sorting plug-ins. Used by `register_ordered`.

    Returns:
        Function: The function that is being registered.
    """
    # Get information from the function being registered
    package_name, _, plugin_name = func.__module__.rpartition(".")
    file_path = pathlib.Path(sys.modules[func.__module__].__file__)

    # Store Plugin-object in _PLUGINS dictionary
    plugin_info = _PLUGINS.setdefault(package_name,
                                      dict()).setdefault(plugin_name, dict())
    if name is None:
        name = func.__name__  # Name of function is used as default name
        plugin_info.setdefault("__parts__", list()).append(
            name)  # Only unnamed parts are added to list

    plugin = Plugin("{}.{}".format(plugin_name, name), func, file_path,
                    sort_value)
    plugin_info[name] = plugin
    log.debug(
        f"Registering {plugin.name} as a {package_name}-plugin from {plugin.file_path}"
    )

    # Add first registered unnamed part as default
    if "__parts__" in plugin_info:
        plugin_info["__default__"] = plugin_info[plugin_info["__parts__"][0]]

    # Use midgard instead
    log.dev(
        f"{package_name}.{plugin_name}: where.lib.plugins is deprecated, use midgard.dev.plugins instead"
    )

    return func
示例#10
0
    def parse_matrix_func(self, data, lower_upper, type=""):
        """Parser for {marker} data

        Converts the input data to a symmetric matrix and adds it to self.data['{marker}'].

        The NEQ-Matrix Row/Column Number correspond to the Estimated Parameters Index in the {size_marker} block.
        Missing elements in the matrix are assumed to be zero (0); consequently, zero elements may be omitted to reduce
        the size of this block.

        Args:
            data (numpy.array):    Input data, raw data for {marker} block.
            lower_upper (String):  Either 'L' or 'U', indicating whether the matrix is given in lower or upper form.
            type (String):         Information about the type of matrix, optional

        Returns:
            Numpy array:  Symmetric matrix.
        """
        # Size of matrix is given by {size_marker}-block, initialize to all zeros
        try:
            n = len(self._sinex[size_marker])
        except KeyError:
            n = max(data["row_idx"])
            log.dev(
                f"{size_marker!r}-block was not parsed. Guessing at size of normal equation matrix (n={n})."
            )
        matrix = np.zeros((n, n))

        # Loop through each line of values and put it in the correct place in the matrix (cannot simply reshape as
        # elements may have been omitted)
        values = np.stack((data["value_0"], data["value_1"], data["value_2"]),
                          axis=1)
        for row, col, vals in zip(data["row_idx"], data["column_idx"], values):
            vals = vals[~np.isnan(vals)]
            idx = slice(row - 1, row), slice(col - 1, col - 1 + len(vals))
            matrix[idx] = vals

        # Add symmetrical elements, depending on whether the matrix being represented in lower or upper form
        if lower_upper.upper() == "L":
            matrix = np.tril(matrix) + np.tril(matrix, k=-1).T
        elif lower_upper.upper() == "U":
            matrix = np.triu(matrix) + np.triu(matrix, k=1).T
        else:
            log.warn(
                f"'L' or 'U' not specified for {marker}. Trying to create a symmetric matrix anyway."
            )
            matrix = matrix + matrix.T - np.diag(np.diag(matrix))

        return {"matrix": matrix, "type": type}
示例#11
0
文件: time.py 项目: vpuenteg/where
    def data(self):
        """Temporary warning about removing of data field

        Remove this method when all references to time.data are gone.
        """
        import sys
        from where.lib import log

        caller = sys._getframe(1)
        func_name = caller.f_code.co_name
        file_name = caller.f_code.co_filename
        line_num = caller.f_lineno
        log.dev(
            "'time.data' is deprecated. Use 'time' instead in '{}' ({}:{})",
            func_name, file_name, line_num)

        return self
示例#12
0
def rotate_z(angle):
    """Rotation matrix around Z-axis

    Positive (counterclockwise) rotation of the Z-axis as viewed from the positive end of the rotation axis towards
    the origin.

    Args:
        angle (float64):    Rotation angle in [rad]

    Return:
        numpy.ndarray:      Rotation matrix
    """
    log.dev("lib.mathp.rotate_z is deprecated. Use lib.rotation.R3 instead.")

    cosA = np.cos(angle)
    sinA = np.sin(angle)
    R = np.array([[cosA, sinA, 0], [-sinA, cosA, 0], [0, 0, 1]])
    return R
示例#13
0
def glob_variable(file_key, variable, pattern, file_vars=None):
    """Find all possible values of variable
    """
    import sys

    caller = sys._getframe(1)
    func_name = caller.f_code.co_name
    file_name = caller.f_code.co_filename
    line_num = caller.f_lineno
    log.dev(
        f"{file_name} ({line_num}) {func_name}: 'lib.files.glob_variable()' is deprecated. Use 'lib.config.files.glob_variable()' instead"
    )

    # Find available paths
    file_vars = dict() if file_vars is None else dict(file_vars)
    file_vars[variable] = "*"
    search_paths = glob_paths(file_key, file_vars)

    # Set up the regular expression
    re_vars = {**file_vars, variable: f"(?P<{variable}>__pattern__)"}
    path_pattern = str(path(file_key, file_vars=re_vars,
                            default=".*")).replace("\\", "\\\\")
    for i in itertools.count():
        # Give unique names to each occurance of variable
        path_pattern = path_pattern.replace(f"<{variable}>",
                                            f"<{variable}__{i}>", 1)
        if f"<{variable}>" not in path_pattern:
            break
    re_pattern = re.compile(path_pattern.replace("__pattern__", pattern))

    # Find each match
    values = set()
    for search_path in search_paths:
        match = re_pattern.search(str(search_path))
        if match:
            matches = set(match.groupdict().values())
            if len(matches) > 1:
                log.warn(
                    f"Found multiple values for {variable!r} in {search_path}: {', '.join(matches)}"
                )
            values |= matches
    return values
示例#14
0
def get(datasource_name, **kwargs):
    """Read data from the given data source

    Simple data sources that only return data directly from a parser does not need an explicit apriori-file. This is
    handled by looking in the parser-directory if a data source is not found in the apriori directory.

    The import of where.parsers is done locally to avoid circular imports.

    Args:
        datasource_name (String):   Name of apriori data source
        kwargs:                     Input arguments to the data source

    Returns:
        The data from the data source (data type depends on source)
    """
    try:
        return plugins.call_one(package_name=__name__,
                                plugin_name=datasource_name,
                                **kwargs)
    except exceptions.UnknownPluginError as apriori_err:
        from where import parsers

        try:
            data = parsers.parse_key(file_key=datasource_name,
                                     **kwargs).as_dict()
            log.dev(
                f"Called parsers.parse_key({datasource_name}) in apriori.get()"
            )
            return data
        except (AttributeError) as att:
            try:
                data = parsers.parse(datasource_name, **kwargs)
                log.dev(
                    f"Called parsers.parse({datasource_name}) in apriori.get()"
                )
                return data
            except exceptions.UnknownPluginError:
                raise apriori_err from None
示例#15
0
def data_handling(dset):
    """Edits data based on SLR handling file

    Args:
        dset:     A Dataset containing model data.

    Returns:
        Array containing False for observations to throw away
    """
    handling = apriori.get("slr_handling_file", time=dset.time)

    for station in dset.unique("station"):
        # Estimate range bias E
        intervals = handling.get(station, {}).get("E", [])
        for interval, info in intervals:
            start_x, end_x = interval
            int_idx = dset.filter(station=station) & (dset.time >= start_x) & (
                dset.time <= end_x)
            if np.any(int_idx):
                log.info(
                    f"ILRS handling: Estimating range bias for station {station} in interval {start_x}-{end_x}"
                )
                log.dev(
                    "ILRS Data Handling: What if there is a break in the middle of a pass?"
                )
                dset.estimate_range[:] = np.logical_or(int_idx,
                                                       dset.estimate_range)
        # Apply range bias R
        intervals = handling.get(station, {}).get("R", [])
        for interval, info in intervals:
            start_x, end_x = interval
            int_idx = dset.filter(station=station) & (dset.time >= start_x) & (
                dset.time <= end_x)
            if np.any(int_idx):
                log.info(
                    f"ILRS handling: Applying range bias for station {station} in interval {start_x}-{end_x}"
                )
                RB = info["e_value"]
                if info["unit"] == "mm":
                    dset.range_bias[:] += int_idx * RB * Unit.mm2m
                elif info["unit"] == "ms":
                    dset.range_bias[:] += int_idx * RB * Unit.millisec2seconds * constant.c
                else:
                    log.fatal(
                        "Unknown unit on ILRS Data handling file for range bias applied"
                    )
        # Estimate time bias U
        intervals = handling.get(station, {}).get("U", [])
        for interval, info in intervals:
            start_x, end_x = interval
            int_idx = dset.filter(station=station) & (dset.time >= start_x) & (
                dset.time <= end_x)
            if np.any(int_idx):
                log.warn(
                    f"ILRS handling: Estimating time bias for station {station} in interval {start_x}-{end_x}"
                )
                dset.estimate_time |= int_idx
        # Apply time bias T
        intervals = handling.get(station, {}).get("T", [])
        for interval, info in intervals:
            start_x, end_x = interval
            int_idx = dset.filter(station=station) & (dset.time >= start_x) & (
                dset.time <= end_x)
            if np.any(int_idx):
                log.info(
                    f"ILRS handling: Applying time bias for station {station} in interval {start_x}-{end_x}"
                )
                t_midInterval = Time(start_x + 1 / 2 * (end_x - start_x),
                                     format="datetime")
                TB = info["e_value"]
                drift = info["e_rate"]
                if info["unit"] == "us":
                    time_drifted = (dset.time - t_midInterval).jd * drift
                    dset.time_bias[:] += int_idx * (
                        -np.repeat(TB, dset.num_obs) -
                        time_drifted) * Unit.microsec2sec
                else:
                    log.fatal(
                        "Unknown unit on ILRS Data handling file for time bias applied"
                    )
        # Apply pressure bias P
        intervals = handling.get(station, {}).get("P", [])
        for interval, info in intervals:
            start_x, end_x = interval
            int_idx = dset.filter(station=station) & (dset.time >= start_x) & (
                dset.time <= end_x)
            if np.any(int_idx):
                log.fatal(f"ILRS handling: TODO: Implement pressure bias!")
        # Target signature bias C
        intervals = handling.get(station, {}).get("P", [])
        for interval, info in intervals:
            start_x, end_x = interval
            int_idx = dset.filter(station=station) & (dset.time >= start_x) & (
                dset.time <= end_x)
            if np.any(int_idx):
                log.fatal(
                    f"ILRS handling: TODO: Implement target signature bias!")
    return
示例#16
0
def path(file_key,
         file_vars=None,
         default=None,
         is_zipped=None,
         download_missing=False,
         use_aliases=True):
    """Construct a filepath for a given file with variables

    If `is_zipped` is None, and the file_path contains `<filename>{gz}`, the file will be assumed to be a gzip-file if
    there exists a file named `<filename>.gz`.

    When setting `use_aliases` to True, the aliases as specified in the files configuration file represent alternative
    filenames. In particular,

        + if directory / file_name exists it is returned
        + otherwise the first directory / alias that exists is returned
        + if none of these exist, directory / file_name is returned

    Args:
        file_key (String):        Key that is looked up in the Where file list.
        file_vars (Dict):         Values used to replace variables in file name and path.
        default (String):         Value to use for variables that are not in file_vars.
        is_zipped (Bool/None):    True, False or None. If True, open with gzip. If None automatically decide.
        download_missing (Bool):  Whether to try to download missing files.
        use_aliases (Bool):       Fall back on aliases if file does not exist.

    Return:
        Path: Full path with replaced variables in file name and path.
    """
    import sys

    caller = sys._getframe(1)
    func_name = caller.f_code.co_name
    file_name = caller.f_code.co_filename
    line_num = caller.f_lineno
    log.dev(
        f"{file_name} ({line_num}) {func_name}: 'lib.files.path()' is deprecated. Use 'lib.config.files.path()' instead"
    )

    file_vars = dict() if file_vars is None else file_vars
    directory = config.files[file_key].directory.replace(default=default,
                                                         **file_vars).path
    file_name = config.files[file_key].filename.replace(default=default,
                                                        **file_vars).path
    file_path = _replace_gz(directory / file_name)

    # Check for aliases
    if use_aliases and not path_exists(file_path):
        aliases = config.files.get("aliases", section=file_key,
                                   default="").replace(default=default,
                                                       **file_vars).list
        for alias in aliases:
            aliased_path = _replace_gz(file_path.with_name(alias))
            if path_exists(aliased_path):
                return aliased_path

    # Try to download the file if it is missing
    if download_missing and not path_exists(file_path):
        downloaded_path = download_file(file_key, file_vars)
        if downloaded_path is not None:
            file_path = downloaded_path

    return file_path