Exemplo n.º 1
0
def get_preproc(type, input_adapter, output_handler, cfg):
    """
    A function returning the pre-processor class corresponding the type definition
    :param type: type of the pre-processor (`orbit_segment`)
    :param input_adapter: A class that return a L1bData object for a given input product file
    :param output_handler: A class that creates a pysiral l1p product from the merged L1bData object
    :param cfg: a treedict of options for the pre-processor
    :return: Initialized pre-processor class
    """

    # A lookup dictionary for the appropriate class
    preproc_class_lookup_dict = {
        "custom_orbit_segment": L1PreProcCustomOrbitSegment,
        "half_orbit": L1PreProcHalfOrbit,
        "full_orbit": L1PreProcFullOrbit,
    }

    # Try the get the class
    cls = preproc_class_lookup_dict.get(type, None)

    # Error handling
    if cls is None:
        msg = "Unrecognized Level-1 Pre-Processor class type: %s" % (str(type))
        msg += "\nKnown types:"
        for key in preproc_class_lookup_dict.keys():
            msg += "\n - %s" % key
        error = ErrorStatus(caller_id="Level1PreProcessor")
        error.add_error("invalid-l1preproc-class", msg)
        error.raise_on_error()

    # Return the initialized class
    return cls(input_adapter, output_handler, cfg)
Exemplo n.º 2
0
class Level2ProductDefinition(DefaultLoggingClass):
    """ Main configuration class for the Level-2 Processor """

    def __init__(self, run_tag, l2_settings_file):

        super(Level2ProductDefinition, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus(self.__class__.__name__)

        # Mandatory parameter
        self._run_tag = run_tag
        self._l2_settings_file = l2_settings_file
        self._parse_l2_settings()

        # Optional parameters (may be set to default values if not specified)
        self._output_handler = []

    def add_output_definition(self, output_def_file, period="default", overwrite_protection=True):

        # Set given or default output handler
        self._output_handler.append(DefaultLevel2OutputHandler(
            output_def=output_def_file, subdirectory=self.run_tag,
            period=period, overwrite_protection=overwrite_protection))

    def _parse_l2_settings(self):
        try:
            self._l2def = get_yaml_config(self._l2_settings_file)
        except Exception, msg:
            self.error.add_error("invalid-l2-settings", msg)
            self.error.raise_on_error()
Exemplo n.º 3
0
    def get_files_from_time_range(self, time_range: DatePeriod) -> List[str]:
        """
        Query l1p files for a a given time range.
        :param time_range: a dateperiods.DatePeriod instance
        :return:
        """

        # Validate time_range (needs to be of type DatePeriod)
        if not isinstance(time_range, DatePeriod):
            error = ErrorStatus()
            msg = "Invalid type of time_range, required: dateperiods.DatePeriod, was %s" % (type(time_range))
            error.add_error("invalid-timerange-type", msg)
            error.raise_on_error()

        # 1) get list of all files for monthly folders
        yyyy, mm = "%04g" % time_range.tcs.year, "%02g" % time_range.tcs.month
        directory = Path(self.l1p_base_dir)
        if self._file_version is not None:
            directory = directory / self._file_version
        directory = directory / self._hemisphere / yyyy / mm
        all_l1p_files = sorted(list(directory.rglob("*.nc")))

        # 3) Check if files are in requested time range
        # This serves two purposes: a) filter out files with timestamps that do
        # not belong in the directory. b) get a subset if required
        l1p_filepaths = [l1p_file for l1p_file in all_l1p_files if self.l1p_in_trange(l1p_file, time_range)]

        # Save last search directory
        self._last_directory = directory

        # Done
        return l1p_filepaths
Exemplo n.º 4
0
def get_local_l1bdata_files(mission_id, time_range, hemisphere, config=None,
                            version="default", allow_multiple_baselines=True):
    """
    Returns a list of l1bdata files for a given mission, hemisphere, version
    and time range
    XXX: Note: this function will slowly replace `get_l1bdata_files`, which
         is limited to full month
    """

    # parse config data (if not provided)
    if config is None or not isinstance(config, ConfigInfo):
        config = ConfigInfo()

    # Validate time_range (needs to be of type TimeRangeIteration)
    try:
        time_range_is_correct_object = time_range.base_period == "monthly"
    except:
        time_range_is_correct_object = False
    if not time_range_is_correct_object:
        error = ErrorStatus()
        msg = "Invalid type of time_range, required: %s, was %s" % (
            type(time_range), type(TimeRangeIteration))
        error.add_error("invalid-timerange-type", msg)
        error.raise_on_error()

    # 1) get list of all files for monthly folders
    yyyy, mm = "%04g" % time_range.start.year, "%02g" % time_range.start.month
    l1b_repo = config.local_machine.l1b_repository[mission_id][version].l1bdata
    directory = os.path.join(l1b_repo, hemisphere, yyyy, mm)
    all_l1bdata_files = sorted(glob.glob(os.path.join(directory, "*.nc")))

    # 2) First filtering step: Check if different algorithm baseline values
    # exist in the list of l1bdata files
    algorithm_baselines = [l1bdata_get_baseline(f) for f in all_l1bdata_files]
    baselines = np.unique(np.array(algorithm_baselines))
    n_baselines = len(baselines)
    if not allow_multiple_baselines and n_baselines > 1:
        error = ErrorStatus()
        baseline_str_list = ", ".join(baselines)
        msg = "Multiple l1bdata baselines (%g) [%s] found in directory: %s" % (
                n_baselines, baseline_str_list, directory)
        error.add_error("multiple-l1b-baselines", msg)
        error.raise_on_error()

    # 3) Check if files are in requested time range
    # This serves two purporses: a) filter out files with timestamps that do
    # not belong in the directory. b) get a subset if required
    l1bdata_files_checked = [l1bdata_file for l1bdata_file in all_l1bdata_files
                             if l1bdata_in_trange(l1bdata_file, time_range)]

    # Done return list (empty or not)
    return l1bdata_files_checked, directory
Exemplo n.º 5
0
class Level2PreProcessor(DefaultLoggingClass):
    def __init__(self, product_def):
        super(Level2PreProcessor, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus()

        # Sanity check of product definition object
        if not isinstance(product_def, Level2PreProcProductDefinition):
            msg = "Invalid Level-2 PreProcessor product definition: %s" % \
                type(product_def)
            self.error.add_error("invalid-l2preproc-def", msg)
            self.error.raise_on_error()
        self._job = product_def

    def process_l2i_files(self, l2i_files, period):
        """ Reads all l2i files and merges the valid data into a l2p
        summary file """

        # l2p: Container for storing l2i objects
        l2p = Level2PContainer(period)

        # Add all l2i objects to the l2p container.
        # NOTE: Only memory is the limit
        for l2i_file in l2i_files:
            try:
                l2i = L2iNCFileImport(l2i_file)
            except Exception as ex:
                msg = "Error (%s) in l2i file: %s"
                msg = msg % (ex, Path(l2i_file).name)
                self.log.error(msg)
                continue
            l2p.append_l2i(l2i)

        # Merge the l2i object to a single L2Data object
        l2 = l2p.get_merged_l2()
        if l2 is None:
            self.log.warning("- No valid freeboard data found for, skip day")
            return

        # Write output
        output = Level2Output(l2, self.job.output_handler)
        self.log.info("- Wrote %s data file: %s" %
                      (self.job.output_handler.id, output.export_filename))

    @property
    def job(self):
        return self._job
Exemplo n.º 6
0
def get_local_l1bdata_files(mission_id,
                            time_range,
                            hemisphere,
                            config=None,
                            version="default",
                            allow_multiple_baselines=True):
    """
    Returns a list of l1bdata files for a given mission, hemisphere, version
    and time range
    XXX: Note: this function will slowly replace `get_l1bdata_files`, which
         is limited to full month
    """

    # parse config data (if not provided)
    if config is None or not isinstance(config, psrlcfg):
        config = psrlcfg

    # Validate time_range (needs to be of type dateperiods.DatePeriod)
    if not isinstance(time_range, DatePeriod):
        error = ErrorStatus()
        msg = "Invalid type of time_range, required: dateperiods.DatePeriod, was %s" % (
            type(time_range))
        error.add_error("invalid-timerange-type", msg)
        error.raise_on_error()

    # 1) get list of all files for monthly folders
    yyyy, mm = "%04g" % time_range.tcs.year, "%02g" % time_range.tcs.month

    repo_branch = config.local_machine.l1b_repository[mission_id][version]
    directory = Path(repo_branch["l1p"]) / hemisphere / yyyy / mm
    all_l1bdata_files = sorted(directory.glob("*.nc"))

    # 3) Check if files are in requested time range
    # This serves two purporses: a) filter out files with timestamps that do
    # not belong in the directory. b) get a subset if required
    l1bdata_files_checked = [
        l1bdata_file for l1bdata_file in all_l1bdata_files
        if l1bdata_in_trange(l1bdata_file, time_range)
    ]

    # Done return list (empty or not)
    return l1bdata_files_checked, directory
Exemplo n.º 7
0
def MaskSourceFile(mask_name, mask_cfg):
    """ Wrapper method for different mask source file classes """

    error = ErrorStatus(caller_id="MaskSourceFile")

    try:
        mask_dir = psrlcfg.local_machine.auxdata_repository.mask[mask_name]
    except KeyError:
        mask_dir = None
        msg = "path to mask %s not in local_machine_def.yaml" % mask_name
        error.add_error("missing-lmd-def", msg)
        error.raise_on_error()

    # Return the Dataset class
    try:
        return globals()[mask_cfg.pyclass_name](mask_dir, mask_name, mask_cfg)
    except KeyError:
        msg = "pysiral.mask.%s not implemented" % str(mask_cfg.pyclass_name)
        error.add_error("missing-mask-class", msg)
        error.raise_on_error()
Exemplo n.º 8
0
class ICESatGLAH13Repository(DefaultLoggingClass):

    _GLAH13_SEARCH = r"GLAH13_*.H5"

    def __init__(self, local_repository_path):

        # Init class and error handler
        class_name = self.__class__.__name__
        super(ICESatGLAH13Repository, self).__init__(class_name)
        self.error = ErrorStatus(caller_id=class_name)

        # Sanity check on path to local repository
        if os.path.isdir(str(local_repository_path)):
            self._local_repository_path = local_repository_path
        else:
            msg = "Invalid GLAH13 directory: %s" % str(local_repository_path)
            self.error.add_error("invalid-dir", msg)
            self.error.raise_on_error()

    def get_glah13_hdfs(self, time_range):
        search_folder = self._get_full_path(time_range)
        search = os.path.join(search_folder, self._GLAH13_SEARCH)
        return sorted(glob(search))

    def _get_full_path(self, time_range):
        """ Assuming the time range monthly """
        folder = self.local_repository_path
        subfolders = [
            "%04g" % time_range.start.year,
            "%02g" % time_range.start.month
        ]
        return os.path.join(folder, *subfolders)

    @property
    def local_repository_path(self):
        return self._local_repository_path
Exemplo n.º 9
0
class Warren99AMSR2ClimDataContainer(object):
    """
    A dedicated data container for the merged W99/AMSR2 snow climatology. This class has been introduced
    with the use of daily scaling that requires data to loaded also from month adjacent to the month
    of the current Level-2 data object
    """
    def __init__(self, cfg, use_daily_scaling):
        """
        Init the class
        :param cfg: A copy of the auxdata class configuration
        :param use_daily_scaling:
        """

        # Properties
        self.cfg = cfg
        self.use_daily_scaling = use_daily_scaling
        self.data = None
        self.filepaths = []
        self.error = ErrorStatus()

    def load(self):
        """
        Load the required data. This will load the data for all winter month into memory and the return
        either a weighted fiels (if `use_daily_scaling` is True) or just the field from the corresponding month
        :return:
        """

        # Check if data is already loaded
        if self.has_data_loaded:
            return

        # Load the data of all month
        self.data = []
        for month_num in self.month_nums:

            # Get the target file path
            filepath = self.get_filepath(month_num)

            # Read the data set (and raise hard error if input is missing)
            try:
                nc = open_dataset(filepath)
                self.data.append(nc)
                self.filepaths.append(filepath)
            except FileNotFoundError:
                msg = "Could not locate file: {}".format(filepath)
                self.error.add_error("invalid-filepath", msg)
                self.error.raise_on_error()

    def get_lonlat(self):
        """
        Return longitude and latitude variables
        :return:
        """

        # The grid is the same for all month, therefore we can just retrieve the fields
        # from the first data sets
        dset = self.data[0]
        return dset.longitude.values, dset.latitude.values

    def get_var(self, parameter_name, date_tuple):
        """
        Get the a geophysical variable from the netCDF(s). If daily scaling is activated, the date information
        given by date tuple will be used to create output fields that are interpolated between adjacent month.
        :param parameter_name:
        :param date_tuple:
        :return:
        """

        # There are three cases that requires a different handling:
        #
        # 1. daily scaling is off
        #    -> return the single field of the single data set for the corresponding month
        if not self.use_daily_scaling:
            return self.get_monthly_field(date_tuple[1], parameter_name)

        # 2. daily scaling is on and requested date is a reference date
        #    -> return the field of the single data set for the reference date
        is_reference_date = date_tuple[1:] in self.reference_dates
        if self.use_daily_scaling and is_reference_date:
            return self.get_monthly_field(date_tuple[1], parameter_name)

        # 3. daily scaling is on and requested date is between reference dates
        #   -> return a linear interpolated field based on the distance to the two enclosing
        #      reference dates
        if self.use_daily_scaling and not is_reference_date:
            return self.get_weighted_variable(date_tuple, parameter_name)

    def get_filepath(self, month_num):
        """
        Return the file path for a given month
        :param month_num: Number of month (1-12)
        :return:
        """

        # Create a dictionary for automatic filepath completion
        date_dict = dict(month="{:02g}".format(month_num))

        # Main directory
        path = Path(self.cfg.local_repository)

        # Add the subfolders
        for subfolder_tag in self.cfg.subfolders:
            subfolder = date_dict[subfolder_tag]
            path = path / subfolder

        # Get the period dict (will be constructed from filenaming)
        period_dict = {}
        attrs = re.findall("{.*?}", self.cfg.filenaming)
        for attr_def in attrs:
            attr_name = attr_def[1:-1]
            period_dict[attr_name] = date_dict[attr_name]
        filename = self.cfg.filenaming.format(**period_dict)
        path = path / filename
        return path

    def get_monthly_field(self, month_num, parameter_name):
        """
        Return the monthly field for given parameter name
        :param month_num:
        :param parameter_name:
        :return:
        """
        index = self.month_nums.index(month_num)
        variable = getattr(self.data[index], parameter_name, None)
        if variable is None:
            msg = "Dataset has no variable: {}".format(parameter_name)
            self.error.add_error("invalid-variable", msg)
            self.error.raise_on_error()
        return variable.values

    def get_reference_month_nums(self, date_tuple):
        """
        Return the two month required for the interpolation.
        :param date_tuple: [year, month, day] as integer
        :return: month_left, month_right, weight_factor
        """

        # Compute the difference in days between requested days
        requested_date_dt = datetime(*date_tuple)
        ref_datetimes = self.get_reference_datetimes(date_tuple)
        ref_date_offset = [(requested_date_dt - dt).days
                           for dt in ref_datetimes]

        # Find the index of the first month where the difference in day is negative (right boundary)
        month_right_index = int(np.argmax(np.array(ref_date_offset) < 0))
        month_left_index = month_right_index - 1
        month_left, month_right = self.month_nums[
            month_left_index], self.month_nums[month_right_index]

        # Check solution
        if month_left_index < 0:
            logger.warning(
                "Target month is outside data coverage, weighting factor -> NaN"
            )
            return 10, 11, np.nan
            # msg = "Month not found, check input or bug in code"
            # self.error.add_error("unspecified-error", msg)
            # self.error.raise_on_error()

        # Compute the weighting factor
        period_n_days = (ref_datetimes[month_right_index] -
                         ref_datetimes[month_left_index]).days
        weight_factor = float(
            ref_date_offset[month_left_index]) / float(period_n_days)

        # All done
        return month_left, month_right, weight_factor

    def get_reference_datetimes(self, date_tuple):
        """
        Creates datetimes objects for the reference dates for the actual winter season
        :param date_tuple:
        :return:
        """

        # Get the winter id (year of October for October - April winter)
        winter_id = date_tuple[0] - int(date_tuple[1] < 10)
        year_vals = [winter_id] * 3 + [winter_id + 1] * 4
        ref_dts = [
            datetime(yyyy, mm, dd)
            for yyyy, (mm, dd) in zip(year_vals, self.reference_dates)
        ]
        return ref_dts

    def get_weighted_variable(self, date_tuple, parameter_name):
        """
        Compute the weighted variable between two reference dates
        :param date_tuple:
        :param parameter_name:
        :return:
        """

        # Get the fields of both reference month
        month_num_left, month_num_right, weight_factor = self.get_reference_month_nums(
            date_tuple)
        var_left = self.get_monthly_field(month_num_left, parameter_name)
        var_right = self.get_monthly_field(month_num_right, parameter_name)

        # Get the relative distance (0: var_left, 1: var_right)
        var = var_left + weight_factor * (var_right - var_left)

        # Done
        return var

    @property
    def w99_weight(self):
        """
        Return the static regional mask for the merged climatology
        :return:
        """
        return self.data[0].w99_weight.values

    @property
    def has_data_loaded(self):
        """
        Status flag if data is present for the current data period
        :return:
        """
        return self.data is not None

    @property
    def month_nums(self):
        return [10, 11, 12, 1, 2, 3, 4]

    @property
    def reference_dates(self):
        """
        Return the reference dates for the
        :return:
        """
        return [
            [10, 1],  # October 1st (to get full coverage of October)
            [11, 15],
            [12, 15],
            [1, 15],
            [2, 15],
            [3, 15],
            [4, 30]
        ]  # April 30th (to get full coverage of April)
Exemplo n.º 10
0
class ESACryoSat2PDSBaselineD(DefaultLoggingClass):
    def __init__(self, cfg, raise_on_error=False):

        cls_name = self.__class__.__name__
        super(ESACryoSat2PDSBaselineD, self).__init__(cls_name)
        self.error = ErrorStatus(caller_id=cls_name)

        # Store arguments
        self.raise_on_error = raise_on_error
        self.cfg = cfg

        # Init main class variables
        self.nc = None

    @staticmethod
    def translate_opmode2radar_mode(op_mode):
        """ Converts the ESA operation mode str in the pysiral compliant version """
        translate_dict = {"sar": "sar", "lrm": "lrm", "sarin": "sin"}
        return translate_dict.get(op_mode, None)

    def get_l1(self, filepath, polar_ocean_check=None):
        """
        Main entry point to the CryoSat-2 Baseline-D Input Adapter
        :param filepath:
        :return:
        """

        timer = StopWatch()
        timer.start()

        # Save filepath
        self.filepath = filepath

        # Create an empty Level-1 data object
        self.l1 = Level1bData()

        # Input Validation
        if not os.path.isfile(filepath):
            msg = "Not a valid file: %s" % filepath
            self.log.warning(msg)
            self.error.add_error("invalid-filepath", msg)
            return self.empty

        # Parse the input file
        self._read_input_netcdf(filepath, attributes_only=True)

        if self.error.status:
            return self.empty

        # Get metadata
        self._set_input_file_metadata()

        if polar_ocean_check is not None:
            has_polar_ocean_data = polar_ocean_check.has_polar_ocean_segments(
                self.l1.info)
            if not has_polar_ocean_data:
                timer.stop()
                return self.empty

        # Polar ocean check passed, now fill the rest of the l1 data groups
        self._set_l1_data_groups()

        timer.stop()
        self.log.info("- Created L1 object in %.3f seconds" %
                      timer.get_seconds())

        # Return the l1 object
        return self.l1

    @staticmethod
    def get_wfm_range(window_delay, n_range_bins):
        """
        Returns the range for each waveform bin based on the window delay and the number of range bins
        :param window_delay: The two-way delay to the center of the range window in seconds
        :param n_range_bins: The number of range bins (256: sar, 512: sin)
        :return: The range for each waveform bin as array (time, ns)
        """
        lightspeed = 299792458.0
        bandwidth = 320000000.0
        # The two way delay time give the distance to the central bin
        central_window_range = window_delay * lightspeed / 2.0
        # Calculate the offset from the center to the first range bin
        window_size = (n_range_bins * lightspeed) / (4.0 * bandwidth)
        first_bin_offset = window_size / 2.0
        # Calculate the range increment for each bin
        range_increment = np.arange(n_range_bins) * lightspeed / (4.0 *
                                                                  bandwidth)

        # Reshape the arrays
        range_offset = np.tile(range_increment,
                               (window_delay.shape[0], 1)) - first_bin_offset
        window_range = np.tile(central_window_range,
                               (n_range_bins, 1)).transpose()

        # Compute the range for each bin and return
        wfm_range = window_range + range_offset
        return wfm_range

    @staticmethod
    def interp_1Hz_to_20Hz(variable_1Hz, time_1Hz, time_20Hz, **kwargs):
        """
        Computes a simple linear interpolation to transform a 1Hz into a 20Hz variable
        :param variable_1Hz: an 1Hz variable array
        :param time_1Hz: 1Hz reference time
        :param time_20Hz: 20 Hz reference time
        :return: the interpolated 20Hz variable
        """
        error_status = False
        try:
            f = interpolate.interp1d(time_1Hz,
                                     variable_1Hz,
                                     bounds_error=False,
                                     **kwargs)
            variable_20Hz = f(time_20Hz)
        except ValueError:
            fill_value = np.nan
            variable_20Hz = np.full(time_20Hz.shape, fill_value)
            error_status = True
        return variable_20Hz, error_status

    def _read_input_netcdf(self, filepath, attributes_only=False):
        """ Read the netCDF file via xarray """
        try:
            self.nc = xarray.open_dataset(filepath,
                                          decode_times=False,
                                          mask_and_scale=True)
        except:
            msg = "Error encountered by xarray parsing: %s" % filepath
            self.error.add_error("xarray-parse-error", msg)
            self.log.warning(msg)
            return

    def _set_input_file_metadata(self):
        """ Fill the product info """

        # Short cuts
        metadata = self.nc.attrs
        info = self.l1.info

        # Processing environment metadata
        info.set_attribute("pysiral_version", pysiral_version)

        # General product metadata
        info.set_attribute("mission", "cryosat2")
        info.set_attribute("mission_sensor", "siral")
        info.set_attribute("mission_data_version", "D")
        info.set_attribute("orbit", metadata["abs_orbit_start"])
        info.set_attribute("cycle", metadata["cycle_number"])
        info.set_attribute("mission_data_source",
                           filename_from_path(self.filepath))
        info.set_attribute(
            "timeliness",
            cs2_procstage2timeliness(metadata["processing_stage"]))

        # Time-Orbit Metadata
        lats = [
            float(metadata["first_record_lat"]) * 1e-6,
            float(metadata["last_record_lat"]) * 1e-6
        ]
        lons = [
            float(metadata["first_record_lon"]) * 1e-6,
            float(metadata["last_record_lon"]) * 1e-6
        ]
        info.set_attribute("start_time",
                           parse_datetime_str(
                               metadata["first_record_time"][4:]))  # TAI=....
        info.set_attribute("stop_time",
                           parse_datetime_str(
                               metadata["last_record_time"][4:]))  # TAI=....
        info.set_attribute("lat_min", np.amin(lats))
        info.set_attribute("lat_max", np.amax(lats))
        info.set_attribute("lon_min", np.amin(lons))
        info.set_attribute("lon_max", np.amax(lons))

        # Product Content Metadata
        for mode in ["sar", "sin", "lrm"]:
            percent_value = 0.0
            if metadata["sir_op_mode"].strip().lower() == mode:
                percent_value = 100.
            info.set_attribute("{}_mode_percent".format(mode), percent_value)
        info.set_attribute("open_ocean_percent",
                           float(metadata["open_ocean_percent"]) * 0.01)

    def _set_l1_data_groups(self):
        """
        Fill all data groups of the Level-1 data object with the content of the netCDF file. This is just the
        overview method, see specific sub-methods below
        :return: None
        """
        self._set_time_orbit_data_group()
        self._set_waveform_data_group()
        self._set_range_correction_group()
        self._set_surface_type_group()
        self._set_classifier_group()

    def _set_time_orbit_data_group(self):
        """
        Transfer the time orbit parameter from the netcdf to l1 data object
        :return: None
        """

        # Transfer the timestamp
        # NOTE: Here it is critical that the xarray does not automatically decodes time since it is
        #       difficult to work with the numpy datetime64 date format. Better to compute datetimes using
        #       a know num2date conversion
        tai_datetime = num2date(self.nc.time_20_ku.values,
                                units=self.nc.time_20_ku.units)
        converter = UTCTAIConverter()
        utc_timestamp = converter.tai2utc(tai_datetime, check_all=False)
        self.l1.time_orbit.timestamp = utc_timestamp

        # Set the geolocation
        self.l1.time_orbit.set_position(self.nc.lon_20_ku.values,
                                        self.nc.lat_20_ku.values,
                                        self.nc.alt_20_ku.values,
                                        self.nc.orb_alt_rate_20_ku.values)

        # Set antenna attitude
        self.l1.time_orbit.set_antenna_attitude(
            self.nc.off_nadir_pitch_angle_str_20_ku.values,
            self.nc.off_nadir_roll_angle_str_20_ku.values,
            self.nc.off_nadir_yaw_angle_str_20_ku.values)

    def _set_waveform_data_group(self):
        """
        Transfer of the waveform group to the Level-1 object. This includes
          1. the computation of waveform power in Watts
          2. the computation of the window delay in meter for each waveform bin
          3. extraction of the waveform valid flag
        :return: None
        """

        # Get the waveform
        # NOTE: Convert the waveform units to Watts. From the documentation:is applied as follows:
        #       pwr_waveform_20_ku(time, ns) * echo_scale_factor_20_ku(time, ns) * 2 ^ echo_scale_pwr_20_ku(time)
        wfm_linear = self.nc.pwr_waveform_20_ku.values

        # Get the shape of the waveform array
        dim_time, dim_ns = wfm_linear.shape

        # Scaling parameter are 1D -> Replicate to same shape as waveform array
        echo_scale_factor = self.nc.echo_scale_factor_20_ku.values
        echo_scale_pwr = self.nc.echo_scale_pwr_20_ku.values
        echo_scale_factor = np.tile(echo_scale_factor, (dim_ns, 1)).transpose()
        echo_scale_pwr = np.tile(echo_scale_pwr, (dim_ns, 1)).transpose()

        # Convert the waveform from linear counts to Watts
        wfm_power = wfm_linear * echo_scale_factor * 2.0**echo_scale_pwr

        # Get the window delay
        # From the documentation:
        #   Calibrated 2-way window delay: distance from CoM to middle range window (at sample ns/2 from 0).
        #   It includes all the range corrections given in the variable instr_cor_range and in the
        #   variable uso_cor_20_ku. This is a 2-way time and 2-way corrections are applied.
        window_delay = self.nc.window_del_20_ku.values

        # Convert window delay to range for each waveform range bin
        wfm_range = self.get_wfm_range(window_delay, dim_ns)

        # Make sure that parameter are float and not double
        # -> Import for cythonized algorithm parts (ctfrma specifically uses floats)
        wfm_power = wfm_power.astype(np.float32)
        wfm_range = wfm_range.astype(np.float32)

        # Set the waveform
        op_mode = str(self.nc.attrs["sir_op_mode"].strip().lower())
        radar_mode = self.translate_opmode2radar_mode(op_mode)
        self.l1.waveform.set_waveform_data(wfm_power, wfm_range, radar_mode)

        # Get the valid flags
        measurement_confident_flag = self.nc.flag_mcd_20_ku.values
        valid_flag = measurement_confident_flag == 0
        self.l1.waveform.set_valid_flag(valid_flag)

    def _set_range_correction_group(self):
        """
        Transfer the range corrections defined in the l1p config file to the Level-1 object
        NOTE: The range corrections are all in 1 Hz and must be interpolated to 20Hz
        :return: None
        """

        # Get the reference times for interpolating the range corrections from 1Hz -> 20Hz
        time_1Hz = self.nc.time_cor_01.values
        time_20Hz = self.nc.time_20_ku.values

        # Loop over all range correction variables defined in the processor definition file
        for key in self.cfg.range_correction_targets.keys():
            pds_var_name = self.cfg.range_correction_targets[key]
            variable_1Hz = getattr(self.nc, pds_var_name)
            variable_20Hz, error_status = self.interp_1Hz_to_20Hz(
                variable_1Hz.values, time_1Hz, time_20Hz)
            if error_status:
                msg = "- Error in 20Hz interpolation for variable `%s` -> set only dummy" % pds_var_name
                self.log.warning(msg)
            self.l1.correction.set_parameter(key, variable_20Hz)

    def _set_surface_type_group(self):
        """
        Transfer of the surface type flag to the Level-1 object
        NOTE: In the current state (TEST dataset), the surface type flag is only 1 Hz. A nearest neighbour
              interpolation is used to get the 20Hz surface type flag.
        :return: None
        """

        # Get the reference times for interpolating the flag from 1Hz -> 20Hz
        time_1Hz = self.nc.time_cor_01.values
        time_20Hz = self.nc.time_20_ku.values

        # Interpolate 1Hz surface type flag to 20 Hz
        surface_type_1Hz = self.nc.surf_type_01.values
        surface_type_20Hz, error_status = self.interp_1Hz_to_20Hz(
            surface_type_1Hz, time_1Hz, time_20Hz, kind="nearest")
        if error_status:
            msg = "- Error in 20Hz interpolation for variable `surf_type_01` -> set only dummy"
            self.log.warning(msg)

        # Set the flag
        for key in ESA_SURFACE_TYPE_DICT.keys():
            flag = surface_type_20Hz == ESA_SURFACE_TYPE_DICT[key]
            self.l1.surface_type.add_flag(flag, key)

    def _set_classifier_group(self):
        """
        Transfer the classifiers defined in the l1p config file to the Level-1 object.
        NOTE: It is assumed that all classifiers are 20Hz
        In addition, a few legacy parameter are computed based on the waveform counts that is only available at
        this stage. Computation of other parameter such as sigma_0, leading_edge_width, ... are moved to the
        post-processing
        :return: None
        """
        # Loop over all classifier variables defined in the processor definition file
        for key in self.cfg.classifier_targets.keys():
            variable_20Hz = getattr(self.nc, self.cfg.classifier_targets[key])
            self.l1.classifier.add(variable_20Hz, key)

        # Calculate Parameters from waveform counts
        # XXX: This is a legacy of the CS2AWI IDL processor
        #      Threshold defined for waveform counts not power in dB
        wfm_counts = self.nc.pwr_waveform_20_ku.values

        # Calculate the OCOG Parameter (CryoSat-2 notation)
        ocog = CS2OCOGParameter(wfm_counts)
        self.l1.classifier.add(ocog.width, "ocog_width")
        self.l1.classifier.add(ocog.amplitude, "ocog_amplitude")

        # Calculate the Peakiness (CryoSat-2 notation)
        pulse = CS2PulsePeakiness(wfm_counts)
        self.l1.classifier.add(pulse.peakiness, "peakiness")
        self.l1.classifier.add(pulse.peakiness_r, "peakiness_r")
        self.l1.classifier.add(pulse.peakiness_l, "peakiness_l")

        # fmi version: Calculate the LTPP
        ltpp = CS2LTPP(wfm_counts)
        self.l1.classifier.add(ltpp.ltpp, "late_tail_to_peak_power")

        # Get satellite velocity vector (classifier needs to be vector -> manual extraction needed)
        satellite_velocity_vector = self.nc.sat_vel_vec_20_ku.values
        self.l1.classifier.add(satellite_velocity_vector[:, 0],
                               "satellite_velocity_x")
        self.l1.classifier.add(satellite_velocity_vector[:, 1],
                               "satellite_velocity_y")
        self.l1.classifier.add(satellite_velocity_vector[:, 2],
                               "satellite_velocity_z")

    @property
    def empty(self):
        return None
Exemplo n.º 11
0
class DefaultAuxdataClassHandler(DefaultLoggingClass):
    """ Class for retrieving handler classes for auxiliary data
    (mss, sic, sitype, snow). The classes are initialized with directory
    information from the local machine definition and the auxdata information
    from `auxdata.yaml` configuration file.
    """
    def __init__(self):
        super(DefaultAuxdataClassHandler,
              self).__init__(self.__class__.__name__)
        self.pysiral_config = ConfigInfo()
        self.error = ErrorStatus(caller_id=self.__class__.__name__)

    def get_pyclass(self, auxdata_class, auxdata_id, l2_procdef_opt):
        """
        Returns a class for handling auxiliary data files, that is initialized
        with auxdata settings in `config/auxdata_def.yaml` and with the
        directory specified in `local_machine_def.yaml`

        Args:
            auxdata_class (str): Auxdata class (e.g. mss, sic, sitype, snow)
            auxdata_id (str): Auxdata class identifier (e.g. osisaf)

        Returns:
            class: The initialized auxdata handler class
        """

        # Clear errors
        self.error.reset()

        # Initialize the class with information from auxdata_def.yaml
        auxdata_def = self.get_auxdata_def(auxdata_class, auxdata_id)
        if auxdata_def is None:
            error_id = "auxdata_missing_definition"
            error_message = PYSIRAL_ERROR_CODES[error_id] % (auxdata_class,
                                                             auxdata_id)
            self.error.add_error(error_id, error_message)
            self.error.raise_on_error()

        # Set the auxdata config
        cfg = AuxClassConfig()

        # connect to repository on local machine
        if "local_repository" in auxdata_def:
            local_repository_id = auxdata_def.local_repository
            local_repo = self.get_local_repository(auxdata_class,
                                                   local_repository_id)
            if local_repo is None and local_repository_id is not None:
                error_id = "auxdata_missing_localrepo_def"
                error_message = PYSIRAL_ERROR_CODES[error_id] % (auxdata_class,
                                                                 auxdata_id)
                self.error.add_error(error_id, error_message)
                self.error.raise_on_error()
            cfg.set_local_repository(local_repo)

        # set doc str (should be mandatory for all auxdata handlers)
        if "long_name" in auxdata_def:
            cfg.set_long_name(auxdata_def.long_name)

        # set filename (e.g. for mss)
        if "filename" in auxdata_def:
            local_repository_id = auxdata_def.local_repository
            local_repo = self.get_local_repository(auxdata_class,
                                                   local_repository_id)
            filename = os.path.join(local_repo, auxdata_def.filename)
            cfg.set_filename(filename)

        # set filenaming (e.g. for sic, sitype, snow)
        if "filenaming" in auxdata_def:
            cfg.set_filenaming(auxdata_def.filenaming)

        # set subfolders (e.g. for sic, sitype, snow)
        if "subfolders" in auxdata_def:
            cfg.set_subfolder(auxdata_def.subfolders)

        # Set the default options from the auxiliary definition file
        if "options" in auxdata_def:
            options = auxdata_def.get("options", None)
            if options is not None:
                cfg.set_options(**options)

        # Override option with definition from the l2 processor settings
        if l2_procdef_opt is not None:
            cfg.set_options(**l2_procdef_opt)

        # Get the auxiliary data class
        module_name, class_name = "pysiral.auxdata.%s" % (
            auxdata_class), auxdata_def["pyclass"]
        auxclass = get_cls(module_name, class_name)
        if auxclass is None:
            error_id = "auxdata_invalid_class_name"
            msg = "Invalid Auxdata class: %s.%s" % (module_name, class_name)
            self.error.add_error(PYSIRAL_ERROR_CODES[error_id], msg)
            self.error.raise_on_error()

        # Init the auxiliary class
        # Note: This will trigger any action defined in the subclasses, such as reading static background files
        auxdata_handler = auxclass(cfg)

        # All done, return
        return auxdata_handler

    def get_local_repository(self, auxdata_class, auxdata_id):
        """ Get the local repository for the the auxdata type and id """
        if auxdata_id is None:
            return None
        aux_repo_defs = self.pysiral_config.local_machine.auxdata_repository
        try:
            local_repo_auxclass = aux_repo_defs[auxdata_class]
        except KeyError:
            msg = "Missing auxdata definition in local_machine_def.yaml: auxdata_repository.%s" % auxdata_class
            self.error.add_error("missing-localmachinedef-tag", msg)
            self.error.raise_on_error()
        return local_repo_auxclass.get(auxdata_id, None)

    def get_auxdata_def(self, auxdata_class, auxdata_id):
        """ Returns the definition in `config/auxdata_def.yaml` for
        specified auxdata class and id """
        try:
            auxdata_class_def = self.pysiral_config.auxdata[auxdata_class]
        except KeyError:
            msg = "Invalid auxdata class [%s] in auxdata_def.yaml" % auxdata_class
            self.error.add_error("invalid-auxdata-class", msg)
            self.error.raise_on_error()
        return auxdata_class_def.get(auxdata_id, None)
Exemplo n.º 12
0
class L2iDataHandler(DefaultLoggingClass):
    """ Class for retrieving default l1b directories and filenames """
    def __init__(self, base_directory, force_l2i_subfolder=True):
        super(L2iDataHandler, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus(caller_id=self.__class__.__name__)
        self._base_directory = base_directory
        self._force_l2i_subfolder = force_l2i_subfolder
        self._subdirectory_list = self.get_subdirectory_list()
        self._validate_base_directory()

    def get_files_from_time_range(self, time_range):
        """ Get all files that fall into time range (May be spread over
        the different year/ month subfolders """
        l2i_files = []
        for year, month, day in time_range.days_list:
            lookup_directory = self.get_lookup_directory(year, month)
            if not os.path.isdir(lookup_directory):
                continue
            l2i_pattern = self.get_l2i_search_str(year=year,
                                                  month=month,
                                                  day=day)
            result = glob.glob(os.path.join(lookup_directory, l2i_pattern))
            l2i_files.extend(sorted(result))
        return l2i_files

    def get_files_for_day(self, day_dt):
        """ Retrieve a list of l2i files with data points for a given day.
        Also specifically looks for files with had a start time on the
        previous day """

        # Get the lookup directory
        lookup_directory = self.get_lookup_directory(day_dt.year, day_dt.month)

        # XXX: We are not evaluating the netCDF attributes at this point
        #      but assuming that the filename contains start and stop
        #      time. This is a pretty safe assumption, but this approach
        #      should be replaced as soon as a proper inspection tool is
        #      available
        day_search = self.get_l2i_search_str(year=day_dt.year,
                                             month=day_dt.month,
                                             day=day_dt.day)
        search_str = os.path.join(lookup_directory, day_search)
        l2i_files = glob.glob(search_str)

        # Check if day is the first day of the month
        # yes -> check last file of previous month which might have data
        #        for the target day
        if day_dt.day == 1:
            previous_day = day_dt - timedelta(days=1)
            lookup_directory = self.get_lookup_directory(
                previous_day.year, previous_day.month)
            search_str = os.path.join(lookup_directory, day_search)
            additional_l2i_files = glob.glob(search_str)
            l2i_files.extend(additional_l2i_files)

        # All done, return sorted output
        return sorted(l2i_files)

    def _validate_base_directory(self):
        """ Performs sanity checks and enforces the l2i subfolder """
        # 1. Path must exist
        if not os.path.isdir(self._base_directory):
            msg = "Invalid l2i product directory: %s"
            msg = msg % str(self._base_directory)
            self.error.add_error("invalid-l2i-productdir", msg)
            self.error.raise_on_error()

    def get_lookup_directory(self, year, month):
        """ Return the sub folders for a given time (datetime object) """
        subfolders = ["%4g" % year, "%02g" % month]
        lookup_directory = os.path.join(self.product_basedir, *subfolders)
        return lookup_directory

    def get_subdirectory_list(self):
        """ Returns a list of all subdirectories of type yyyy/mm """
        subdirectory_list = list()
        try:
            years = sorted(next(os.walk(self.product_basedir))[1])
        except StopIteration:
            self.log.warning("No subdirectories in %s" % self.product_basedir)
            return []
        # filter any invalid directories
        years = [y for y in years if re.match(r'[1-3][0-9]{3}', y)]
        for year in years:
            subdir_year = os.path.join(self.product_basedir, year)
            months = sorted(next(os.walk(subdir_year))[1])
            # filter any invalid directories
            months = [m for m in months if re.match(r'[0-1][0-9]', m)]
            subdirectory_list.extend([[year, m] for m in months])
        return subdirectory_list

    def get_l2i_search_str(self, year=None, month=None, day=None):
        """ Returns a search pattern for l2i files with optional refined
        search for year, month, day. Note: month & day can only be set,
        if the year & year + month respectively is set
        Examples:
            l2i*.nc
            l2i*2017*.nc
            l2i*201704*.nc
            l2i*20170401*.nc
        """
        date_str = "*"
        if year is not None:
            date_str += "%04g" % year
        if month is not None and year is not None:
            date_str += "%02g" % month
        else:
            raise ValueError("year must be set if month is set")
        if day is not None and month is not None:
            date_str += "%02g" % day
        else:
            raise ValueError("year & month must be set if day is set")
        if len(date_str) > 1:
            date_str += "*"
        l2i_file_pattern = "l2i%s.nc" % date_str
        return l2i_file_pattern

    @property
    def product_basedir(self):
        return self._base_directory

    @property
    def subdirectory_list(self):
        return self._subdirectory_list

    @property
    def start_month(self):
        """ Returns a date time object for the first month of the l2i
        product repository """
        first_month = self.subdirectory_list[0]
        return datetime(int(first_month[0]), int(first_month[1]), 1)

    @property
    def stop_month(self):
        """ Returns a date time object for the last month of the l2i
        product repository """
        last_month = self.subdirectory_list[-1]
        return datetime(int(last_month[0]), int(last_month[1]),
                        1) + relativedelta(months=1, microseconds=-1)
Exemplo n.º 13
0
class DefaultAuxdataClassHandler(DefaultLoggingClass):
    """ Class for retrieving handler classes for auxiliary data
    (mss, sic, sitype, snow). The classes are initialized with directory
    information from the local machine definition and the auxdata information
    from `auxdata.yaml` configuration file.
    """

    def __init__(self):
        super(DefaultAuxdataClassHandler, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus(caller_id=self.__class__.__name__)

    def get_pyclass(self, auxdata_class, auxdata_id, l2_procdef_opt):
        """
        Returns a class for handling auxiliary data files, that is initialized
        with auxdata settings in `config/auxdata_def.yaml` and with the
        directory specified in `local_machine_def.yaml`

        Args:
            auxdata_class (str): Auxdata class (e.g. mss, sic, sitype, snow)
            auxdata_id (str): Auxdata class identifier (e.g. osisaf)

        Returns:
            class: The initialized auxdata handler class
        """

        # Clear errors
        self.error.reset()

        # Initialize the class with information from auxdata_def.yaml
        auxdata_def = self.get_auxdata_def(auxdata_class, auxdata_id)
        if auxdata_def is None:
            error_id = "auxdata_missing_definition"
            error_message = PYSIRAL_ERROR_CODES[error_id] % (auxdata_class, auxdata_id)
            self.error.add_error(error_id, error_message)
            self.error.raise_on_error()

        # Set the auxdata config
        cfg = AuxClassConfig()

        # connect to repository on local machine
        if "local_repository" in auxdata_def:
            local_repository_id = auxdata_def.local_repository
            local_repo = self.get_local_repository(auxdata_class, local_repository_id)
            if local_repo is None and local_repository_id is not None:
                error_id = "auxdata_missing_localrepo_def"
                error_message = f"Missing entry `auxdata_repository.{auxdata_class}.{auxdata_id}` in " + \
                    f"local_machine_def ({psrlcfg.local_machine_def_filepath})"
                self.error.add_error(error_id, error_message)
                self.error.raise_on_error()
            empty_str = len(local_repo) == 0 if local_repo is not None else False
            if empty_str:
                msg = "Path definition for {}.{} exists in local_machine_def.yaml, but is empty string"
                msg = msg.format(auxdata_class, auxdata_id)
                logger.warning(msg)
            cfg.set_local_repository(local_repo)

        # set doc str (should be mandatory for all auxdata handlers)
        if "long_name" in auxdata_def:
            cfg.set_long_name(auxdata_def.long_name)

        # set filename (e.g. for mss)
        if "filename" in auxdata_def:
            local_repository_id = auxdata_def.local_repository
            local_repo = self.get_local_repository(auxdata_class, local_repository_id)
            filename = Path(local_repo) / auxdata_def.filename
            cfg.set_filename(filename)

        # set filenaming (e.g. for sic, sitype, snow)
        if "filenaming" in auxdata_def:
            cfg.set_filenaming(auxdata_def.filenaming)

        # set subfolders (e.g. for sic, sitype, snow)
        if "subfolders" in auxdata_def:
            cfg.set_subfolder(auxdata_def.subfolders)

        # Set the default options from the auxiliary definition file
        if "options" in auxdata_def:
            options = auxdata_def.get("options", None)
            if options is not None:
                cfg.set_options(**options)

        # Override option with definition from the l2 processor settings
        if l2_procdef_opt is not None:
            cfg.set_options(**l2_procdef_opt)

        # Get the auxiliary data class
        module_name, class_name = f"pysiral.auxdata.{auxdata_class}", auxdata_def["pyclass"]
        auxclass = get_cls(module_name, class_name)
        if auxclass is None:
            error_id = "auxdata_invalid_class_name"
            msg = "Invalid Auxdata class: %s.%s" % (module_name, class_name)
            self.error.add_error(PYSIRAL_ERROR_CODES[error_id], msg)
            self.error.raise_on_error()

        # Init the auxiliary class
        # Note: This will trigger any action defined in the subclasses, such as reading static background files
        auxdata_handler = auxclass(cfg)

        # All done, return
        return auxdata_handler

    def get_local_repository(self, auxdata_class, auxdata_id):
        """ Get the local repository for the the auxdata type and id """
        if auxdata_id is None:
            return None
        aux_repo_defs = psrlcfg.local_machine.auxdata_repository
        try:
            local_repo_auxclass = aux_repo_defs[auxdata_class]
        except KeyError:
            local_repo_auxclass = {}
            msg = "Missing auxdata definition in local_machine_def.yaml: auxdata_repository.%s" % auxdata_class
            self.error.add_error("missing-localmachinedef-tag", msg)
            self.error.raise_on_error()
        return local_repo_auxclass.get(auxdata_id, None)

    def get_auxdata_def(self, auxdata_class: str, auxdata_id: str) -> "AttrDict":
        """
        Returns the definition in `config/auxdata_def.yaml` for specified auxdata class and id.
        Raises an error if the entry is not found.
        :param auxdata_class: The code for auxiliary data type (sic, mss, sitype, snow, ...)
        :param auxdata_id: The id of a specific data set for the auxiliary data class (e.g. sic:osisaf-operational)
        :return: The configuration dictionary
        """

        auxdata_def = psrlcfg.auxdef.get_definition(auxdata_class, auxdata_id)
        if auxdata_def is None:
            msg = f"Cannot find entry for auxiliary data set {auxdata_class}:{auxdata_id} in auxdata_def.yaml"
            self.error.add_error("invalid-auxdata-class", msg)
            self.error.raise_on_error()
        return auxdata_def.attrdict
Exemplo n.º 14
0
class NCDataFile(DefaultLoggingClass):

    def __init__(self, output_handler):
        """
        Init the netCDF output parent class.
        NOTE: This class should only be used as a parent class.
        :param output_handler: An output handler class for the different processing level
        """

        # Init parent
        class_name = self.__class__.__name__
        super(NCDataFile, self).__init__(class_name)
        self.error = ErrorStatus(caller_id=class_name)

        # Output handler property
        self.output_handler = output_handler

        # Class attributes
        self.data = None
        self.filename = None
        self.base_export_path = None
        self.parameter_attributes = None

        self.time_def = NCDateNumDef()

        # TODO: Make this an option?
        self.zlib = True

        self._rootgrp = None
        self._options = None
        self._proc_settings = None
        self.verbose = False

    def set_options(self, **opt_dict):
        self._options = AttrDict(**opt_dict)

    def set_processor_settings(self, proc_settings):
        self._proc_settings = proc_settings

    def set_base_export_path(self, path):
        self.base_export_path = path

    def _set_doi(self):
        if self.output_handler.has_doi:
            self.data.set_doi(self.output_handler.doi)

    def _set_data_record_type(self):
        if self.output_handler.has_doi:
            self.data.set_data_record_type(self.output_handler.data_record_type)

    def _write_global_attributes(self):
        attr_dict = self.output_handler.get_global_attribute_dict(self.data)
        self._set_global_attributes(attr_dict)

    def _populate_data_groups(self, level3=False, flip_yc=False):

        lonlat_parameter_names = ["lon", "lat", "longitude", "latitude"]

        dimdict = self.data.dimdict
        dims = dimdict.keys()

        for key in dims:
            self._rootgrp.createDimension(key, dimdict[key])

        for parameter_name, attribute_dict in self.output_handler.variable_def:

            # Check if parameter name is also the the name or the source
            # parameter
            if "var_source_name" in attribute_dict.keys():
                attribute_dict = dict(attribute_dict)
                var_source_name = attribute_dict.pop("var_source_name")
            else:
                var_source_name = parameter_name

            # Get the data container
            data = self.data.get_parameter_by_name(var_source_name, raise_on_error=False)

            # Check if the data exists
            if data is None:
                msg = "Invalid parameter name for data object: %s"
                msg = msg % parameter_name
                logger.error(msg)
                self.error.add_error("invalid-paramater", msg)
                self.error.raise_on_error()

            # Convert datetime objects to number
            if isinstance(data[0], (datetime, cftime.datetime, cftime.real_datetime)):
                data = date2num(data, self.time_def.units, self.time_def.calendar)

            # Convert bool objects to integer
            if data.dtype.str == "|b1":
                data = np.int8(data)

            # Set dimensions (dependent on product level)
            if level3:
                if flip_yc:
                    data = np.flipud(data)
                if parameter_name not in lonlat_parameter_names:
                    data = np.array([data])
                    dimensions = tuple(list(dims)[0:len(data.shape)])
                else:
                    dimensions = tuple(list(dims)[1:len(data.shape)+1])
            else:
                if len(data.shape) == 1:
                    dimensions = tuple(list(dims)[0:len(data.shape)])
                else:

                    # Register the additional dimension
                    aux_dimdict = self.data.get_multidim_auxdata_dimdict(parameter_name)
                    for dim_name, dim_value in aux_dimdict["new_dims"]:
                        self._rootgrp.createDimension(dim_name, dim_value)

                    # Add the dimension variable
                    for name, dim_data in aux_dimdict["add_dims"]:
                        dimvar = self._rootgrp.createVariable(name, dim_data.dtype.str, name, zlib=self.zlib)
                        dimvar[:] = dim_data

                    # The full dimension
                    dimensions = aux_dimdict["dimensions"]

            dtype = np.byte
            flag_mask_vals = []

            # flag_mask attributes need special handling
            if 'flag_masks' in attribute_dict.keys():
                # Check to see if data is currently using less bits than the flag allows
                flag_mask_vals = [int(x) for x in str(attribute_dict['flag_masks']).split(sep=',')]
                if max(flag_mask_vals) >= 128:
                    dtype = np.short
                if max(flag_mask_vals) >= 65536:
                    dtype = np.int32
                # Create and set the variable with the wider type
                var = self._rootgrp.createVariable(parameter_name, dtype, dimensions, zlib=self.zlib)
                var[:] = data.astype(dtype)
            else:
                # Create and set the variable
                var = self._rootgrp.createVariable(parameter_name, data.dtype.str, dimensions, zlib=self.zlib)
                var[:] = data

            # Add Parameter Attributes
            # NOTE: The parameter attributes may be template strings and there are special cases with
            #       flags when the data type of the attribute is not a string
            for key in sorted(attribute_dict.keys()):
                attribute = attribute_dict[key]
                attribute = self.output_handler.fill_template_string(attribute, self.data)
                if key == 'flag_masks':
                    # Use values pre-computed above
                    attribute = np.asarray(flag_mask_vals, dtype=dtype)
                elif key == 'flag_values':
                    # The flag_values attribute also needs to be converted to a list of the correct datatype
                    flag_values = [int(x) for x in attribute.split(sep=',')]
                    attribute = np.asarray(flag_values, dtype=data.dtype)
                setattr(var, key, attribute)

    def _create_root_group(self, attdict, **global_attr_keyw):
        """
        Create the root group and add l1b metadata as global attributes
        """
        self._convert_datetime_attributes(attdict)
        self._convert_bool_attributes(attdict)
        self._convert_nonetype_attributes(attdict)
        self._set_global_attributes(attdict, **global_attr_keyw)

    def _convert_datetime_attributes(self, attdict):
        """
        Replace l1b info parameters of type datetime.datetime by a double
        representation to match requirements for netCDF attribute data type
        rules
        """
        for key in attdict.keys():
            content = attdict[key]
            if isinstance(content, (datetime, cftime.datetime, cftime.real_datetime)):
                attdict[key] = date2num(content, self.time_def.units, self.time_def.calendar)

    @staticmethod
    def _convert_bool_attributes(attdict):
        """
        Replace l1b info parameters of type bool ['b1'] by a integer
        representation to match requirements for netCDF attribute data type
        rules
        """
        for key in attdict.keys():
            content = attdict[key]
            if type(content) is bool:
                attdict[key] = int(content)

    @staticmethod
    def _convert_nonetype_attributes(attdict):
        """
        Replace l1b info parameters of type bool ['b1'] by a integer
        representation to match requirements for netCDF attribute data type
        rules
        """
        for key in attdict.keys():
            content = attdict[key]
            if content is None:
                attdict[key] = ""

    def _set_global_attributes(self, attdict, prefix=""):
        """ Save l1b.info dictionary as global attributes """
        for key in attdict.keys():
            self._rootgrp.setncattr(prefix+key, attdict[key])

    def _get_variable_attr_dict(self, parameter):
        """ Retrieve the parameter attributes """
        default_attrs = {
            "long_name": parameter,
            "standard_name": parameter,
            "scale_factor": 1.0,
            "add_offset": 0.0}
        if parameter not in self.parameter_attributes:
            # self._missing_parameters.append(parameter)
            return default_attrs
        else:
            return dict(self.parameter_attributes[parameter])

    def _write_processor_settings(self):
        if self._proc_settings is None:
            pass
        settings = self._proc_settings
        for item in settings.iterkeys():
            self._rootgrp.setncattr(item, str(settings[item]))

    def _open_file(self):
        try:
            self._rootgrp = Dataset(self.full_path, "w")
        except RuntimeError:
            msg = "Unable to create netCDF file: %s" % self.full_path
            self.error.add_error("nc-runtime-error", msg)
            self.error.raise_on_error()

    def _write_to_file(self):
        self._rootgrp.close()

    @property
    def export_path(self):
        """ Evoking this property will also create the directory if it
        does not already exists """
        return self.output_handler.get_directory_from_data(self.data, create=True)

    @property
    def export_filename(self):
        """ Returns the filename for the level2 output file """
        return self.output_handler.get_filename_from_data(self.data)

    @property
    def full_path(self):
        return Path(self.export_path) / self.export_filename
Exemplo n.º 15
0
class TimeRangeRequest(DefaultLoggingClass):

    # Defintion of periods:
    #
    # monthly:
    #   from 00:00:00.000000 of first day in month to 23:59:59.99999 of
    #   last day per month for each month in time range
    #
    # weekly:
    #   from Monday 00:00:00.00000 to Sunday 23:59:59.99999 for all weeks
    #   in the time range including partially covered weeks
    #
    # daily:
    #   from 00:00:00.000000 to 23:59:59.99999 for each day in time range
    #
    # custom:
    #   from 00:00:00.000000 of first day to 23:59:59.99999 of last day
    #   in timer range

    _PERIODS = ["monthly", "weekly", "daily", "custom"]

    # TODO: Future planned option (weekly: 7 days from start day) and
    #       (week_of_year: self explanatory)

    def __init__(self,
                 start_dt,
                 stop_dt,
                 period="monthly",
                 exclude_month=[],
                 raise_if_empty=False):
        super(TimeRangeRequest, self).__init__(self.__class__.__name__)
        self.pysiral_config = ConfigInfo()
        self.error = ErrorStatus()
        self.set_range(start_dt, stop_dt)
        self.set_period(period)
        self.set_exclude_month(exclude_month)
        if raise_if_empty:
            self.raise_if_empty()

    def __repr__(self):
        output = "TimeRangeRequest object:\n"
        for field in ["_start_dt", "_stop_dt", "_period", "_exclude_month"]:
            output += "%12s: %s" % (field, getattr(self, field))
            output += "\n"
        return output

    def clip_to_mission(self, mission_id):
        mission_info = self.pysiral_config.get_mission_info(mission_id)
        start = mission_info.data_period.start
        stop = mission_info.data_period.stop
        is_clipped = self.clip_to_range(start, stop)
        if is_clipped:
            self.log.info("Clipped to mission time range: %s till %s" %
                          (mission_info.data_period.start,
                           mission_info.data_period.stop))

    def raise_if_empty(self):
        message = ""
        if self._start_dt is None:
            message += "start time is invalid"
        if self._stop_dt is None:
            message += "; stop time is invalid"
        if len(message) > 0:
            self.error.add_error("empty-time-range", message)
            self.error.raise_on_error()

    def set_range(self, start_date, stop_date):
        """ Set the range of the request, start_date and stop_data can
        be either int lists (year, month, [day]) or datetime objects """

        # 1. Check if datetime objects
        valid_start, valid_stop = False, False
        if isinstance(start_date, datetime):
            self._start_dt = start_date
            valid_start = True
        if isinstance(stop_date, datetime):
            self._stop_dt = stop_date
            valid_stop = True

        if valid_start and valid_stop:
            self._validate_range()
            return

        # 2. Check and decode integer lists
        msg_template = "invalid %s time (not integer list or datetime)"
        if isinstance(start_date, list):
            if all(isinstance(item, int) for item in start_date):
                self._start_dt = self._decode_int_list(start_date, "start")
            else:
                error_message = msg_template % "start"
                self.error.add_error("invalid-timedef", error_message)

        if isinstance(stop_date, list):
            if all(isinstance(item, int) for item in stop_date):
                self._stop_dt = self._decode_int_list(stop_date, "stop")
            else:
                error_message = msg_template % "stop"
                self.error.add_error("invalid-timedef", error_message)

        # 3. Raise on parsing errors
        self.error.raise_on_error()

        # 4. Check range
        self._validate_range()

    def clip_to_range(self, range_start, range_stop):
        """ Clip the current time range to an defined time range """

        is_clipped = False

        if self._start_dt < range_start and self._stop_dt > range_start:
            is_clipped = True
            self._start_dt = range_start
        elif self._start_dt < range_start and self._stop_dt < range_start:
            is_clipped = True
            self._start_dt = None
            self._stop_dt = None

        if self._stop_dt > range_stop and self._start_dt < range_stop:
            is_clipped = True
            self._stop_dt = range_stop
        elif self._stop_dt > range_stop and self._start_dt > range_stop:
            is_clipped = True
            self._start_dt = None
            self._stop_dt = None

        return is_clipped

    def set_period(self, period):
        """ Set the period (monthly, weekly, etc) for the generation of
        iterations for the time range """
        if period in self._PERIODS:
            self._period = period
        else:
            raise ValueError("Invalid TimeRangeRequest period: %s" % period)

    def set_exclude_month(self, exclude_month_list):
        """ Set a list of month, that shall be ignored during the generation of
        iterations for the time range """
        if exclude_month_list is None:
            exclude_month_list = []
        self._exclude_month = exclude_month_list

    def get_id(self, dt_fmt="%Y%m%dT%H%M%S"):
        return self.start_dt.strftime(dt_fmt) + "_" + self.stop_dt.strftime(
            dt_fmt)

    def _get_iterations(self):
        """ Return a list of iterations for the number of periods in the
        time range """

        # Return empty list if no start/stop are set
        if self._start_dt is None or self._stop_dt is None:
            return []

        # monthly periods: return a list of time ranges that cover the full
        # month from the first to the last month
        if self._period == "monthly":
            iterations = self._get_monthly_iterations()

        # default week periods: return a list of time ranges for each default
        # week definition (from Monday to Sunday)
        elif self._period == "weekly":
            iterations = self._get_weekly_iterations()

        # daily periods: return a list of time ranges for each day
        # in the requested period (exclude_month still applies)
        elif self._period == "daily":
            iterations = self._get_daily_iterations()

        # Just return one iteration with custom time range
        elif self._period == "custom":
            time_range = TimeRangeIteration(base_period="custom")
            time_range.set_range(self.start_dt, self.stop_dt)
            time_range.set_indices(1, 1)
            iterations = [time_range]

        # This should be caught before, but always terminate an
        # an if-elif-else
        else:
            msg = "Invalid period: %s" % str(self._period)
            self.error.add_error("invalid-period", msg)
            self.error.raise_on_error()

        return iterations

    def _decode_int_list(self, int_list, start_or_stop):

        # XXX: Currently only yyyy mm [dd] (day is optional) are allowed
        n_entries = len(int_list)
        if n_entries < 2 or n_entries > 3:
            error_message = "%s date integer list must be yyyy mm [dd]"
            self.error.add_error("invalid-date-int-list", error_message)
            return None

        # Set the day
        day = 1 if n_entries == 2 else int_list[2]

        # Set the datetime object (as if would be start date)
        # Raise error and return none if unsuccessful
        try:
            dt = datetime(int_list[0], int_list[1], day)
        except:
            error_message = "cannot convert integer list to datetime: %s" % (
                str(int_list))
            self.error.add_error("invalid-date-int-list", error_message)
            return None

        # if stop time: add one period
        if start_or_stop == "stop":
            if n_entries == 2:
                extra_period = relativedelta(months=1, microseconds=-1)
            else:
                extra_period = relativedelta(days=1, microseconds=-1)
            dt = dt + extra_period

        return dt

    def _validate_range(self):
        # Check if start and stop are in the right order
        if self.stop_dt <= self.start_dt:
            msg = "stop [%s] before start [%s]"
            msg = msg % (str(self.stop_dt), str(self.start_dt))
            self.error.add_error("invalid-period", msg)
            self.error.raise_on_error()

    def _get_monthly_iterations(self):
        """ Create iterator with monthly period """
        # Create Iterations
        iterations = []
        n_iterations = len(self.month_list)
        index = 1
        for year, month in self.month_list:

            # Per default get the full month
            period_start, period_stop = get_month_time_range(year, month)

            # Clip time range to actual days for first and last iteration
            # (only if the first and the last month are not in the
            #  exclude_month list)
            first_month = self._start_dt.month
            first_month_excluded = first_month in self._exclude_month
            if index == 1 and not first_month_excluded:
                period_start = self.start_dt

            last_month = self._stop_dt.month
            last_month_excluded = last_month in self._exclude_month
            if index == n_iterations and not last_month_excluded:
                period_stop = self.stop_dt

            # set final time range
            # iteration will be a of type TimeRangeIteration
            time_range = TimeRangeIteration(base_period=self.base_period)
            time_range.set_range(period_start, period_stop)
            time_range.set_indices(index, n_iterations)
            iterations.append(time_range)
            index += 1

        return iterations

    def _get_weekly_iterations(self):
        """ Create iterator with weekly (Monday throught Sunday)
        period """

        # Start with empty iteration
        iterations = []
        index = 1

        # Get the start date: period start date (if is monday) or previous
        # monday. If the day is not monday we can use the isoweekday
        # (monday=1m sunday=7) to compute the number days we have to subtract
        # from the start day of the period
        start_offset_days = self.start_dt.isoweekday() - 1
        week_start_day = self.start_dt - relativedelta(days=start_offset_days)

        # Same for the stop date: Make sure the end date either a Sunday
        # already or a Sunday after the stop date of the period
        stop_offset_days = 7 - self.stop_dt.isoweekday()
        week_stop_day = self.stop_dt + relativedelta(days=stop_offset_days)

        # Get the list of weeks
        weeks = weeks_list(week_start_day, week_stop_day, self._exclude_month)
        n_iterations = len(weeks)

        for start_day, stop_day in weeks:

            # weeks list provide only a
            start = datetime(start_day[0], start_day[1], start_day[2])
            stop = start + relativedelta(days=7, microseconds=-1)

            # set final time range
            # iteration will be a of type TimeRangeIteration
            time_range = TimeRangeIteration(base_period=self.base_period)
            time_range.set_range(start, stop)
            time_range.set_indices(index, n_iterations)
            iterations.append(time_range)
            index += 1

        return iterations

    def _get_daily_iterations(self):
        """ Create iterator with daily period """

        # Get list of days
        day_list = self.days_list
        iterations = []
        n_iterations = len(day_list)
        index = 1

        # Loop over days
        for year, month, day in day_list:

            # Start and stop are beginning/end of day
            start = datetime(year, month, day)
            stop = start + relativedelta(days=1, microseconds=-1)

            # Create the iteration
            time_range = TimeRangeIteration(base_period=self.base_period)
            time_range.set_range(start, stop)
            time_range.set_indices(index, n_iterations)
            iterations.append(time_range)
            index += 1

        return iterations

    @property
    def month_list(self):
        return month_list(self.start_dt, self.stop_dt, self._exclude_month)

    @property
    def days_list(self):
        return days_list(self.start_dt, self.stop_dt, self._exclude_month)

    @property
    def _default_period(self):
        return self._PERIODS[0]

    @property
    def start_dt(self):
        return self._start_dt

    @property
    def stop_dt(self):
        return self._stop_dt

    @property
    def label(self):
        return str(self.start_dt) + " till " + str(self.stop_dt)

    @property
    def iterations(self):
        return self._get_iterations()

    @property
    def base_period(self):
        return self._period

    @property
    def base_duration(self):
        """ Return a duration object """
        if self.base_period == "monthly":
            return Duration(months=1)
        elif self.base_period == "daily":
            return Duration(days=1)
        else:
            timedelta = relativedelta(dt1=self.start, dt2=self.stop)
            return Duration(months=timedelta.months,
                            days=timedelta.days,
                            hours=timedelta.hours,
                            minutes=timedelta.minutes,
                            seconds=timedelta.seconds)

    @property
    def base_duration_isoformat(self):
        return duration_isoformat(self.base_duration)
Exemplo n.º 16
0
class Sentinel3CODAL2Wat(DefaultLoggingClass):
    def __init__(self, cfg, raise_on_error=False):
        """
        Input handler for Sentinel-3 L2WAT netCDF files from the CODA.
        :param cfg: A treedict object (root.input_handler.options) from the corresponding Level-1 pre-processor
                    config file
        :param raise_on_error: Boolean value if the class should raise an exception upon an error (default: False)
        """

        cls_name = self.__class__.__name__
        super(Sentinel3CODAL2Wat, self).__init__(cls_name)
        self.error = ErrorStatus(caller_id=cls_name)

        # Store arguments
        self.raise_on_error = raise_on_error
        self.cfg = cfg

        # Init main class variables
        self.nc = None

        # Debug variables
        self.timer = None

    def get_l1(self, filepath, polar_ocean_check=None):
        """
        Create a Level-1 data container from Sentinel-3 CODA L2WAT files
        :param filepath: The full file path to the netCDF file
        :param polar_ocean_check:
        :return: The parsed (or empty) Level-1 data container
        """

        #  for debug purposes
        self.timer = StopWatch()
        self.timer.start()

        # Save filepath
        self.filepath = filepath

        # Create an empty Level-1 data object
        self.l1 = Level1bData()

        # Input Validation
        if not os.path.isfile(filepath):
            msg = "Not a valid file: %s" % filepath
            self.log.warning(msg)
            self.error.add_error("invalid-filepath", msg)
            return self.empty

        # Parse xml header file
        self._parse_xml_manifest(filepath)

        # Parse the input netCDF file
        self._read_input_netcdf(filepath)
        if self.error.status:
            return self.empty

        # Get metadata
        self._set_input_file_metadata()

        # Test if input file contains data over polar oceans (optional)
        if polar_ocean_check is not None:
            has_polar_ocean_data = polar_ocean_check.has_polar_ocean_segments(
                self.l1.info)
            if not has_polar_ocean_data:
                self.timer.stop()
                return self.empty

        # Polar ocean check passed, now fill the rest of the l1 data groups
        self._set_l1_data_groups()

        self.timer.stop()
        self.log.info("- Created L1 object in %.3f seconds" %
                      self.timer.get_seconds())

        # Return the l1 object
        return self.l1

    @staticmethod
    def interp_1Hz_to_20Hz(variable_1Hz, time_1Hz, time_20Hz, **kwargs):
        """
        Computes a simple linear interpolation to transform a 1Hz into a 20Hz variable
        :param variable_1Hz: an 1Hz variable array
        :param time_1Hz: 1Hz reference time
        :param time_20Hz: 20 Hz reference time
        :return: the interpolated 20Hz variable
        """
        error_status = False
        try:
            f = interpolate.interp1d(time_1Hz,
                                     variable_1Hz,
                                     bounds_error=False,
                                     **kwargs)
            variable_20Hz = f(time_20Hz)
        except ValueError:
            fill_value = np.nan
            variable_20Hz = np.full(time_20Hz.shape, fill_value)
            error_status = True
        return variable_20Hz, error_status

    @staticmethod
    def parse_sentinel3_l1b_xml_header(filename):
        """
        Reads the XML header file of a Sentinel 3 L1b Data set
        and returns the contents as an OrderedDict
        """
        with open(filename) as fd:
            content_odereddict = xmltodict.parse(fd.read())
        return content_odereddict[u'xfdu:XFDU']

    def _parse_xml_manifest(self, filepath):
        """
        Parse the Sentinel-3 XML header file and extract key attributes for filtering
        :param filepath: the filepath for the netcdf
        :return: None
        """
        # Retrieve header information from mission settings
        xml_header_file = self.cfg.xml_manifest
        dataset_folder = folder_from_filename(filepath)
        filename_header = os.path.join(dataset_folder, xml_header_file)
        self._xmlh = self.parse_sentinel3_l1b_xml_header(filename_header)

    def _get_xml_content(self, section_name, tag):
        """ Returns the generalProductInformation content of the xml manifest
        :return: dictionary
        """

        # Extract Metadata
        metadata = self._xmlh["metadataSection"]["metadataObject"]

        # Extract General Product Info
        index = self.cfg.xml_metadata_object_index[section_name]
        product_info = metadata[index]["metadataWrap"]["xmlData"]
        product_info = product_info[tag]

        return product_info

    def _read_input_netcdf(self, filepath):
        """
        Read the netCDF file via xarray
        :param filepath: The full filepath to the netCDF file
        :return: none
        """
        try:
            self.nc = xarray.open_dataset(filepath,
                                          decode_times=False,
                                          mask_and_scale=True)
        except:
            msg = "Error encountered by xarray parsing: %s" % filepath
            self.error.add_error("xarray-parse-error", msg)
            self.log.warning(msg)
            return

    def _set_input_file_metadata(self):
        """
        Populates the product info segment of the Level1Data object with information from
        the global attributes of the netCDF and content of the xml manifest
        :return: None
        """

        # Short cuts
        metadata = self.nc.attrs
        info = self.l1.info

        # Get xml manifest content
        product_info = self._get_xml_content(
            "generalProductInformation", "sentinel3:generalProductInformation")
        sral_info = self._get_xml_content("sralProductInformation",
                                          "sralProductInformation")

        # Processing environment metadata
        info.set_attribute("pysiral_version", pysiral_version)

        # General product metadata
        mission = metadata["mission_name"].lower().replace(" ", "")
        info.set_attribute("mission", str(mission))
        info.set_attribute("mission_sensor", "sral")
        info.set_attribute("mission_data_version", metadata["source"])
        info.set_attribute("orbit", metadata["absolute_rev_number"])
        info.set_attribute("cycle", metadata["cycle_number"])
        info.set_attribute("mission_data_source", metadata["product_name"])
        info.set_attribute(
            "timeliness", self.cfg.timeliness_dict[str(
                product_info["sentinel3:timeliness"])])

        # Time-Orbit Metadata
        lats = [
            float(metadata["first_meas_lat"]),
            float(metadata["last_meas_lat"])
        ]
        lons = [
            float(metadata["first_meas_lon"]),
            float(metadata["last_meas_lon"])
        ]
        info.set_attribute("start_time",
                           parse_datetime_str(metadata["first_meas_time"][4:]))
        info.set_attribute("stop_time",
                           parse_datetime_str(metadata["last_meas_time"][4:]))
        info.set_attribute("lat_min", np.amin(lats))
        info.set_attribute("lat_max", np.amax(lats))
        info.set_attribute("lon_min", np.amin(lons))
        info.set_attribute("lon_max", np.amax(lons))

        # Product Content Metadata
        for mode in ["sar", "sin", "lrm"]:
            percent_value = 0.0
            if mode == "sar":
                percent_value = 100.
            info.set_attribute("{}_mode_percent".format(mode), percent_value)
        info.set_attribute("open_ocean_percent",
                           float(sral_info["sral:openOceanPercentage"]))

    def _set_l1_data_groups(self):
        """
        Fill all data groups of the Level-1 data object with the content of the netCDF file. This is just the
        overview method, see specific sub-methods below
        :return: None
        """
        self._set_time_orbit_data_group()
        self._set_waveform_data_group()
        self._set_range_correction_group()
        self._set_surface_type_group()
        self._set_classifier_group()

    def _set_time_orbit_data_group(self):
        """
        Transfer the time orbit parameter from the netcdf to l1 data object
        :return: None
        """

        # Transfer the timestamp
        # NOTE: Here it is critical that the xarray does not automatically decodes time since it is
        #       difficult to work with the numpy datetime64 date format. Better to compute datetimes using
        #       a know num2date conversion
        utc_timestamp = num2date(self.nc.time_20_ku.values,
                                 units=self.nc.time_20_ku.units)
        self.l1.time_orbit.timestamp = utc_timestamp

        # Set the geolocation
        self.l1.time_orbit.set_position(self.nc.lon_20_ku.values,
                                        self.nc.lat_20_ku.values,
                                        self.nc.alt_20_ku.values,
                                        self.nc.orb_alt_rate_20_ku.values)

        # Set antenna attitude
        # NOTE: This are only available in 1Hz and need to be interpolated
        time_01, time_20 = self.nc.time_01.values, self.nc.time_20_ku.values
        pitch_angle_20, stat = self.interp_1Hz_to_20Hz(
            self.nc.off_nadir_pitch_angle_pf_01.values, time_01, time_20)
        roll_angle_20, stat = self.interp_1Hz_to_20Hz(
            self.nc.off_nadir_roll_angle_pf_01.values, time_01, time_20)
        yaw_angle_20, stat = self.interp_1Hz_to_20Hz(
            self.nc.off_nadir_yaw_angle_pf_01.values, time_01, time_20)
        self.l1.time_orbit.set_antenna_attitude(pitch_angle_20, roll_angle_20,
                                                yaw_angle_20)

    def _set_waveform_data_group(self):
        """
        Transfer of the waveform group to the Level-1 object. This includes
          1. the computation of waveform power in Watts
          2. the computation of the window delay in meter for each waveform bin
          3. extraction of the waveform valid flag
        :return: None
        """

        # Get the waveform
        # NOTE: The waveform is given in counts
        wfm_counts = self.nc.waveform_20_ku.values
        n_records, n_range_bins = wfm_counts.shape

        # Convert the waveform to power
        # TODO: This needs to be verified. Currently using the scale factor and documentation in netcdf unclear
        # From the documentation:
        # "This scaling factor represents the backscatter coefficient for a waveform amplitude equal to 1.
        #  It is corrected for AGC instrumental errors (agc_cor_20_ku) and internal calibration (sig0_cal_20_ku)"
        # NOTE: Make sure type of waveform is float and not double
        #       (double will cause issues with cythonized retrackers)
        wfm_power = np.ndarray(shape=wfm_counts.shape, dtype=np.float32)
        waveform_scale_factor = self.nc.scale_factor_20_ku.values
        for record in np.arange(n_records):
            wfm_power[record, :] = waveform_scale_factor[record] * wfm_counts[
                record, :].astype(float)

        # Get the window delay
        # "The tracker_range_20hz is the range measured by the onboard tracker
        #  as the window delay, corrected for instrumental effects and
        #  CoG offset"
        tracker_range_20hz = self.nc.tracker_range_20_ku.values
        wfm_range = np.ndarray(shape=wfm_counts.shape, dtype=np.float32)
        range_bin_index = np.arange(n_range_bins)
        for record in np.arange(n_records):
            wfm_range[record, :] = tracker_range_20hz[record] + \
                (range_bin_index*self.cfg.range_bin_width) - \
                (self.cfg.nominal_tracking_bin*self.cfg.range_bin_width)

        # Set the operation mode
        op_mode = self.nc.instr_op_mode_20_ku.values
        op_mode_translator = self.cfg.instr_op_mode_list
        radar_mode = np.array(
            [op_mode_translator[int(val)] for val in op_mode]).astype("int8")

        # Set the waveform
        self.l1.waveform.set_waveform_data(wfm_power, wfm_range, radar_mode)

        # Get the valid flags
        # TODO: Find a way to get a valid flag
        # measurement_confident_flag = self.nc.flag_mcd_20_ku.values
        # valid_flag = measurement_confident_flag == 0
        # self.l1.waveform.set_valid_flag(valid_flag)

    def _set_range_correction_group(self):
        """
        Transfer the range corrections defined in the l1p config file to the Level-1 object
        NOTE: The range corrections are all in 1 Hz and must be interpolated to 20Hz
        :return: None
        """

        # Get the reference times for interpolating the range corrections from 1Hz -> 20Hz
        time_1Hz = self.nc.time_01.values
        time_20Hz = self.nc.time_20_ku.values

        # Loop over all range correction variables defined in the processor definition file
        for key in self.cfg.range_correction_targets.keys():
            var_name = self.cfg.range_correction_targets[key]
            variable_1Hz = getattr(self.nc, var_name)
            variable_20Hz, error_status = self.interp_1Hz_to_20Hz(
                variable_1Hz.values, time_1Hz, time_20Hz)
            if error_status:
                msg = "- Error in 20Hz interpolation for variable `%s` -> set only dummy" % var_name
                self.log.warning(msg)
            self.l1.correction.set_parameter(key, variable_20Hz)

    def _set_surface_type_group(self):
        """
        Transfer of the surface type flag to the Level-1 object
        NOTE: In the current state (TEST dataset), the surface type flag is only 1 Hz. A nearest neighbour
              interpolation is used to get the 20Hz surface type flag.
        :return: None
        """

        # Set the flag
        for key in ESA_SURFACE_TYPE_DICT.keys():
            flag = self.nc.surf_type_20_ku.values == ESA_SURFACE_TYPE_DICT[key]
            self.l1.surface_type.add_flag(flag, key)

    def _set_classifier_group(self):
        """
        Transfer the classifiers defined in the l1p config file to the Level-1 object.
        NOTE: It is assumed that all classifiers are 20Hz
        In addition, a few legacy parameter are computed based on the waveform counts that is only available at
        this stage. Computation of other parameter such as sigma_0, leading_edge_width, ... are moved to the
        post-processing
        :return: None
        """
        # Loop over all classifier variables defined in the processor definition file
        for key in self.cfg.classifier_targets.keys():
            variable_20Hz = getattr(self.nc, self.cfg.classifier_targets[key])
            self.l1.classifier.add(variable_20Hz, key)

    @property
    def empty(self):
        """
        Default return object, if nodata should be returned
        :return: Representation of an empty object (None)
        """
        return None
Exemplo n.º 17
0
class Level2Data(object):

    _L2_DATA_ITEMS = [
        "range", "ssa", "elev", "afrb", "frb", "sit", "radar_mode"
    ]

    _HEMISPHERE_CODES = {"north": "nh", "south": "sh"}

    # These are only the standard Level-2 parameters
    # NOTE: Auxiliary parameter are handled differently
    _PARAMETER_CATALOG = {
        "time": "time",
        "longitude": "longitude",
        "latitude": "latitude",
        "surface_type": "surface_type_flag",
        "radar_mode": "radar_mode",
        "elevation": "elev",
        "sea_surface_anomaly": "ssa",
        "radar_freeboard": "afrb",
        "freeboard": "frb",
        "sea_ice_thickness": "sit",
    }

    _PROPERTY_CATALOG = {"sea_surface_height": "ssh"}

    def __init__(self, metadata, time_orbit, period=None):

        # Copy necessary fields form l1b
        self.error = ErrorStatus()
        self._n_records = metadata.n_records
        self.info = metadata
        self.track = time_orbit
        self.period = period

        self._auto_auxvar_num = 0

        # A dictionary similar to the parameter catalog
        # To be filled during the set auxdata method
        self._auxiliary_catalog = {}

        # Metadata
        self._auxdata_source_dict = {}
        self._source_primary_filename = "unkown"
        self._l2_algorithm_id = "unkown"
        self._l2_version_tag = "unkown"
        self._doi = ""

        # Define time of dataset creation as the time of object initialization
        # to avoid slightly different timestamps for repated calls of datetime.now()
        self._creation_time = datetime.now()

        # Other Class properties
        self._is_evenly_spaced = time_orbit.is_evenly_spaced

        # Create Level2 Data Groups
        self._create_l2_data_items()

    def set_surface_type(self, surface_type):
        self.surface_type = surface_type

    def set_radar_mode(self, radar_mode):
        self.radar_mode = radar_mode

    def set_parameter(self, target, value, uncertainty=None, bias=None):
        """ Convienience method to safely add a parameter with optional
        uncertainty and/or bias to the level-2 data structure """

        # Sanity checks
        is_l2_default = self._check_if_valid_parameter(target)

        # Check if the full name has been passed
        if not is_l2_default and target in self.parameter_catalog.keys():
            target = self.parameter_catalog[target]
        else:
            # TODO: Need to figure something out for the auxvar id (not known if reinstated from l2i)
            par_name = self.auto_auxvar_id
            self.set_auxiliary_parameter(par_name, target, value, uncertainty)
            return

        # Next check: Needs to be of correct shape
        is_correct_size = self._check_valid_size(value)
        if not is_correct_size:
            msg = "Invalid parameter dimension: %s (See self._L2_DATA_ITEMS)"
            msg = msg % str(target)
            self.error.add_error("l2-invalid-parameter_name", msg)
            self.error.raise_on_error()

        # Test if parameter exists
        # (older l2i files might not have all parameters)
        try:
            parameter = getattr(self, target)
        except AttributeError:
            return

        # Set values, uncertainty bias
        parameter.set_value(value)
        if uncertainty is not None:
            uncertainty_value = self._get_as_array(uncertainty)
            parameter.set_uncertainty(uncertainty_value)
        if bias is not None:
            bias_value = self._get_as_array(bias)
            parameter.set_bias(bias, bias_value)
        setattr(self, target, parameter)

    def set_auxiliary_parameter(self,
                                var_id,
                                var_name,
                                value,
                                uncertainty=None):
        """ Adds an auxiliary parameter to the data object"""

        # Use L2Elevation Array
        # TODO: This is to cumbersome, replace by xarray at due time
        param = L2ElevationArray(shape=(self.n_records))
        # Allow value to be None
        # NOTE: In this case an empty value will be generated
        if value is None:
            value = np.full((self.n_records), np.nan)
        param.set_value(value)
        if uncertainty is not None:
            param.set_uncertainty(uncertainty)
        setattr(self, var_id, param)

        # Register auxiliary parameter (this allows to find the parameter
        # by its long name
        self._auxiliary_catalog[var_name] = var_id

    def set_data_record_type(self, data_record_type):
        self._data_record_type = data_record_type

    def update_retracked_range(self, retracker):
        # Update only for indices (surface type) supplied by retracker class
        # XXX: should get an overhaul
        ii = retracker.indices
        self.range[ii] = retracker.range[ii]
        self.range.uncertainty[ii] = retracker.uncertainty[ii]
        self.elev[ii] = self.altitude[ii] - retracker.range[ii]
        self.elev.uncertainty[ii] = retracker.uncertainty[ii]

        # Register potential auxiliary data
        for var_id, var_name, value, uncertainty in retracker.auxdata_output:

            # --- Check if output variable already exists ---

            # Create if new
            if var_name not in self.auxvar_names:
                self.set_auxiliary_parameter(var_id, var_name, value,
                                             uncertainty)

            # Transfer values for indices if already exists
            else:
                auxdata = getattr(self, var_id)
                auxdata[ii] = value[ii]
                if uncertainty is not None:
                    auxdata.uncertainty[ii] = uncertainty[ii]
                setattr(self, var_id, auxdata)

    def set_metadata(self,
                     auxdata_source_dict=None,
                     source_primary_filename=None,
                     l2_algorithm_id=None,
                     l2_version_tag=None):
        if auxdata_source_dict is not None:
            self._auxdata_source_dict = auxdata_source_dict
        if source_primary_filename is not None:
            self._source_primary_filename = source_primary_filename
        if l2_algorithm_id is not None:
            self._l2_algorithm_id = l2_algorithm_id
        if l2_version_tag is not None:
            self._l2_version_tag = l2_version_tag

    def set_doi(self, doi):
        self._doi = doi

    def get_parameter_by_name(self, parameter_name):
        """ Method to retrieve a level-2 parameter """

        # Combine parameter and property catalogs
        catalog = self.parameter_catalog
        catalog.update(self.property_catalog)
        catalog.update(self._auxiliary_catalog)

        if "_uncertainty" in parameter_name:
            parameter_name = parameter_name.replace("_uncertainty", "")
            source = catalog[parameter_name]
            parameter = getattr(self, source)
            return parameter.uncertainty

        elif "_bias" in parameter_name:
            parameter_name = parameter_name.replace("_bias", "")
            source = catalog[parameter_name]
            parameter = getattr(self, source)
            return parameter.bias

        else:
            try:
                source = catalog[parameter_name]
            except KeyError:
                msg = "Variable name `%s` is not in the catalog of this l2 object" % parameter_name
                self.error.add_error("l2data-missing-variable", msg)
                self.error.raise_on_error()
            parameter = getattr(self, source)
            return parameter

    def get_attribute(self, attribute_name, *args):
        """ Return a string for a given attribute name. This method is
        required for the output data handler """

        try:
            attr_getter = getattr(self, "_get_attr_" + attribute_name)
            attribute = attr_getter(*args)
            return attribute
        except AttributeError:
            return "unkown"

    def _create_l2_data_items(self):
        for item in self._L2_DATA_ITEMS:
            setattr(self, item, L2ElevationArray(shape=(self.n_records)))

    def _check_if_valid_parameter(self, parameter_name):
        """ Performs a test if parameter name is a valid level-2 parameter
        name. Adds error if result negative and returns flag (valid: True,
        invalid: False) """
        if parameter_name not in self._L2_DATA_ITEMS:
            return False
        else:
            return True

    def _check_valid_size(self, array, name=""):
        """ Test if array has the correct size shape=(n_records). Adds error
        if not and returns flag (valid: True, invalid: False) """
        condition = array.ndim == 1 and len(array) == self._n_records
        if condition:
            return True
        else:
            self.error.add_error("Invalid array added to level-2 class")
            return False

    def _get_as_array(self, value, dtype=np.float32):
        """ Create an output array from values that is of length n_records.
        Value can be scalar or array of length n_records. If value is any other
        length or dimension, an error will be added and a nan array of length
        n_records will be returned

        Arguments:
            value (integer, float or )

        Note: This method is mostly used to allow scalar uncertainty and
              bias values. It also makes sure that uncertainty and bias
              are of the same shape than the value, which is not guaranteed
              in L2ElevationArray. If a wrong uncertainty, bias shape is
              passed, the result will be nan uncertainties/biases throughout
              the processing chain and the start of NaN occurences can be used
              to trace the origin of the error.
        """

        # Check if value is either float or integer
        is_numeric = np.asarray(value).dtype.kind in "if"
        if not is_numeric:
            return np.full(self.arrshape, np.nan)

        # Check if value is scalar or array
        if np.isscalar(value):
            return np.full(self.arrshape, value).astype(dtype)

        # if array, check if correct size
        else:
            is_np_array = isinstance(value, (np.ndarray, np.array))
            is_correct_size = self._check_valid_size(value)
            if is_np_array and is_correct_size:
                return value
            else:
                return np.full(self.arrshape, np.nan)

    def _get_attr_pysiral_version(self, *args):
        return psrlcfg.version

    def _get_attr_mission_id(self, *args):
        # XXX: Deprecated
        return self.info.mission

    def _get_attr_source_mission_id(self, *args):
        mission_id = self.info.mission
        if args[0] == "uppercase":
            mission_id = mission_id.upper()
        if args[0] == "select":
            for entry in args[1:]:
                mission_id_code, label = entry.split(":")
                if mission_id == mission_id_code:
                    return label
            return "Error (mission id %s not in select statement)" % mission_id
        return mission_id

    def _get_attr_source_mission_name(self, *args):
        mission_name = psrlcfg.platforms.get_name(self.info.mission)
        if args[0] == "uppercase":
            mission_name = mission_name.upper()
        return mission_name

    def _get_attr_source_mission_sensor(self, *args):
        mission_sensor = psrlcfg.platforms.get_sensor(self.info.mission)
        if args[0] == "uppercase":
            mission_sensor = mission_sensor.upper()
        return mission_sensor

    def _get_attr_source_mission_sensor_fn(self, *args):
        """ Same as source mission sensor, only a sanitized version for filenames """
        mission_sensor = psrlcfg.platforms.get_sensor(self.info.mission)
        for character in ["-"]:
            mission_sensor = mission_sensor.replace(character, "")
        if args[0] == "uppercase":
            mission_sensor = mission_sensor.upper()
        return mission_sensor

    def _get_attr_source_hemisphere(self, *args):
        if args[0] == "select":
            choices = {"north": args[1], "south": args[2]}
            return choices.get(self.hemisphere, "n/a")
        return self.hemisphere

    def _get_attr_hemisphere(self, *args):
        # XXX: Deprecated
        return self.hemisphere

    def _get_attr_hemisphere_code(self, *args):
        hemisphere_code = self.hemisphere_code
        if args[0] == "uppercase":
            hemisphere_code = hemisphere_code.upper()
        return hemisphere_code

    def _get_attr_startdt(self, dtfmt):
        # XXX: Deprecated
        return self.info.start_time.strftime(dtfmt)

    def _get_attr_stopdt(self, dtfmt):
        # XXX: Deprecated
        return self.info.stop_time.strftime(dtfmt)

    def _get_attr_geospatial_lat_min(self, *args):
        return self._gett_attr_geospatial_str(np.nanmin(self.latitude))

    def _get_attr_geospatial_lat_max(self, *args):
        return self._gett_attr_geospatial_str(np.nanmax(self.latitude))

    def _get_attr_geospatial_lon_min(self, *args):
        return self._gett_attr_geospatial_str(np.nanmin(self.longitude))

    def _get_attr_geospatial_lon_max(self, *args):
        return self._gett_attr_geospatial_str(np.nanmax(self.longitude))

    def _gett_attr_geospatial_str(self, value):
        return "%.4f" % value

    def _get_attr_source_auxdata_sic(self, *args):
        value = self._auxdata_source_dict.get("sic", "unkown")
        if value == "unkown":
            value = self.info.source_auxdata_sic
        return value

    def _get_attr_source_auxdata_sitype(self, *args):
        value = self._auxdata_source_dict.get("sitype", "unkown")
        if value == "unkown":
            value = self.info.source_auxdata_sitype
        return value

    def _get_attr_source_auxdata_mss(self, *args):
        value = self._auxdata_source_dict.get("mss", "unkown")
        if value == "unkown":
            value = self.info.source_auxdata_mss
        return value

    def _get_attr_source_auxdata_snow(self, *args):
        value = self._auxdata_source_dict.get("snow", "unkown")
        if value == "unkown":
            value = self.info.source_auxdata_snow
        return value

    def _get_attr_source_sic(self, *args):
        # XXX: Deprecated
        return self._auxdata_source_dict.get("sic", "unkown")

    def _get_attr_source_sitype(self, *args):
        # XXX: Deprecated
        return self._auxdata_source_dict.get("sitype", "unkown")

    def _get_attr_source_mss(self, *args):
        # XXX: Deprecated
        return self._auxdata_source_dict.get("mss", "unkown")

    def _get_attr_source_snow(self, *args):
        # XXX: Deprecated
        return self._auxdata_source_dict.get("snow", "unkown")

    def _get_attr_source_primary(self, *args):
        return self._source_primary_filename

    def _get_attr_l2_algorithm_id(self, *args):
        return self._l2_algorithm_id

    def _get_attr_l2_version_tag(self, *args):
        return self._l2_version_tag

    def _get_attr_utcnow(self, *args):
        datetime = self._creation_time
        if re.match("%", args[0]):
            time_string = datetime.strftime(args[0])
        else:
            time_string = datetime.isoformat()
        return time_string

    def _get_attr_time_coverage_start(self, *args):
        datetime = self.period.start
        if re.match("%", args[0]):
            time_string = datetime.strftime(args[0])
        else:
            time_string = datetime.isoformat()
        return time_string

    def _get_attr_time_coverage_end(self, *args):
        datetime = self.period.stop
        if re.match("%", args[0]):
            time_string = datetime.strftime(args[0])
        else:
            time_string = datetime.isoformat()
        return time_string

    def _get_attr_time_coverage_duration(self, *args):
        return self.period.duration_isoformat

    def _get_attr_time_resolution(self, *args):
        tdelta = self.time[-1] - self.time[0]
        seconds = tdelta.total_seconds() + 1e-6 * tdelta.microseconds
        resolution = seconds / self.n_records
        return "%.2f seconds" % resolution

    def _get_attr_source_timeliness(self, *args):
        """ Return the timeliness of the l1b source data. Set default to
        NTC for backwark compability """
        try:
            timeliness = self.info.timeliness
        except AttributeError:
            timeliness = "NTC"
        if timeliness is None:
            timeliness = "NTC"
        if args[0] == "lowercase":
            timeliness = timeliness.lower()
        return timeliness

    def _get_attr_uuid(self, *args):
        """ Provide an uuid code (for tracking id's) """
        return str(uuid.uuid4())

    def _get_attr_doi(self, *args):
        return self._doi

    @property
    def parameter_catalog(self):
        return dict(self._PARAMETER_CATALOG)

    @property
    def property_catalog(self):
        return dict(self._PROPERTY_CATALOG)

    @property
    def auxvar_names(self):
        return sorted(self._auxiliary_catalog.keys())

    @property
    def auto_auxvar_id(self):
        name = "auxvar%02g" % self._auto_auxvar_num
        self._auto_auxvar_num += 1
        return name

    @property
    def arrshape(self):
        return (self.n_records)

    @property
    def n_records(self):
        return self._n_records

    @property
    def hemisphere(self):
        return self.info.subset_region_name

    @property
    def hemisphere_code(self):
        return self._HEMISPHERE_CODES[self.hemisphere]

    @property
    def footprint_spacing(self):
        spacing = great_circle((self.latitude[1], self.longitude[1]),
                               (self.latitude[0], self.longitude[0])).meters

        if np.isclose(spacing, 0.0):
            spacing = great_circle(
                (self.latitude[-2], self.longitude[-2]),
                (self.latitude[-1], self.longitude[-1])).meters

        return spacing

    @property
    def dimdict(self):
        """ Returns dictionary with dimensions"""
        dimdict = OrderedDict([("time", self.n_records)])
        return dimdict

    @property
    def time(self):
        try:
            time = self.track.time
        except AttributeError:
            time = self.track.timestamp
        return time

    @property
    def longitude(self):
        return self.track.longitude

    @property
    def latitude(self):
        return self.track.latitude

    @property
    def altitude(self):
        return self.track.altitude

    @property
    def surface_type_flag(self):
        return self.surface_type.flag

    @property
    def ssh(self):
        ssh = L2ElevationArray(shape=self._n_records)
        ssh.set_value(self.mss + self.ssa)
        ssh.set_uncertainty(self.ssa.uncertainty)
        return ssh
Exemplo n.º 18
0
class Level1PreProcJobDef(DefaultLoggingClass):
    """ A class that contains the information for the Level-1 pre-processor JOB (not the pre-processor class!) """
    def __init__(self,
                 l1p_settings_id_or_file,
                 tcs,
                 tce,
                 exclude_month=None,
                 hemisphere="global",
                 platform=None,
                 output_handler_cfg=None,
                 source_repo_id=None):
        """
        The settings for the Level-1 pre-processor job
        :param l1p_settings_id_or_file: An id of an proc/l1 processor config file (filename excluding the .yaml
                                        extension) or an full filepath to a yaml config file
        :param tcs: [int list] Time coverage start (YYYY MM [DD])
        :param tce: [int list] Time coverage end (YYYY MM [DD]) [int list]
        :param exclude_month: [int list] A list of month that will be ignored
        :param hemisphere: [str] The target hemisphere (`north`, `south`, `global`:default).
        :param platform: [str] The target platform (pysiral id). Required if l1p settings files is valid for
                               multiple platforms (e.g. ERS-1/2, ...)
        :param output_handler_cfg: [dict] An optional dictionary with options of the output handler
                                   (`overwrite_protection`: [True, False], `remove_old`: [True, False])
        :param source_repo_id: [str] The tag in local_machine_def.yaml (l1b_repository.<platform>.<source_repo_id>)
                                  -> Overwrites the default source repo in the l1p settings
                                     (input_handler.options.local_machine_def_tag &
                                      output_handler.options.local_machine_def_tag)
        """

        super(Level1PreProcJobDef, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus()

        # Get pysiral configuration
        # TODO: Move to global
        self._cfg = psrlcfg

        # Store command line options
        self._hemisphere = hemisphere
        self._platform = platform
        self._source_repo_id = source_repo_id

        # Parse the l1p settings file
        self.set_l1p_processor_def(l1p_settings_id_or_file)

        # Get full requested time range
        self._time_range = DatePeriod(tcs, tce)
        logger.info("Requested time range is %s" % self.time_range.label)

        # Store the data handler options
        if output_handler_cfg is None:
            output_handler_cfg = {}
        self._output_handler_cfg = output_handler_cfg

        # Measure execution time
        self.stopwatch = StopWatch()

    @classmethod
    def from_args(cls, args):
        """ Init the Processor Definition from the pysiral-l1preproc command line argument object """

        # Optional Keywords
        kwargs = {}
        if args.exclude_month is not None:
            kwargs["exclude_month"] = args.exclude_month
        data_handler_cfg = dict()
        data_handler_cfg["overwrite_protection"] = args.overwrite_protection
        data_handler_cfg["remove_old"] = args.remove_old
        if args.source_repo_id is not None:
            data_handler_cfg["local_machine_def_tag"] = args.source_repo_id
        kwargs["output_handler_cfg"] = data_handler_cfg
        kwargs["hemisphere"] = args.hemisphere
        kwargs["platform"] = args.platform
        kwargs["source_repo_id"] = args.source_repo_id

        # Return the initialized class
        return cls(args.l1p_settings, args.start_date, args.stop_date,
                   **kwargs)

    def set_l1p_processor_def(self, l1p_settings_id_or_file):
        """ Parse the content of the processor definition file """

        # 1. Resolve the absolute file path
        procdef_file_path = self.get_l1p_proc_def_filename(
            l1p_settings_id_or_file)

        # 2. Read the content
        logger.info("Parsing L1P processor definition file: %s" %
                    procdef_file_path)
        self._l1pprocdef = get_yaml_config(procdef_file_path)
        self._check_if_unambiguous_platform()

        # 3. Expand info (input data lookup directories)
        self._get_local_input_directory()

        # 4. update hemisphere for input adapter
        self._l1pprocdef.level1_preprocessor.options.polar_ocean.target_hemisphere = self.target_hemisphere

    def get_l1p_proc_def_filename(self, l1p_settings_id_or_file):
        """ Query pysiral config to obtain filename for processor definition file """

        # A. Check if already filename
        if Path(l1p_settings_id_or_file).is_file():
            return l1p_settings_id_or_file

        # B. Not a file, try to resolve filename via pysiral config
        filename = self.pysiral_cfg.get_settings_file("proc", "l1",
                                                      l1p_settings_id_or_file)
        if filename is None:
            msg = "Invalid Level-1 pre-processor definition filename or id: %s\n" % l1p_settings_id_or_file
            msg = msg + " \nRecognized Level-1 pre-processor definitions ids:\n"
            ids = self.pysiral_cfg.get_setting_ids("proc", "l1")
            for id in ids:
                msg = msg + "    - " + id + "\n"
            self.error.add_error("invalid-l1p-outputdef", msg)
            self.error.raise_on_error()
        return filename

    def _get_local_input_directory(self):
        """ Replace the tag for local machine def with the actual path info """

        input_handler_cfg = self.l1pprocdef.input_handler.options
        local_machine_def_tag = input_handler_cfg.local_machine_def_tag
        primary_input_def = self.pysiral_cfg.local_machine.l1b_repository
        platform, tag = self.platform, local_machine_def_tag

        # Overwrite the tag if specifically supplied
        if self._source_repo_id is not None:
            tag = self._source_repo_id

        # Get the value
        expected_branch_name = "root.l1b_repository.%s.%s" % (platform, tag)
        try:
            branch = AttrDict(primary_input_def[platform][tag])
        except KeyError:
            msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s"
            msg = msg % expected_branch_name
            self.error.add_error("local-machine-def-missing-tag", msg)
            self.error.raise_on_error()

        # Sanity Checks
        # TODO: Obsolete?
        if branch is None:
            msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s"
            msg = msg % expected_branch_name
            self.error.add_error("local-machine-def-missing-tag", msg)
            self.error.raise_on_error()

        # Validity checks
        # TODO: These checks are probably better located in a separate method?
        for key in ["source", "l1p"]:

            # 1. Branch must have specific keys for input and output
            if not key in branch:
                msg = "Missing definition in `local_machine_def.yaml`. Expected value: %s.%s"
                msg = msg % (expected_branch_name, key)
                self.error.add_error("local-machine-def-missing-tag", msg)
                self.error.raise_on_error()

            # 2. The value of each branch must be a valid directory or a
            #    attr (e.g. for different radar modes) with a list of directories
            directory_or_attrdict = branch[key]
            try:
                directories = directory_or_attrdict.values()
            except AttributeError:
                directories = [directory_or_attrdict]

            for directory in directories:
                if not Path(directory).is_dir():
                    msg = "Invalid directory in `local_machine_def.yaml`: %s is not a valid directory"
                    msg = msg % directory
                    self.error.add_error("local-machine-def-invalid-dir", msg)
                    self.error.raise_on_error()

        # Update the lookup dir parameter
        self.l1pprocdef.input_handler["options"]["lookup_dir"] = branch.source

    def _check_if_unambiguous_platform(self):
        """ Checks if the platform is unique, since some l1 processor definitions are valid for a series of
        platforms, such as ERS-1/2, Sentinel-3A/B, etc. The indicator is that the platform tag in the
        l1 preprocessor settings is comma separated list.

        For the location of the source data, it is however necessary that the exact platform is known. It must
        therefore be specified explicitly by the -platform argument """

        settings_is_ambigous = "," in self._l1pprocdef.platform
        platform_is_known = self.platform is not None

        # Test if platform is given if the settings file is valid for more than 1 platform
        if settings_is_ambigous and not platform_is_known:
            msg = "Error: platform in l1p settings is ambiguous (%s), but no platform has been given (-platform)"
            msg = msg % self._l1pprocdef.platform
            sys.exit(msg)

        # Test if platform provided matches the platform list in the settings file
        if settings_is_ambigous and platform_is_known:
            if not self.platform in str(self._l1pprocdef.platform):
                msg = "Error: platform in l1p settings (%s) and given platform (%s) do not match"
                msg = msg % (self._l1pprocdef.platform, self.platform)
                sys.exit(msg)

        # If platform in settings is unambigous, but not provided -> get platform from settings
        if not settings_is_ambigous and not platform_is_known:
            self._platform = self._l1pprocdef.platform
            logger.info("- get platform from l1p settings -> %s" %
                        self.platform)

    @property
    def hemisphere(self):
        return self._hemisphere

    @property
    def target_hemisphere(self):
        values = {
            "north": ["north"],
            "south": ["south"],
            "global": ["north", "south"]
        }
        return values[self.hemisphere]

    @property
    def pysiral_cfg(self):
        return self._cfg

    @property
    def l1pprocdef(self):
        return self._l1pprocdef

    @property
    def time_range(self):
        return self._time_range

    @property
    def period_segments(self):
        segments = self._time_range.get_segments("month", crop_to_period=True)
        return segments

    @property
    def output_handler_cfg(self):
        return self._output_handler_cfg

    @property
    def platform(self):
        return self._platform
Exemplo n.º 19
0
class L1PreProcBase(DefaultLoggingClass):
    def __init__(self, cls_name, input_adapter, output_handler, cfg):

        # Make sure the logger/error handler has the name of the parent class
        super(L1PreProcBase, self).__init__(cls_name)
        self.error = ErrorStatus(caller_id=cls_name)

        # The class that translates a given input file into an L1BData object
        self.input_adapter = input_adapter

        # Output data handler that creates l1p netCDF files from l1 data objects
        self.output_handler = output_handler

        # The configuration for the pre-processor
        self.cfg = cfg

        # The stack of Level-1 objects is a simple list
        self.l1_stack = []

    def process_input_files(self, input_file_list):
        """
        Main entry point for the Level-Preprocessor.
        :param input_file_list: A list full filepath for the pre-processor
        :return: None
        """

        # Validity Check
        n_input_files = len(input_file_list)
        if n_input_files == 0:
            logger.warning(
                "Passed empty input file list to process_input_files()")
            return

        # Init helpers
        prgs = ProgressIndicator(n_input_files)

        # A class that is passed to the input adapter to check if the pre-processsor wants the
        # content of the current file
        polar_ocean_check = L1PreProcPolarOceanCheck(self.__class__.__name__,
                                                     self.polar_ocean_props)

        # orbit segments may or may not be connected, therefore the list of input file
        # needs to be processed sequentially.
        for i, input_file in enumerate(input_file_list):

            # Step 1: Read Input
            # Map the entire orbit segment into on Level-1 data object. This is the task
            # of the input adaptor. The input handler gets only the filename and the target
            # region to assess whether it is necessary to parse and transform the file content
            # for the sake of computational efficiency.
            logger.info("+ Process input file %s" % prgs.get_status_report(i))
            l1 = self.input_adapter.get_l1(input_file, polar_ocean_check)
            if l1 is None:
                logger.info(
                    "- No polar ocean data for curent job -> skip file")
                continue

            # Step 2: Extract and subset
            # The input files may contain unwanted data (low latitude/land segments). It is the job of the
            # L1PReProc children class to return only the relevant segments over polar ocean as a list of l1 objects.
            l1_segments = self.extract_polar_ocean_segments(l1)

            # Step 3: Post-processing
            # Computational expensive post-processing (e.g. computation of waveform shape parameters) can now be
            # executed as the the Level-1 segments are cropped to the minimal length.
            self.l1_post_processing(l1_segments)

            # Step 4: Merge orbit segments
            # Add the list of orbit segments to the l1 data stack and merge those that are connected
            # (e.g. two half orbits connected at the pole) into a single l1 object. Orbit segments that
            # are unconnected from other segments in the stack will be exported to netCDF files.
            self.l1_stack_merge_and_export(l1_segments)

        # Step : Export the last item in the stack
        l1_merged = self.l1_get_merged_stack()
        self.l1_export_to_netcdf(l1_merged)

    def l1_post_processing(self, l1_segments):
        """
        Apply the post-processing procedures defined in the l1p processor definition file.

        :param l1_segments: A list of Level-1 data objects
        :return: None, the l1_segments are changed in place
        """

        # Get the post processing options
        pre_processing_items = self.cfg.get("pre_processing_items", None)
        if pre_processing_items is None:
            logger.info("No pre processing items defined")
            return

        # Measure time for the different post processors
        timer = StopWatch()

        # Get the list of post-processing items
        for pp_item in pre_processing_items:
            timer.start()
            pp_class = get_cls(pp_item["module_name"],
                               pp_item["class_name"],
                               relaxed=False)
            post_processor = pp_class(**pp_item["options"])
            for l1 in l1_segments:
                post_processor.apply(l1)
            timer.stop()
            msg = "- L1 pre-processing item `%s` applied in %.3f seconds" % (
                pp_item["label"], timer.get_seconds())
            logger.info(msg)

    def l1_stack_merge_and_export(self, l1_segments):
        """
        Add the input Level-1 segments to the l1 stack and export the unconnected ones as l1p netCDF products
        :param l1_segments:
        :return: None
        """

        # Loop over all input segments
        for l1 in l1_segments:

            # Test if l1 segment is connected to stack
            is_connected = self.l1_is_connected_to_stack(l1)

            # Case 1: Segment is connected
            # -> Add the l1 segment to the stack and check the next segment.
            if is_connected:
                logger.info("- L1 segment connected -> add to stack")
                self.l1_stack.append(l1)

            # Case 2: Segment is not connected
            # -> In this case all items in the l1 stack will be merged and the merged l1 object will be
            #    exported to a l1p netCDF product. The current l1 segment that was unconnected to the stack
            #    will become the next stack
            else:
                logger.info(
                    "- L1 segment unconnected -> exporting current stack")
                l1_merged = self.l1_get_merged_stack()
                self.l1_export_to_netcdf(l1_merged)
                self.l1_stack = [l1]

    def l1_is_connected_to_stack(self, l1):
        """
        Check if the start time of file i and the stop time if file i-1 indicate neighbouring orbit segments
        (e.g. due to radar mode change, or two half-orbits
        :param l1:
        :return: Flag if l1 is connected (True of False)
        """

        # Stack is empty (return True -> create a new stack)
        if self.stack_len == 0:
            return True

        # Test if segments are adjacent based on time gap between them
        timedelta = l1.info.start_time - self.last_stack_item.info.stop_time
        threshold = self.cfg.orbit_segment_connectivity.max_connected_segment_timedelta_seconds
        is_connected = timedelta.seconds <= threshold

        return is_connected

    def l1_get_merged_stack(self):
        """
        Concatenates all items in the l1 stack and returns the merged Level-1 data object.
        Note: This operation leaves the state of the Level-1 stack untouched
        :return: Level-1 data object
        """
        l1_merged = self.l1_stack[0]
        for l1 in self.l1_stack[1:]:
            l1_merged.append(l1)
        return l1_merged

    def l1_export_to_netcdf(self, l1):
        """
        Exports the Level-1 object as as l1p netCDF
        :param l1_merged: The Level-1 object to exported
        :return:
        """

        if "export_minimum_n_records" in self.cfg:
            minimum_n_records = self.cfg.export_minimum_n_records
        else:
            minimum_n_records = 0

        if l1.n_records >= minimum_n_records:
            self.output_handler.export_to_netcdf(l1)
            logger.info("- Written l1p product: %s" %
                        self.output_handler.last_written_file)
        else:
            logger.info("- Orbit segment below minimum size (%g), skipping" %
                        l1.n_records)

    def trim_single_hemisphere_segment_to_polar_region(self, l1):
        """
        Extract polar region of interest from a segment that is either north or south (not global)

        :param l1: Input Level-1 object
        :return: Trimmed Input Level-1 object
        """
        polar_threshold = self.cfg.polar_ocean.polar_latitude_threshold
        is_polar = np.abs(l1.time_orbit.latitude) >= polar_threshold
        polar_subset = np.where(is_polar)[0]
        if len(polar_subset) != l1.n_records:
            l1.trim_to_subset(polar_subset)
        return l1

    def trim_two_hemisphere_segment_to_polar_regions(self, l1):
        """
        Extract polar regions of interest from a segment that is either north, south or both. The method will
        preserve the order of the hemispheres

        :param l1: Input Level-1 object
        :return: List of Trimmed Input Level-1 objects
        """

        polar_threshold = self.cfg.polar_ocean.polar_latitude_threshold
        l1_list = []

        # Loop over the two hemispheres
        for hemisphere in self.cfg.polar_ocean.target_hemisphere:

            if hemisphere == "north":
                is_polar = l1.time_orbit.latitude >= polar_threshold

            elif hemisphere == "south":
                is_polar = l1.time_orbit.latitude <= (-1.0 * polar_threshold)

            else:
                msg = "Unknown hemisphere: %s [north|south]" % hemisphere
                self.error.add_error("invalid-hemisphere", msg)
                self.error.raise_on_error()

            # Extract the subset (if applicable)
            polar_subset = np.where(is_polar)[0]
            n_records_subset = len(polar_subset)

            # is true subset -> add subset to output list
            if n_records_subset != l1.n_records and n_records_subset > 0:
                l1_segment = l1.extract_subset(polar_subset)
                l1_list.append(l1_segment)

            # entire segment in polar region -> add full segment to output list
            elif n_records_subset == l1.n_records:
                l1_list.append(l1)

            # no coverage in target hemisphere -> remove segment from list
            else:
                pass

        # Last step: Sort the list to maintain temporal order
        # (only if more than 1 segment)
        if len(l1_list) > 1:
            l1_list = sorted(l1_list, key=attrgetter("tcs"))

        return l1_list

    def trim_full_orbit_segment_to_polar_regions(self, l1):
        """
        Extract polar regions of interest from a segment that is either north, south or both. The method will
        preserve the order of the hemispheres

        :param l1: Input Level-1 object
        :return: List of Trimmed Input Level-1 objects
        """

        polar_threshold = self.cfg.polar_ocean.polar_latitude_threshold
        l1_list = []

        # Loop over the two hemispheres
        for hemisphere in self.cfg.polar_ocean.target_hemisphere:

            # Compute full polar subset range
            if hemisphere == "north":
                is_polar = l1.time_orbit.latitude >= polar_threshold

            elif hemisphere == "south":
                is_polar = l1.time_orbit.latitude <= (-1.0 * polar_threshold)

            else:
                msg = "Unknown hemisphere: %s [north|south]" % hemisphere
                self.error.add_error("invalid-hemisphere", msg)
                self.error.raise_on_error()

            # Step: Extract the polar ocean segment for the given hemisphere
            polar_subset = np.where(is_polar)[0]
            n_records_subset = len(polar_subset)

            # Safety check
            if n_records_subset == 0:
                continue
            l1_segment = l1.extract_subset(polar_subset)

            # Step: Trim non-ocean segments
            l1_segment = self.trim_non_ocean_data(l1_segment)

            # Step: Split the polar subset to its marine regions
            l1_segment_list = self.split_at_large_non_ocean_segments(
                l1_segment)

            # Step: append the ocean segments
            l1_list.extend(l1_segment_list)

        # Last step: Sort the list to maintain temporal order
        # (only if more than 1 segment)
        if len(l1_list) > 1:
            l1_list = sorted(l1_list, key=attrgetter("tcs"))

        return l1_list

    def filter_small_ocean_segments(self, l1):
        """
        This method sets the surface type flag of very small ocean segments to land. This action should prevent
        large portions of land staying in the l1 segment is a small fjord etc is crossed. It should also filter
        out smaller ocean segments that do not have a realistic chance of freeboard retrieval.

        :param l1: A pysiral.l1bdata.Level1bData instance
        :return: filtered l1 object
        """

        # Minimum size for valid ocean segments
        ocean_mininum_size_nrecords = self.cfg.polar_ocean.ocean_mininum_size_nrecords

        # Get the clusters of ocean parts in the l1 object
        ocean_flag = l1.surface_type.get_by_name("ocean").flag
        land_flag = l1.surface_type.get_by_name("land").flag
        segments_len, segments_start, not_ocean = rle(ocean_flag)

        # Find smaller than threshold ocean segments
        small_cluster_indices = np.where(
            segments_len < ocean_mininum_size_nrecords)[0]

        # Do not mess with the l1 object if not necessary
        if len(small_cluster_indices == 0):
            return l1

        # Set land flag -> True for small ocean segments
        for small_cluster_index in small_cluster_indices:
            i0 = segments_start[small_cluster_index]
            i1 = i0 + segments_len[small_cluster_index]
            land_flag[i0:i1] = True

        # Update the l1 surface type flag by re-setting the land flag
        l1.surface_type.add_flag(land_flag, "land")

        # All done
        return l1

        # import matplotlib.pyplot as plt
        # import sys
        #
        # print segments_len
        #
        # plt.figure()
        # plt.plot(ocean_flag, alpha=0.5)
        # plt.plot(land_flag, alpha=0.5)
        # plt.show()
        # sys.exit()

    def trim_non_ocean_data(self, l1):
        """
        Remove leading and trailing data that is not if type ocean. 
        :param l1: The input Level-1 objects
        :return: The subsetted Level-1 objects. (Segments with no ocean data are removed from the list)
        """ """ """

        ocean = l1.surface_type.get_by_name("ocean")
        first_ocean_index = get_first_array_index(ocean.flag, True)
        last_ocean_index = get_last_array_index(ocean.flag, True)
        if first_ocean_index is None or last_ocean_index is None:
            return None
        n = l1.info.n_records - 1
        is_full_ocean = first_ocean_index == 0 and last_ocean_index == n
        if not is_full_ocean:
            ocean_subset = np.arange(first_ocean_index, last_ocean_index + 1)
            l1.trim_to_subset(ocean_subset)
        return l1

    def split_at_large_non_ocean_segments(self, l1):
        """
        Identify larger segments that are not ocean (land, land ice) and split the segments if necessary.
        The return value will always be a list of Level-1 object instances, even if no non-ocean data
        segment is present in the input data file
        :param l1: Input Level-1 object
        :return: a list of Level-1 objects.
        """

        # Identify connected non-ocean segments within the orbit
        ocean = l1.surface_type.get_by_name("ocean")
        not_ocean_flag = np.logical_not(ocean.flag)
        segments_len, segments_start, not_ocean = rle(not_ocean_flag)
        landseg_index = np.where(not_ocean)[0]

        # no non-ocean segments, return full segment
        if len(landseg_index) == 0:
            return [l1]

        # Test if non-ocean segments above the size threshold that will require a split of the segment.
        # The motivation behind this step to keep l1p data files as small as possible, while tolerating
        # smaller non-ocean sections
        treshold = self.cfg.polar_ocean.allow_nonocean_segment_nrecords
        large_landsegs_index = np.where(
            segments_len[landseg_index] > treshold)[0]
        large_landsegs_index = landseg_index[large_landsegs_index]

        # no segment split necessary, return full segment
        if len(large_landsegs_index) == 0:
            return [l1]

        # Split of orbit segment required, generate individual Level-1 segments from the ocean segments
        l1_segments = []
        start_index = 0
        for index in large_landsegs_index:
            stop_index = segments_start[index]
            subset_list = np.arange(start_index, stop_index)
            l1_segments.append(l1.extract_subset(subset_list))
            start_index = segments_start[index + 1]

        # Extract the last subset
        last_subset_list = np.arange(start_index, len(ocean.flag))
        l1_segments.append(l1.extract_subset(last_subset_list))

        # Return a list of segments
        return l1_segments

    def split_at_time_discontinuities(self, l1_list):
        """
        Split l1 object(s) at discontinuities of the timestamp value and return the expanded list with l1 segments.

        :param l1_list: [list] a list of l1b_files
        :return: expanded list
        """

        # Prepare input (should always be list)
        seconds_threshold = self.cfg.timestamp_discontinuities.split_at_time_gap_seconds
        dt_threshold = timedelta(seconds=seconds_threshold)

        # Output (list with l1b segments)
        l1_segments = []

        for l1 in l1_list:

            # Get timestamp discontinuities (if any)
            time = l1.time_orbit.timestamp

            # Get start start/stop indices pairs
            segments_start = np.array([0])
            segments_start_indices = np.where(
                np.ediff1d(time) > dt_threshold)[0] + 1
            segments_start = np.append(segments_start, segments_start_indices)

            segments_stop = segments_start[1:] - 1
            segments_stop = np.append(segments_stop, len(time) - 1)

            # Check if only one segment found
            if len(segments_start) == 1:
                l1_segments.append(l1)
                continue

            # Extract subsets
            segment_indices = zip(segments_start, segments_stop)
            for start_index, stop_index in segment_indices:
                subset_indices = np.arange(start_index, stop_index + 1)
                l1_segment = l1.extract_subset(subset_indices)
                l1_segments.append(l1_segment)

        return l1_segments

    @property
    def target_region_def(self):
        if not "polar_ocean" in self.cfg:
            msg = "Missing configuration key `polar_ocean` in Level-1 Pre-Processor Options"
            self.error.add_error("l1preproc-missing-option", msg)
            self.error.raise_on_error()
        return self.cfg.polar_ocean

    @property
    def polar_ocean_props(self):
        if not "polar_ocean" in self.cfg:
            msg = "Missing configuration key `polar_ocean` in Level-1 Pre-Processor Options"
            self.error.add_error("l1preproc-missing-option", msg)
            self.error.raise_on_error()
        return self.cfg.polar_ocean

    @property
    def orbit_segment_connectivity_props(self):
        if not "orbit_segment_connectivity" in self.cfg:
            msg = "Missing configuration key `orbit_segment_connectivity` in Level-1 Pre-Processor Options"
            self.error.add_error("l1preproc-missing-option", msg)
            self.error.raise_on_error()
        return self.cfg.orbit_segment_connectivity

    @property
    def stack_len(self):
        return len(self.l1_stack)

    @property
    def last_stack_item(self):
        return self.l1_stack[-1]
Exemplo n.º 20
0
class ESACryoSat2PDSBaselineD(DefaultLoggingClass):
    def __init__(self, cfg, raise_on_error=False):

        cls_name = self.__class__.__name__
        super(ESACryoSat2PDSBaselineD, self).__init__(cls_name)
        self.error = ErrorStatus(caller_id=cls_name)

        # Store arguments
        self.raise_on_error = raise_on_error
        self.cfg = cfg

        # Init main class variables
        self.nc = None
        self.filepath = None
        self.l1 = None

    @staticmethod
    def translate_opmode2radar_mode(op_mode):
        """ Converts the ESA operation mode str in the pysiral compliant version """
        translate_dict = {"sar": "sar", "lrm": "lrm", "sarin": "sin"}
        return translate_dict.get(op_mode, None)

    def get_l1(self, filepath, polar_ocean_check=None):
        """
        Main entry point to the CryoSat-2 Baseline-D Input Adapter
        :param filepath:
        :param polar_ocean_check:
        :return:
        """

        timer = StopWatch()
        timer.start()

        # Save filepath
        self.filepath = filepath

        # Create an empty Level-1 data object
        self.l1 = Level1bData()

        # Input Validation
        if not Path(filepath).is_file():
            msg = "Not a valid file: %s" % filepath
            logger.warning(msg)
            self.error.add_error("invalid-filepath", msg)
            return self.empty

        # Parse the input file
        self._read_input_netcdf(filepath, attributes_only=True)
        if self.nc is None:
            return self.empty

        # CAVEAT: An issue has been identified with baseline-D L1b data when the orbit solution
        # is based on predicted orbits and not the DORIS solution (Nov 2020).
        # The source of the orbit data can be identified by the `vector_source` global attribute
        # in the L1b source files. This can take/should take the following values:
        #
        #     nrt:  "fos predicted" (predicted orbit)
        #           "doris_navigator" (DORIS Nav solution)
        #
        #     rep:  "doris_precise" (final and precise DORIS solution)
        #
        # To prevent l1 data with erroneous orbit solution entering the processing chain, l1 data
        # with the predicted orbit can be excluded here. The process of exclusion requires to set
        # a flag in the l1 processor definition for the input handler:
        #
        #   exclude_predicted_orbits: True
        #
        exclude_predicted_orbits = self.cfg.get("exclude_predicted_orbits",
                                                False)
        is_predicted_orbit = self.nc.vector_source.lower().strip(
        ) == "fos predicted"
        logger.debug(self.nc.vector_source.lower().strip())
        if is_predicted_orbit and exclude_predicted_orbits:
            logger.warning("Predicted orbit solution detected -> skip file")
            return self.empty

        # Get metadata
        self._set_input_file_metadata()
        if polar_ocean_check is not None:
            has_polar_ocean_data = polar_ocean_check.has_polar_ocean_segments(
                self.l1.info)
            if not has_polar_ocean_data:
                timer.stop()
                return self.empty

        # Polar ocean check passed, now fill the rest of the l1 data groups
        self._set_l1_data_groups()

        timer.stop()
        logger.info("- Created L1 object in %.3f seconds" %
                    timer.get_seconds())

        # Return the l1 object
        return self.l1

    @staticmethod
    def get_wfm_range(window_delay, n_range_bins):
        """
        Returns the range for each waveform bin based on the window delay and the number of range bins
        :param window_delay: The two-way delay to the center of the range window in seconds
        :param n_range_bins: The number of range bins (256: sar, 512: sin)
        :return: The range for each waveform bin as array (time, ns)
        """
        lightspeed = 299792458.0
        bandwidth = 320000000.0
        # The two way delay time give the distance to the central bin
        central_window_range = window_delay * lightspeed / 2.0
        # Calculate the offset from the center to the first range bin
        window_size = (n_range_bins * lightspeed) / (4.0 * bandwidth)
        first_bin_offset = window_size / 2.0
        # Calculate the range increment for each bin
        range_increment = np.arange(n_range_bins) * lightspeed / (4.0 *
                                                                  bandwidth)

        # Reshape the arrays
        range_offset = np.tile(range_increment,
                               (window_delay.shape[0], 1)) - first_bin_offset
        window_range = np.tile(central_window_range,
                               (n_range_bins, 1)).transpose()

        # Compute the range for each bin and return
        wfm_range = window_range + range_offset
        return wfm_range

    @staticmethod
    def interp_1hz_to_20hz(variable_1hz, time_1hz, time_20hz, **kwargs):
        """
        Computes a simple linear interpolation to transform a 1Hz into a 20Hz variable
        :param variable_1hz: an 1Hz variable array
        :param time_1hz: 1Hz reference time
        :param time_20hz: 20 Hz reference time
        :return: the interpolated 20Hz variable
        """
        error_status = False
        try:
            f = interpolate.interp1d(time_1hz,
                                     variable_1hz,
                                     bounds_error=False,
                                     **kwargs)
            variable_20hz = f(time_20hz)
        except ValueError:
            fill_value = np.nan
            variable_20hz = np.full(time_20hz.shape, fill_value)
            error_status = True
        return variable_20hz, error_status

    def _read_input_netcdf(self, filepath, **kwargs):
        """ Read the netCDF file via xarray """
        try:
            self.nc = xarray.open_dataset(filepath,
                                          decode_times=False,
                                          mask_and_scale=True)
        except:
            msg = "Error encountered by xarray parsing: %s" % filepath
            self.error.add_error("xarray-parse-error", msg)
            self.nc = None
            logger.warning(msg)
            return

    def _set_input_file_metadata(self):
        """ Fill the product info """

        # Short cuts
        metadata = self.nc.attrs
        info = self.l1.info

        # Processing environment metadata
        info.set_attribute("pysiral_version", pysiral_version)

        # General product metadata
        info.set_attribute("mission", "cryosat2")
        info.set_attribute("mission_sensor", "siral")
        info.set_attribute("mission_data_version", "D")
        info.set_attribute("orbit", metadata["abs_orbit_start"])
        info.set_attribute("cycle", metadata["cycle_number"])
        info.set_attribute("mission_data_source", Path(self.filepath).name)
        info.set_attribute(
            "timeliness",
            cs2_procstage2timeliness(metadata["processing_stage"]))

        # Time-Orbit Metadata
        lats = [
            float(metadata["first_record_lat"]) * 1e-6,
            float(metadata["last_record_lat"]) * 1e-6
        ]
        lons = [
            float(metadata["first_record_lon"]) * 1e-6,
            float(metadata["last_record_lon"]) * 1e-6
        ]
        info.set_attribute("start_time",
                           parse_datetime_str(
                               metadata["first_record_time"][4:]))  # TAI=....
        info.set_attribute("stop_time",
                           parse_datetime_str(
                               metadata["last_record_time"][4:]))  # TAI=....
        info.set_attribute("lat_min", np.amin(lats))
        info.set_attribute("lat_max", np.amax(lats))
        info.set_attribute("lon_min", np.amin(lons))
        info.set_attribute("lon_max", np.amax(lons))

        # Product Content Metadata
        for mode in ["sar", "sin", "lrm"]:
            percent_value = 0.0
            if metadata["sir_op_mode"].strip().lower() == mode:
                percent_value = 100.
            info.set_attribute("{}_mode_percent".format(mode), percent_value)
        info.set_attribute("open_ocean_percent",
                           float(metadata["open_ocean_percent"]) * 0.01)

    def _set_l1_data_groups(self):
        """
        Fill all data groups of the Level-1 data object with the content of the netCDF file. This is just the
        overview method, see specific sub-methods below
        :return: None
        """
        self._set_time_orbit_data_group()
        self._set_waveform_data_group()
        self._set_range_correction_group()
        self._set_surface_type_group()
        self._set_classifier_group()

    def _set_time_orbit_data_group(self):
        """
        Transfer the time orbit parameter from the netcdf to l1 data object
        :return: None
        """

        # Transfer the timestamp
        # NOTE: Here it is critical that the xarray does not automatically decodes time since it is
        #       difficult to work with the numpy datetime64 date format. Better to compute datetimes using
        #       a know num2pydate conversion
        tai_datetime = num2pydate(self.nc.time_20_ku.values,
                                  units=self.nc.time_20_ku.units)
        converter = UTCTAIConverter()
        utc_timestamp = converter.tai2utc(tai_datetime, check_all=False)
        self.l1.time_orbit.timestamp = utc_timestamp

        # Set the geolocation
        self.l1.time_orbit.set_position(self.nc.lon_20_ku.values,
                                        self.nc.lat_20_ku.values,
                                        self.nc.alt_20_ku.values,
                                        self.nc.orb_alt_rate_20_ku.values)

        # Set antenna attitude
        self.l1.time_orbit.set_antenna_attitude(
            self.nc.off_nadir_pitch_angle_str_20_ku.values,
            self.nc.off_nadir_roll_angle_str_20_ku.values,
            self.nc.off_nadir_yaw_angle_str_20_ku.values)

    def _set_waveform_data_group(self):
        """
        Transfer of the waveform group to the Level-1 object. This includes
          1. the computation of waveform power in Watts
          2. the computation of the window delay in meter for each waveform bin
          3. extraction of the waveform valid flag
        :return: None
        """

        # Get the waveform
        # NOTE: Convert the waveform units to Watts. From the documentation:is applied as follows:
        #       pwr_waveform_20_ku(time, ns) * echo_scale_factor_20_ku(time, ns) * 2 ^ echo_scale_pwr_20_ku(time)
        wfm_linear = self.nc.pwr_waveform_20_ku.values

        # Get the shape of the waveform array
        dim_time, dim_ns = wfm_linear.shape

        # Scaling parameter are 1D -> Replicate to same shape as waveform array
        echo_scale_factor = self.nc.echo_scale_factor_20_ku.values
        echo_scale_pwr = self.nc.echo_scale_pwr_20_ku.values
        echo_scale_factor = np.tile(echo_scale_factor, (dim_ns, 1)).transpose()
        echo_scale_pwr = np.tile(echo_scale_pwr, (dim_ns, 1)).transpose()

        # Convert the waveform from linear counts to Watts
        wfm_power = wfm_linear * echo_scale_factor * 2.0**echo_scale_pwr

        # Get the window delay
        # From the documentation:
        #   Calibrated 2-way window delay: distance from CoM to middle range window (at sample ns/2 from 0).
        #   It includes all the range corrections given in the variable instr_cor_range and in the
        #   variable uso_cor_20_ku. This is a 2-way time and 2-way corrections are applied.
        window_delay = self.nc.window_del_20_ku.values

        # Convert window delay to range for each waveform range bin
        wfm_range = self.get_wfm_range(window_delay, dim_ns)

        # Make sure that parameter are float and not double
        # -> Import for cythonized algorithm parts (ctfrma specifically uses floats)
        wfm_power = wfm_power.astype(np.float32)
        wfm_range = wfm_range.astype(np.float32)

        # Set the waveform
        op_mode = str(self.nc.attrs["sir_op_mode"].strip().lower())
        radar_mode = self.translate_opmode2radar_mode(op_mode)
        self.l1.waveform.set_waveform_data(wfm_power, wfm_range, radar_mode)

        # --- Get the valid flag ---
        #
        # From the documentation
        # :comment = "Measurement confidence flags. Generally the MCD flags indicate problems when set.
        #             If the whole MCD is 0 then no problems or non-nominal conditions were detected.
        #             Serious errors are indicated by setting the most significant bit, i.e. block_degraded,
        #             in which case the block must not be processed. Other error settings can be regarded
        #             as warnings.";
        #
        # :flag_masks = -2147483648, block_degraded        <- most severe error
        #                1073741824, blank_block
        #                536870912, datation_degraded
        #                268435456, orbit_prop_error
        #                134217728, orbit_file_change
        #                67108864, orbit_gap
        #                33554432, echo_saturated
        #                16777216, other_echo_error
        #                8388608, sarin_rx1_error
        #                4194304, sarin_rx2_error
        #                2097152, window_delay_error
        #                1048576, agc_error
        #                524288, cal1_missing
        #                262144, cal1_default
        #                131072, doris_uso_missing
        #                65536, ccal1_default
        #                32768, trk_echo_error
        #                16384, echo_rx1_error
        #                8192, echo_rx2_error
        #                4096, npm_error                   <- Defined as maximum permissible error level
        #                2048, cal1_pwr_corr_type
        #                128, phase_pert_cor_missing       <- Seems to be always set for SARin
        #                64, cal2_missing
        #                32, cal2_default
        #                16, power_scale_error
        #                8, attitude_cor_missing
        #                1, phase_pert_cor_default
        measurement_confident_flag = self.nc.flag_mcd_20_ku.values
        valid_flag = (measurement_confident_flag >=
                      0) & (measurement_confident_flag <= 4096)
        self.l1.waveform.set_valid_flag(valid_flag)

    def _set_range_correction_group(self):
        """
        Transfer the range corrections defined in the l1p config file to the Level-1 object
        NOTE: The range corrections are all in 1 Hz and must be interpolated to 20Hz
        :return: None
        """

        # Get the reference times for interpolating the range corrections from 1Hz -> 20Hz
        time_1hz = self.nc.time_cor_01.values
        time_20hz = self.nc.time_20_ku.values

        # Loop over all range correction variables defined in the processor definition file
        for key in self.cfg.range_correction_targets.keys():
            pds_var_name = self.cfg.range_correction_targets[key]
            variable_1hz = getattr(self.nc, pds_var_name)
            variable_20hz, error_status = self.interp_1hz_to_20hz(
                variable_1hz.values, time_1hz, time_20hz)
            if error_status:
                msg = "- Error in 20Hz interpolation for variable `%s` -> set only dummy" % pds_var_name
                logger.warning(msg)
            self.l1.correction.set_parameter(key, variable_20hz)

    def _set_surface_type_group(self):
        """
        Transfer of the surface type flag to the Level-1 object
        NOTE: In the current state (TEST dataset), the surface type flag is only 1 Hz. A nearest neighbour
              interpolation is used to get the 20Hz surface type flag.
        :return: None
        """

        # Get the reference times for interpolating the flag from 1Hz -> 20Hz
        time_1hz = self.nc.time_cor_01.values
        time_20hz = self.nc.time_20_ku.values

        # Interpolate 1Hz surface type flag to 20 Hz
        surface_type_1hz = self.nc.surf_type_01.values
        surface_type_20hz, error_status = self.interp_1hz_to_20hz(
            surface_type_1hz, time_1hz, time_20hz, kind="nearest")
        if error_status:
            msg = "- Error in 20Hz interpolation for variable `surf_type_01` -> set only dummy"
            logger.warning(msg)

        # Set the flag
        for key in ESA_SURFACE_TYPE_DICT.keys():
            flag = surface_type_20hz == ESA_SURFACE_TYPE_DICT[key]
            self.l1.surface_type.add_flag(flag, key)

    def _set_classifier_group(self):
        """
        Transfer the classifiers defined in the l1p config file to the Level-1 object.
        NOTE: It is assumed that all classifiers are 20Hz
        In addition, a few legacy parameter are computed based on the waveform counts that is only available at
        this stage. Computation of other parameter such as sigma_0, leading_edge_width, ... are moved to the
        post-processing
        :return: None
        """
        # Loop over all classifier variables defined in the processor definition file
        for key in self.cfg.classifier_targets.keys():
            variable_20hz = getattr(self.nc, self.cfg.classifier_targets[key])
            self.l1.classifier.add(variable_20hz, key)

        # Calculate Parameters from waveform counts
        # XXX: This is a legacy of the CS2AWI IDL processor
        #      Threshold defined for waveform counts not power in dB
        wfm_counts = self.nc.pwr_waveform_20_ku.values

        # Calculate the OCOG Parameter (CryoSat-2 notation)
        ocog = CS2OCOGParameter(wfm_counts)
        self.l1.classifier.add(ocog.width, "ocog_width")
        self.l1.classifier.add(ocog.amplitude, "ocog_amplitude")

        # Calculate the Peakiness (CryoSat-2 notation)
        pulse = CS2PulsePeakiness(wfm_counts)
        self.l1.classifier.add(pulse.peakiness, "peakiness")
        self.l1.classifier.add(pulse.peakiness_r, "peakiness_r")
        self.l1.classifier.add(pulse.peakiness_l, "peakiness_l")

        # fmi version: Calculate the LTPP
        ltpp = CS2LTPP(wfm_counts)
        self.l1.classifier.add(ltpp.ltpp, "late_tail_to_peak_power")

        # Get satellite velocity vector (classifier needs to be vector -> manual extraction needed)
        satellite_velocity_vector = self.nc.sat_vel_vec_20_ku.values
        self.l1.classifier.add(satellite_velocity_vector[:, 0],
                               "satellite_velocity_x")
        self.l1.classifier.add(satellite_velocity_vector[:, 1],
                               "satellite_velocity_y")
        self.l1.classifier.add(satellite_velocity_vector[:, 2],
                               "satellite_velocity_z")

    @property
    def empty(self):
        return None
Exemplo n.º 21
0
class Level1POutputHandler(DefaultLoggingClass):
    """
    The output handler for l1p product files
    NOTE: This is not a subclass of OutputHandlerbase due to the special nature of pysiral l1p products
    """
    def __init__(self, cfg):
        cls_name = self.__class__.__name__
        super(Level1POutputHandler, self).__init__(cls_name)
        self.error = ErrorStatus(caller_id=cls_name)
        self.cfg = cfg

        self.pysiral_cfg = psrlcfg

        # Init class properties
        self._path = None
        self._filename = None

    def remove_old_if_applicable(self, period):
        logger.warning("Not implemented: self.remove_old_if_applicable")
        return

    def export_to_netcdf(self, l1):
        """
        Workflow to export a Level-1 object to l1p netCDF product. The workflow includes the generation of the
        output path (if applicable).
        :param l1: The Level-1 object to be exported
        :return: None
        """

        # Get filename and path
        self.set_output_filepath(l1)

        # Check if path exists
        Path(self.path).mkdir(exist_ok=True, parents=True)

        # Export the data object
        ncfile = L1bDataNC()
        ncfile.l1b = l1
        ncfile.output_folder = self.path
        ncfile.filename = self.filename
        ncfile.export()

    def set_output_filepath(self, l1):
        """
        Sets the class properties required for the file export
        :param l1: The Level-1 object
        :return: None
        """

        local_machine_def_tag = self.cfg.get("local_machine_def_tag", None)
        if local_machine_def_tag is None:
            msg = "Missing mandatory option %s in l1p processor definition file -> aborting"
            msg = msg % "root.output_handler.options.local_machine_def_tag"
            msg = msg + "\nOptions: \n" + self.cfg.makeReport()
            self.error.add_error("missing-option", msg)
            self.error.raise_on_error()

        # TODO: This is work in progress
        filename_template = "pysiral-l1p-{platform}-{source}-{timeliness}-{hemisphere}-{tcs}-{tce}-{file_version}.nc"
        time_fmt = "%Y%m%dT%H%M%S"
        values = {
            "platform": l1.info.mission,
            "source": self.cfg.version.source_file_tag,
            "timeliness": l1.info.timeliness,
            "hemisphere": l1.info.hemisphere,
            "tcs": l1.time_orbit.timestamp[0].strftime(time_fmt),
            "tce": l1.time_orbit.timestamp[-1].strftime(time_fmt),
            "file_version": self.cfg.version.version_file_tag
        }
        self._filename = filename_template.format(**values)

        local_repository = self.pysiral_cfg.local_machine.l1b_repository
        export_folder = Path(
            local_repository[l1.info.mission][local_machine_def_tag]["l1p"])
        yyyy = "%04g" % l1.time_orbit.timestamp[0].year
        mm = "%02g" % l1.time_orbit.timestamp[0].month
        self._path = export_folder / l1.info.hemisphere / yyyy / mm

    @property
    def path(self):
        return Path(self._path)

    @property
    def filename(self):
        return self._filename

    @property
    def last_written_file(self):
        return self.path / self.filename
Exemplo n.º 22
0
class Level2ProcArgParser(DefaultLoggingClass):
    def __init__(self):
        super(Level2ProcArgParser, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus()
        self.pysiral_config = psrlcfg
        self._args = None

    def parse_command_line_arguments(self):
        # use python module argparse to parse the command line arguments
        # (first validation of required options and data types)
        self._args = self.parser.parse_args()

        # Add additional check to make sure either `l1b-files` or
        # `start ` and `stop` are set
        l1b_file_preset_is_set = self._args.l1b_files_preset is not None
        start_and_stop_is_set = self._args.start_date is not None and \
            self._args.stop_date is not None

        if l1b_file_preset_is_set and start_and_stop_is_set:
            self.parser.error("-start & -stop and -l1b-files are exclusive")

        if not l1b_file_preset_is_set and not start_and_stop_is_set:
            self.parser.error("either -start & -stop or -l1b-files required")

    def critical_prompt_confirmation(self):

        # Any confirmation prompts can be overridden by --no-critical-prompt
        no_prompt = self._args.no_critical_prompt

        # if --remove_old is set, all previous l1bdata files will be
        # erased for all month
        if self._args.remove_old and not no_prompt:
            message = "You have selected to remove all previous " + \
                "l2 files for the requested period\n" + \
                "(Note: use --no-critical-prompt to skip confirmation)\n" + \
                "Enter \"YES\" to confirm and continue: "
            result = input(message)

            if result != "YES":
                sys.exit(1)

    @property
    def parser(self):
        # XXX: Move back to caller

        # Take the command line options from default settings
        # -> see config module for data types, destination variables, etc.
        clargs = DefaultCommandLineArguments()

        # List of command line option required for pre-processor
        # (argname, argtype (see config module), destination, required flag)
        options = [("-l2-settings", "l2-settings", "l2_settings", True),
                   ("-run-tag", "run-tag", "run_tag", False),
                   ("-start", "date", "start_date", False),
                   ("-stop", "date", "stop_date", False),
                   ("-l1b-files", "l1b_files", "l1b_files_preset", False),
                   ("-exclude-month", "exclude-month", "exclude_month", False),
                   ("-input-version", "input-version", "input_version", False),
                   ("-l2-output", "l2-output", "l2_output", False),
                   ("--remove-old", "remove-old", "remove_old", False),
                   ("--no-critical-prompt", "no-critical-prompt",
                    "no_critical_prompt", False),
                   ("--no-overwrite-protection", "no-overwrite-protection",
                    "overwrite_protection", False),
                   ("--overwrite-protection", "overwrite-protection",
                    "overwrite_protection", False)]

        # create the parser
        parser = argparse.ArgumentParser()
        for option in options:
            argname, argtype, destination, required = option
            argparse_dict = clargs.get_argparse_dict(argtype, destination,
                                                     required)
            parser.add_argument(argname, **argparse_dict)

        parser.set_defaults(overwrite_protection=True)

        return parser

    @property
    def arg_dict(self):
        """ Return the arguments as dictionary """
        return self._args.__dict__

    @property
    def start(self):
        return self._args.start_date

    @property
    def stop(self):
        return self._args.stop_date

    @property
    def run_tag(self):
        """ run_tag is a str or relative path that determines the output directory for
        the Level-2 processor. If the -run-tag option is not specified, the output
        directory will be the `product_repository` specification in `local_machine_def`
        with the l2 settings file basename as subfolder.

        One can however specify a custom string, or a relative path, with subfolders
        defined by `\` or `/`, e.g.

        Examples:
            -run-tag cs2awi_v2p0_nrt
            -run-tag c3s/cdr/cryosat2/v1p0/nh
        """

        # Get from command line arguments (default: None)
        run_tag = self._args.run_tag

        # If argument is empty use the basename of the l2 settings file
        if run_tag is None:
            run_tag = self._args.l2_settings
            # Settings file may be specified as full path and not just the id
            if Path(run_tag).is_file():
                run_tag = Path(run_tag).stem

        # split the run-tag on potential path separators
        run_tag = re.split(r'[\\|/]', run_tag)
        return run_tag

    @property
    def exclude_month(self):
        return self._args.exclude_month

    @property
    def overwrite_protection(self):
        return self._args.overwrite_protection

    @property
    def l2_settings_file(self):
        l2_settings = self._args.l2_settings
        filename = self.pysiral_config.get_settings_file(
            "proc", "l2", l2_settings)
        if filename is None:
            msg = "Invalid l2 settings filename or id: %s\n" % l2_settings
            msg = msg + " \nRecognized Level-2 processor setting ids:\n"
            for l2_settings_id in self.pysiral_config.get_setting_ids(
                    "proc", "l2"):
                msg = msg + "  " + l2_settings_id + "\n"
            self.error.add_error("invalid-l2-settings", msg)
            self.error.raise_on_error()
        else:
            return filename

    @property
    def l1b_version(self):
        return self._args.input_version

    @property
    def l1b_predef_files(self):
        l1b_files = glob.glob(self._args.l1b_files_preset)
        return l1b_files

    @property
    def l2_output(self):
        l2_output = self._args.l2_output
        filename = self.pysiral_config.get_settings_file(
            "output", "l2i", l2_output)
        if filename is None:
            msg = "Invalid l2 outputdef filename or id: %s\n" % l2_output
            msg = msg + " \nRecognized Level-2 output definitions ids:\n"
            l2_output_ids = self.pysiral_config.get_setting_ids(
                "output", "l2i")
            for l2_output_id in l2_output_ids:
                msg = msg + "    - " + l2_output_id + "\n"
            self.error.add_error("invalid-l2-outputdef", msg)
            self.error.raise_on_error()
        else:
            return filename

    @property
    def is_time_range_request(self):
        return self._args.l1b_files_preset is None

    @property
    def remove_old(self):
        return self._args.remove_old and not self._args.overwrite_protection
Exemplo n.º 23
0
class OutputHandlerBase(DefaultLoggingClass):
    """
    A class that defines properties of output files (content, location, format)
    based on the output definition, data container and other processor settings
    """

    subfolder_format = {"month": "%02g", "year": "%04g", "day": "%02g"}

    def __init__(self,
                 output_def,
                 applicable_data_level=None,
                 subfolder_tags=None,
                 default_file_location=None):
        """
        Init the output handler with the content of the output definition file and
        keywords specific for the data processing levels. These keywords have to be
        set during the initialization of the parent (this) class.

        TODO: Move applicable data level and subfolder tags to output definition file
        TODO: Remove default_file_location

        :param output_def: (str or pathlib.Path): The full file path to the output definition file
        :param applicable_data_level: (int) Intended processing level for validation of output definition
        :param subfolder_tags: (str list) A list of intended sub-folders and their meaning
            (example ["year", "month"])
        :param default_file_location: A list relative to the pysiral resource directory of sub-directories
            and filenames that links to the default output definition for the respectice
            data processing level (deprecated)
        """

        # Init the parent
        super(OutputHandlerBase, self).__init__(self.__class__.__name__)
        self.pysiral_config = psrlcfg
        self.error = ErrorStatus()
        self._basedir = "n/a"

        # Attributes
        self._doi = None
        self.subfolders = None
        self.subfolder_tags = subfolder_tags
        self.applicable_data_level = applicable_data_level
        self.default_file_location = default_file_location

        self._init_from_output_def(output_def)
        self.output_def_filename = output_def

    def fill_template_string(self, template, dataset):
        """ Fill an template string with information of a dataset
        object (in this case Level2Data) """
        attributes = self.get_template_attrs(template)
        result = str(template)
        for attribute in attributes:
            attribute_name, option, placeholder = attribute
            attribute = dataset.get_attribute(attribute_name, *option)
            if attribute is None:
                attribute = "unknown"
            result = result.replace(placeholder, attribute)
        return result

    def get_dt_subfolders(self, dt, subfolder_tags):
        """ Returns a list of subdirectories based on a datetime object
        (usually the start time of data collection) """
        subfolders = []
        for subfolder_tag in subfolder_tags:
            parameter = getattr(dt, subfolder_tag)
            subfolder = self.subfolder_format[subfolder_tag] % parameter
            subfolders.append(subfolder)
        return subfolders

    @staticmethod
    def get_template_attrs(template):
        """ Extract attribute names and options (if defined) for a
        give template string """
        try:
            template = template.encode('utf-8').strip()
        except AttributeError:
            template = str(template)
        attr_defs = re.findall("{.*?}", str(template))
        attrs, options = [], []
        for attr_def in attr_defs:
            attr_name, _, optstr = attr_def[1:-1].partition(":")
            attrs.append(attr_name)
            options.append(optstr.split(";"))
        return zip(attrs, options, attr_defs)

    def _init_from_output_def(self, output_def):
        """ Adds the information for the output def yaml files (either
        full filename or treedict structure) """
        if Path(output_def).is_file():
            try:
                self._output_def = get_yaml_config(output_def)
            except Exception as ex:
                self.error.add_error("outputdef-parser-error", ex)
                self.error.raise_on_error()
        else:
            self._output_def = output_def
        self._validate_outputdef()

    def _set_basedir(self, basedir, create=True):
        """ Sets and and (per default) creates the main output directory """
        self._basedir = basedir
        if create:
            self._create_directory(self._basedir)

    def _create_directory(self, directory):
        """ Convinience method to create a directory and add an error
        when failed """
        Path(directory).mkdir(exist_ok=True, parents=True)
        if not Path(directory).is_dir():
            msg = "Unable to create directory: %s" % str(directory)
            self.error.add_error("directory-error", msg)

    def _get_subdirectories(self, dt):
        directory = Path(self.basedir)
        for subfolder_tag in self.subfolders:
            parameter = getattr(dt, subfolder_tag)
            subfolder = self.subfolder_format[subfolder_tag] % parameter
            directory = directory / subfolder

    def _get_directory_from_dt(self, dt):
        subfolders = self.get_dt_subfolders(dt, self.subfolder_tags)
        return Path(self.basedir) / Path(*subfolders)

    def _validate_outputdef(self):
        """ Run a series of tests to check if a valid output definition
        has been passed. Note: theses tests will only check existing
        items of the output definition. If the requested item is missing
        a separate exception will be evoked """
        # Test 1: Applicable data level needs
        if self.applicable_data_level != self.data_level:
            msg = "outputdef data level (%g) does not match %s reqirement (%g)"
            msg = msg % (self.data_level, self.__class__.__name__,
                         self.applicable_data_level)
            self.error.add_error("datalevel-mismatch", msg)
            self.error.raise_on_error()

    @property
    def has_doi(self):
        try:
            return self._doi is not None
        except AttributeError:
            return False

    @property
    def id(self):
        try:
            return self._output_def.metadata.output_id
        except (AttributeError, KeyError):
            return None

    @property
    def product_level_subfolder(self):
        subfolder = self._output_def.product_level_subfolder
        if type(subfolder) is not str:
            msg = "root.product_level_subfolder (str) missing or wrong dtype"
            self.error.add_error("outputdef-invalid", msg)
            self.error.raise_on_error()
        return subfolder

    @property
    def data_level(self):
        data_level = self._output_def.metadata.data_level
        if type(data_level) is not int:
            msg = "root.metadata.data_level (int) missing or wrong dtype"
            self.error.add_error("outputdef-invalid", msg)
            self.error.raise_on_error()
        return data_level

    @property
    def basedir(self):
        return self._basedir

    @property
    def output_def(self):
        return self._output_def

    @property
    def now_directory(self):
        """ Returns a directory suitable string with the current time """
        return datetime.now().strftime("%Y%m%dT%H%M%S")

    @property
    def variable_def(self):
        variables = sorted(list(self.output_def.variables.keys()))
        attribute_dicts = [self.output_def.variables[a] for a in variables]
        return zip(variables, attribute_dicts)
Exemplo n.º 24
0
class Level2PreProcArgParser(DefaultLoggingClass):
    def __init__(self):
        super(Level2PreProcArgParser, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus()
        self._args = None

    def parse_command_line_arguments(self):
        # use python module argparse to parse the command line arguments
        # (first validation of required options and data types)
        self._args = self.parser.parse_args()

    def critical_prompt_confirmation(self):

        # Any confirmation prompts can be overriden by --no-critical-prompt
        no_prompt = self._args.no_critical_prompt

        # if --remove_old is set, all previous l1bdata files will be
        # erased for all month
        if self._args.remove_old and not no_prompt:
            message = "You have selected to remove all previous " + \
                "l2p files for the requested period\n" + \
                "(Note: use --no-critical-prompt to skip confirmation)\n" + \
                "Enter \"YES\" to confirm and continue: "
            result = input(message)

            if result != "YES":
                sys.exit(1)

    @property
    def parser(self):
        # XXX: Move back to caller

        # Take the command line options from default settings
        # -> see config module for data types, destination variables, etc.
        clargs = DefaultCommandLineArguments()

        # List of command line option required for pre-processor
        # (argname, argtype (see config module), destination, required flag)
        options = [("-start", "date", "start_date", False),
                   ("-stop", "date", "stop_date", False),
                   ("-l2i-product-dir", "l2i-product-dir", "l2i_product_dir",
                    True), ("-l2p-output", "l2p-output", "l2p_output", False),
                   ("-exclude-month", "exclude-month", "exclude_month", False),
                   ("-doi", "doi", "doi", False),
                   ("--remove-old", "remove-old", "remove_old", False),
                   ("--no-critical-prompt", "no-critical-prompt",
                    "no_critical_prompt", False),
                   ("--no-overwrite-protection", "no-overwrite-protection",
                    "overwrite_protection", False),
                   ("--overwrite-protection", "overwrite-protection",
                    "overwrite_protection", False)]

        # create the parser
        parser = argparse.ArgumentParser()
        for option in options:
            argname, argtype, destination, required = option
            argparse_dict = clargs.get_argparse_dict(argtype, destination,
                                                     required)
            parser.add_argument(argname, **argparse_dict)

        parser.set_defaults(overwrite_protection=False)

        return parser

    @property
    def arg_dict(self):
        """ Return the arguments as dictionary """
        return self._args.__dict__

    @property
    def start(self):
        return self._args.start_date

    @property
    def stop(self):
        return self._args.stop_date

    @property
    def exclude_month(self):
        return self._args.exclude_month

    @property
    def doi(self):
        return self._args.doi

    @property
    def overwrite_protection(self):
        return self._args.overwrite_protection

    @property
    def l2i_product_dir(self):
        l2i_product_dir = self._args.l2i_product_dir
        if Path(l2i_product_dir).is_dir():
            return Path(l2i_product_dir).resolve(strict=False)
        else:
            msg = "Invalid l2i product dir: %s" % str(l2i_product_dir)
            self.error.add_error("invalid-l2i-product-dir", msg)
            self.error.raise_on_error()

    @property
    def l2p_output(self):
        l2p_output = self._args.l2p_output
        filename = psrlcfg.get_settings_file("output", "l2p", l2p_output)
        if filename is None:
            msg = "Invalid l2p outputdef filename or id: %s\n" % l2p_output
            msg = msg + " \nRecognized Level-2 output definitions ids:\n"
            l2p_output_ids = psrlcfg.get_setting_ids("output", "l2p")
            for l2p_output_id in l2p_output_ids:
                msg = msg + "    - " + l2p_output_id + "\n"
            self.error.add_error("invalid-l2p-outputdef", msg)
            self.error.raise_on_error()
        else:
            return filename

    @property
    def remove_old(self):
        return self._args.remove_old and not self._args.overwrite_protection
Exemplo n.º 25
0
class Level3ProcArgParser(DefaultLoggingClass):
    def __init__(self):
        super(Level3ProcArgParser, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus()
        self.pysiral_config = ConfigInfo()
        self._args = None

    def parse_command_line_arguments(self):
        # use python module argparse to parse the command line arguments
        # (first validation of required options and data types)
        self._args = self.parser.parse_args()

        # Add addtional check to make sure either `l1b-files` or
        # `start ` and `stop` are set


#        l1b_file_preset_is_set = self._args.l1b_files_preset is not None
#        start_and_stop_is_set = self._args.start_date is not None and \
#            self._args.stop_date is not None
#
#        if l1b_file_preset_is_set and start_and_stop_is_set:
#            self.parser.error("-start & -stop and -l1b-files are exclusive")
#
#        if not l1b_file_preset_is_set and not start_and_stop_is_set:
#            self.parser.error("either -start & -stop or -l1b-files required")

    def critical_prompt_confirmation(self):

        # Any confirmation prompts can be overriden by --no-critical-prompt
        no_prompt = self._args.no_critical_prompt

        # if --remove_old is set, all previous l1bdata files will be
        # erased for all month
        if self._args.remove_old and not no_prompt:
            message = "You have selected to remove all previous " + \
                "l3 files for the requested period\n" + \
                "(Note: use --no-critical-prompt to skip confirmation)\n" + \
                "Enter \"YES\" to confirm and continue: "
            result = raw_input(message)

            if result != "YES":
                sys.exit(1)

    @property
    def parser(self):
        # XXX: Move back to caller

        # Take the command line options from default settings
        # -> see config module for data types, destination variables, etc.
        clargs = DefaultCommandLineArguments()

        # List of command line option required for pre-processor
        # (argname, argtype (see config module), destination, required flag)
        options = [("-l2i-product-dir", "l2i-product-dir",
                    "l2i_basedir", True),
                   ("-l3-settings", "l3-settings", "l3_settings", False),
                   ("-l3-griddef", "l3-griddef", "l3_griddef", True),
                   ("-l3-output", "l3-output", "l3_output", True),
                   ("-start", "date", "start_date", True),
                   ("-stop", "date", "stop_date", True),
                   ("-period", "period", "period", False),
                   ("-doi", "doi", "doi", False),
                   ("-data-record-type", "data_record_type",
                    "data_record_type", False),
                   ("--remove-old", "remove-old", "remove_old", False),
                   ("--no-critical-prompt", "no-critical-prompt",
                    "no_critical_prompt", False)]

        # create the parser
        parser = argparse.ArgumentParser()
        for option in options:
            argname, argtype, destination, required = option
            argparse_dict = clargs.get_argparse_dict(argtype, destination,
                                                     required)
            parser.add_argument(argname, **argparse_dict)

        return parser

    @property
    def arg_dict(self):
        """ Return the arguments as dictionary """
        return self._args.__dict__

    @property
    def start(self):
        return self._args.start_date

    @property
    def stop(self):
        return self._args.stop_date

    @property
    def period(self):
        return self._args.period

    @property
    def doi(self):
        return self._args.doi

    @property
    def data_record_type(self):
        return self._args.data_record_type

    @property
    def l2i_product_directory(self):
        return os.path.join(self.l3_product_basedir, "l2i")

    @property
    def l3_settings_file(self):
        l3_settings = self._args.l3_settings
        filename = self.pysiral_config.get_settings_file(
            "proc", "l3", l3_settings)
        if filename is None:
            msg = "Invalid l3 settings filename or id: %s\n" % l3_settings
            msg = msg + " \nRecognized Level-3 processor setting ids:\n"
            for l3_settings_id in self.pysiral_config.get_setting_ids(
                    "proc", "l3"):
                msg = msg + "  " + l3_settings_id + "\n"
            self.error.add_error("invalid-l3-settings", msg)
            self.error.raise_on_error()
        else:
            return filename

    @property
    def l3_griddef(self):
        l3_griddef = self._args.l3_griddef
        filename = self.pysiral_config.get_settings_file(
            "grid", None, l3_griddef)
        if filename is None:
            msg = "Invalid griddef filename or id: %s\n" % l3_griddef
            msg = msg + "    Recognized grid definition ids:\n"
            for griddef_id in self.pysiral_config.get_setting_ids("griddef"):
                msg = msg + "    - " + griddef_id + "\n"
            self.error.add_error("invalid-griddef", msg)
            self.error.raise_on_error()
        else:
            return filename

    @property
    def l3_output_file(self):
        l3_output = self._args.l3_output
        filename = self.pysiral_config.get_settings_file(
            "output", "l3", l3_output)
        if filename is None:
            msg = "Invalid output definition filename or id: %s\n" % l3_output
            msg = msg + "    Recognized output definition ids:\n"
            for output_id in self.pysiral_config.get_setting_ids(
                    "output", "l3"):
                msg = msg + "    - " + output_id + "\n"
            self.error.add_error("invalid-outputdef", msg)
            self.error.raise_on_error()
        else:
            return filename

    @property
    def l3_product_basedir(self):
        """ Returns the base directory (one level below l2i) """
        # 1. Clean up the path
        product_basedir = os.path.abspath(self._args.l2i_basedir)
        dirs = os.path.split(product_basedir)
        if dirs[1] == "l2i":
            return dirs[0]
        else:
            return product_basedir

    @property
    def remove_old(self):
        return self._args.remove_old and not self._args.overwrite_protection
Exemplo n.º 26
0
class OutputHandlerBase(DefaultLoggingClass):

    subfolder_format = {"month": "%02g", "year": "%04g", "day": "%02g"}

    def __init__(self, output_def):
        super(OutputHandlerBase, self).__init__(self.__class__.__name__)
        self.pysiral_config = ConfigInfo()
        self.error = ErrorStatus()
        self._basedir = "n/a"
        self._init_from_output_def(output_def)
        self.output_def_filename = output_def

    def fill_template_string(self, template, dataset):
        """ Fill an template string with information of a dataset
        object (in this case Level2Data) """
        attributes = self.get_template_attrs(template)
        try:
            result = template.encode("utf-8")
        except AttributeError:
            result = str(template)
        for attribute in attributes:
            attribute_name, option, placeholder = attribute
            attribute = dataset.get_attribute(attribute_name, *option)
            if attribute is None:
                attribute = "unknown"
            result = result.replace(placeholder, attribute)
        return result

    def get_dt_subfolders(self, dt, subfolder_tags):
        """ Returns a list of subdirectories based on a datetime object
        (usually the start time of data collection) """
        subfolders = []
        for subfolder_tag in subfolder_tags:
            parameter = getattr(dt, subfolder_tag)
            subfolder = self.subfolder_format[subfolder_tag] % parameter
            subfolders.append(subfolder)
        return subfolders

    def get_template_attrs(self, template):
        """ Extract attribute names and options (if defined) for a
        give template string """
        try:
            template = template.encode('utf-8').strip()
        except AttributeError:
            template = str(template)
        attr_defs = re.findall("{.*?}", template)
        attrs, options = [], []
        for attr_def in attr_defs:
            attr_name, _, optstr = attr_def[1:-1].partition(":")
            attrs.append(attr_name)
            options.append(optstr.split(";"))
        return zip(attrs, options, attr_defs)

    def _init_from_output_def(self, output_def):
        """ Adds the information for the output def yaml files (either
        full filename or treedict structure) """
        if os.path.isfile(output_def):
            try:
                self._output_def = get_yaml_config(output_def)
            except Exception, msg:
                self.error.add_error("outputdef-parser-error", msg)
                self.error.raise_on_error()
        else:
Exemplo n.º 27
0
class AuxdataBaseClass(object):
    """
    Base class for all sub-type auxdata base classes (e.g. SICBaseClass).
    This class defines the mandatory set of methods and properties for all
    auxdata classes
    """
    def __init__(self, auxclass_cfg):
        """ This class should not be called directly, only its subclasses. auxclass_cfg needs to be of type
        AuxClassConfig """

        # Error handler
        self.error = ErrorStatus(self.pyclass)

        # Auxiliary class options
        if not isinstance(auxclass_cfg, AuxClassConfig):
            msg = "Invalid config object: %s (needs to be of type pysiral.auxdata.AuxClassConfig"
            msg = msg % str(auxclass_cfg)
            self.error.add_error("invalid-auxclasscfg-type", msg)
            self.error.raise_on_error()
        self._cfg = auxclass_cfg

        # Main properties
        self._data = None  # Data container for external data
        self._auxvars = [
        ]  # List of auxiliary variables generated by the child class

        # General messages
        self.msgs = []

        # --- Class internals ---

        # This is for auxiliary data handlers that require to read external product files for
        # a defined period (daily, monthly, ...). The implementation currently keeps only one
        # external product in memory at the time. The period (date list: yyyy, mm, dd) of this
        # currently loaded product is designated as current_date  This date is compared to the
        # requested date and if a new product is loaded upon mismatch of current & requested data
        # NOTE: This will be bypassed by static auxiliary data classes
        # TODO: Load all auxiliary products for processing period in memory (allow parallel processing)
        self._current_date = [0, 0, 0]
        self._requested_date = [-1, -1, -1]

    def set_requested_date(self, year, month, day):
        """ Use first timestamp as reference, date changes are ignored """
        self._requested_date = [year, month, day]

    def set_requested_date_from_l2(self, l2):
        """ Convenience method, Use first timestamp as reference, date changes are ignored """
        year = l2.track.timestamp[0].year
        month = l2.track.timestamp[0].month
        day = l2.track.timestamp[0].day
        self.set_requested_date(year, month, day)

    def check_data_availability(self, data_container_name="_data"):
        """
        Checks if data is loaded. If data container is None, raise an Error
        :param data_container_name:
        :return:
        """
        data_container = getattr(self, data_container_name, None)
        if data_container is None:
            msg = "%s: Data not loaded [%s]"
            msg = msg % (self.__class__.__name__,
                         self.year + "-" + self.month + "-" + self.day)
            self.add_handler_message(msg)
            self.error.add_error("auxdata_missing", msg)

    def reset_auxvars(self):
        """ Empties the auxiliary data store. To be executed during class initialization and
        before retrieving data (e.g. since the Level-2 processor calls this instance repeatedly) """
        self._auxvars = []

    def reset_handler_messages(self):
        """ Empties the message list. To be executed during class initialization and
        before retrieving data (e.g. since the Level-2 processor calls this instance repeatedly) """
        self.msgs = []

    def add_variables_to_l2(self, l2):
        """ Main Access points for the Level-2 Processor """

        # Call the API get_track class. This is the mandatory method of all auxiliary subclasses (independent
        # of type. Test if this is indeed the case
        if not self.has_mandatory_track_method:
            msg = "Mandatory subclass method `get_l2_track_vars` not implemented for %s " % self.pyclass
            self.error.add_error("not-implemented", msg)
            self.error.raise_on_error()

        # Before calling the get_track_vars of the subclass, we must empty any existing data from a potential
        # previous execution
        self.reset_auxvars()
        self.reset_handler_messages()

        # Call the mandatory track extraction method. Each subclass should register its output via the
        # `register_auxvar` method of the parent class
        self.get_l2_track_vars(l2)

        # Check on errors
        if self.error.status and self.exception_on_error:
            self.error.raise_on_error()

        # Update the Level-2 object
        try:
            self.update_l2(l2)
        except KeyError:
            msg = "Invalid auxiliary parameter return from class %s" % self.pyclass
            self.error.add_error("invalid-auxvar-return", msg)
            self.error.raise_on_error()

    def register_auxvar(self, var_id, var_name, value, uncertainty=None):
        """ Register an auxiliary variable. The different parameters are necessary for the L2 data object.
        When it will be added to the l2 object in self.update_l2, the variable will be accessible from the l2 with
        the following expressions:

            value = l2.%var_id%
            uncertainty = l2.%var_id%.uncertainty

        or

            value = l2.get_parameter_by_name(%var_name%)
            uncertainty = l2.get_parameter_by_name(%var_name%_uncertainty)
        """
        auxvar_dict = dict(id=var_id,
                           name=var_name,
                           value=value,
                           uncertainty=uncertainty)
        self._auxvars.append(auxvar_dict)

    def add_handler_message(self, msg):
        self.msgs.append(msg)

    @staticmethod
    def get_empty_array(l2, empty_val=np.nan):
        return np.full(l2.n_records, empty_val)

    def update_external_data(self):
        """ This method will check if the requested date matches current data
        and call the subclass data loader method if not """
        # Check if data for day is already loaded
        if self._requested_date != self._current_date:
            # NOTE: The implementation of this method needs to be in the subclass
            self.load_requested_auxdata()
            self._current_date = self._requested_date
            if self.has_data_loaded:
                self.add_handler_message(self.__class__.__name__ + ": Load " +
                                         str(self.requested_filepath))
        else:
            if self.has_data_loaded:
                self.add_handler_message(self.__class__.__name__ +
                                         ": Data already present")
            else:
                msg = ": No Data: Loading failed in an earlier attempt"
                self.add_handler_message(self.__class__.__name__ + msg)

    def load_requested_auxdata(self):
        """
        This methods raises a NotImplementedError if it is not overwritten by child class
        :return:
        """
        msg = """
        This Exception is caused because the auxiliary data class ({}) is missing the method `load_requested_auxdata`
        or AuxdataBaseClass was called directly (which it should not)"""
        msg = msg.format(self.__class__.__name__)
        raise NotImplementedError(msg)

    def get_l2_track_vars(self, *args):
        """
        This methods raises a NotImplementedError if it is not overwritten by child class
        :return:
        """
        msg = """
        This Exception is caused because the auxiliary data class ({}) is missing the method `get_l2_track_vars`
        or AuxdataBaseClass was called directly (which it should not)"""
        msg = msg.format(self.__class__.__name__)
        raise NotImplementedError(msg)

    def update_l2(self, l2):
        """ Automatically add all auxiliary variables to a Level-2 data object"""
        for auxvar in self._auxvars:
            uncertainty = auxvar.get("uncertainty", None)
            l2.set_auxiliary_parameter(auxvar["id"], auxvar["name"],
                                       auxvar["value"], uncertainty)

    @property
    def pyclass(self):
        return self.__class__.__name__

    @property
    def cfg(self):
        return self._cfg

    @property
    def has_data_loaded(self):
        if not hasattr(self, "_data"):
            return False
        return self._data is not None

    @property
    def exception_on_error(self):
        if "exception_on_error" in self.cfg.options:
            exception_on_error = self.cfg.options.exception_on_error
        else:
            exception_on_error = False
        return exception_on_error

    @property
    def requested_filepath(self):
        """ Returns the local file path for the requested date"""

        # Main directory
        path = Path(self.cfg.local_repository)

        # Add the subfolders
        for subfolder_tag in self.cfg.subfolders:
            subfolder = getattr(self, subfolder_tag)
            path = path / subfolder

        # Get the period dict (will be constructed from filenaming)
        period_dict = {}
        attrs = re.findall("{.*?}", self.cfg.filenaming)
        for attr_def in attrs:
            attr_name = attr_def[1:-1]
            period_dict[attr_name] = getattr(self, attr_name)
        filename = self.cfg.filenaming.format(**period_dict)
        path = path / filename
        return path

    @property
    def year(self):
        return "%04g" % self._requested_date[0]

    @property
    def month(self):
        return "%02g" % self._requested_date[1]

    @property
    def day(self):
        return "%02g" % self._requested_date[2]

    @property
    def has_mandatory_track_method(self):
        """ Test if this object instance has the mandatory method for extracting track data. This method
        is named get_l2_track_vars() and needs to be present in any auxiliary subclass"""
        has_method = False
        get_track_children_method = getattr(self, "get_l2_track_vars", None)
        if callable(get_track_children_method):
            has_method = True
        return has_method

    @property
    def auxvar_names(self):
        return list([auxvar["name"] for auxvar in self._auxvars])
Exemplo n.º 28
0
class ReadNC(object):
    """
    Quick & dirty method to parse content of netCDF file into a python object
    with attributes from file variables
    """
    def __init__(self, filename, verbose=False, autoscale=True,
                 nan_fill_value=False, global_attrs_only=False):
        self.error = ErrorStatus()
        self.time_def = NCDateNumDef()
        self.parameters = []
        self.attributes = []
        self.verbose = verbose
        self.autoscale = autoscale
        self.global_attrs_only = global_attrs_only
        self.nan_fill_value = nan_fill_value
        self.filename = filename
        self.parameters = []
        self.read_globals()
        self.read_content()

    def read_globals(self):
        pass
#        self.gobal_attributes = {}
#        f = Dataset(self.filename)
#        print f.ncattrs()
#        f.close()

    def read_content(self):

        self.keys = []

        # Open the file
        try:
            f = Dataset(self.filename)
        except RuntimeError:
            msg = "Cannot read netCDF file: %s" % self.filename
            self.error.add_error("nc-runtime-error", msg)
            self.error.raise_on_error()

        f.set_auto_scale(self.autoscale)

        # Get the global attributes
        for attribute_name in f.ncattrs():

            self.attributes.append(attribute_name)
            attribute_value = getattr(f, attribute_name)

            # Convert timestamps back to datetime objects
            # TODO: This needs to be handled better
            if attribute_name in ["start_time", "stop_time"]:
                attribute_value = num2date(
                    attribute_value, self.time_def.units,
                    calendar=self.time_def.calendar)
            setattr(self, attribute_name, attribute_value)

        # Get the variables
        if not self.global_attrs_only:
            for key in f.variables.keys():

                variable = f.variables[key][:]

                try:
                    is_float = variable.dtype in ["float32", "float64"]
                    has_mask = hasattr(variable, "mask")
                except:
                    is_float, has_mask = False, False

                if self.nan_fill_value and has_mask and is_float:
                    is_fill_value = np.where(variable.mask)
                    variable[is_fill_value] = np.nan

                setattr(self, key, variable)
                self.keys.append(key)
                self.parameters.append(key)
                if self.verbose:
                    print key
            self.parameters = f.variables.keys()
        f.close()
Exemplo n.º 29
0
class ConfigInfo(DefaultLoggingClass):
    """
    Container for the content of the pysiral definition files
    (in pysiral/configuration) and the local machine definition file
    (local_machine_definition.yaml)
    """

    # Global variables
    _DEFINITION_FILES = {
        "mission": "mission_def.yaml",
        "auxdata": "auxdata_def.yaml",
    }

    # FIXME: This is only a quick fix for a bug that was caused by the removal of `parameter_def.yaml` in v0.6.1
    # (This list was implemented to ensure consistent naming of geophysical range corrections through all
    # platform pre-processors.
    CORRECTION_LIST = [
        "dry_troposphere", "wet_troposphere", "inverse_barometric",
        "dynamic_atmosphere", "ionospheric", "ocean_tide_elastic",
        "ocean_tide_long_period", "ocean_loading_tide", "solid_earth_tide",
        "geocentric_polar_tide"
    ]

    _LOCAL_MACHINE_DEF_FILE = "local_machine_def.yaml"

    VALID_SETTING_TYPES = ["proc", "output", "grid"]
    VALID_DATA_LEVEL_IDS = ["l1", "l2", "l2i", "l2p", "l3", None]

    def __init__(self):
        """ Read all definition files """
        super(ConfigInfo, self).__init__(self.__class__.__name__)

        self.error = ErrorStatus(self.__class__.__name__)
        # read the definition files in the config folder
        self._read_config_files()
        # read the local machine definition file
        self._read_local_machine_file()

    @property
    def mission_ids(self):
        return self.mission.missions

    def get_mission_defaults(self, mission):
        mission_options = self.mission[mission].options
        defaults = {}
        names, options = td_branches(mission_options)
        for name, option in zip(names, options):
            defaults[name] = option.default
        return defaults

    def get_mission_options(self, mission):
        mission_options = self.mission[mission].options
        return mission_options

    def get_mission_settings(self, mission):
        mission_options = self.mission[mission].settings
        return mission_options

    def get_mission_info(self, mission):
        mission_info = self.mission[mission]
        if mission_info.data_period.start is None:
            mission_info.data_period.start = datetime.utcnow()
        if mission_info.data_period.stop is None:
            mission_info.data_period.stop = datetime.utcnow()
        return mission_info

    def get_setting_ids(self, type, data_level=None):
        lookup_directory = self.get_local_setting_path(type, data_level)
        ids, files = self.get_yaml_setting_filelist(lookup_directory)
        return ids

    def get_settings_file(self, type, data_level, setting_id_or_filename):
        """ Returns a processor settings file for a given data level.
        (data level: l2 or l3). The second argument can either be an
        direct filename (which validity will be checked) or an id, for
        which the corresponding file (id.yaml) will be looked up in
        the default directory """

        if type not in self.VALID_SETTING_TYPES:
            return None

        if data_level not in self.VALID_DATA_LEVEL_IDS:
            return None

        # Check if filename
        if os.path.isfile(setting_id_or_filename):
            return setting_id_or_filename

        # Get all settings files in settings/{data_level} and its
        # subdirectories
        lookup_directory = self.get_local_setting_path(type, data_level)
        ids, files = self.get_yaml_setting_filelist(lookup_directory)

        # Test if ids are unique and return error for the moment
        if len(np.unique(ids)) != len(ids):
            msg = "Non-unique %-%s setting filename" % (type, str(data_level))
            self.error.add_error("ambiguous-setting-files", msg)
            self.error.raise_on_error()

        # Find filename to setting_id
        try:
            index = ids.index(setting_id_or_filename)
            return files[index]
        except:
            return None

    def get_yaml_setting_filelist(self, directory, ignore_obsolete=True):
        """ Retrieve all yaml files from a given directory (including
        subdirectories). Directories named "obsolete" are ignored if
        ignore_obsolete=True (default) """
        setting_ids = []
        setting_files = []
        for root, dirs, files in os.walk(directory):
            if os.path.split(root)[-1] == "obsolete" and ignore_obsolete:
                continue
            for filename in files:
                if re.search("yaml$", filename):
                    setting_ids.append(filename.replace(".yaml", ""))
                    setting_files.append(os.path.join(root, filename))
        return setting_ids, setting_files

    def get_local_setting_path(self, type, data_level):
        if type in self.VALID_SETTING_TYPES and data_level in self.VALID_DATA_LEVEL_IDS:
            args = [type]
            if data_level is not None:
                args.append(data_level)
            return os.path.join(USER_CONFIG_PATH, *args)
        else:
            return None

    def _read_config_files(self):
        for key in self._DEFINITION_FILES.keys():
            filename = os.path.join(USER_CONFIG_PATH,
                                    self._DEFINITION_FILES[key])
            setattr(self, key, get_yaml_config(filename))

    def _read_local_machine_file(self):
        filename = os.path.join(USER_CONFIG_PATH, self._LOCAL_MACHINE_DEF_FILE)
        try:
            local_machine_def = get_yaml_config(filename)
        except IOError:
            msg = "local_machine_def.yaml not found (expected: %s)" % filename
            self.error.add_error("local-machine-def-missing", msg)
            self.error.raise_on_error()
        setattr(self, "local_machine", local_machine_def)

    def _return_path(self, subfolder):
        return os.path.join(USER_CONFIG_PATH, subfolder)
Exemplo n.º 30
0
class NCDataFile(DefaultLoggingClass):
    def __init__(self):
        class_name = self.__class__.__name__
        super(NCDataFile, self).__init__(class_name)
        self.error = ErrorStatus(caller_id=class_name)
        self.filename = None
        self.time_def = NCDateNumDef()
        self.zlib = True
        self._rootgrp = None
        self._options = None
        self._proc_settings = None
        self.verbose = False

    def set_options(self, **opt_dict):
        self._options = AttrDict(**opt_dict)

    def set_processor_settings(self, proc_settings):
        self._proc_settings = proc_settings

    def set_base_export_path(self, path):
        self.base_export_path = path

    def get_full_export_path(self, startdt):
        self._get_full_export_path(startdt)
        return self.export_path

    def _set_doi(self):
        if self.output_handler.has_doi:
            self.data.set_doi(self.output_handler.doi)

    def _set_data_record_type(self):
        if self.output_handler.has_doi:
            self.data.set_data_record_type(
                self.output_handler.data_record_type)

    def _write_global_attributes(self):
        attr_dict = self.output_handler.get_global_attribute_dict(self.data)
        self._set_global_attributes(attr_dict)

    def _populate_data_groups(self, level3=False, flip_yc=False):

        lonlat_parameter_names = ["lon", "lat", "longitude", "latitude"]

        dimdict = self.data.dimdict
        dims = dimdict.keys()

        for key in dims:
            self._rootgrp.createDimension(key, dimdict[key])

        for parameter_name, attribute_dict in self.output_handler.variable_def:
            # Check if parameter name is also the the name or the source
            # parameter

            if "var_source_name" in attribute_dict.keys():
                attribute_dict = dict(attribute_dict)
                var_source_name = attribute_dict.pop("var_source_name")
            else:
                var_source_name = parameter_name

            data = self.data.get_parameter_by_name(var_source_name)

            if data is None:
                msg = "Invalid parameter name for data object: %s"
                msg = msg % parameter_name
                self.log.error(msg)
                self.error.add_error("invalid-paramater", msg)
                self.error.raise_on_error()

            # Convert datetime objects to number
            if type(data[0]) is datetime:
                data = date2num(data, self.time_def.units,
                                self.time_def.calendar)

            # Convert bool objects to integer
            if data.dtype.str == "|b1":
                data = np.int8(data)

            # Set dimensions (dependend on product level)
            if level3:
                if flip_yc:
                    data = np.flipud(data)
                if parameter_name not in lonlat_parameter_names:
                    data = np.array([data])
                    dimensions = tuple(list(dims)[0:len(data.shape)])
                else:
                    dimensions = tuple(list(dims)[1:len(data.shape) + 1])
            else:
                dimensions = tuple(list(dims)[0:len(data.shape)])

            # Create and set the variable
            var = self._rootgrp.createVariable(parameter_name,
                                               data.dtype.str,
                                               dimensions,
                                               zlib=self.zlib)
            var[:] = data

            # Add Parameter Attributes
            for key in sorted(attribute_dict.keys()):
                attribute = attribute_dict[key]
                attribute = self.output_handler.fill_template_string(
                    attribute, self.data)
                setattr(var, key, attribute)

    def _create_root_group(self, attdict, **global_attr_keyw):
        """
        Create the root group and add l1b metadata as global attributes
        """
        self._convert_datetime_attributes(attdict)
        self._convert_bool_attributes(attdict)
        self._convert_nonetype_attributes(attdict)
        self._set_global_attributes(attdict, **global_attr_keyw)

    def _convert_datetime_attributes(self, attdict):
        """
        Replace l1b info parameters of type datetime.datetime by a double
        representation to match requirements for netCDF attribute data type
        rules
        """
        for key in attdict.keys():
            content = attdict[key]
            if type(content) is datetime:
                attdict[key] = date2num(content, self.time_def.units,
                                        self.time_def.calendar)

    def _convert_bool_attributes(self, attdict):
        """
        Replace l1b info parameters of type bool ['b1'] by a integer
        representation to match requirements for netCDF attribute data type
        rules
        """
        for key in attdict.keys():
            content = attdict[key]
            if type(content) is bool:
                attdict[key] = int(content)

    def _convert_nonetype_attributes(self, attdict):
        """
        Replace l1b info parameters of type bool ['b1'] by a integer
        representation to match requirements for netCDF attribute data type
        rules
        """
        for key in attdict.keys():
            content = attdict[key]
            if content is None:
                attdict[key] = ""

    def _set_global_attributes(self, attdict, prefix=""):
        """ Save l1b.info dictionary as global attributes """
        for key in attdict.keys():
            self._rootgrp.setncattr(prefix + key, attdict[key])

    def _get_variable_attr_dict(self, parameter):
        """ Retrieve the parameter attributes """
        default_attrs = {
            "long_name": parameter,
            "standard_name": parameter,
            "scale_factor": 1.0,
            "add_offset": 0.0
        }
        if parameter not in self.parameter_attributes:
            # self._missing_parameters.append(parameter)
            return default_attrs
        else:
            return dict(self.parameter_attributes[parameter])

    def _write_processor_settings(self):
        if self._proc_settings is None:
            pass
        settings = self._proc_settings
        for item in settings.iterkeys():
            self._rootgrp.setncattr(item, str(settings[item]))

    def _open_file(self):
        try:
            self._rootgrp = Dataset(self.full_path, "w")
        except RuntimeError:
            msg = "Unable to create netCDF file: %s" % self.full_path
            self.error.add_error("nc-runtime-error", msg)
            self.error.raise_on_error()

    def _write_to_file(self):
        self._rootgrp.close()

    @property
    def export_path(self):
        """ Evoking this property will also create the directory if it
        does not already exists """
        return self.output_handler.get_directory_from_data(self.data,
                                                           create=True)

    @property
    def export_filename(self):
        """ Returns the filename for the level2 output file """
        return self.output_handler.get_filename_from_data(self.data)

    @property
    def full_path(self):
        return Path(self.export_path) / self.export_filename