示例#1
0
def pysiral_l2proc_time_range_job(args):
    """ This is a Level-2 Processor job for a given time range """

    # Get start time of processor run
    t0 = time.clock()

    # Get the product definition
    product_def = Level2ProductDefinition(args.run_tag, args.l2_settings_file)
    mission_id = product_def.l2def.mission.id
    hemisphere = product_def.l2def.hemisphere

    # Specifically add an output handler
    product_def.add_output_definition(
        args.l2_output, overwrite_protection=args.overwrite_protection)

    # --- Get the period for the Level-2 Processor ---
    # Evaluate the input arguments
    period = DatePeriod(args.start, args.stop)

    # Clip the time range to the valid time range of the target platform
    period = period.intersect(psrlcfg.get_platform_period(mission_id))

    # The Level-2 processor operates in monthly iterations
    # -> Break down the full period into monthly segments and
    #    filter specific month that should not be processed
    period_segments = period.get_segments("month", crop_to_period=True)
    if args.exclude_month is not None:
        period_segments.filter_month(args.exclude_month)

    # Prepare DataHandler
    l1b_data_handler = DefaultL1bDataHandler(mission_id,
                                             hemisphere,
                                             version=args.l1b_version)

    # Processor Initialization
    l2proc = Level2Processor(product_def)

    # Now loop over the month
    for time_range in period_segments:

        # Do some extra logging
        l2proc.log.info("Processing period: %s" % time_range.label)

        # Product Data Management
        if args.remove_old:
            for output_handler in product_def.output_handler:
                output_handler.remove_old(time_range)

        # Get input files
        l1b_files = l1b_data_handler.get_files_from_time_range(time_range)
        l2proc.log.info("Found %g files in %s" %
                        (len(l1b_files), l1b_data_handler.last_directory))

        # Process the orbits
        l2proc.process_l1b_files(l1b_files)

    # All done
    t1 = time.clock()
    seconds = int(t1 - t0)
    l2proc.log.info("Run completed in %s" % str(timedelta(seconds=seconds)))
示例#2
0
def pysiral_l3proc():
    # parse command line arguments
    args = Level3ProcArgParser()
    args.parse_command_line_arguments()

    # Get start time of processor run
    t0 = time.clock()

    # --- Get the period segments for the Level-3 processor ---
    # NOTE: These depend on the chosen total time range and the duration period for the grid.
    period = DatePeriod(args.start, args.stop)
    if args.period == "custom":
        period_segments = [period]
        n_periods = 1
    else:
        period_segments = period.get_segments(args.period)
        n_periods = period_segments.n_periods

    # Get the output grid
    grid = Level3GridDefinition(args.l3_griddef)

    # Initialize the interface to the l2i products
    l2i_handler = L2iDataHandler(args.l2i_product_directory)

    # Initialize the output handler
    # Currently the overwrite protection is disabled per default
    output = []
    for l3_output_file in args.l3_output_file:
        output_handler = Level3OutputHandler(output_def=l3_output_file,
                                             base_directory=args.l3_product_basedir,
                                             period=args.period,
                                             doi=args.doi,
                                             data_record_type=args.data_record_type,
                                             overwrite_protection=False)
        output.append(output_handler)

    # Compile the product def
    product_def = Level3ProductDefinition(args.l3_settings_file, grid, output, period)

    # Initialize the Processor
    l3proc = Level3Processor(product_def)

    # Loop over all iterations
    for i, time_range in enumerate(period_segments):

        # Report processing period
        msg = "# Processing %s period (%g of %g): %s"
        msg = msg % (args.period, i+1, n_periods, time_range.date_label)
        logger.info(msg)

        # Retrieve files
        l2i_files = l2i_handler.get_files_from_time_range(time_range)
        logger.info("Num l2i files: %g" % len(l2i_files))
        if len(l2i_files) == 0:
            logger.info("Skip data period")
            continue

        # Start the Level-3 processing
        l3proc.process_l2i_files(l2i_files, time_range)

    # Final reporting
    t1 = time.clock()
    seconds = int(t1 - t0)
    logger.info("Run completed in %s" % str(timedelta(seconds=seconds)))
示例#3
0
def pysiral_l2preproc():
    """ Caller for converting Level-2 Intermediate (l2i) into
    Level-2 Pre-Processed (l2p) data products.
    NOTE: At the moment that only means summary of valid freeboard/thickness
          data points into daily summary files. """

    # Collect job settings from pysiral configuration data and
    # command line arguments
    args = Level2PreProcArgParser()

    # Parse and validate the command line arguments
    args.parse_command_line_arguments()

    # Get confirmation for critical choices (if necessary)
    args.critical_prompt_confirmation()

    # Start the level-2 pre-processor
    # Get start time of processor run
    t0 = time.clock()

    # Get the product definition
    product_def = Level2PreProcProductDefinition()

    # Specifically add an output handler
    product_def.add_output_definition(
        args.l2i_product_dir,
        args.l2p_output,
        period="daily",
        doi=args.doi,
        overwrite_protection=args.overwrite_protection)

    # Prepare DataHandler
    # The l2 pre-processor requires l2i input files
    l2i_handler = L2iDataHandler(args.l2i_product_dir)

    # Get list of days for processing
    # start and/or stop can be ommitted. In this case fall back to the
    # start and/or stop of l2i product availability
    start = args.start if args.start is not None else l2i_handler.start_month
    stop = args.stop if args.stop is not None else l2i_handler.stop_month
    period = DatePeriod(start, stop)
    days = period.get_segments("day")
    if args.exclude_month is not None:
        days.filter_month(args.exclude_month)

    # Processor Initialization
    # NOTE: This is only for later cases. Not much is done here at this
    #       point
    l2preproc = Level2PreProcessor(product_def)

    #    # Loop over iterations (one per day)
    for day in days:

        # Do some extra logging
        logger.info("Processing Day [%s]" % day.label)

        #        XXX: This needs a bit more thought
        #        # Product Data Management
        #        if args.remove_old:
        #            for output_handler in product_def.output_handler:
        #                output_handler.remove_old(day)

        # Get input files
        l2i_daily_files = l2i_handler.get_files_for_day(day.tcs.dt)
        if len(l2i_daily_files) == 0:
            logger.info("- no l2i products, skip day")
            continue
        logger.info("- Found %g l2i product files" % len(l2i_daily_files))

        # Process the orbits
        l2preproc.process_l2i_files(l2i_daily_files, day)

    # All done, log processor time
    t1 = time.clock()
    seconds = int(t1 - t0)
    logger.info("Run completed in %s" % str(timedelta(seconds=seconds)))
示例#4
0
class Level1PreProcJobDef(DefaultLoggingClass):
    """ A class that contains the information for the Level-1 pre-processor JOB (not the pre-processor class!) """
    def __init__(self,
                 l1p_settings_id_or_file,
                 tcs,
                 tce,
                 exclude_month=None,
                 hemisphere="global",
                 platform=None,
                 output_handler_cfg=None,
                 source_repo_id=None):
        """
        The settings for the Level-1 pre-processor job
        :param l1p_settings_id_or_file: An id of an proc/l1 processor config file (filename excluding the .yaml
                                        extension) or an full filepath to a yaml config file
        :param tcs: [int list] Time coverage start (YYYY MM [DD])
        :param tce: [int list] Time coverage end (YYYY MM [DD]) [int list]
        :param exclude_month: [int list] A list of month that will be ignored
        :param hemisphere: [str] The target hemisphere (`north`, `south`, `global`:default).
        :param platform: [str] The target platform (pysiral id). Required if l1p settings files is valid for
                               multiple platforms (e.g. ERS-1/2, ...)
        :param output_handler_cfg: [dict] An optional dictionary with options of the output handler
                                   (`overwrite_protection`: [True, False], `remove_old`: [True, False])
        :param source_repo_id: [str] The tag in local_machine_def.yaml (l1b_repository.<platform>.<source_repo_id>)
                                  -> Overwrites the default source repo in the l1p settings
                                     (input_handler.options.local_machine_def_tag &
                                      output_handler.options.local_machine_def_tag)
        """

        super(Level1PreProcJobDef, self).__init__(self.__class__.__name__)
        self.error = ErrorStatus()

        # Get pysiral configuration
        # TODO: Move to global
        self._cfg = psrlcfg

        # Store command line options
        self._hemisphere = hemisphere
        self._platform = platform
        self._source_repo_id = source_repo_id

        # Parse the l1p settings file
        self.set_l1p_processor_def(l1p_settings_id_or_file)

        # Get full requested time range
        self._time_range = DatePeriod(tcs, tce)
        logger.info("Requested time range is %s" % self.time_range.label)

        # Store the data handler options
        if output_handler_cfg is None:
            output_handler_cfg = {}
        self._output_handler_cfg = output_handler_cfg

        # Measure execution time
        self.stopwatch = StopWatch()

    @classmethod
    def from_args(cls, args):
        """ Init the Processor Definition from the pysiral-l1preproc command line argument object """

        # Optional Keywords
        kwargs = {}
        if args.exclude_month is not None:
            kwargs["exclude_month"] = args.exclude_month
        data_handler_cfg = dict()
        data_handler_cfg["overwrite_protection"] = args.overwrite_protection
        data_handler_cfg["remove_old"] = args.remove_old
        if args.source_repo_id is not None:
            data_handler_cfg["local_machine_def_tag"] = args.source_repo_id
        kwargs["output_handler_cfg"] = data_handler_cfg
        kwargs["hemisphere"] = args.hemisphere
        kwargs["platform"] = args.platform
        kwargs["source_repo_id"] = args.source_repo_id

        # Return the initialized class
        return cls(args.l1p_settings, args.start_date, args.stop_date,
                   **kwargs)

    def set_l1p_processor_def(self, l1p_settings_id_or_file):
        """ Parse the content of the processor definition file """

        # 1. Resolve the absolute file path
        procdef_file_path = self.get_l1p_proc_def_filename(
            l1p_settings_id_or_file)

        # 2. Read the content
        logger.info("Parsing L1P processor definition file: %s" %
                    procdef_file_path)
        self._l1pprocdef = get_yaml_config(procdef_file_path)
        self._check_if_unambiguous_platform()

        # 3. Expand info (input data lookup directories)
        self._get_local_input_directory()

        # 4. update hemisphere for input adapter
        self._l1pprocdef.level1_preprocessor.options.polar_ocean.target_hemisphere = self.target_hemisphere

    def get_l1p_proc_def_filename(self, l1p_settings_id_or_file):
        """ Query pysiral config to obtain filename for processor definition file """

        # A. Check if already filename
        if Path(l1p_settings_id_or_file).is_file():
            return l1p_settings_id_or_file

        # B. Not a file, try to resolve filename via pysiral config
        filename = self.pysiral_cfg.get_settings_file("proc", "l1",
                                                      l1p_settings_id_or_file)
        if filename is None:
            msg = "Invalid Level-1 pre-processor definition filename or id: %s\n" % l1p_settings_id_or_file
            msg = msg + " \nRecognized Level-1 pre-processor definitions ids:\n"
            ids = self.pysiral_cfg.get_setting_ids("proc", "l1")
            for id in ids:
                msg = msg + "    - " + id + "\n"
            self.error.add_error("invalid-l1p-outputdef", msg)
            self.error.raise_on_error()
        return filename

    def _get_local_input_directory(self):
        """ Replace the tag for local machine def with the actual path info """

        input_handler_cfg = self.l1pprocdef.input_handler.options
        local_machine_def_tag = input_handler_cfg.local_machine_def_tag
        primary_input_def = self.pysiral_cfg.local_machine.l1b_repository
        platform, tag = self.platform, local_machine_def_tag

        # Overwrite the tag if specifically supplied
        if self._source_repo_id is not None:
            tag = self._source_repo_id

        # Get the value
        expected_branch_name = "root.l1b_repository.%s.%s" % (platform, tag)
        try:
            branch = AttrDict(primary_input_def[platform][tag])
        except KeyError:
            msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s"
            msg = msg % expected_branch_name
            self.error.add_error("local-machine-def-missing-tag", msg)
            self.error.raise_on_error()

        # Sanity Checks
        # TODO: Obsolete?
        if branch is None:
            msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s"
            msg = msg % expected_branch_name
            self.error.add_error("local-machine-def-missing-tag", msg)
            self.error.raise_on_error()

        # Validity checks
        # TODO: These checks are probably better located in a separate method?
        for key in ["source", "l1p"]:

            # 1. Branch must have specific keys for input and output
            if not key in branch:
                msg = "Missing definition in `local_machine_def.yaml`. Expected value: %s.%s"
                msg = msg % (expected_branch_name, key)
                self.error.add_error("local-machine-def-missing-tag", msg)
                self.error.raise_on_error()

            # 2. The value of each branch must be a valid directory or a
            #    attr (e.g. for different radar modes) with a list of directories
            directory_or_attrdict = branch[key]
            try:
                directories = directory_or_attrdict.values()
            except AttributeError:
                directories = [directory_or_attrdict]

            for directory in directories:
                if not Path(directory).is_dir():
                    msg = "Invalid directory in `local_machine_def.yaml`: %s is not a valid directory"
                    msg = msg % directory
                    self.error.add_error("local-machine-def-invalid-dir", msg)
                    self.error.raise_on_error()

        # Update the lookup dir parameter
        self.l1pprocdef.input_handler["options"]["lookup_dir"] = branch.source

    def _check_if_unambiguous_platform(self):
        """ Checks if the platform is unique, since some l1 processor definitions are valid for a series of
        platforms, such as ERS-1/2, Sentinel-3A/B, etc. The indicator is that the platform tag in the
        l1 preprocessor settings is comma separated list.

        For the location of the source data, it is however necessary that the exact platform is known. It must
        therefore be specified explicitly by the -platform argument """

        settings_is_ambigous = "," in self._l1pprocdef.platform
        platform_is_known = self.platform is not None

        # Test if platform is given if the settings file is valid for more than 1 platform
        if settings_is_ambigous and not platform_is_known:
            msg = "Error: platform in l1p settings is ambiguous (%s), but no platform has been given (-platform)"
            msg = msg % self._l1pprocdef.platform
            sys.exit(msg)

        # Test if platform provided matches the platform list in the settings file
        if settings_is_ambigous and platform_is_known:
            if not self.platform in str(self._l1pprocdef.platform):
                msg = "Error: platform in l1p settings (%s) and given platform (%s) do not match"
                msg = msg % (self._l1pprocdef.platform, self.platform)
                sys.exit(msg)

        # If platform in settings is unambigous, but not provided -> get platform from settings
        if not settings_is_ambigous and not platform_is_known:
            self._platform = self._l1pprocdef.platform
            logger.info("- get platform from l1p settings -> %s" %
                        self.platform)

    @property
    def hemisphere(self):
        return self._hemisphere

    @property
    def target_hemisphere(self):
        values = {
            "north": ["north"],
            "south": ["south"],
            "global": ["north", "south"]
        }
        return values[self.hemisphere]

    @property
    def pysiral_cfg(self):
        return self._cfg

    @property
    def l1pprocdef(self):
        return self._l1pprocdef

    @property
    def time_range(self):
        return self._time_range

    @property
    def period_segments(self):
        segments = self._time_range.get_segments("month", crop_to_period=True)
        return segments

    @property
    def output_handler_cfg(self):
        return self._output_handler_cfg

    @property
    def platform(self):
        return self._platform