Пример #1
0
    def _add_db_handler(self):
        """Add DB logging handler.

        :param str db_file: path to DB file
        """
        try:
            Logger.addHandler(
                DbLogger(self.config['catalog']['ip_parent_identifier'])
            )
            Logger.db_handler().set_session(
                self._db_file
            )
        except DbConnectionError as e:
            raise ConnectionError('{}: {}'.format(e, self._db_file))
Пример #2
0
    def _cleanup(self, job_id=None):
        """Perform manager clean up.

        :param int job_id: remove only selected job
        """
        # logging DB
        if job_id < 0:
            # all
            log_db = self.config['logging']['db']
            if os.path.exists(log_db):
                os.remove(log_db)
                Logger.debug("Logging DB {} removed".format(
                    log_db
                ))
        else:
            # single job
            self._add_db_handler()
            Logger.db_handler().delete_job(job_id)

        # logging dir
        if job_id < 0:
            # all
            log_dir = self.config['logging']['directory']
            if os.path.exists(log_dir):
                shutil.rmtree(log_dir)
                Logger.debug("Logging directory {} removed".format(
                    log_dir
                ))
        else:
            # single job
            log_file = self._get_log_file(job_id)
            if os.path.exists(log_file):
                os.remove(log_file)
                Logger.debug("Logging file {} removed".format(
                    log_file
                ))
            log_dir = os.path.splitext(log_file)[0]
            if os.path.exists(log_dir):
                shutil.rmtree(log_dir)
                Logger.debug("Logging directory {} removed".format(
                    log_dir
                ))

        if job_id > 0:
            return
Пример #3
0
    def run(self):
        """Run all registered processors from queue.
        """
        # check if processors defined
        if not self._processors:
            raise ConfigError(self._config_files,
                              "list of processors not defined"
            )

        # determine current/previous job id
        job_id = Logger.db_handler().job_id()
        prev_job_id = Logger.db_handler().last_job_id(self.config['processors'][0])
        Logger.info("Job started (id {})".format(job_id))
        Logger.db_handler().job_started()
        if prev_job_id:
            Logger.debug("Previous job found (id {})".format(prev_job_id))
        else:
            Logger.debug("No previous job found. Starting from scratch")

        start = time.time()
        idx = 0
        for proc in self._processors:
            try:
                # run the processor
                proc.run()
                try:
                    Logger.info('{} processor result: {}'.format(
                        proc.identifier, proc.result()
                    ))
                except ProcessorResultError:
                    pass

                # store JSON after each processor
                self.save_response(proc)
            except ProcessorFailedError:
                pass

            idx += 1

        Logger.success(
            "Job {} successfully finished in {:.6f} sec".format(
                job_id, time.time() - start
        ))
Пример #4
0
    def _get_log_file(self, job_id=None):
        """Get log file filepath

        :param int job_id: selected job id or None for current
        """
        if not job_id:
            job_id = Logger.db_handler().job_id()

        return os.path.abspath(
            os.path.join(
                self.config['logging']['directory'],
                '{0:05d}.log'.format(job_id)
        ))
Пример #5
0
    def get_last_response(self, ip, full=False):
        """
        Get QI metadata response from previous job.

        :param str ip: image product
        :param bool full: True for full data otherwise only relevant part

        :return dict: QI metadata
        """
        try:
            job_id = Logger.db_handler().last_job_id(
                self.config['processors'][0])
        except KeyError:
            raise ProcessorCriticalError(self,
                                         "No processors defined in config")

        if not job_id:
            Logger.debug(
                "First run? Unable to get last response from JSON file")
            return None

        json_file = os.path.join(self.config['logging']['directory'],
                                 '{0:05d}'.format(job_id), ip + '.json')

        if not os.path.exists(json_file):
            raise ProcessorCriticalError(
                self, "Response file {} not found".format(json_file))

        data = JsonIO.read(json_file, response=True)
        if full:
            return data

        relevant_part = QCResponse(data).get(self.isMeasurementOf)
        if not relevant_part:
            if self.config['strict']['enabled']:
                raise ProcessorCriticalError(
                    self, "Unable to get relevant part for {} ({})".format(
                        self.isMeasurementOf, ip))
            else:
                return {}
        if hasattr(self, "isMeasurementOfSection"):
            relevant_part_tmp = {}
            for key in relevant_part.keys():
                if key in ("isMeasurementOf", "value", "lineage",
                           self.isMeasurementOfSection):
                    relevant_part_tmp[key] = relevant_part[key]
            relevant_part = relevant_part_tmp

        return relevant_part
Пример #6
0
    def get_last_ip_status(self, ip, status):
        """Get status for last image products.

        :param str ip: image product
        :param DbIpOperationStatus status: status

        :return DbIpOperationStatus: image product status
        """
        ip_status = status
        if status == DbIpOperationStatus.unchanged:
            # check also status of last IP processed by this processor
            ip_status = Logger.db_handler().processed_ip_status(
                self.identifier, ip)

        return ip_status
Пример #7
0
    def __init__(self, directory):
        # switch to safe mode when template processor is used
        self.safe = True
        # if self.processors()[0].__class__.__name__ == 'QCProcessorTemplate' else False

        # read JSON schema
        with open(os.path.join(os.path.dirname(__file__),
                               'schema.json')) as fd:
            self._schema = json.load(fd)

        # create directory where output JSON file will be stored
        self.target_dir = os.path.join(
            directory, '{0:05d}'.format(Logger.db_handler().job_id()))
        if not os.path.exists(self.target_dir):
            os.makedirs(self.target_dir)
Пример #8
0
    def run(self):
        """Run processor tasks.

        :return int: response counter value
        """
        # log start computation
        self._run_start()

        # loop through image products (IP)
        processor_previous = self.previous()
        processed_ips = Logger.db_handler().processed_ips(
            processor_previous, platform_type=self.platform_type)
        ip_count = len(processed_ips)
        if ip_count < 1:
            Logger.warning(
                "No IP products to process (previous processor: {})".format(
                    processor_previous))
        counter = 1
        for ip, status in processed_ips:
            # increment counter
            self._current_response_idx += 1

            Logger.info("({}) Processing {}... ({}/{})".format(
                self.identifier, ip, counter, ip_count))
            counter += 1

            # get last IP status
            ip_status = self.get_last_ip_status(ip, status)

            # skip rejected IP (QA not passed)
            if ip_status == DbIpOperationStatus.rejected:
                self.ip_operation(ip, ip_status)
                response_data = self.get_last_response(ip)
                if response_data:
                    self.update_response(response_data)
                continue

            # set current response status from DB
            self.set_response_status(status)

            # read metadata
            meta_data = self.get_meta_data(ip)

            # define output path
            # check whether results exists
            if self.output_path is None:
                # output path not defined, assuming QI results (level2)
                try:
                    output_path = self._get_qi_results_path(
                        self.get_processing_level2(meta_data)['title'])
                except TypeError:
                    Logger.warning(
                        "Level2 product not found, switching back to level1!")
                    output_path = self._get_qi_results_path(meta_data['title'])
                results_exist = self.check_qi_results(output_path)
            else:
                output_path = self.output_path
                results_exist = os.path.exists(self._get_ip_output_path(ip))

            # force absolute path
            try:
                output_path = os.path.abspath(output_path)
            except TypeError:
                raise ProcessorCriticalError(self,
                                             "Output directory not defined!")

            # determine whether to force the computation
            # ip_status is None -> no previous processor run detected
            force = status == DbIpOperationStatus.forced or \
                ip_status is None or \
                status == DbIpOperationStatus.unchanged and not results_exist

            # perform processor operations if requested
            if status in (DbIpOperationStatus.added,
                          DbIpOperationStatus.updated,
                          DbIpOperationStatus.failed) or force:
                if force:
                    # change status from unchanged to updated
                    if not results_exist:
                        Logger.debug("Missing results")
                    Logger.debug("Operation forced")

                # create processor result directory if not exists
                if output_path and not os.path.exists(output_path):
                    os.makedirs(output_path)

                # run processor computation if requested
                down_path = self.get_data_dir()
                if self.level2_data:
                    try:
                        ip_dd = self.get_processing_level2(meta_data)['title']
                    except TypeError:
                        # switch back to L1
                        ip_dd = ip
                else:
                    ip_dd = ip
                data_dir = os.path.join(
                    down_path, '{}{}'.format(ip_dd, self.data_dir_suf))
                Logger.debug("Data dir: {}".format(data_dir))
                Logger.debug("Output dir: {}".format(output_path))

                # run computation
                response_data = self._run(meta_data, data_dir, output_path)
            else:
                # no change, get response data from previous run
                response_data = self.get_last_response(ip)

            # update response
            if response_data:
                self.update_response(response_data)

            # log IP operation
            self.ip_operation(
                ip, self._response[self._current_response_idx].status)

        # log computation finished
        self._run_done()

        return self._current_response_idx
Пример #9
0
    def compute_coverage(self):
        """Compute vpx coverage from input valid pixel masks.

        :return: path to output file
        """
        # collect years
        years = {}
        for yr in self.get_years():
            years[yr] = []

        # collect input files from last IP processor
        processed_ips = Logger.db_handler().processed_ips_last('valid_pixels')

        ip_idx = 1
        ip_count = len(processed_ips)
        if ip_count == 0:
            # create empty vpx_coverage file
            from osgeo import gdal, gdalconst

            im_reference = self.config.abs_path(
                self.config['geometry']['reference_image'])
            ids = gdal.Open(im_reference, gdalconst.GA_ReadOnly)
            iproj = ids.GetProjection()
            itrans = ids.GetGeoTransform()
            vpx_band = ids.GetRasterBand(1)

            for yr in years.keys():
                out_file = self.get_output_file(yr)
                driver = gdal.GetDriverByName('GTiff')
                ods = driver.Create(out_file,
                                    vpx_band.XSize,
                                    vpx_band.YSize,
                                    eType=vpx_band.DataType)
                ods.SetGeoTransform(itrans)
                ods.SetProjection(iproj)

                ods = None

                self.tif2jpg(out_file)

            ids = None

            raise ProcessorFailedError(self, "No input valid layers found")

        for ip, platform_type, status in processed_ips:
            Logger.info("Processing {}... ({}/{})".format(
                ip, ip_idx, ip_count))
            ip_idx += 1

            # set current platform type
            self.platform_type = QCPlatformType(platform_type)
            if self.config['image_products'].get('{}_processing_level2'.format(
                    self.get_platform_type())) == 'S2MSI2A':
                self.data_dir_suf = '.SAFE'
            else:
                self.data_dir_suf = ''

            # delete previous results if needed
            if status not in (DbIpOperationStatus.unchanged,
                              DbIpOperationStatus.rejected):
                do_run = True

            if self.get_last_ip_status(ip,
                                       status) == DbIpOperationStatus.rejected:
                Logger.info("{} skipped - rejected".format(ip))
                continue

            yr = self.get_ip_year(ip)
            data_dir = self.get_data_dir(ip)

            try:
                years[yr] += self.filter_files(
                    data_dir, 'valid_pixels_{}m.tif'.format(
                        self.config['land_product']['geometric_resolution']))
            except KeyError:
                raise ProcessorFailedError(
                    self,
                    'Inconsistency between years in metadata and years in the '
                    'config file. Years from the config file are {}, but you '
                    'are querying year {}'.format(years, yr))

        vpx_files = {}
        for yr, input_files in years.items():
            if len(input_files) < 1:
                Logger.warning(
                    "No Vpx layers to be processed for {}".format(yr))
                continue

            # define output file
            output_file = self.get_output_file(yr)
            vpx_files[yr] = output_file

            if os.path.exists(output_file):
                # run processor if output file does not exist
                continue

            status = DbIpOperationStatus.updated if os.path.exists(output_file) \
                else DbIpOperationStatus.added

            Logger.info("Running countVpx for {}: {} layers".format(
                yr, len(input_files)))
            # run processor
            try:
                self.count_vpx(input_files, output_file)
            except ProcessorFailedError:
                pass

            # log processor IP operation
            if os.path.exists(output_file):
                timestamp = self.file_timestamp(output_file)
            else:
                timestamp = None
            # TBD
            ### self.lp_operation(status, timestamp=timestamp)

        return vpx_files