def _add_db_handler(self): """Add DB logging handler. :param str db_file: path to DB file """ try: Logger.addHandler( DbLogger(self.config['catalog']['ip_parent_identifier']) ) Logger.db_handler().set_session( self._db_file ) except DbConnectionError as e: raise ConnectionError('{}: {}'.format(e, self._db_file))
def _cleanup(self, job_id=None): """Perform manager clean up. :param int job_id: remove only selected job """ # logging DB if job_id < 0: # all log_db = self.config['logging']['db'] if os.path.exists(log_db): os.remove(log_db) Logger.debug("Logging DB {} removed".format( log_db )) else: # single job self._add_db_handler() Logger.db_handler().delete_job(job_id) # logging dir if job_id < 0: # all log_dir = self.config['logging']['directory'] if os.path.exists(log_dir): shutil.rmtree(log_dir) Logger.debug("Logging directory {} removed".format( log_dir )) else: # single job log_file = self._get_log_file(job_id) if os.path.exists(log_file): os.remove(log_file) Logger.debug("Logging file {} removed".format( log_file )) log_dir = os.path.splitext(log_file)[0] if os.path.exists(log_dir): shutil.rmtree(log_dir) Logger.debug("Logging directory {} removed".format( log_dir )) if job_id > 0: return
def run(self): """Run all registered processors from queue. """ # check if processors defined if not self._processors: raise ConfigError(self._config_files, "list of processors not defined" ) # determine current/previous job id job_id = Logger.db_handler().job_id() prev_job_id = Logger.db_handler().last_job_id(self.config['processors'][0]) Logger.info("Job started (id {})".format(job_id)) Logger.db_handler().job_started() if prev_job_id: Logger.debug("Previous job found (id {})".format(prev_job_id)) else: Logger.debug("No previous job found. Starting from scratch") start = time.time() idx = 0 for proc in self._processors: try: # run the processor proc.run() try: Logger.info('{} processor result: {}'.format( proc.identifier, proc.result() )) except ProcessorResultError: pass # store JSON after each processor self.save_response(proc) except ProcessorFailedError: pass idx += 1 Logger.success( "Job {} successfully finished in {:.6f} sec".format( job_id, time.time() - start ))
def _get_log_file(self, job_id=None): """Get log file filepath :param int job_id: selected job id or None for current """ if not job_id: job_id = Logger.db_handler().job_id() return os.path.abspath( os.path.join( self.config['logging']['directory'], '{0:05d}.log'.format(job_id) ))
def get_last_response(self, ip, full=False): """ Get QI metadata response from previous job. :param str ip: image product :param bool full: True for full data otherwise only relevant part :return dict: QI metadata """ try: job_id = Logger.db_handler().last_job_id( self.config['processors'][0]) except KeyError: raise ProcessorCriticalError(self, "No processors defined in config") if not job_id: Logger.debug( "First run? Unable to get last response from JSON file") return None json_file = os.path.join(self.config['logging']['directory'], '{0:05d}'.format(job_id), ip + '.json') if not os.path.exists(json_file): raise ProcessorCriticalError( self, "Response file {} not found".format(json_file)) data = JsonIO.read(json_file, response=True) if full: return data relevant_part = QCResponse(data).get(self.isMeasurementOf) if not relevant_part: if self.config['strict']['enabled']: raise ProcessorCriticalError( self, "Unable to get relevant part for {} ({})".format( self.isMeasurementOf, ip)) else: return {} if hasattr(self, "isMeasurementOfSection"): relevant_part_tmp = {} for key in relevant_part.keys(): if key in ("isMeasurementOf", "value", "lineage", self.isMeasurementOfSection): relevant_part_tmp[key] = relevant_part[key] relevant_part = relevant_part_tmp return relevant_part
def get_last_ip_status(self, ip, status): """Get status for last image products. :param str ip: image product :param DbIpOperationStatus status: status :return DbIpOperationStatus: image product status """ ip_status = status if status == DbIpOperationStatus.unchanged: # check also status of last IP processed by this processor ip_status = Logger.db_handler().processed_ip_status( self.identifier, ip) return ip_status
def __init__(self, directory): # switch to safe mode when template processor is used self.safe = True # if self.processors()[0].__class__.__name__ == 'QCProcessorTemplate' else False # read JSON schema with open(os.path.join(os.path.dirname(__file__), 'schema.json')) as fd: self._schema = json.load(fd) # create directory where output JSON file will be stored self.target_dir = os.path.join( directory, '{0:05d}'.format(Logger.db_handler().job_id())) if not os.path.exists(self.target_dir): os.makedirs(self.target_dir)
def run(self): """Run processor tasks. :return int: response counter value """ # log start computation self._run_start() # loop through image products (IP) processor_previous = self.previous() processed_ips = Logger.db_handler().processed_ips( processor_previous, platform_type=self.platform_type) ip_count = len(processed_ips) if ip_count < 1: Logger.warning( "No IP products to process (previous processor: {})".format( processor_previous)) counter = 1 for ip, status in processed_ips: # increment counter self._current_response_idx += 1 Logger.info("({}) Processing {}... ({}/{})".format( self.identifier, ip, counter, ip_count)) counter += 1 # get last IP status ip_status = self.get_last_ip_status(ip, status) # skip rejected IP (QA not passed) if ip_status == DbIpOperationStatus.rejected: self.ip_operation(ip, ip_status) response_data = self.get_last_response(ip) if response_data: self.update_response(response_data) continue # set current response status from DB self.set_response_status(status) # read metadata meta_data = self.get_meta_data(ip) # define output path # check whether results exists if self.output_path is None: # output path not defined, assuming QI results (level2) try: output_path = self._get_qi_results_path( self.get_processing_level2(meta_data)['title']) except TypeError: Logger.warning( "Level2 product not found, switching back to level1!") output_path = self._get_qi_results_path(meta_data['title']) results_exist = self.check_qi_results(output_path) else: output_path = self.output_path results_exist = os.path.exists(self._get_ip_output_path(ip)) # force absolute path try: output_path = os.path.abspath(output_path) except TypeError: raise ProcessorCriticalError(self, "Output directory not defined!") # determine whether to force the computation # ip_status is None -> no previous processor run detected force = status == DbIpOperationStatus.forced or \ ip_status is None or \ status == DbIpOperationStatus.unchanged and not results_exist # perform processor operations if requested if status in (DbIpOperationStatus.added, DbIpOperationStatus.updated, DbIpOperationStatus.failed) or force: if force: # change status from unchanged to updated if not results_exist: Logger.debug("Missing results") Logger.debug("Operation forced") # create processor result directory if not exists if output_path and not os.path.exists(output_path): os.makedirs(output_path) # run processor computation if requested down_path = self.get_data_dir() if self.level2_data: try: ip_dd = self.get_processing_level2(meta_data)['title'] except TypeError: # switch back to L1 ip_dd = ip else: ip_dd = ip data_dir = os.path.join( down_path, '{}{}'.format(ip_dd, self.data_dir_suf)) Logger.debug("Data dir: {}".format(data_dir)) Logger.debug("Output dir: {}".format(output_path)) # run computation response_data = self._run(meta_data, data_dir, output_path) else: # no change, get response data from previous run response_data = self.get_last_response(ip) # update response if response_data: self.update_response(response_data) # log IP operation self.ip_operation( ip, self._response[self._current_response_idx].status) # log computation finished self._run_done() return self._current_response_idx
def compute_coverage(self): """Compute vpx coverage from input valid pixel masks. :return: path to output file """ # collect years years = {} for yr in self.get_years(): years[yr] = [] # collect input files from last IP processor processed_ips = Logger.db_handler().processed_ips_last('valid_pixels') ip_idx = 1 ip_count = len(processed_ips) if ip_count == 0: # create empty vpx_coverage file from osgeo import gdal, gdalconst im_reference = self.config.abs_path( self.config['geometry']['reference_image']) ids = gdal.Open(im_reference, gdalconst.GA_ReadOnly) iproj = ids.GetProjection() itrans = ids.GetGeoTransform() vpx_band = ids.GetRasterBand(1) for yr in years.keys(): out_file = self.get_output_file(yr) driver = gdal.GetDriverByName('GTiff') ods = driver.Create(out_file, vpx_band.XSize, vpx_band.YSize, eType=vpx_band.DataType) ods.SetGeoTransform(itrans) ods.SetProjection(iproj) ods = None self.tif2jpg(out_file) ids = None raise ProcessorFailedError(self, "No input valid layers found") for ip, platform_type, status in processed_ips: Logger.info("Processing {}... ({}/{})".format( ip, ip_idx, ip_count)) ip_idx += 1 # set current platform type self.platform_type = QCPlatformType(platform_type) if self.config['image_products'].get('{}_processing_level2'.format( self.get_platform_type())) == 'S2MSI2A': self.data_dir_suf = '.SAFE' else: self.data_dir_suf = '' # delete previous results if needed if status not in (DbIpOperationStatus.unchanged, DbIpOperationStatus.rejected): do_run = True if self.get_last_ip_status(ip, status) == DbIpOperationStatus.rejected: Logger.info("{} skipped - rejected".format(ip)) continue yr = self.get_ip_year(ip) data_dir = self.get_data_dir(ip) try: years[yr] += self.filter_files( data_dir, 'valid_pixels_{}m.tif'.format( self.config['land_product']['geometric_resolution'])) except KeyError: raise ProcessorFailedError( self, 'Inconsistency between years in metadata and years in the ' 'config file. Years from the config file are {}, but you ' 'are querying year {}'.format(years, yr)) vpx_files = {} for yr, input_files in years.items(): if len(input_files) < 1: Logger.warning( "No Vpx layers to be processed for {}".format(yr)) continue # define output file output_file = self.get_output_file(yr) vpx_files[yr] = output_file if os.path.exists(output_file): # run processor if output file does not exist continue status = DbIpOperationStatus.updated if os.path.exists(output_file) \ else DbIpOperationStatus.added Logger.info("Running countVpx for {}: {} layers".format( yr, len(input_files))) # run processor try: self.count_vpx(input_files, output_file) except ProcessorFailedError: pass # log processor IP operation if os.path.exists(output_file): timestamp = self.file_timestamp(output_file) else: timestamp = None # TBD ### self.lp_operation(status, timestamp=timestamp) return vpx_files