def get_meta_data(self, ip): """Get provider-based metadata for given image product. :param str ip: image product :return dict: metadata """ return JsonIO.read( os.path.join(self.config['project']['path'], self.config['project']['metapath'], ip + ".geojson"))
def _is_updated(self, data, json_file): """Check if data are updated. :param dict data: data to be checked :param str: filename with alreadys stored data :return bool: True if updated otherwise False """ updated = False json_data = JsonIO.read(json_file) is_l2a = json_data['title'].find('MSIL2A') > -1 # check for updated items first for k, v in json_data.items(): if k in data.keys() and data[k] != v: if isinstance(v, datetime.datetime): dt = datetime.datetime.strptime(data[k], '%Y-%m-%dT%H:%M:%S.%f') if (dt - v).total_seconds() < 0.01: # timedelta under threshold continue Logger.info("Change in file {} detected ({}: {} -> {})".format( os.path.basename(json_file), k, data[k], v)) updated = True # check for added/deleted items if len(data.keys()) != len(json_data.keys()): for k in data.keys(): if k not in json_data: Logger.info( "Change in file {} detected ({} removed)".format( os.path.basename(json_file), k)) updated = True for k in json_data.keys(): if k == 'qcmms': # ignore QCMMS metadata if any continue if is_l2a and k in ('Tile Identifier horizontal order', 'Datatake sensing start', 'Instrument mode', 'Tile Identifier'): # ignore extra metadata items for L2A products continue if k not in data: Logger.info("Change in file {} detected ({} added)".format( os.path.basename(json_file), k)) updated = True if not updated: Logger.debug("No changes in file {} detected".format( os.path.basename(json_file))) return updated
def get_ip_year(self, ip): """Get image product filename. :param str ip: image product title :return dict: metadata """ meta_data = JsonIO.read( os.path.join(self.config['project']['path'], self.config['project']['metapath'], ip + ".geojson")) return meta_data['Sensing start'].year
def get_last_response(self, ip, full=False): """ Get QI metadata response from previous job. :param str ip: image product :param bool full: True for full data otherwise only relevant part :return dict: QI metadata """ try: job_id = Logger.db_handler().last_job_id( self.config['processors'][0]) except KeyError: raise ProcessorCriticalError(self, "No processors defined in config") if not job_id: Logger.debug( "First run? Unable to get last response from JSON file") return None json_file = os.path.join(self.config['logging']['directory'], '{0:05d}'.format(job_id), ip + '.json') if not os.path.exists(json_file): raise ProcessorCriticalError( self, "Response file {} not found".format(json_file)) data = JsonIO.read(json_file, response=True) if full: return data relevant_part = QCResponse(data).get(self.isMeasurementOf) if not relevant_part: if self.config['strict']['enabled']: raise ProcessorCriticalError( self, "Unable to get relevant part for {} ({})".format( self.isMeasurementOf, ip)) else: return {} if hasattr(self, "isMeasurementOfSection"): relevant_part_tmp = {} for key in relevant_part.keys(): if key in ("isMeasurementOf", "value", "lineage", self.isMeasurementOfSection): relevant_part_tmp[key] = relevant_part[key] relevant_part = relevant_part_tmp return relevant_part
def _read_interpretation_qi(self): """Read LP interpretation quality indicators. :return dict: quality indicators """ try: lp_interpretation_fn = os.path.join( self.config['map_product']['path'], self.config['map_product']['map_interpretation_qi']) except KeyError: Logger.info("{} is not defined".format('map_interpretation_qi')) return None if not os.path.isfile(lp_interpretation_fn): Logger.info("File {} not found".format(lp_interpretation_fn)) return None return JsonIO.read(lp_interpretation_fn)
def _run(self, meta_file, data_dir, output_dir): """Perform processor's tasks. Check processors.download.base for a real example. :param meta_file: path to JSON metafile :param str data_dir: path to data directory :param str output_dir: path to output processor directory :return dict: QI metadata """ # get IP metadata data = JsonIO.read(meta_file) # print log meesage Logger.info("Processing IP title: {}".format(data['title'])) # modify response attributes response = {'qcmms_delivery_status': 'finished'} return response
def _run(self, meta_data, data_dir, output_dir): """Perform processor tasks. :param meta_data: IP metadata :param str data_dir: path to data directory :param str output_dir: path to output processor directory """ response_data = {} metapath = os.path.join(self.config['project']['path'], self.config['project']['metapath']) json_file = os.path.join(metapath, meta_data['title'] + '.geojson') output_dir = self._get_ip_output_path(meta_data['title']) # check if L2 product is available try: title = meta_data['qcmms']['processing_level2']['title'] l2_file = os.path.join(os.path.dirname(data_dir), title + self.extension) if os.path.exists(l2_file): Logger.debug( 'L2 product already downloaded ({}); no local calibration done' .format(l2_file)) return response_data except KeyError: pass # unarchive L1C product if needed if self.data_dir_suf: filepath = data_dir.replace(self.data_dir_suf, self.extension) else: filepath = data_dir + self.extension try: _ = self.unarchive(filepath) except ProcessorFailedError: return response_data if not os.path.exists(output_dir) or \ len(self.filter_files(output_dir, self.img_extension)) < 1: # no files found, do calibration Logger.debug( 'L2 products will be created in {}'.format(output_dir)) if os.path.exists(output_dir): # remove broken product if available shutil.rmtree(output_dir) response_data.update(self.calibrate(data_dir, output_dir)) else: Logger.debug("Level2 product found: {}".format(output_dir)) # title to be written into metadata title = os.path.split(output_dir)[-1] if len(self.data_dir_suf) > 0: title = title.split(self.data_dir_suf)[0] meta_data.update({'qcmms': {'processing_level2': {'title': title}}}) JsonIO.write(json_file, meta_data) try: pass except ProcessorFailedError: self.set_response_status(DbIpOperationStatus.failed) return response_data
def run(self): """Run processor tasks. :return int: number of responses """ self._run_start() metapath = os.path.join(self.config['project']['path'], self.config['project']['metapath']) if not os.path.isdir(metapath): self.create_dir(metapath) # connect API try: self.connector = self.connect() except ProcessorFailedError: self.set_response_status(DbIpOperationStatus.failed) return len(self._response) # filter products kwargs = self.get_query_params() products = self.query(kwargs) # search also for secondary product type if defined level2_products = None if self.level2_data: level2_producttype = self.config['image_products'].get( '{}_processing_level2'.format(self.platform_type.name.lower())) if level2_producttype: kwargs['producttype'] = level2_producttype Logger.debug("Query: {}".format(kwargs)) level2_products = self.query(kwargs) products_count = len(products.keys()) Logger.debug('{} products found'.format(products_count)) # define mission archive for odata mission_archive = self.config['image_products']\ ['{}_mission_archive'.format(self.platform_type.name.lower())] if mission_archive == 'https://scihub.copernicus.eu/dhus': mission_archive = 'EOP:ESA:SCIHUB:S2' # store metadata to CSV overview file and individual JSON files csv_data = [] collected_files = [] i = 0 count = len(products) for uuid in products: i += 1 Logger.info("Querying {} ({}/{})".format(uuid, i, count)) # get full metadata if products[uuid] is None: continue odata = self.get_product_data(uuid, products[uuid]) csv_data.append(odata) # compare downloaded data with already stored json_file = os.path.abspath( os.path.join(metapath, '{}.geojson'.format(odata[self.identifier_key]))) collected_files.append(json_file) # determine IP operation status is_new_or_updated = not os.path.exists( json_file) or self._is_updated(odata, json_file) if not is_new_or_updated and self.get_last_response( odata['title']) is None: # unchanged, but unable to get response is_new_or_updated = True if is_new_or_updated: # -> new IP or update IP # add extended metadata selected_for_delivery_control, response_data = self.get_response_data( odata, extra_data={'mission_archive': mission_archive}) if selected_for_delivery_control: # QA passed -> added or updated if os.path.exists(json_file): if self.get_last_response(odata['title']): status = DbIpOperationStatus.updated else: # special case, unchanged, but unable to get response status = DbIpOperationStatus.forced else: status = DbIpOperationStatus.added else: # QA not passed -> rejected status = DbIpOperationStatus.rejected # prepare response (must be called before saving) timestamp = datetime.datetime.now() if not os.path.exists(json_file): search_date = timestamp else: try: search_date = JsonIO.read( json_file)['qcmms_search_date'] except KeyError: # appears on updated, byt unable to get response search_date = timestamp response_data["properties"]\ ["productInformation"]\ ["qualityInformation"]\ ["qualityIndicators"].append( { "searchDate": search_date, "searchDateUpdate": timestamp, "isMeasurementOf": "{}/#{}".format( self._measurement_prefix, self.isMeasurementOf), "generatedAtTime": datetime.datetime.now(), "value": selected_for_delivery_control, "lineage": 'http://qcmms.esa.int/QCMMS_QCManager_v{}'.format( __version__ ) } ) # pair level1 products with level2 if exists if selected_for_delivery_control and level2_products: l2_found = False for uuid in level2_products: try: title_items_s = level2_products[uuid][ 'title'].split('_') except TypeError: continue title_items_p = odata['title'].replace( 'L1C', 'L2A').split('_') title_items_s[3] = title_items_p[ 3] = 'N' # version differs if title_items_s == title_items_p: Logger.debug("Level2 product found: {}".format( level2_products[uuid]['title'])) odata['qcmms'] = { 'processing_level2': { 'id': uuid, 'title': level2_products[uuid]['title'] } } l2_found = True break if not l2_found: Logger.info("Secondary product not found") # save searched products into individual geojsons JsonIO.write(json_file, odata) else: # -> unchanged, read last response response_data = self.get_last_response(odata['title']) selected_for_delivery = QCResponse(response_data).get_value( self.isMeasurementOf) status = DbIpOperationStatus.unchanged \ if selected_for_delivery else DbIpOperationStatus.rejected # add response from file self.add_response(response_data) # log processor IP operation timestamp = self.file_timestamp(json_file) self.ip_operation(odata[self.identifier_key], status, timestamp=timestamp) # check for deleted # processed_ids_delete = Logger.db_handler().processed_ips( # self.identifier, prev=True, platform_type=self.platform_type # ) # for ip, status in processed_ids_delete: # if status == DbIpOperationStatus.deleted: # # already deleted, skip # continue # json_file = os.path.abspath( # os.path.join(metapath, '{}.geojson'.format(ip)) # ) # if json_file in collected_files: # continue # # to be removed # os.remove(json_file) # self.ip_operation(ip, DbIpOperationStatus.deleted) if len(csv_data) > 0: csv_file = os.path.join( metapath, '{}_fullmetadata.csv'.format( self.config['land_product']['product_abbrev'])) CsvIO.write(csv_file, csv_data, append=True) if len(collected_files) < 1: Logger.warning("No products found") self._run_done() return len(self._response)