def do_activity(self, data=None): try: if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) run = data['run'] session = Session(self.settings) article_id = session.get_value(run, 'article_id') version = session.get_value(run, 'version') except Exception as e: self.logger.exception(str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE try: storage_context = StorageContext(self.settings) bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket images_resource = "".join((self.settings.storage_provider, "://", bucket, "/", article_id)) files_in_bucket = storage_context.list_resources(images_resource) original_figures = article_structure.get_figures_for_iiif( files_in_bucket) iiif_path_for_article = self.settings.iiif_resolver.replace( '{article_id}', article_id) results = self.retrieve_endpoints_check(original_figures, iiif_path_for_article) bad_images = list(filter(lambda x: x[0] == False, results)) if len(bad_images) > 0: # print endpoints that did not work self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "error", "Some images are not available through the IIIF endpoint: " + str(bad_images)) return activity.activity.ACTIVITY_PERMANENT_FAILURE self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "end", "Finished Verification. All endpoints work. Article: " + article_id) return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception(str(e)) self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "error", "An error occurred when checking IIIF endpoint. Article " + article_id + '; message: ' + str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def do_activity(self, data=None): if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) run = data['run'] session = Session(self.settings) version = session.get_value(run, 'version') article_id = session.get_value(run, 'article_id') self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start", "Starting submission convert images to jpg for article " + article_id) try: expanded_folder_name = session.get_value(run, 'expanded_folder') expanded_folder_bucket = (self.settings.publishing_buckets_prefix + self.settings.expanded_bucket) storage_provider = self.settings.storage_provider + "://" orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name storage_context = StorageContext(self.settings) files_in_bucket = storage_context.list_resources(orig_resource) figures = filter(article_structure.article_figure, files_in_bucket) # download is not a IIIF asset but is currently kept for compatibility # download may become obsolete in future formats = {"Original": { "sources": "tif", "format": "jpg", "download": "yes" }} for file_name in figures: figure_resource = orig_resource + "/" + file_name file_path = self.get_tmp_dir() + os.sep + file_name file_pointer = storage_context.get_resource_to_file_pointer(figure_resource, file_path) cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket cdn_resource_path = storage_provider + cdn_bucket_name + "/" + article_id + "/" publish_locations = [cdn_resource_path] image_conversion.generate_images(self.settings, formats, file_pointer, article_structure.ArticleInfo(file_name), publish_locations, self.logger) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end", "Finished converting images for " + article_id + ": " + str(len(figures)) + " images processed ") return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception("An error occurred during " + self.pretty_name) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "Error converting images to JPG for article" + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def do_activity(self, data=None): run = data['run'] session = Session(self.settings) version = session.get_value(run, 'version') article_id = session.get_value(run, 'article_id') self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start", "Depositing Ingest assets for " + article_id) try: expanded_folder_name = session.get_value(run, 'expanded_folder') expanded_folder_bucket = (self.settings.publishing_buckets_prefix + self.settings.expanded_bucket) cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket storage_context = StorageContext(self.settings) storage_provider = self.settings.storage_provider + "://" orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name files_in_bucket = storage_context.list_resources(orig_resource) pre_ingest_assets = article_structure.pre_ingest_assets( files_in_bucket) for file_name in pre_ingest_assets: orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" + file_name dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_name storage_context.copy_resource(orig_resource, dest_resource) if self.logger: self.logger.info("Uploaded file %s to %s" % (file_name, cdn_bucket_name)) self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "end", "Deposited Ingest assets for article " + article_id) return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception("Exception when Depositing Ingest assets") self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "error", "Error depositing Ingest assets for article " + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def do_activity(self, data=None): try: if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) run = data['run'] session = Session(self.settings) article_id = session.get_value(run, 'article_id') version = session.get_value(run, 'version') except Exception as e: self.logger.exception(str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE try: storage_context = StorageContext(self.settings) bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket images_resource = "".join((self.settings.storage_provider, "://", bucket, "/", article_id)) files_in_bucket = storage_context.list_resources(images_resource) original_figures = article_structure.get_figures_for_iiif(files_in_bucket) iiif_path_for_article = self.settings.iiif_resolver.replace('{article_id}', article_id) results = self.retrieve_endpoints_check(original_figures, iiif_path_for_article) bad_images = list(filter(lambda x: x[0] == False, results)) if len(bad_images) > 0: # print endpoints that did not work self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "Some images are not available through the IIIF endpoint: " + str(bad_images)) return activity.activity.ACTIVITY_PERMANENT_FAILURE self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end", "Finished Verification. All endpoints work. Article: " + article_id) return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception(str(e)) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "An error occurred when checking IIIF endpoint. Article " + article_id + '; message: ' + str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def list_files_from_cdn(self, article_id): storage_context = StorageContext(self.settings) article_path_in_cdn = self.settings.storage_provider + "://" + \ self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket + "/" + \ article_id return storage_context.list_resources(article_path_in_cdn)
def do_activity(self, data=None): """ Do the work """ run = data['run'] session = Session(self.settings) version = session.get_value(run, 'version') article_id = session.get_value(run, 'article_id') self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start", "Depositing assets for " + article_id) try: expanded_folder_name = session.get_value(run, 'expanded_folder') expanded_folder_bucket = (self.settings.publishing_buckets_prefix + self.settings.expanded_bucket) storage_context = StorageContext(self.settings) storage_provider = self.settings.storage_provider + "://" orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name files_in_bucket = storage_context.list_resources(orig_resource) # filter figures that have already been copied (see DepositIngestAssets activity) pre_ingest_assets = article_structure.pre_ingest_assets(files_in_bucket) other_assets = filter(lambda asset: asset not in pre_ingest_assets, files_in_bucket) # assets bucket cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket no_download_extensions = self.get_no_download_extensions(self.settings.no_download_extensions) for file_name in other_assets: orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" storage_context.copy_resource(orig_resource + file_name, dest_resource + file_name) if self.logger: self.logger.info("Uploaded file %s to %s" % (file_name, cdn_bucket_name)) file_name_no_extension, extension = file_name.rsplit('.', 1) if extension not in no_download_extensions: content_type = self.content_type_from_file_name(file_name) dict_metadata = {'Content-Disposition': str("Content-Disposition: attachment; filename=" + file_name + ";"), 'Content-Type': content_type} file_download = file_name_no_extension + "-download." + extension # file is copied with additional metadata storage_context.copy_resource(orig_resource + file_name, dest_resource + file_download, additional_dict_metadata=dict_metadata) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end", "Deposited assets for article " + article_id) except Exception as e: self.logger.exception("Exception when Depositing assets") self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "Error depositing assets for article " + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE return activity.activity.ACTIVITY_SUCCESS
def _get_bucket_files(settings, expanded_folder_name, xml_bucket): storage_context = StorageContext(settings) resource = settings.storage_provider + "://" + xml_bucket + "/" + expanded_folder_name files_in_bucket = storage_context.list_resources(resource) return files_in_bucket
def list_files_from_cdn(self, article_id): storage_context = StorageContext(self.settings) article_path_in_cdn = self.settings.storage_provider + "://" + \ self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket + "/" + \ article_id return storage_context.list_resources(article_path_in_cdn)
def _get_bucket_files(settings, expanded_folder_name, xml_bucket): storage_context = StorageContext(settings) resource = settings.storage_provider + "://" + xml_bucket + "/" + expanded_folder_name files_in_bucket = storage_context.list_resources(resource) return files_in_bucket