def do_activity(self, data=None):

        try:
            if self.logger:
                self.logger.info('data: %s' %
                                 json.dumps(data, sort_keys=True, indent=4))

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')

        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:
            storage_context = StorageContext(self.settings)
            bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
            images_resource = "".join((self.settings.storage_provider, "://",
                                       bucket, "/", article_id))

            files_in_bucket = storage_context.list_resources(images_resource)
            original_figures = article_structure.get_figures_for_iiif(
                files_in_bucket)

            iiif_path_for_article = self.settings.iiif_resolver.replace(
                '{article_id}', article_id)

            results = self.retrieve_endpoints_check(original_figures,
                                                    iiif_path_for_article)

            bad_images = list(filter(lambda x: x[0] == False, results))

            if len(bad_images) > 0:
                # print endpoints that did not work
                self.emit_monitor_event(
                    self.settings, article_id, version, run, self.pretty_name,
                    "error",
                    "Some images are not available through the IIIF endpoint: "
                    + str(bad_images))

                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Finished Verification. All endpoints work. Article: " +
                article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error",
                "An error occurred when checking IIIF endpoint. Article " +
                article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Пример #2
0
    def do_activity(self, data=None):
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Starting submission convert images to jpg for article " + article_id)

        try:
            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)
            storage_provider = self.settings.storage_provider + "://"
            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name

            storage_context = StorageContext(self.settings)
            files_in_bucket = storage_context.list_resources(orig_resource)

            figures = filter(article_structure.article_figure, files_in_bucket)

            # download is not a IIIF asset but is currently kept for compatibility
            # download may become obsolete in future
            formats = {"Original": {
                            "sources": "tif",
                            "format": "jpg",
                            "download": "yes"
                        }}

            for file_name in figures:
                figure_resource = orig_resource + "/" + file_name
                file_path = self.get_tmp_dir() + os.sep + file_name
                file_pointer = storage_context.get_resource_to_file_pointer(figure_resource, file_path)

                cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
                cdn_resource_path = storage_provider + cdn_bucket_name + "/" + article_id + "/"

                publish_locations = [cdn_resource_path]

                image_conversion.generate_images(self.settings, formats, file_pointer, article_structure.ArticleInfo(file_name),
                                                 publish_locations, self.logger)

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished converting images for " + article_id + ": " +
                                    str(len(figures)) + " images processed ")
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("An error occurred during " + self.pretty_name)
            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "error",
                                    "Error converting images to JPG for article" + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run,
                                self.pretty_name, "start",
                                "Depositing Ingest assets for " + article_id)

        try:

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)

            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"

            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name
            files_in_bucket = storage_context.list_resources(orig_resource)

            pre_ingest_assets = article_structure.pre_ingest_assets(
                files_in_bucket)

            for file_name in pre_ingest_assets:

                orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" + file_name
                dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_name
                storage_context.copy_resource(orig_resource, dest_resource)

                if self.logger:
                    self.logger.info("Uploaded file %s to %s" %
                                     (file_name, cdn_bucket_name))

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Deposited Ingest assets for article " + article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("Exception when Depositing Ingest assets")
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error", "Error depositing Ingest assets for article " +
                article_id + " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Пример #4
0
    def do_activity(self, data=None):

        try:
            if self.logger:
                self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')

        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:
            storage_context = StorageContext(self.settings)
            bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
            images_resource = "".join((self.settings.storage_provider, "://", bucket, "/", article_id))

            files_in_bucket = storage_context.list_resources(images_resource)
            original_figures = article_structure.get_figures_for_iiif(files_in_bucket)

            iiif_path_for_article = self.settings.iiif_resolver.replace('{article_id}', article_id)

            results = self.retrieve_endpoints_check(original_figures, iiif_path_for_article)

            bad_images = list(filter(lambda x: x[0] == False, results))

            if len(bad_images) > 0:
                # print endpoints that did not work
                self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                        "Some images are not available through the IIIF endpoint: " + str(bad_images))

                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished Verification. All endpoints work. Article: " + article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                    "An error occurred when checking IIIF endpoint. Article " +
                                    article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
 def list_files_from_cdn(self, article_id):
     storage_context = StorageContext(self.settings)
     article_path_in_cdn = self.settings.storage_provider + "://" + \
                           self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket + "/" + \
                           article_id
     return storage_context.list_resources(article_path_in_cdn)
Пример #6
0
    def do_activity(self, data=None):
        """
        Do the work
        """

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Depositing assets for " + article_id)

        try:

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)

            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"

            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name
            files_in_bucket = storage_context.list_resources(orig_resource)

            # filter figures that have already been copied (see DepositIngestAssets activity)
            pre_ingest_assets = article_structure.pre_ingest_assets(files_in_bucket)

            other_assets = filter(lambda asset: asset not in pre_ingest_assets, files_in_bucket)

            # assets bucket
            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            no_download_extensions = self.get_no_download_extensions(self.settings.no_download_extensions)

            for file_name in other_assets:
                orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/"
                dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/"

                storage_context.copy_resource(orig_resource + file_name, dest_resource + file_name)

                if self.logger:
                    self.logger.info("Uploaded file %s to %s" % (file_name, cdn_bucket_name))

                file_name_no_extension, extension = file_name.rsplit('.', 1)
                if extension not in no_download_extensions:
                    content_type = self.content_type_from_file_name(file_name)
                    dict_metadata = {'Content-Disposition':
                                     str("Content-Disposition: attachment; filename=" + file_name + ";"),
                                     'Content-Type': content_type}
                    file_download = file_name_no_extension + "-download." + extension

                    # file is copied with additional metadata
                    storage_context.copy_resource(orig_resource + file_name,
                                                  dest_resource + file_download,
                                                  additional_dict_metadata=dict_metadata)

            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "end",
                                    "Deposited assets for article " + article_id)

        except Exception as e:
            self.logger.exception("Exception when Depositing assets")
            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "error",
                                    "Error depositing assets for article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return activity.activity.ACTIVITY_SUCCESS
Пример #7
0
 def _get_bucket_files(settings, expanded_folder_name, xml_bucket):
     storage_context = StorageContext(settings)
     resource = settings.storage_provider + "://" + xml_bucket + "/" + expanded_folder_name
     files_in_bucket = storage_context.list_resources(resource)
     return files_in_bucket
Пример #8
0
 def list_files_from_cdn(self, article_id):
     storage_context = StorageContext(self.settings)
     article_path_in_cdn = self.settings.storage_provider + "://" + \
                           self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket + "/" + \
                           article_id
     return storage_context.list_resources(article_path_in_cdn)
Пример #9
0
 def _get_bucket_files(settings, expanded_folder_name, xml_bucket):
     storage_context = StorageContext(settings)
     resource = settings.storage_provider + "://" + xml_bucket + "/" + expanded_folder_name
     files_in_bucket = storage_context.list_resources(resource)
     return files_in_bucket