Пример #1
0
    def store_in_cdn(self, filename, image, cdn_path, download):
        try:
            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"

            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
            storage_resource = storage_provider + cdn_bucket_name + "/" + cdn_path + "/" + filename
            # adds image to bucket
            image.seek(0)
            content_type, encoding = guess_type(filename)
            storage_context.set_resource_from_file(storage_resource, image,
                                                   metadata={ 'Content-Type': content_type })

            if download:
                dict_metadata = {'Content-Disposition':
                                     str("Content-Disposition: attachment; filename=" + filename + ";"),
                                 'Content-Type': content_type}
                filename_no_extension, extension = filename.rsplit('.', 1)
                file_download = filename_no_extension + "-download." + extension

                storage_resource_dest_download_cdn = storage_provider + cdn_bucket_name + "/" + cdn_path + "/" + \
                                                     file_download

                # file is copied with additional metadata
                storage_context.copy_resource(storage_resource, storage_resource_dest_download_cdn,
                                              additional_dict_metadata=dict_metadata)

        finally:
            image.close()
    def do_activity(self, data=None):

        try:
            if self.logger:
                self.logger.info('data: %s' %
                                 json.dumps(data, sort_keys=True, indent=4))

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')

        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:
            storage_context = StorageContext(self.settings)
            bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
            images_resource = "".join((self.settings.storage_provider, "://",
                                       bucket, "/", article_id))

            files_in_bucket = storage_context.list_resources(images_resource)
            original_figures = article_structure.get_figures_for_iiif(
                files_in_bucket)

            iiif_path_for_article = self.settings.iiif_resolver.replace(
                '{article_id}', article_id)

            results = self.retrieve_endpoints_check(original_figures,
                                                    iiif_path_for_article)

            bad_images = list(filter(lambda x: x[0] == False, results))

            if len(bad_images) > 0:
                # print endpoints that did not work
                self.emit_monitor_event(
                    self.settings, article_id, version, run, self.pretty_name,
                    "error",
                    "Some images are not available through the IIIF endpoint: "
                    + str(bad_images))

                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Finished Verification. All endpoints work. Article: " +
                article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error",
                "An error occurred when checking IIIF endpoint. Article " +
                article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Пример #3
0
    def do_activity(self, data=None):
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Starting submission convert images to jpg for article " + article_id)

        try:
            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)
            storage_provider = self.settings.storage_provider + "://"
            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name

            storage_context = StorageContext(self.settings)
            files_in_bucket = storage_context.list_resources(orig_resource)

            figures = filter(article_structure.article_figure, files_in_bucket)

            # download is not a IIIF asset but is currently kept for compatibility
            # download may become obsolete in future
            formats = {"Original": {
                            "sources": "tif",
                            "format": "jpg",
                            "download": "yes"
                        }}

            for file_name in figures:
                figure_resource = orig_resource + "/" + file_name
                file_path = self.get_tmp_dir() + os.sep + file_name
                file_pointer = storage_context.get_resource_to_file_pointer(figure_resource, file_path)

                cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
                cdn_resource_path = storage_provider + cdn_bucket_name + "/" + article_id + "/"

                publish_locations = [cdn_resource_path]

                image_conversion.generate_images(self.settings, formats, file_pointer, article_structure.ArticleInfo(file_name),
                                                 publish_locations, self.logger)

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished converting images for " + article_id + ": " +
                                    str(len(figures)) + " images processed ")
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("An error occurred during " + self.pretty_name)
            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "error",
                                    "Error converting images to JPG for article" + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Пример #4
0
    def do_activity(self, data=None):
        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

        run = data['run']
        session = Session(self.settings)
        article_id = session.get_value(run, 'article_id')
        version = session.get_value(run, 'version')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Starting Glencoe video check for " + article_id)
        try:
            expanded_folder = session.get_value(run, 'expanded_folder')
            if expanded_folder is None:
                raise RuntimeError("No session value for expanded folder")

            expanded_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket
            self.logger.info("expanded_bucket: " + expanded_bucket)

            xml_filename = lax_provider.get_xml_file_name(self.settings, expanded_folder, expanded_bucket, version)
            if xml_filename is None:
                raise RuntimeError("No xml_filename found.")

            xml_origin = "".join((self.settings.storage_provider, "://", expanded_bucket, "/", expanded_folder + '/' +
                                  xml_filename))

            storage_context = StorageContext(self.settings)
            xml_content = storage_context.get_resource_as_string(xml_origin)

            if glencoe_check.has_videos(xml_content):
                glencoe_check.validate_sources(glencoe_check.metadata(glencoe_check.check_msid(article_id), self.settings))
                self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                        "Finished Verification. Glencoe is available. Article: " + article_id)
                return True

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished Verification. No Glencoe media tags found in xml. "
                                    "Article: " + article_id)
            return True
        except AssertionError as err:
            self.logger.info(err)
            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                    "Glencoe video is not available for article " + article_id + '; message: ' + str(err))
            time.sleep(60)
            return activity.activity.ACTIVITY_TEMPORARY_FAILURE
        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                    "An error occurred when checking for Glencoe video. Article " +
                                    article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
 def store_file(self, path, article_id):
     storage_context = StorageContext(self.settings)
     r = requests.get(path)
     if r.status_code == 200:
         resource = self.s3_resources(path, article_id)
         self.logger.info("S3 resource: " + resource)
         jpg_filename = os.path.split(resource)[-1]
         storage_context.set_resource_from_string(resource, r.content,
                                                  content_type=r.headers['content-type'])
         return jpg_filename
     else:
         raise RuntimeError("Glencoe returned a %s status code for %s" % (r.status_code, path))
Пример #6
0
 def store_file(self, path, article_id):
     storage_context = StorageContext(self.settings)
     r = requests.get(path)
     if r.status_code == 200:
         resource = self.s3_resources(path, article_id)
         self.logger.info("S3 resource: " + resource)
         jpg_filename = os.path.split(resource)[-1]
         storage_context.set_resource_from_string(
             resource, r.content, content_type=r.headers['content-type'])
         return jpg_filename
     else:
         raise RuntimeError("Glencoe returned a %s status code for %s" %
                            (r.status_code, path))
    def do_activity(self, data=None):

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run,
                                self.pretty_name, "start",
                                "Depositing Ingest assets for " + article_id)

        try:

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)

            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"

            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name
            files_in_bucket = storage_context.list_resources(orig_resource)

            pre_ingest_assets = article_structure.pre_ingest_assets(
                files_in_bucket)

            for file_name in pre_ingest_assets:

                orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" + file_name
                dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_name
                storage_context.copy_resource(orig_resource, dest_resource)

                if self.logger:
                    self.logger.info("Uploaded file %s to %s" %
                                     (file_name, cdn_bucket_name))

            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "end", "Deposited Ingest assets for article " + article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("Exception when Depositing Ingest assets")
            self.emit_monitor_event(
                self.settings, article_id, version, run, self.pretty_name,
                "error", "Error depositing Ingest assets for article " +
                article_id + " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Пример #8
0
    def do_activity(self, data=None):

        try:
            if self.logger:
                self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))

            run = data['run']
            session = Session(self.settings)
            article_id = session.get_value(run, 'article_id')
            version = session.get_value(run, 'version')

        except Exception as e:
            self.logger.exception(str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        try:
            storage_context = StorageContext(self.settings)
            bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
            images_resource = "".join((self.settings.storage_provider, "://", bucket, "/", article_id))

            files_in_bucket = storage_context.list_resources(images_resource)
            original_figures = article_structure.get_figures_for_iiif(files_in_bucket)

            iiif_path_for_article = self.settings.iiif_resolver.replace('{article_id}', article_id)

            results = self.retrieve_endpoints_check(original_figures, iiif_path_for_article)

            bad_images = list(filter(lambda x: x[0] == False, results))

            if len(bad_images) > 0:
                # print endpoints that did not work
                self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                        "Some images are not available through the IIIF endpoint: " + str(bad_images))

                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end",
                                    "Finished Verification. All endpoints work. Article: " + article_id)
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(str(e))
            self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error",
                                    "An error occurred when checking IIIF endpoint. Article " +
                                    article_id + '; message: ' + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Пример #9
0
def store_in_publish_locations(settings, filename, image, publish_locations, download):
        try:
            storage_context = StorageContext(settings)

            for resource in publish_locations:
                image.seek(0)
                content_type, encoding = guess_type(filename)
                storage_context.set_resource_from_file(resource + filename, image, metadata={'Content-Type': content_type})

                if download:
                    dict_metadata = {'Content-Disposition':
                                     str("Content-Disposition: attachment; filename=" + filename + ";"),
                                     'Content-Type': content_type}
                    filename_no_extension, extension = filename.rsplit('.', 1)
                    file_download = filename_no_extension + "-download." + extension
                    storage_context.copy_resource(resource + filename, resource + file_download,
                                                  additional_dict_metadata=dict_metadata)

        finally:
            image.close()
Пример #10
0
    def do_activity(self, data):

        session = Session(self.settings)
        eif_location = session.get_value(data['run'], 'eif_location')

        self.emit_monitor_event(self.settings, data['article_id'],
                                data['version'], data['run'], self.pretty_name,
                                "start", "Starting to set EIF to publish")

        try:

            if not isinstance(eif_location, basestring):
                self.logger.error(self.pretty_name +
                                  " error. eif_location must be string")
                raise Exception("eif_location not available")

            storage_context = StorageContext(self.settings)

            eif_origin = "".join((self.settings.storage_provider, "://",
                                  self.settings.publishing_buckets_prefix +
                                  self.settings.eif_bucket, "/", eif_location))
        except Exception as e:

            self.emit_monitor_event(self.settings, data['article_id'],
                                    data['version'], data['run'],
                                    self.pretty_name, "error", e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        success, error = self.set_eif_to_publish(storage_context, eif_origin)

        if success:
            self.emit_monitor_event(self.settings, data['article_id'],
                                    data['version'], data['run'],
                                    self.pretty_name, "end",
                                    "Finished to set EIF to publish")
            return activity.activity.ACTIVITY_SUCCESS

        self.logger.error(error)
        self.emit_monitor_event(self.settings, data['article_id'],
                                data['version'], data['run'], self.pretty_name,
                                "error", error)
        return activity.activity.ACTIVITY_PERMANENT_FAILURE
Пример #11
0
    def store_in_cdn(self, filename, image, cdn_path, download):
        try:
            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"

            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket
            storage_resource = storage_provider + cdn_bucket_name + "/" + cdn_path + "/" + filename
            # adds image to bucket
            image.seek(0)
            content_type, encoding = guess_type(filename)
            storage_context.set_resource_from_file(
                storage_resource,
                image,
                metadata={'Content-Type': content_type})

            if download:
                dict_metadata = {
                    'Content-Disposition':
                    str("Content-Disposition: attachment; filename=" +
                        filename + ";"),
                    'Content-Type':
                    content_type
                }
                filename_no_extension, extension = filename.rsplit('.', 1)
                file_download = filename_no_extension + "-download." + extension

                storage_resource_dest_download_cdn = storage_provider + cdn_bucket_name + "/" + cdn_path + "/" + \
                                                     file_download

                # file is copied with additional metadata
                storage_context.copy_resource(
                    storage_resource,
                    storage_resource_dest_download_cdn,
                    additional_dict_metadata=dict_metadata)

        finally:
            image.close()
Пример #12
0
    def do_activity(self, data=None):
        """
        Do the work
        """

        run = data['run']
        session = Session(self.settings)
        version = session.get_value(run, 'version')
        article_id = session.get_value(run, 'article_id')

        self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start",
                                "Depositing assets for " + article_id)

        try:

            expanded_folder_name = session.get_value(run, 'expanded_folder')
            expanded_folder_bucket = (self.settings.publishing_buckets_prefix +
                                      self.settings.expanded_bucket)

            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"

            orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name
            files_in_bucket = storage_context.list_resources(orig_resource)

            # filter figures that have already been copied (see DepositIngestAssets activity)
            pre_ingest_assets = article_structure.pre_ingest_assets(files_in_bucket)

            other_assets = filter(lambda asset: asset not in pre_ingest_assets, files_in_bucket)

            # assets bucket
            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            no_download_extensions = self.get_no_download_extensions(self.settings.no_download_extensions)

            for file_name in other_assets:
                orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/"
                dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/"

                storage_context.copy_resource(orig_resource + file_name, dest_resource + file_name)

                if self.logger:
                    self.logger.info("Uploaded file %s to %s" % (file_name, cdn_bucket_name))

                file_name_no_extension, extension = file_name.rsplit('.', 1)
                if extension not in no_download_extensions:
                    content_type = self.content_type_from_file_name(file_name)
                    dict_metadata = {'Content-Disposition':
                                     str("Content-Disposition: attachment; filename=" + file_name + ";"),
                                     'Content-Type': content_type}
                    file_download = file_name_no_extension + "-download." + extension

                    # file is copied with additional metadata
                    storage_context.copy_resource(orig_resource + file_name,
                                                  dest_resource + file_download,
                                                  additional_dict_metadata=dict_metadata)

            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "end",
                                    "Deposited assets for article " + article_id)

        except Exception as e:
            self.logger.exception("Exception when Depositing assets")
            self.emit_monitor_event(self.settings, article_id, version, run,
                                    self.pretty_name, "error",
                                    "Error depositing assets for article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return activity.activity.ACTIVITY_SUCCESS
Пример #13
0
 def list_files_from_cdn(self, article_id):
     storage_context = StorageContext(self.settings)
     article_path_in_cdn = self.settings.storage_provider + "://" + \
                           self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket + "/" + \
                           article_id
     return storage_context.list_resources(article_path_in_cdn)
Пример #14
0
 def _get_bucket_files(settings, expanded_folder_name, xml_bucket):
     storage_context = StorageContext(settings)
     resource = settings.storage_provider + "://" + xml_bucket + "/" + expanded_folder_name
     files_in_bucket = storage_context.list_resources(resource)
     return files_in_bucket
    def do_activity(self, data=None):
        """
        Do the work
        """

        run = data["run"]
        session = Session(self.settings)
        version = session.get_value(run, "version")
        article_id = session.get_value(run, "article_id")

        self.emit_monitor_event(
            self.settings, article_id, version, run, "Deposit assets", "start", "Depositing assets for " + article_id
        )

        try:
            conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)

            expanded_folder_name = session.get_value(run, "expanded_folder")
            expanded_folder_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket

            expanded_bucket = conn.get_bucket(expanded_folder_bucket)
            cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket

            no_download_extensions = self.get_no_download_extensions(self.settings.no_download_extensions)

            storage_context = StorageContext(self.settings)
            storage_provider = self.settings.storage_provider + "://"
            published_bucket_path = (
                self.settings.publishing_buckets_prefix + self.settings.published_bucket + "/articles"
            )

            keys = self.get_keys(expanded_bucket, expanded_folder_name)
            for key in keys:
                (file_key, file_name) = key
                # file_key.copy(cdn_bucket_name, article_id + "/" + file_name)

                orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" + file_name
                dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_name
                additional_dest_resource = storage_provider + published_bucket_path + "/" + article_id + "/" + file_name
                storage_context.copy_resource(orig_resource, dest_resource)
                storage_context.copy_resource(orig_resource, additional_dest_resource)

                if self.logger:
                    self.logger.info("Uploaded key %s to %s" % (file_name, cdn_bucket_name))
                file_name_no_extension, extension = file_name.rsplit(".", 1)
                if extension not in no_download_extensions:

                    content_type = self.content_type_from_file_name(file_name)
                    dict_metadata = {
                        "Content-Disposition": str("Content-Disposition: attachment; filename=" + file_name + ";"),
                        "Content-Type": content_type,
                    }
                    file_download = file_name_no_extension + "-download." + extension

                    orig_resource_download = dest_resource
                    dest_resource_download = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_download
                    additional_dest_resource_download = (
                        storage_provider + published_bucket_path + "/" + article_id + "/" + file_download
                    )

                    # file is copied with additional metadata
                    storage_context.copy_resource(
                        orig_resource_download, dest_resource_download, additional_dict_metadata=dict_metadata
                    )
                    # additional metadata is already set in origin resource so it will be copied accross by default
                    storage_context.copy_resource(dest_resource_download, additional_dest_resource_download)

            self.emit_monitor_event(
                self.settings,
                article_id,
                version,
                run,
                "Deposit assets",
                "end",
                "Deposited assets for article " + article_id,
            )

        except Exception as e:
            self.logger.exception("Exception when Depositing assets")
            self.emit_monitor_event(
                self.settings,
                article_id,
                version,
                run,
                "Deposit assets",
                "error",
                "Error depositing assets for article " + article_id + " message:" + e.message,
            )
            return False

        return True
    def do_activity(self, data=None):
            self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'],
                                    self.pretty_name, "start",
                                    "Starting Updating repository for article " + data['article_id'])

            # assert all Github settings have are not None when live
            # if Github settings are null and we are testing, skip activity
            if None in (self.settings.git_repo_path, self.settings.git_repo_name, self.settings.github_token):
                import settings as settingsLib
                if isinstance(self.settings(), settingsLib.live) or isinstance(self.settings(), settingsLib.prod) or \
                        isinstance(self.settings(), settingsLib.end2end):
                    self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'],
                                            self.pretty_name, "error",
                                            "Error Updating repository for article. Github settings are unavailable.")
                    return activity.activity.ACTIVITY_PERMANENT_FAILURE

                self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'],
                                        self.pretty_name, "end",
                                        "UpdateRepository got skipped as there are no Github "
                                        "settings (Test enviroment).")
                return True

            try:

                xml_file = lax_provider.get_xml_file_name(self.settings,
                                                          data['article_id'],
                                                          self.settings.publishing_buckets_prefix +
                                                          self.settings.ppp_cdn_bucket,
                                                          data['version'])
                s3_file_path = data['article_id'] + "/" + xml_file

                #download xml
                with tempfile.TemporaryFile(mode='r+') as tmp:
                    storage_context = StorageContext(self.settings)
                    storage_provider = self.settings.storage_provider + "://"
                    published_path = storage_provider + self.settings.publishing_buckets_prefix + \
                                       self.settings.ppp_cdn_bucket

                    resource = published_path + "/" + s3_file_path

                    storage_context.get_resource_to_file(resource, tmp)

                    file_content = storage_context.get_resource_as_string(resource)

                    message = self.update_github(self.settings.git_repo_path + xml_file, file_content)

                    self.logger.info(message)
                    self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'],
                                    self.pretty_name, "end",
                                    "Finished Updating repository for article. Details: " + message)
                    return True

            except RetryException as e:
                self.logger.info(e.message)
                return activity.activity.ACTIVITY_TEMPORARY_FAILURE

            except Exception as e:
                self.logger.exception("Exception in do_activity")
                self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'],
                                        self.pretty_name, "error",
                                        "Error Updating repository for article. Details: " + str(e))
                return activity.activity.ACTIVITY_PERMANENT_FAILURE
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True
Пример #18
0
 def _get_bucket_files(settings, expanded_folder_name, xml_bucket):
     storage_context = StorageContext(settings)
     resource = settings.storage_provider + "://" + xml_bucket + "/" + expanded_folder_name
     files_in_bucket = storage_context.list_resources(resource)
     return files_in_bucket
 def list_files_from_cdn(self, article_id):
     storage_context = StorageContext(self.settings)
     article_path_in_cdn = self.settings.storage_provider + "://" + \
                           self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket + "/" + \
                           article_id
     return storage_context.list_resources(article_path_in_cdn)
Пример #20
0
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)
            self.check_filenames(upload_filenames)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True