def get_article_xml_key(bucket, expanded_folder_name):
     files = bucket.list(expanded_folder_name + "/", "/")
     for bucket_file in files:
         key = bucket.get_key(bucket_file.key)
         filename = key.name.rsplit('/', 1)[1]
         info = ArticleInfo(filename)
         if info.file_type == 'ArticleXML':
             return key, filename
     return None
Exemplo n.º 2
0
    def process_key(self, key, cdn_path):
        # determine filename (without folder) and obtain ArticleInfo instance
        filename = key.name.rsplit('/', 1)[1]
        info = ArticleInfo(filename)

        # see if we have any formats available for the file_type of this file
        formats = self.get_formats(info.file_type)
        if formats is not None:
            # generate images for relevant formats
            fp = self.get_file_pointer(key)
            self.generate_images(formats, fp, info, cdn_path)
Exemplo n.º 3
0
 def get_xml_file_name(self, settings, expanded_folder_name, xml_bucket, version):
     files = self._get_bucket_files(settings, expanded_folder_name, xml_bucket)
     for filename in files:
         info = ArticleInfo(filename)
         if info.file_type == 'ArticleXML':
             if version is None:
                 return filename
             v_number = '-v'+ version + '.'
             if v_number in filename:
                 return filename
     return None
Exemplo n.º 4
0
    def do_activity(self, data=None):

        try:
            session = Session(self.settings)
            version = session.get_value(data['run'], 'version')
            filename = session.get_value(data['run'], 'filename_last_element')

            article_structure = ArticleInfo(filename)

            version_date, error = self.get_version(
                self.settings, article_structure, article_structure.article_id,
                version)

            if error is not None:
                self.logger.error(error)
                self.emit_monitor_event(
                    self.settings, article_structure.article_id, version,
                    data['run'], self.pretty_name, "error", " ".join(
                        ("Error Looking up version article",
                         article_structure.article_id, "message:", error)))
                return activity.activity.ACTIVITY_PERMANENT_FAILURE

            self.emit_monitor_event(
                self.settings, article_structure.article_id, version,
                data['run'], self.pretty_name, "end", " ".join(
                    ("Finished Version Lookup for article",
                     article_structure.article_id, "version:", version)))

            session.store_value(data['run'], 'update_date', version_date)

            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception(
                "Exception when trying to Lookup next version")
            self.emit_monitor_event(
                self.settings, article_structure.article_id, version,
                data['run'], self.pretty_name, "error", " ".join(
                    ("Error looking up version for article",
                     article_structure.article_id, "message:", str(e))))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
 def find_xml_filename_in_map(self, file_name_map):
     for old_name, new_name in file_name_map.iteritems():
         info = ArticleInfo(new_name)
         if info.file_type == 'ArticleXML':
             return new_name
Exemplo n.º 6
0
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)
            self.check_filenames(upload_filenames)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True
Exemplo n.º 7
0
 def test_get_version_from_zip_filename(self, input, expected):
     self.articleinfo = ArticleInfo(input)
     result = self.articleinfo.get_version_from_zip_filename()
     self.assertEqual(result, expected)
Exemplo n.º 8
0
 def test_get_file_type_from_zip_filename(self, input, expected):
     self.articleinfo = ArticleInfo(input)
     result = self.articleinfo.file_type
     self.assertEqual(result, expected)