Пример #1
0
 def test_ingest_article_zip_starter_(self, fake_boto_conn, fake_logger):
     fake_boto_conn.return_value = FakeBotoConnection()
     self.stater_ingest_article_zip.start(
         settings=settings_mock,
         run=run_example,
         info=S3NotificationInfo.from_dict(
             test_data.ingest_article_zip_data))
Пример #2
0
    def start(self, ENV="dev", info=None, run=None):

        # TODO : much of this is common to many starters and could probably be streamlined

        # Specify run environment settings
        settings = settingsLib.get_settings(ENV)

        # Log
        identity = "starter_%s" % int(random.random() * 1000)
        log_file = "starter.log"
        # logFile = None
        logger = log.logger(log_file, settings.setLevel, identity)

        filename = info.file_name

        if filename is None:
            logger.error("Did not get a filename")
            return

        # Simple connect
        conn = boto.swf.layer1.Layer1(settings.aws_access_key_id,
                                      settings.aws_secret_access_key)

        # Start a workflow execution
        workflow_id = "PublishPerfectArticle_%s" % filename.replace(
            '/', '_') + str(int(random.random() * 1000))
        workflow_name = "PublishPerfectArticle"
        workflow_version = "1"
        child_policy = None
        execution_start_to_close_timeout = str(60 * 30)
        workflow_input = S3NotificationInfo.to_dict(info)
        workflow_input['run'] = run
        workflow_input = json.dumps(workflow_input,
                                    default=lambda ob: ob.__dict__)

        try:
            response = conn.start_workflow_execution(
                settings.domain, workflow_id, workflow_name, workflow_version,
                settings.default_task_list, child_policy,
                execution_start_to_close_timeout, workflow_input)

            logger.info('got response: \n%s' %
                        json.dumps(response, sort_keys=True, indent=4))

        except boto.swf.exceptions.SWFWorkflowExecutionAlreadyStartedError:
            # There is already a running workflow with that ID, cannot start another
            message = 'SWFWorkflowExecutionAlreadyStartedError: There is already a running workflow with ID %s' % workflow_id
            logger.info(message)
    def do_activity(self, data=None):
        """
        Do the work
        """

        self.expanded_bucket_name = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket

        info = S3NotificationInfo.from_dict(data)
        session = Session(self.settings)
        version = session.get_value(self.get_workflowId(), 'version')
        article_id = session.get_value(self.get_workflowId(), 'article_id')
        article_version_id = article_id + '.' + version
        run = session.get_value(self.get_workflowId(), 'run')

        self.emit_monitor_event(self.settings, article_id, version, run, "Apply Version Number", "start",
                                "Starting applying version number to files for " + article_id)

        try:

            if self.logger:
                self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
                
            # Do not rename files if a version number is in the file_name
            m = re.search(ur'-v([0-9]*?)[\.|-]', info.file_name)
            
            if m is not None:
                # Nothing to do
                pass
            
            elif m is None and version is not None:
                expanded_folder_name = session.get_value(self.get_workflowId(), 'expanded_folder')
                bucket_folder_name = expanded_folder_name.replace(os.sep, '/')
                self.rename_article_s3_objects(bucket_folder_name, version)
                
            self.emit_monitor_event(self.settings, article_id, version, run, "Apply Version Number", "end",
                        "Finished applying version number to article " + article_id +
                        " for version " + version + " run " + str(run))


        except Exception as e:
            self.logger.exception("Exception when applying version number to article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Convert JATS", "error",
                                    "Error in applying version number to files for " + article_id +
                                    " message:" + e.message)

        return True
Пример #4
0
    def start(self, settings, info=None, run=None):

        # Log
        logger = helper.get_starter_logger(
            settings.setLevel, helper.get_starter_identity(self.const_name))

        if hasattr(info, 'file_name') == False or info.file_name is None:
            raise NullRequiredDataException(
                "filename is Null / Did not get a filename.")

        input = S3NotificationInfo.to_dict(info)
        input['run'] = run
        input['version_lookup_function'] = "article_highest_version"
        input['force'] = True

        workflow_id, \
        workflow_name, \
        workflow_version, \
        child_policy, \
        execution_start_to_close_timeout, \
        workflow_input = helper.set_workflow_information(self.const_name, "1", None, input,
                                                         info.file_name.replace('/', '_'))

        # Simple connect
        conn = boto.swf.layer1.Layer1(settings.aws_access_key_id,
                                      settings.aws_secret_access_key)

        try:
            response = conn.start_workflow_execution(
                settings.domain, workflow_id, workflow_name, workflow_version,
                settings.default_task_list, child_policy,
                execution_start_to_close_timeout, workflow_input)

            logger.info('got response: \n%s' %
                        json.dumps(response, sort_keys=True, indent=4))

        except NullRequiredDataException as e:
            logger.exception(e.message)
            raise

        except boto.swf.exceptions.SWFWorkflowExecutionAlreadyStartedError:
            # There is already a running workflow with that ID, cannot start another
            message = 'SWFWorkflowExecutionAlreadyStartedError: There is already a running workflow with ID %s' % workflow_id
            logger.info(message)
    def start(self, ENV="dev", info=None, run=None):

        # TODO : much of this is common to many starters and could probably be streamlined

        # Specify run environment settings
        settings = settingsLib.get_settings(ENV)

        # Log
        identity = "starter_%s" % int(random.random() * 1000)
        log_file = "starter.log"
        # logFile = None
        logger = log.logger(log_file, settings.setLevel, identity)

        filename = info.file_name

        if filename is None:
            logger.error("Did not get a filename")
            return

        # Simple connect
        conn = boto.swf.layer1.Layer1(settings.aws_access_key_id, settings.aws_secret_access_key)

        # Start a workflow execution
        workflow_id = "PublishPerfectArticle_%s" % filename.replace('/', '_') + str(int(random.random() * 1000))
        workflow_name = "PublishPerfectArticle"
        workflow_version = "1"
        child_policy = None
        execution_start_to_close_timeout = str(60 * 30)
        workflow_input = S3NotificationInfo.to_dict(info)
        workflow_input['run'] = run
        workflow_input = json.dumps(workflow_input, default=lambda ob: ob.__dict__)

        try:
            response = conn.start_workflow_execution(settings.domain, workflow_id, workflow_name, workflow_version,
                                                     settings.default_task_list, child_policy,
                                                     execution_start_to_close_timeout, workflow_input)

            logger.info('got response: \n%s' % json.dumps(response, sort_keys=True, indent=4))

        except boto.swf.exceptions.SWFWorkflowExecutionAlreadyStartedError:
            # There is already a running workflow with that ID, cannot start another
            message = 'SWFWorkflowExecutionAlreadyStartedError: There is already a running workflow with ID %s' % workflow_id
            logger.info(message)
Пример #6
0
    def start(self, settings, run, info):

        # Log
        logger = helper.get_starter_logger(settings.setLevel, helper.get_starter_identity(self.const_name))

        if hasattr(info, 'file_name') == False or info.file_name is None:
            raise NullRequiredDataException("filename is Null. Did not get a filename.")

        input = S3NotificationInfo.to_dict(info)
        input['run'] = run
        input['version_lookup_function'] = "article_next_version"

        workflow_id, \
        workflow_name, \
        workflow_version, \
        child_policy, \
        execution_start_to_close_timeout, \
        workflow_input = helper.set_workflow_information(self.const_name, "1", None, input,
                                                         info.file_name.replace('/', '_'),
                                                         start_to_close_timeout=str(60 * 60 * 5))

        # Simple connect
        conn = boto.swf.layer1.Layer1(settings.aws_access_key_id, settings.aws_secret_access_key)

        try:
            response = conn.start_workflow_execution(settings.domain, workflow_id, workflow_name, workflow_version,
                                                     settings.default_task_list, child_policy,
                                                     execution_start_to_close_timeout, workflow_input)

            logger.info('got response: \n%s' % json.dumps(response, sort_keys=True, indent=4))

        except NullRequiredDataException as e:
            logger.exception(e.message)
            raise

        except boto.swf.exceptions.SWFWorkflowExecutionAlreadyStartedError:
            # There is already a running workflow with that ID, cannot start another
            message = 'SWFWorkflowExecutionAlreadyStartedError: ' \
                      'There is already a running workflow with ID %s' % workflow_id
            logger.info(message)
Пример #7
0
    def do_activity(self, data=None):

        try:

            info = S3NotificationInfo.from_dict(data)
            filename = info.file_name[info.file_name.rfind('/')+1:]
            session = Session(self.settings)
            session.store_value(data['run'], 'filename_last_element', filename)

            article_structure = ArticleInfo(filename)

            if article_structure.article_id is None:
                self.logger.error("Name '%s' did not match expected pattern for article id" % filename)
                raise RuntimeError("article_structure.article_id is None. File pattern problem.")

            version = self.get_version(self.settings, article_structure, data['version_lookup_function'])
            session.store_value(data['run'], 'version', version)
            article_id = article_structure.article_id

            self.emit_monitor_event(self.settings, article_id, version, data['run'],
                                    self.pretty_name, "start",
                                    " ".join(("Version Lookup for article", article_id, "version:", version)))

            self.set_monitor_property(self.settings, article_id, "article-id", article_id, "text")
            self.set_monitor_property(self.settings, article_id, "publication-status", "publication in progress",
                                      "text",
                                      version=version)

            self.emit_monitor_event(self.settings, article_structure.article_id, version, data['run'],
                                    self.pretty_name, "end",
                                    " ".join(("Finished Version Lookup for article", article_structure.article_id,
                                              "version:", version)))
            return activity.activity.ACTIVITY_SUCCESS

        except Exception as e:
            self.logger.exception("Exception when trying to Lookup Version. Error: " + str(e))
            return activity.activity.ACTIVITY_PERMANENT_FAILURE
Пример #8
0
    parser.add_option("-e",
                      "--env",
                      default="dev",
                      action="store",
                      type="string",
                      dest="env",
                      help="set the environment to run, either dev or live")
    parser.add_option("-f",
                      "--filename",
                      default=None,
                      action="store",
                      type="string",
                      dest="filename",
                      help="specify the DOI id the article to process")

    (options, args) = parser.parse_args()
    if options.env:
        ENV = options.env
    if options.filename:
        filename = options.filename

    import settings as settingsLib
    settings = settingsLib.get_settings(ENV)

    o = starter_NewS3File()

    info = S3NotificationInfo("S3Event", "", "xxawsxx-drop-bucket",
                              "elife-kitchen-sink.xml",
                              "3f53f5c808dd58973cd93a368be739b4", "1")
    o.start(settings=settings, info=info)
Пример #9
0
def process_data_publishperfectarticle(workflow_name, workflow_data):
    data = {
        'info': S3NotificationInfo.from_dict(workflow_data),
        'run': str(uuid.uuid4())
    }
    return data
def process_data_publishperfectarticle(workflow_name, workflow_data):
    data = {'info': S3NotificationInfo.from_dict(workflow_data),
            'run': str(uuid.uuid4())}
    return data
def process_data_ingestarticlezip(workflow_name, workflow_data):
    data = {'info': S3NotificationInfo.from_dict(workflow_data),
            'run': str(uuid.uuid4())}
    return data
Пример #12
0
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)
            self.check_filenames(upload_filenames)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True
    def do_activity(self, data=None):

        """
        Do the work
        """

        run = data['run']

        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        storage_context = StorageContext(self.settings)

        session = Session(self.settings)

        filename_last_element = session.get_value(run, 'filename_last_element')
        # zip name contains version information for previously archived zip files
        article_structure = ArticleInfo(filename_last_element)
        article_id = article_structure.article_id
        session.store_value(run, 'article_id', article_id)
        session.store_value(run, 'file_name', info.file_name)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        version = session.get_value(run, 'version')

        status = article_structure.status
        if status is None or (status != 'vor' and status != 'poa'):
            self.logger.error("Name '%s' did not match expected pattern for status" %
                              filename_last_element)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE  # status could not be determined, exit workflow.

        article_version_id = article_id + '.' + version
        session.store_value(run, 'article_version_id', article_version_id)
        session.store_value(run, 'run', run)
        session.store_value(run, 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)


        try:
            # download zip to temp folder
            tmp = self.get_tmp_dir()
            local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb')
            storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name
            storage_context.get_resource_to_file(storage_resource_origin, local_zip_file)
            local_zip_file.close()

            # extract zip contents
            folder_name = path.join(article_version_id, run)
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, filename_last_element)) as zf:
                zf.extractall(content_folder)

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)

            bucket_folder_name = article_version_id + '/' + run
            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \
                                        self.settings.expanded_bucket + "/" + dest_path
                storage_context.set_resource_from_filename(storage_resource_dest, source_path)

            self.clean_tmp_dir()

            session.store_value(run, 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "end", "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) +
                                    " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article",
                                    "error", "Error expanding article " + article_id +
                                    " message:" + e.message)
            return activity.activity.ACTIVITY_PERMANENT_FAILURE

        return True
 def test_ingest_article_zip_starter_(self, fake_boto_conn, fake_logger):
     fake_boto_conn.return_value = FakeBotoConnection()
     self.stater_ingest_article_zip.start(settings=settings_mock, run=run_example,
                                          info=S3NotificationInfo.from_dict(test_data.ingest_article_zip_data))
Пример #15
0
def work(ENV, flag):
    # Specify run environment settings
    settings = settings_lib.get_settings(ENV)

    # Log
    identity = "queue_worker_%s" % os.getpid()
    log_file = "queue_worker.log"
    # logFile = None
    logger = log.logger(log_file, settings.setLevel, identity)

    # Simple connect
    conn = boto.sqs.connect_to_region(settings.sqs_region,
                                      aws_access_key_id=settings.aws_access_key_id,
                                      aws_secret_access_key=settings.aws_secret_access_key)
    queue = conn.get_queue(settings.S3_monitor_queue)
    queue.set_message_class(S3SQSMessage)

    rules = load_rules()
    application = newrelic.agent.application()

    # Poll for an activity task indefinitely
    if queue is not None:
        while flag.green():

            logger.info('reading message')
            queue_message = queue.read(30)
            # TODO : check for more-than-once delivery
            # ( Dynamo conditional write? http://tinyurl.com/of3tmop )

            if queue_message is None:
                logger.info('no messages available')
            else:
                with newrelic.agent.BackgroundTask(application, name=queue_message.notification_type, group='queue_worker.py'):
                    logger.info('got message id: %s' % queue_message.id)
                    if queue_message.notification_type == 'S3Event':
                        info = S3NotificationInfo.from_S3SQSMessage(queue_message)
                        logger.info("S3NotificationInfo: %s", info.to_dict())
                        workflow_name = get_starter_name(rules, info)
                        if workflow_name is None:
                            logger.info("Could not handle file %s in bucket %s" % (info.file_name, info.bucket_name))
                            return False

                        # build message
                        message = {
                            'workflow_name': workflow_name,
                            'workflow_data': info.to_dict()
                        }

                        # send workflow initiation message
                        out_queue = conn.get_queue(settings.workflow_starter_queue)
                        m = Message()
                        m.set_body(json.dumps(message))
                        out_queue.write(m)

                        # cancel incoming message
                        logger.info("cancelling message")
                        queue.delete_message(queue_message)
                        logger.info("message cancelled")
                    else:
                        # TODO : log
                        pass
            time.sleep(10)

        logger.info("graceful shutdown")

    else:
        logger.error('error obtaining queue')
Пример #16
0
def work(ENV, flag):
    # Specify run environment settings
    settings = settings_lib.get_settings(ENV)

    # Log
    identity = "queue_worker_%s" % os.getpid()
    log_file = "queue_worker.log"
    # logFile = None
    logger = log.logger(log_file, settings.setLevel, identity)

    # Simple connect
    conn = boto.sqs.connect_to_region(
        settings.sqs_region,
        aws_access_key_id=settings.aws_access_key_id,
        aws_secret_access_key=settings.aws_secret_access_key)
    queue = conn.get_queue(settings.S3_monitor_queue)
    queue.set_message_class(S3SQSMessage)

    rules = load_rules()
    application = newrelic.agent.application()

    # Poll for an activity task indefinitely
    if queue is not None:
        while flag.green():

            logger.info('reading message')
            queue_message = queue.read(30)
            # TODO : check for more-than-once delivery
            # ( Dynamo conditional write? http://tinyurl.com/of3tmop )

            if queue_message is None:
                logger.info('no messages available')
            else:
                with newrelic.agent.BackgroundTask(
                        application,
                        name=queue_message.notification_type,
                        group='queue_worker.py'):
                    logger.info('got message id: %s' % queue_message.id)
                    if queue_message.notification_type == 'S3Event':
                        info = S3NotificationInfo.from_S3SQSMessage(
                            queue_message)
                        logger.info("S3NotificationInfo: %s", info.to_dict())
                        workflow_name = get_starter_name(rules, info)
                        if workflow_name is None:
                            logger.info(
                                "Could not handle file %s in bucket %s" %
                                (info.file_name, info.bucket_name))
                            return False

                        # build message
                        message = {
                            'workflow_name': workflow_name,
                            'workflow_data': info.to_dict()
                        }

                        # send workflow initiation message
                        out_queue = conn.get_queue(
                            settings.workflow_starter_queue)
                        m = Message()
                        m.set_body(json.dumps(message))
                        out_queue.write(m)

                        # cancel incoming message
                        logger.info("cancelling message")
                        queue.delete_message(queue_message)
                        logger.info("message cancelled")
                    else:
                        # TODO : log
                        pass
            time.sleep(10)

        logger.info("graceful shutdown")

    else:
        logger.error('error obtaining queue')
Пример #17
0
    def do_activity(self, data=None):

        """
        Do the work
        """
        if self.logger:
            self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4))
        info = S3NotificationInfo.from_dict(data)

        # set up required connections
        conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key)
        source_bucket = conn.get_bucket(info.bucket_name)
        dest_bucket = conn.get_bucket(self.settings.publishing_buckets_prefix + self.settings.expanded_bucket)
        session = Session(self.settings)

        article_id_match = re.match(ur'elife-(.*?)-', info.file_name)
        article_id = article_id_match.group(1)
        session.store_value(self.get_workflowId(), 'article_id', article_id)

        if self.logger:
            self.logger.info("Expanding file %s" % info.file_name)

        # extract any doi, version and updated date information from the filename
        version = None
        # zip name contains version information for previously archived zip files
        m = re.search(ur'-v([0-9]*?)[\.|-]', info.file_name)
        if m is not None:
            version = m.group(1)
        if version is None:
            version = self.get_next_version(article_id)
        if version == '-1':
            return False  # version could not be determined, exit workflow. Can't emit event as no version.

        sm = re.search(ur'.*?-.*?-(.*?)-', info.file_name)
        if sm is not None:
            status = sm.group(1)
        if status is None:
            return False  # version could not be determined, exit workflow. Can't emit event as no version.
        run = str(uuid.uuid4())
        # store version for other activities in this workflow execution
        session.store_value(self.get_workflowId(), 'version', version)

        # TODO : extract and store updated date if supplied

        article_version_id = article_id + '.' + version
        session.store_value(self.get_workflowId(), 'article_version_id', article_version_id)
        session.store_value(self.get_workflowId(), 'run', run)
        session.store_value(self.get_workflowId(), 'status', status)
        self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start",
                                "Starting expansion of article " + article_id)
        self.set_monitor_property(self.settings, article_id, "article_id", article_id, "text")
        try:

            # download zip to temp folder
            tmp = self.get_tmp_dir()
            key = Key(source_bucket)
            key.key = info.file_name
            local_zip_file = self.open_file_from_tmp_dir(info.file_name, mode='wb')
            key.get_contents_to_file(local_zip_file)
            local_zip_file.close()

            bucket_folder_name = article_version_id + '/' + run
            folder_name = path.join(article_version_id, run)

            # extract zip contents
            content_folder = path.join(tmp, folder_name)
            makedirs(content_folder)
            with ZipFile(path.join(tmp, info.file_name)) as zf:
                zf.extractall(content_folder)

            # TODO : rename files (versions!)

            # TODO : edit xml and rename references

            upload_filenames = []
            for f in listdir(content_folder):
                if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_':
                    upload_filenames.append(f)

            for filename in upload_filenames:
                source_path = path.join(content_folder, filename)
                dest_path = bucket_folder_name + '/' + filename
                k = Key(dest_bucket)
                k.key = dest_path
                k.set_contents_from_filename(source_path)

            session.store_value(self.get_workflowId(), 'expanded_folder', bucket_folder_name)
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "end",
                                    "Finished expansion of article " + article_id +
                                    " for version " + version + " run " + str(run) + " into " + bucket_folder_name)
        except Exception as e:
            self.logger.exception("Exception when expanding article")
            self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "error",
                                    "Error expanding article " + article_id + " message:" + e.message)
            return False

        return True
Пример #18
0
def process_data_ingestarticlezip(workflow_name, workflow_data):
    data = {
        'info': S3NotificationInfo.from_dict(workflow_data),
        'run': str(uuid.uuid4())
    }
    return data