Exemplo n.º 1
0
class Author(object):
    """Author ingestion workflow for HEPNames/Authors collection."""
    name = "Author"
    data_type = "authors"

    workflow = [
        # Make sure schema is set for proper indexing in Holding Pen
        set_schema,
        validate_record('authors'),
        IF_ELSE(
            is_marked('is-update'),
            SEND_UPDATE_NOTIFICATION,
            ASK_FOR_REVIEW + [
                IF_ELSE(
                    is_record_accepted,
                    (
                        SEND_TO_LEGACY + NOTIFY_ACCEPTED + [
                            # TODO: once legacy is out, this should become
                            # unconditional, and remove the SEND_TO_LEGACY
                            # steps
                            IF_NOT(in_production_mode, [store_record]),
                        ] + CLOSE_TICKET_IF_NEEDED),
                    NOTIFY_NOT_ACCEPTED),
            ],
        ),
    ]
Exemplo n.º 2
0
class Article(object):
    """Article ingestion workflow for Literature collection."""
    name = "HEP"
    data_type = "hep"

    workflow = (
        PRE_PROCESSING +
        NOTIFY_IF_SUBMISSION +
        MARK_IF_MATCH_IN_HOLDINGPEN +
        CHECK_IS_UPDATE +
        STOP_IF_EXISTING_SUBMISSION +
        CHECK_AUTO_APPROVE +
        PROCESS_HOLDINGPEN_MATCHES +
        ENHANCE_RECORD +
        HALT_FOR_APPROVAL_IF_NEW_OR_STOP_IF_NOT_RELEVANT +
        [
            IF_ELSE(
                is_record_accepted,
                (
                    POSTENHANCE_RECORD +
                    STORE_RECORD +
                    SEND_TO_LEGACY +
                    WAIT_FOR_LEGACY_WEBCOLL +
                    NOTIFY_ACCEPTED +
                    NOTIFY_CURATOR_IF_CORE
                ),
                NOTIFY_NOT_ACCEPTED,
            ),
            IF(
                is_submission,
                close_ticket(ticket_id_key="ticket_id"),
            )
        ]
    )
Exemplo n.º 3
0
    def test_current_taskname_resolution(self):
        workflow = [m('test')]
        self.wfe.callbacks.replace(workflow, self.key)
        self.wfe.process(self.tokens)
        assert self.wfe.current_taskname == 'string appender'

        workflow = [lambda obj, eng: 1]
        self.wfe.callbacks.replace(workflow, self.key)
        self.wfe.process(self.tokens)
        assert self.wfe.current_taskname == '<lambda>'

        workflow = [
            IF_ELSE(
                lambda obj, eng: True,
                [lambda obj, eng: 1],
                [lambda obj, eng: 2],
            )
        ]
        self.wfe.callbacks.replace(workflow, self.key)
        # This test will break if someone changes IF_ELSE. TODO: Mock
        # Note: Python3 has much stronger introspection, thus the `.*`.
        assert re.match(
            r'\[<function IF_ELSE.* at 0x[0-f]+>, '
            r'\[<function .*<lambda> at 0x[0-f]+>\], '
            r'<function BREAK.* at 0x[0-f]+>, '
            r'\[<function .*<lambda> at 0x[0-f]+>\]\]',
            self.wfe.current_taskname)
Exemplo n.º 4
0
class Article(object):
    """Article ingestion workflow for Literature collection."""
    name = "HEP"
    data_type = "hep"

    workflow = (
        [
            # Make sure schema is set for proper indexing in Holding Pen
            set_schema,
        ] + ADD_MARKS + DELETE_AND_STOP_IF_NEEDED + ENHANCE_RECORD +
        # TODO: Once we have a way to resolve merges, we should
        # use that instead of stopping
        CHECK_IF_MERGE_AND_STOP_IF_SO +
        CHECK_IF_SUBMISSION_AND_ASK_FOR_APPROVAL + [
            IF_ELSE(
                is_record_accepted,
                (
                    POSTENHANCE_RECORD + SEND_TO_LEGACY_AND_WAIT +
                    NOTIFY_USER_OR_CURATOR + [
                        # TODO: once legacy is out, this should become
                        # unconditional, and remove the SEND_TO_LEGACY_AND_WAIT
                        # steps
                        IF_NOT(in_production_mode, [store_record]),
                    ]),
                NOTIFY_NOT_ACCEPTED,
            ),
            IF(
                is_submission,
                [close_ticket(ticket_id_key="ticket_id")],
            )
        ])
Exemplo n.º 5
0
class Author(object):
    """Author ingestion workflow for HEPNames/Authors collection."""
    name = "Author"
    data_type = "authors"

    workflow = [
        # Make sure schema is set for proper indexing in Holding Pen
        set_schema,
        # Emit record signals to receive metadata enrichment
        emit_record_signals,
        IF_ELSE(is_marked('is-update'), [
            send_robotupload(marcxml_processor=hepnames2marc,
                             mode="holdingpen"),
            create_ticket(
                template="authors/tickets/curator_update.html",
                queue="Authors_cor_user",
                context_factory=update_ticket_context,
            ),
        ], [
            create_ticket(template="authors/tickets/curator_new.html",
                          queue="Authors_add_user",
                          context_factory=new_ticket_context),
            reply_ticket(template="authors/tickets/user_new.html",
                         context_factory=reply_ticket_context,
                         keep_new=True),
            halt_record(action="author_approval",
                        message="Accept submission?"),
            IF_ELSE(is_record_accepted, [
                send_robotupload(marcxml_processor=hepnames2marc,
                                 mode="insert"),
                reply_ticket(template="authors/tickets/user_accepted.html",
                             context_factory=reply_ticket_context),
                close_ticket(ticket_id_key="ticket_id"),
                IF(curation_ticket_needed, [
                    create_ticket(
                        template="authors/tickets/curation_needed.html",
                        queue="AUTHORS_curation",
                        context_factory=curation_ticket_context,
                        ticket_id_key="curation_ticket_id"),
                ]),
            ], [
                close_ticket(ticket_id_key="ticket_id"),
            ]),
        ]),
    ]
Exemplo n.º 6
0
class Author(object):
    """Author ingestion workflow for HEPNames/Authors collection."""
    name = "Author"
    data_type = "authors"

    workflow = [
        load_from_source_data,
        # Make sure schema is set for proper indexing in Holding Pen
        set_schema,
        validate_record('authors'),
        IF_ELSE(
            is_marked('is-update'),
            [
                SEND_TO_LEGACY,
                SEND_UPDATE_NOTIFICATION,
            ],
            [
                ASK_FOR_REVIEW,
                IF_ELSE(is_record_accepted,
                        ([store_record] + SEND_TO_LEGACY + NOTIFY_ACCEPTED +
                         CLOSE_TICKET_IF_NEEDED), NOTIFY_NOT_ACCEPTED),
            ],
        ),
    ]
Exemplo n.º 7
0
class Article(object):
    """Article ingestion workflow for Literature collection."""
    name = "HEP"
    data_type = "hep"

    workflow = (
        PRE_PROCESSING + STOP_IF_ALREADY_HARVESTED_ON_LEGACY_OR_TOO_OLD +
        NOTIFY_IF_SUBMISSION + MARK_IF_MATCH_IN_HOLDINGPEN +
        PROCESS_HOLDINGPEN_MATCHES + MARK_IF_UPDATE + ENHANCE_RECORD +
        STOP_IF_EXISTING_SUBMISSION + HALT_FOR_APPROVAL + [
            IF_ELSE(
                is_record_accepted,
                (POSTENHANCE_RECORD + STORE_RECORD + SEND_TO_LEGACY_AND_WAIT +
                 NOTIFY_ACCEPTED + NOTIFY_CURATOR_IF_CORE),
                NOTIFY_NOT_ACCEPTED,
            ),
            IF(
                is_submission,
                close_ticket(ticket_id_key="ticket_id"),
            )
        ])
Exemplo n.º 8
0
    do_not_repeat('reply_ticket_user_new_submission')(
        reply_ticket(
            template="literaturesuggest/tickets/user_submitted.html",
            context_factory=reply_ticket_context,
            keep_new=True
        ),
    )
]

CHECK_AUTO_APPROVE = [
    IF_ELSE(
        is_submission,
        mark('auto-approved', False),
        IF_ELSE(
            auto_approve,
            [
                mark('auto-approved', True),
                set_core_in_extra_data,
            ],
            mark('auto-approved', False),
        ),
    ),
]

ENHANCE_RECORD = [
    IF(
        is_arxiv_paper,
        [
            populate_arxiv_document,
            arxiv_package_download,
            arxiv_plot_extract,
            arxiv_derive_inspire_categories,
Exemplo n.º 9
0
    schema_data = requests_retry_session().get(obj.data['$schema']).content
    schema_data = json.loads(schema_data)

    try:
        validate(obj.data, schema_data)
    except ValidationError as err:
        __halt_and_notify('Invalid record: %s' % err, eng)
    except SchemaError as err:
        __halt_and_notify('SchemaError during record validation! %s' % err,
                          eng)


STORE_REC = [
    IF_ELSE(is_record_in_db, [
        update_record,
    ], [
        store_record,
    ]),
]


class ArticlesUpload(object):
    """Article ingestion workflow for Records collection."""
    name = "HEP"
    data_type = "harvesting"
    workflow = [
        delete_older_workflows,
        set_schema,
        add_arxiv_category,
        add_nations,
        remove_orcid_prefix,
Exemplo n.º 10
0
class Article(object):
    """Article ingestion workflow for Literature collection."""
    name = "HEP"
    data_type = "hep"

    workflow = [
        # Make sure schema is set for proper indexing in Holding Pen
        set_schema,
        # Emit record signals to receive metadata enrichment
        emit_record_signals,
        # Query locally or via legacy search API to see if article
        # is already ingested and this is an update
        IF(article_exists, [
            mark('match-found', True),
        ]),
        IF_ELSE(
            is_submission,
            [
                # Article matching for submissions
                # ================================
                IF(pending_in_holding_pen, [
                    mark('already-in-holding-pen', True),
                ]),
                # Special RT integration for submissions
                # ======================================
                create_ticket(
                    template="literaturesuggest/tickets/curator_submitted.html",
                    queue="HEP_add_user",
                    context_factory=new_ticket_context,
                    ticket_id_key="ticket_id"),
                reply_ticket(
                    template="literaturesuggest/tickets/user_submitted.html",
                    context_factory=reply_ticket_context,
                    keep_new=True),
            ],
            [
                # Article matching for non-submissions
                # ====================================
                # Query holding pen to see if we already have this article ingested
                #
                # NOTE on updates:
                #     If the same article has been harvested before and the
                #     ingestion has been completed, process is continued
                #     to allow for updates.
                IF(pending_in_holding_pen, [
                    mark('already-in-holding-pen', True),
                    mark('delete', True),
                ]),
                IF(
                    is_arxiv_paper,
                    [
                        # FIXME: This filtering step should be removed when this
                        #        workflow includes arXiv CORE harvesting
                        IF(already_harvested, [
                            mark('already-ingested', True),
                            mark('stop', True),
                        ]),
                        # FIXME: This filtering step should be removed when:
                        #        old previously rejected records are treated
                        #        differently e.g. good auto-reject heuristics or better
                        #        time based filtering (5 days is quite random now).
                        IF(previously_rejected(), [
                            mark('already-ingested', True),
                            mark('stop', True),
                        ]),
                    ]),
                IF(is_marked('delete'),
                   [update_old_object, delete_self_and_stop_processing]),
                IF(is_marked('stop'), [stop_processing]),
            ]),
        #
        # Article Processing
        # ==================
        IF(is_arxiv_paper, [
            arxiv_fulltext_download,
            arxiv_plot_extract,
            arxiv_refextract,
            arxiv_author_list("authorlist2marcxml.xsl"),
        ]),
        extract_journal_info,
        classify_paper(
            taxonomy="HEPont.rdf",
            only_core_tags=False,
            spires=True,
            with_author_keywords=True,
        ),
        filter_core_keywords,
        guess_categories,
        IF(is_experimental_paper, [
            guess_experiments,
        ]),
        guess_keywords,
        # Predict action for a generic HEP paper based only on title
        # and abstract.
        guess_coreness,  # ("arxiv_skip_astro_title_abstract.pickle)
        # Check if we shall halt or auto-reject
        # =====================================
        # NOTE: User submissions are always relevant
        IF_ELSE(is_record_relevant, [
            halt_record(action="hep_approval"),
        ], [reject_record("Article automatically rejected"), stop_processing]),
        IF_ELSE(is_record_accepted, [
            IF(article_exists, [
                IF_ELSE(is_submission, [
                    reject_record('Article was already found on INSPIRE'),
                    stop_processing,
                    reply_ticket(
                        template=
                        "literaturesuggest/tickets/user_rejected_exists.html",
                        context_factory=reply_ticket_context),
                    close_ticket(ticket_id_key="ticket_id"),
                ], [
                    halt_record(action="merge_approval"),
                ]),
            ]),
            add_core,
            add_note_entry,
            filter_keywords,
            user_pdf_get,
            IF_ELSE(shall_push_remotely, [
                IF_ELSE(article_exists, [
                    prepare_update_payload(extra_data_key="update_payload"),
                    send_robotupload(marcxml_processor=hep2marc,
                                     mode="correct",
                                     extra_data_key="update_payload"),
                ], [
                    send_robotupload(marcxml_processor=hep2marc,
                                     mode="insert"),
                ])
            ], [store_record]),
            IF(is_submission, [
                IF(curation_ticket_needed, [
                    create_ticket(
                        template="literaturesuggest/tickets/curation_core.html",
                        queue="HEP_curation",
                        context_factory=curation_ticket_context,
                        ticket_id_key="curation_ticket_id")
                ]),
                reply_ticket(
                    template="literaturesuggest/tickets/user_accepted.html",
                    context_factory=reply_ticket_context),
            ]),
        ], [
            IF(is_submission,
               [reply_ticket(context_factory=reply_ticket_context)])
        ]),
        close_ticket(ticket_id_key="ticket_id")
    ]
Exemplo n.º 11
0
        ticket_id_key="ticket_id"
    ),
    reply_ticket(
        template="literaturesuggest/tickets/user_submitted.html",
        context_factory=reply_ticket_context,
        keep_new=True
    ),
]

CHECK_AUTO_APPROVE = [
    IF_ELSE(
        is_submission,
        mark('auto-approved', False),
        IF_ELSE(
            auto_approve,
            [
                mark('auto-approved', True),
                set_core_in_extra_data,
            ],
            mark('auto-approved', False),
        ),
    ),
]

ENHANCE_RECORD = [
    IF(
        is_arxiv_paper,
        [
            populate_arxiv_document,
            arxiv_package_download,
            arxiv_plot_extract,
            arxiv_derive_inspire_categories,
Exemplo n.º 12
0
    ),
    filter_core_keywords,
    guess_categories,
    IF(is_experimental_paper, [guess_experiments]),
    guess_keywords,
    # Predict action for a generic HEP paper based only on title
    # and abstract.
    guess_coreness,  # ("arxiv_skip_astro_title_abstract.pickle)
    # Check if we shall halt or auto-reject
    # =====================================
]

CHECK_IF_SUBMISSION_AND_ASK_FOR_APPROVAL = [
    IF_ELSE(is_record_relevant, [
        halt_record(
            action="hep_approval",
            message="Submission halted for curator approval.",
        )
    ], [reject_record("Article automatically rejected"), stop_processing]),
]

NOTIFY_NOT_ACCEPTED = [
    IF(is_submission, [reply_ticket(context_factory=reply_ticket_context)])
]

NOTIFY_ALREADY_EXISTING = [
    reject_record('Article was already found on INSPIRE'),
    stop_processing,
    reply_ticket(template=("literaturesuggest/tickets/"
                           "user_rejected_exists.html"),
                 context_factory=reply_ticket_context),
    close_ticket(ticket_id_key="ticket_id"),
Exemplo n.º 13
0
class AuthorNew(WorkflowBase):
    """Workflow for new author information."""

    object_type = "Author New"

    workflow = [
        convert_data_to_model(),
        create_marcxml_record(),
        create_curator_ticket_new(template="authors/tickets/curator_new.html",
                                  queue="Authors_add_user"),
        reply_ticket(template="authors/tickets/user_new.html", keep_new=True),
        halt_record_with_action(action="author_approval",
                                message="Accept submission?"),
        IF_ELSE(shall_upload_record, [
            IF(recreate_data,
               [convert_data_to_model(),
                create_marcxml_record()]),
            send_robotupload(mode="insert"),
            reply_ticket(template="authors/tickets/user_accepted.html"),
            close_ticket(ticket_id_key="ticket_id"),
            IF(curation_ticket_needed, [
                create_curation_ticket(
                    template="authors/tickets/curation_needed.html",
                    queue="AUTHORS_curation",
                    ticket_id_key="curation_ticket_id"),
            ]),
        ], [
            close_ticket(ticket_id_key="ticket_id"),
        ])
    ]

    @staticmethod
    def get_title(bwo):
        """Return title of object."""
        id_user = bwo.id_user
        try:
            user_email = User.query.get(id_user).email
        except AttributeError:
            user_email = ''

        return u"New Author by: {0}".format(user_email)

    @staticmethod
    def get_description(bwo):
        """Return description of object."""
        return bwo.data.get("name", {}).get("preferred_name", "No name found")

    @staticmethod
    def formatter(bwo, **kwargs):
        """Return formatted data of object."""

        of = kwargs.get("of", "hp")

        xml = bwo.extra_data.get("marcxml")

        id_user = bwo.id_user
        try:
            user_email = User.query.get(id_user).email
        except AttributeError:
            user_email = ''
        ticket_id = bwo.extra_data.get("ticket_id")
        ticket_url = "https://rt.inspirehep.net/Ticket/Display.html?id={}".format(
            ticket_id)

        if of == "xm":
            return xml
        else:
            # FIXME add a template for the author display in the HP
            return render_template("authors/workflows/authorupdate.html",
                                   record=bwo.data,
                                   user_email=user_email,
                                   ticket_url=ticket_url,
                                   comments=bwo.extra_data.get("comments"))
Exemplo n.º 14
0
from workflow.patterns.controlflow import (FOR, IF_ELSE, CMP)

redisUrl = "redis://" + os.getenv('REDIS_HOST') + ":6379/0"
app = Celery('core', broker=redisUrl)

flow = [
    library.utils.initializeStore, library.agents.getAgent,
    library.clock.getCurrentTime, library.logger.logResult
]
#workflowDefinition1 = '(lp0\nclibrary.agents\ngetAgent\np1\naclibrary.clock\ngetCurrentTime\np2\naclibrary.logger\nlogResult\np3\na.'

#flow = pickle.loads(flow)

flow = [
    library.utils.initializeStore, library.agents.getAgent,
    IF_ELSE(CMP(lambda o, e: o['data']['agent']['sex'], 'Male', '=='),
            [library.logger.logResult], [library.clock.getCurrentTime]),
    library.logger.logResult
]


@app.task
def runWorkflow(data):
    from workflow.engine import GenericWorkflowEngine
    wfe = GenericWorkflowEngine()
    wfe.setWorkflow(flow)
    wfe.process(data)


if __name__ == "__main__":
    runWorkflow.delay([{"start": True}])
Exemplo n.º 15
0
        with_author_keywords=True,
    ),
    filter_core_keywords,
    guess_categories,
    IF(
        is_experimental_paper,
        guess_experiments,
    ),
    guess_keywords,
    guess_coreness,
    IF_ELSE(
        is_submission,
        mark('auto-approved', False),
        IF_ELSE(
            belongs_to_relevant_category,
            [
                mark('auto-approved', True),
                set_coreness_in_extra_data,
            ],
            mark('auto-approved', False),
        ),
    ),
]

NOTIFY_NOT_ACCEPTED = [
    IF(
        is_submission,
        reply_ticket(context_factory=reply_ticket_context),
    )
]

NOTIFY_ALREADY_EXISTING = [