Exemplo n.º 1
0
def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    record_workflow = build_workflow(record).id
    eng_uuid = start("article", object_id=record_workflow)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get("approved") is False

    es.indices.refresh("holdingpen-hep")

    record["titles"][0][
        "title"] = "This is an update that will match the wf in the holdingpen"
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data["previously_rejected"] is True
    assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
Exemplo n.º 2
0
def test_workflow_checks_affiliations_if_record_is_not_important(
    mocked_is_auto_rejected,
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    record['authors'][0]['raw_affiliations'] = [{
        "value": "IN2P3"
    }, {
        "value": "Cern"
    }]
    record['authors'][1]['raw_affiliations'] = [{"value": "Fermilab"}]
    workflow_id = build_workflow(record).id
    with patch.dict(
            workflow_app.config, {
                'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True,
                'INSPIREHEP_URL': "http://web:8000"
            }):
        start("article", object_id=workflow_id)

    collections_in_record = mocked_external_services.request_history[0].json(
    )['_collections']
    assert "CDS Hidden" in collections_in_record
    assert "HAL Hidden" in collections_in_record
    assert "Fermilab" in collections_in_record
    assert "Literature" not in collections_in_record
Exemplo n.º 3
0
def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    eng_uuid = start('article', [record])
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get('approved') is False

    es.indices.refresh('holdingpen-hep')

    record['titles'][0][
        'title'] = 'This is an update that will match the wf in the holdingpen'
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    eng_uuid = start('article', [record])
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data['already-in-holding-pen'] is False
    assert obj2.extra_data['previously_rejected'] is True
    assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
Exemplo n.º 4
0
def test_harvesting_arxiv_workflow_manual_accepted(
    mocked_refextract_extract_refs,
    mocked_matching_match,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download_utils,
    mocked_download_arxiv,
    mocked_package_download,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()
    """Test a full harvesting workflow."""

    workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record)

    do_accept_core(app=workflow_app, workflow_id=obj.id)

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    assert obj.status == ObjectStatus.WAITING

    do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345])

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    # It was accepted
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data["approved"] is True
Exemplo n.º 5
0
def test_workflow_restart_count_initialized_properly(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()

    with workflow_app.app_context():
        obj_id = build_workflow(record).id
        start('article', object_id=obj_id)

        obj = workflow_object_class.get(obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks'][
            'restart-count'] == 0
        assert obj.extra_data['restart-count'] == 0

        obj.callback_pos = [0]
        obj.save()
        db.session.commit()

        start('article', object_id=obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks'][
            'restart-count'] == 1
        assert obj.extra_data['restart-count'] == 1
def test_harvesting_arxiv_workflow_manual_rejected(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
    }

    workflow_uuid, eng, obj = get_halted_workflow(
        app=workflow_app, extra_config=extra_config, record=record
    )

    obj.extra_data["approved"] = False
    obj.save()
    db.session.commit()

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj_id = obj.id
    obj.continue_workflow()

    obj = workflow_object_class.get(obj_id)
    # It was rejected
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data["approved"] is False
def test_harvesting_arxiv_workflow_manual_accepted(
    mocked_refextract_extract_refs,
    mocked_matching_match,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download_utils,
    mocked_download_arxiv,
    mocked_package_download,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()
    """Test a full harvesting workflow."""

    workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record)

    do_accept_core(app=workflow_app, workflow_id=obj.id)

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    assert obj.status == ObjectStatus.WAITING

    do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345])

    obj = workflow_object_class.get(obj.id)
    assert obj.status == ObjectStatus.WAITING

    do_webcoll_callback(app=workflow_app, recids=[12345])

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    # It was accepted
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data["approved"] is True
def test_workflow_restart_count_initialized_properly(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()

    with workflow_app.app_context():
        obj_id = build_workflow(record).id
        start('article', object_id=obj_id)

        obj = workflow_object_class.get(obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0
        assert obj.extra_data['restart-count'] == 0

        obj.callback_pos = [0]
        obj.save()
        db.session.commit()

        start('article', object_id=obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1
        assert obj.extra_data['restart-count'] == 1
def test_match_in_holdingpen_different_sources_continues(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    es.indices.refresh('holdingpen-hep')
    eng = WorkflowEngine.from_uuid(eng_uuid)
    wf_to_match = eng.objects[0].id
    obj = workflow_object_class.get(wf_to_match)
    assert obj.status == ObjectStatus.HALTED
    # generated wf pending in holdingpen

    record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen'
    record['acquisition_source']['source'] = 'but not the source'
    # this workflow matches in the holdingpen but continues because has a
    # different source
    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.extra_data['already-in-holding-pen'] is True
    assert obj.extra_data['holdingpen_matches'] == [wf_to_match]
    assert obj.extra_data['previously_rejected'] is False
    assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
Exemplo n.º 10
0
def test_match_in_holdingpen_different_sources_continues(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    es.indices.refresh('holdingpen-hep')
    eng = WorkflowEngine.from_uuid(eng_uuid)
    wf_to_match = eng.objects[0].id
    obj = workflow_object_class.get(wf_to_match)
    assert obj.status == ObjectStatus.HALTED
    # generated wf pending in holdingpen

    record['titles'][0][
        'title'] = 'This is an update that will match the wf in the holdingpen'
    record['acquisition_source']['source'] = 'but not the source'
    # this workflow matches in the holdingpen but continues because has a
    # different source
    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.extra_data['already-in-holding-pen'] is True
    assert obj.extra_data['holdingpen_matches'] == [wf_to_match]
    assert obj.extra_data['previously_rejected'] is False
    assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
Exemplo n.º 11
0
def test_harvesting_arxiv_workflow_manual_rejected(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
    }

    workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app,
                                                  extra_config=extra_config,
                                                  record=record)

    obj.extra_data["approved"] = False
    obj.save()
    db.session.commit()

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    obj_id = obj.id
    obj.continue_workflow()

    obj = workflow_object_class.get(obj_id)
    # It was rejected
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data["approved"] is False
Exemplo n.º 12
0
def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    mocked_package_download,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    eng_uuid = start('article', [record])
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get('approved') is False

    es.indices.refresh('holdingpen-hep')

    record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen'
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    eng_uuid = start('article', [record])
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data['already-in-holding-pen'] is False
    assert obj2.extra_data['previously_rejected'] is True
    assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
Exemplo n.º 13
0
def test_workflow_do_not_changes_to_hidden_if_record_authors_do_not_have_interesting_affiliations(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    workflow_id = build_workflow(record).id
    with patch.dict(
            workflow_app.config, {
                'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True,
                'INSPIREHEP_URL': "http://web:8000"
            }):
        start("article", object_id=workflow_id)
        wf = workflow_object_class.get(workflow_id)
        wf.extra_data['approved'] = True
        wf.save()
        wf.continue_workflow(delayed=False)

    collections_in_record = mocked_external_services.request_history[0].json(
    )['_collections']
    assert "CDS Hidden" not in collections_in_record
    assert "HAL Hidden" not in collections_in_record
    assert "Fermilab" not in collections_in_record
    assert ["Literature"] == collections_in_record
Exemplo n.º 14
0
def test_harvesting_arxiv_workflow_manual_accepted(
    mocked_refextract_extract_refs,
    mocked_matching_search,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download_utils,
    mocked_download_arxiv,
    workflow_app,
):
    record = generate_record()
    """Test a full harvesting workflow."""
    with requests_mock.Mocker() as requests_mocker:
        requests_mocker.register_uri(
            requests_mock.ANY,
            re.compile('.*(indexer|localhost).*'),
            real_http=True,
        )
        requests_mocker.register_uri(
            'POST',
            re.compile('https?://localhost:1234.*', ),
            text=u'[INFO]',
            status_code=200,
        )

        workflow_uuid, eng, obj = get_halted_workflow(
            app=workflow_app,
            extra_config={'PRODUCTION_MODE': False},
            record=record,
        )

        do_accept_core(
            app=workflow_app,
            workflow_id=obj.id,
        )

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        assert obj.status == ObjectStatus.WAITING

        response = do_robotupload_callback(
            app=workflow_app,
            workflow_id=obj.id,
            recids=[12345],
        )
        assert response.status_code == 200

        obj = workflow_object_class.get(obj.id)
        assert obj.status == ObjectStatus.WAITING

        response = do_webcoll_callback(app=workflow_app, recids=[12345])
        assert response.status_code == 200

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        # It was accepted
        assert obj.status == ObjectStatus.COMPLETED
Exemplo n.º 15
0
def test_start_wf_with_no_source_data_fails(workflow_app):
    record = generate_record()

    obj = build_workflow(record)
    del obj.extra_data["source_data"]
    obj.save()
    db.session.commit()

    with pytest.raises(ValueError):
        start("article", object_id=obj.id)
Exemplo n.º 16
0
def test_start_wf_with_no_source_data_fails(workflow_app):
    record = generate_record()

    obj = build_workflow(record)
    del obj.extra_data["source_data"]
    obj.save()
    db.session.commit()

    with pytest.raises(ValueError):
        start("article", object_id=obj.id)
Exemplo n.º 17
0
def test_match_wf_in_error_goes_in_initial_state(workflow_app):
    record = generate_record()

    obj = workflow_object_class.create(data=record, data_type='hep')
    obj.status = ObjectStatus.INITIAL
    obj.save()
    es.indices.refresh('holdingpen-hep')

    with pytest.raises(WorkflowsError):
        start('article', record)
Exemplo n.º 18
0
def test_match_wf_in_error_goes_in_initial_state(workflow_app):
    record = generate_record()

    obj = workflow_object_class.create(data=record, data_type="hep")
    obj.status = ObjectStatus.INITIAL
    obj.save()
    es.indices.refresh("holdingpen-hep")

    with pytest.raises(WorkflowsError):
        workflow_id = build_workflow(record).id
        start("article", object_id=workflow_id)
Exemplo n.º 19
0
def test_match_wf_in_error_goes_in_error_state(workflow_app):
    record = generate_record()

    obj = workflow_object_class.create(data=record, data_type='hep')
    obj.status = ObjectStatus.ERROR
    obj.save()
    es.indices.refresh('holdingpen-hep')

    with pytest.raises(WorkflowsError):
        workflow_id = build_workflow(record).id
        start('article', object_id=workflow_id)
Exemplo n.º 20
0
def test_match_wf_in_error_goes_in_initial_state(workflow_app):
    record = generate_record()

    obj = workflow_object_class.create(data=record, data_type="hep")
    obj.status = ObjectStatus.INITIAL
    obj.save()
    es.indices.refresh("holdingpen-hep")

    with pytest.raises(WorkflowsError):
        workflow_id = build_workflow(record).id
        start("article", object_id=workflow_id)
Exemplo n.º 21
0
def test_match_wf_in_error_goes_in_error_state(workflow_app):
    record = generate_record()

    obj = workflow_object_class.create(data=record, data_type="hep")
    obj.status = ObjectStatus.ERROR
    obj.save()
    current_search.flush_and_refresh("holdingpen-hep")

    with pytest.raises(WorkflowsError):
        workflow_id = build_workflow(record).id
        start("article", object_id=workflow_id)
Exemplo n.º 22
0
def test_match_in_holdingpen_stops_pending_wf(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    es.indices.refresh("holdingpen-hep")
    eng = WorkflowEngine.from_uuid(eng_uuid)
    old_wf = eng.objects[0]
    obj_id = old_wf.id

    assert old_wf.status == ObjectStatus.HALTED
    assert old_wf.extra_data["previously_rejected"] is False

    record2 = record
    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    record2_workflow = build_workflow(record2).id
    start("article", object_id=record2_workflow)
    es.indices.refresh("holdingpen-hep")

    update_wf = workflow_object_class.get(record2_workflow)

    assert update_wf.status == ObjectStatus.HALTED
    #  As workflow stops (in error) before setting this
    assert update_wf.extra_data["previously_rejected"] is False
    assert update_wf.extra_data['already-in-holding-pen'] is True
    assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True
    assert update_wf.extra_data["is-update"] is False

    old_wf = workflow_object_class.get(obj_id)
    assert old_wf.extra_data['already-in-holding-pen'] is False
    assert old_wf.extra_data['previously_rejected'] is False
    assert old_wf.extra_data['stopped-by-wf'] == update_wf.id
    assert old_wf.extra_data.get('approved') is None
    assert old_wf.extra_data['is-update'] is False
    assert old_wf.status == ObjectStatus.COMPLETED
Exemplo n.º 23
0
def test_match_in_holdingpen_stops_pending_wf(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    current_search.flush_and_refresh("holdingpen-hep")
    eng = WorkflowEngine.from_uuid(eng_uuid)
    old_wf = eng.objects[0]
    obj_id = old_wf.id

    assert old_wf.status == ObjectStatus.HALTED
    assert old_wf.extra_data["previously_rejected"] is False

    record2 = record
    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    record2_workflow = build_workflow(record2).id
    start("article", object_id=record2_workflow)
    current_search.flush_and_refresh("holdingpen-hep")

    update_wf = workflow_object_class.get(record2_workflow)

    assert update_wf.status == ObjectStatus.HALTED
    #  As workflow stops (in error) before setting this
    assert update_wf.extra_data["previously_rejected"] is False
    assert update_wf.extra_data['already-in-holding-pen'] is True
    assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True
    assert update_wf.extra_data["is-update"] is False

    old_wf = workflow_object_class.get(obj_id)
    assert old_wf.extra_data['already-in-holding-pen'] is False
    assert old_wf.extra_data['previously_rejected'] is False
    assert old_wf.extra_data['stopped-by-wf'] == update_wf.id
    assert old_wf.extra_data.get('approved') is None
    assert old_wf.extra_data['is-update'] is False
    assert old_wf.status == ObjectStatus.COMPLETED
Exemplo n.º 24
0
def test_harvesting_arxiv_workflow_manual_accepted(
    mocked_refextract_extract_refs,
    mocked_matching_match,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download_utils,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()
    """Test a full harvesting workflow."""

    workflow_uuid, eng, obj = get_halted_workflow(
        app=workflow_app,
        record=record,
    )

    do_accept_core(
        app=workflow_app,
        workflow_id=obj.id,
    )

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    assert obj.status == ObjectStatus.WAITING

    response = do_robotupload_callback(
        app=workflow_app,
        workflow_id=obj.id,
        recids=[12345],
    )
    assert response.status_code == 200

    obj = workflow_object_class.get(obj.id)
    assert obj.status == ObjectStatus.WAITING

    response = do_webcoll_callback(app=workflow_app, recids=[12345])
    assert response.status_code == 200

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]
    # It was accepted
    assert obj.status == ObjectStatus.COMPLETED
Exemplo n.º 25
0
def test_match_in_holdingpen_stops_pending_wf(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    es.indices.refresh('holdingpen-hep')
    eng = WorkflowEngine.from_uuid(eng_uuid)
    old_wf = eng.objects[0]
    obj_id = old_wf.id

    assert old_wf.status == ObjectStatus.HALTED
    assert old_wf.extra_data['previously_rejected'] is False

    record2 = record
    record['titles'][0][
        'title'] = 'This is an update that will match the wf in the holdingpen'
    record2_workflow = build_workflow(record2).id
    eng_uuid2 = start('article', object_id=record2_workflow)
    es.indices.refresh('holdingpen-hep')
    eng2 = WorkflowEngine.from_uuid(eng_uuid2)
    update_wf = eng2.objects[0]

    assert update_wf.status == ObjectStatus.HALTED
    assert update_wf.extra_data['already-in-holding-pen'] is True
    assert update_wf.extra_data['previously_rejected'] is False
    assert update_wf.extra_data['stopped-matched-holdingpen-wf'] is True
    assert update_wf.extra_data['is-update'] is False

    old_wf = workflow_object_class.get(obj_id)
    assert old_wf.extra_data['already-in-holding-pen'] is False
    assert old_wf.extra_data['previously_rejected'] is False
    assert old_wf.extra_data['stopped-by-wf'] == update_wf.id
    assert old_wf.extra_data.get('approved') is None
    assert old_wf.extra_data['is-update'] is False
    assert old_wf.status == ObjectStatus.COMPLETED
Exemplo n.º 26
0
def test_match_in_holdingpen_stops_pending_wf(
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    mocked_package_download,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    eng_uuid = start('article', [record])
    es.indices.refresh('holdingpen-hep')
    eng = WorkflowEngine.from_uuid(eng_uuid)
    old_wf = eng.objects[0]
    obj_id = old_wf.id

    assert old_wf.status == ObjectStatus.HALTED
    assert old_wf.extra_data['previously_rejected'] is False

    record2 = record
    record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen'
    eng_uuid2 = start('article', [record2])
    es.indices.refresh('holdingpen-hep')
    eng2 = WorkflowEngine.from_uuid(eng_uuid2)
    update_wf = eng2.objects[0]

    assert update_wf.status == ObjectStatus.HALTED
    assert update_wf.extra_data['already-in-holding-pen'] is True
    assert update_wf.extra_data['previously_rejected'] is False
    assert update_wf.extra_data['stopped-matched-holdingpen-wf'] is True
    assert update_wf.extra_data['is-update'] is False

    old_wf = workflow_object_class.get(obj_id)
    assert old_wf.extra_data['already-in-holding-pen'] is False
    assert old_wf.extra_data['previously_rejected'] is False
    assert old_wf.extra_data['stopped-by-wf'] == update_wf.id
    assert old_wf.extra_data.get('approved') is None
    assert old_wf.extra_data['is-update'] is False
    assert old_wf.status == ObjectStatus.COMPLETED
Exemplo n.º 27
0
def test_harvesting_arxiv_workflow_manual_rejected(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_download,
    small_app,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
    }

    workflow_uuid = None
    with small_app.app_context():
        workflow_uuid, eng, obj = get_halted_workflow(
            app=small_app,
            extra_config=extra_config,
            record=record,
        )

        # Now let's resolve it as accepted and continue
        # FIXME Should be accept, but record validation prevents us.
        obj.remove_action()
        obj.extra_data["approved"] = False
        # obj.extra_data["core"] = True
        obj.save()

        db.session.commit()

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]
        obj_id = obj.id
        obj.continue_workflow()

        obj = workflow_object_class.get(obj_id)
        # It was rejected
        assert obj.status == ObjectStatus.COMPLETED
Exemplo n.º 28
0
def test_workflow_checks_affiliations_if_record_is_rejected_by_curator(
    mocked_is_auto_rejected,
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    record['authors'][0]['raw_affiliations'] = [{
        "value": "IN2P3."
    }, {
        "value":
        "Some words with CErN, inside."
    }]
    record['authors'][1]['raw_affiliations'] = [{"value": "Fermilab?"}]
    workflow_id = build_workflow(record).id
    with patch.dict(
            workflow_app.config, {
                'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True,
                'INSPIREHEP_URL': "http://web:8000"
            }):
        start("article", object_id=workflow_id)
        wf = workflow_object_class.get(workflow_id)
        wf.extra_data['approved'] = False
        wf.save()
        wf.continue_workflow(delayed=False)

    collections_in_record = mocked_external_services.request_history[0].json(
    )['_collections']
    assert "CDS Hidden" in collections_in_record
    assert "HAL Hidden" in collections_in_record
    assert "Fermilab" in collections_in_record
    assert "Literature" not in collections_in_record
Exemplo n.º 29
0
def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    record_workflow = build_workflow(record).id
    eng_uuid = start("article", object_id=record_workflow)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get("approved") is False

    es.indices.refresh("holdingpen-hep")

    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data["previously_rejected"] is True
    assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
Exemplo n.º 30
0
def test_do_not_repeat(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    def return_value(val):
        def _return_value(obj, eng):
            obj.extra_data["id"] = val
            obj.save()
            return {"id": val}

        return _return_value

    custom_wf_steps = [
        load_from_source_data,
        do_not_repeat("one")(return_value(1)),
        do_not_repeat("two")(return_value(2)),
    ]

    custom_wf_steps_to_repeat = [
        load_from_source_data,
        do_not_repeat("one")(return_value(41)),
        do_not_repeat("two")(return_value(42)),
        do_not_repeat("three")(return_value(43)),
    ]

    expected_persistent_data_first_run = {
        'one': {'id': 1},
        'two': {'id': 2}
    }.viewitems()

    expected_persistent_data_second_run = {
        'one': {'id': 1},
        'two': {'id': 2},
        'three': {'id': 43},
    }.viewitems()

    record = generate_record()

    with workflow_app.app_context():
        wf_id = build_workflow(record).id
        workflow_uuid = start("article", object_id=wf_id)

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        eng = WorkflowEngine.from_uuid(obj.id_workflow)
        eng.callbacks.replace(custom_wf_steps)
        eng.process([obj])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        persistent_data = obj.extra_data['source_data']['persistent_data'].viewitems()
        assert expected_persistent_data_first_run <= persistent_data
        assert obj.extra_data['id'] == 2
        assert obj.status == ObjectStatus.COMPLETED

        eng = WorkflowEngine.from_uuid(obj.id_workflow)
        eng.callbacks.replace(custom_wf_steps_to_repeat)
        obj.callback_pos = [0]
        obj.save()
        db.session.commit()
        eng.process([obj])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        persistent_data = obj.extra_data['source_data']['persistent_data'].viewitems()
        assert expected_persistent_data_second_run <= persistent_data
        assert obj.extra_data['id'] == 43
Exemplo n.º 31
0
def test_do_not_repeat(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    def return_value(val):
        def _return_value(obj, eng):
            obj.extra_data["id"] = val
            obj.save()
            return {"id": val}

        return _return_value

    custom_wf_steps = [
        load_from_source_data,
        do_not_repeat("one")(return_value(1)),
        do_not_repeat("two")(return_value(2)),
    ]

    custom_wf_steps_to_repeat = [
        load_from_source_data,
        do_not_repeat("one")(return_value(41)),
        do_not_repeat("two")(return_value(42)),
        do_not_repeat("three")(return_value(43)),
    ]

    expected_persistent_data_first_run = {
        'one': {
            'id': 1
        },
        'two': {
            'id': 2
        }
    }.viewitems()

    expected_persistent_data_second_run = {
        'one': {
            'id': 1
        },
        'two': {
            'id': 2
        },
        'three': {
            'id': 43
        },
    }.viewitems()

    record = generate_record()

    with workflow_app.app_context():
        wf_id = build_workflow(record).id
        workflow_uuid = start("article", object_id=wf_id)

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        eng = WorkflowEngine.from_uuid(obj.id_workflow)
        eng.callbacks.replace(custom_wf_steps)
        eng.process([obj])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        persistent_data = obj.extra_data['source_data'][
            'persistent_data'].viewitems()
        assert expected_persistent_data_first_run <= persistent_data
        assert obj.extra_data['id'] == 2
        assert obj.status == ObjectStatus.COMPLETED

        eng = WorkflowEngine.from_uuid(obj.id_workflow)
        eng.callbacks.replace(custom_wf_steps_to_repeat)
        obj.callback_pos = [0]
        obj.save()
        db.session.commit()
        eng.process([obj])

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        persistent_data = obj.extra_data['source_data'][
            'persistent_data'].viewitems()
        assert expected_persistent_data_second_run <= persistent_data
        assert obj.extra_data['id'] == 43