def test_article_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = { 'document_type': [ 'article', ], 'titles': [ {'title': 'A title'}, ], } obj = workflow_object_class.create( data=invalid_record, data_type='hep', id_user=1, ) obj_id = obj.id with pytest.raises(ValidationError): start('article', invalid_record, obj_id) obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.ERROR assert '_error_msg' in obj.extra_data assert 'required' in obj.extra_data['_error_msg']
def test_match_in_holdingpen_different_sources_continues( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) wf_to_match = eng.objects[0].id obj = workflow_object_class.get(wf_to_match) assert obj.status == ObjectStatus.HALTED # generated wf pending in holdingpen record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' record['acquisition_source']['source'] = 'but not the source' # this workflow matches in the holdingpen but continues because has a # different source workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data['already-in-holding-pen'] is True assert obj.extra_data['holdingpen_matches'] == [wf_to_match] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_workflow_restart_count_initialized_properly( mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() with workflow_app.app_context(): obj_id = build_workflow(record).id start('article', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0 assert obj.extra_data['restart-count'] == 0 obj.callback_pos = [0] obj.save() db.session.commit() start('article', object_id=obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1 assert obj.extra_data['restart-count'] == 1
def test_validation_error_callback_with_validation_error(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], "preprint_date": "Jessica Jones", } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_message = "Validation error." expected_error_code = "VALIDATION_ERROR" data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert data["workflow"]["_extra_data"]["callback_url"] assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
def test_workflows_halts_on_multiple_exact_matches(workflow_app): # Record from arxiv with just arxiv ID in DB TestRecordMetadata.create_from_file( __name__, "multiple_matches_arxiv.json", index_name="records-hep" ) # Record from publisher with just DOI in DB TestRecordMetadata.create_from_file( __name__, "multiple_matches_publisher.json", index_name="records-hep" ) path = pkg_resources.resource_filename( __name__, "fixtures/multiple_matches_arxiv_update.json" ) update_from_arxiv = json.load(open(path)) # An update from arxiv with the same arxiv and DOI as above records workflow_id = build_workflow(update_from_arxiv).id start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert len(set(obj.extra_data["matches"]["exact"])) == 2 assert obj.status == ObjectStatus.HALTED assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('approved') is False es.indices.refresh('holdingpen-hep') record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' # this workflow matches in the holdingpen and stops because the # matched one was rejected eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data['already-in-holding-pen'] is False assert obj2.extra_data['previously_rejected'] is True assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
def test_halt(app, halt_workflow, halt_workflow_conditional): """Test halt task.""" assert 'halttest' in app.extensions['invenio-workflows'].workflows assert 'halttestcond' in app.extensions['invenio-workflows'].workflows with app.app_context(): data = [{'foo': 'bar'}] eng_uuid = start('halttest', data) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.WAITING == obj.status assert WorkflowStatus.HALTED == eng.status obj_id = obj.id obj.continue_workflow() obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status # Check conditional workflows and pass data not as a list (to check). eng_uuid = start('halttestcond', data[0]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.WAITING == obj.status assert WorkflowStatus.HALTED == eng.status obj_id = obj.id obj.continue_workflow() obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status
def test_task_info(app, halt_workflow): """Test WorkflowObject comparison functions.""" with app.app_context(): obj = WorkflowObject.create({"x": 22}) start("halttest", obj) ident = obj.id obj = WorkflowObject.get(ident) task_info = obj.get_current_task_info() assert task_info["name"] == "halt_engine"
def test_start_wf_with_no_source_data_fails(workflow_app): record = generate_record() obj = build_workflow(record) del obj.extra_data["source_data"] obj.save() db.session.commit() with pytest.raises(ValueError): start("article", object_id=obj.id)
def test_match_wf_in_error_goes_in_initial_state(workflow_app): record = generate_record() obj = workflow_object_class.create(data=record, data_type="hep") obj.status = ObjectStatus.INITIAL obj.save() es.indices.refresh("holdingpen-hep") with pytest.raises(WorkflowsError): workflow_id = build_workflow(record).id start("article", object_id=workflow_id)
def start_merger(head_id, update_id, current_user_id=None): """Start a new ManualMerge workflow to merge two records manually. Args: head_id: the id of the first record to merge. This record is the one that will be updated with the new information. update_id: the id of the second record to merge. This record is the one that is going to be deleted and replaced by `head`. current_user_id: Id of the current user provided by the Flask app. Returns: (int): the current workflow object's id. """ data = { 'pid_type': 'lit', # TODO: support 'recid_head': head_id, 'recid_update': update_id, } head = get_db_record('lit', head_id) update = get_db_record('lit', update_id) workflow_object = workflow_object_class.create( data=None, id_user=current_user_id, data_type='hep' ) wf_id = workflow_object.id # to retrieve it later workflow_object.extra_data.update(data) update_source = LiteratureReader(update).source update_source = update_source if update_source else 'arxiv' workflow_object.extra_data['update_source'] = update_source.lower() workflow_object.extra_data['head_control_number'] = head_id workflow_object.extra_data['update_control_number'] = update_id workflow_object.extra_data['head_uuid'] = str(head.id) workflow_object.extra_data['update_uuid'] = str(update.id) workflow_object.extra_data['head'] = head workflow_object.extra_data['update'] = update workflow_object.save() start('manual_merge', object_id=wf_id) return wf_id
def test_errors(app, error_workflow): """Test halt task.""" assert 'errortest' in app.extensions['invenio-workflows'].workflows with app.app_context(): with pytest.raises(WorkflowsMissingData): start('errortest') with pytest.raises(WorkflowDefinitionError): start('doesnotexist', 100) with pytest.raises(WorkflowsMissingObject): start('errortest', object_id=-1) obj = WorkflowObject.create({"id": 0}) db.session.commit() obj_id = obj.id with pytest.raises(ZeroDivisionError): start('errortest', object_id=obj_id) obj = WorkflowObject.get(obj_id) assert obj.known_statuses.ERROR == obj.status assert obj.data == {"id": 0, "foo": "bar"}
def test_merge_with_disabled_merge_on_update_feature_flag( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, ): with patch.dict(workflow_app.config, {'FEATURE_FLAG_ENABLE_MERGER': False}): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITHOUT_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('conflicts') is None assert obj.extra_data.get('merged') is True assert obj.extra_data.get('merger_root') is None assert obj.extra_data.get('is-update') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_workflow_loads_from_source_data_fails_on_no_source_data( load_from_source_data_workflow, workflow_app, record_from_db, ): extra_data_without_source_data = {} workflow_id = workflow_object_class.create( data_type='hep', data=record_from_db, extra_data=extra_data_without_source_data, ).id with pytest.raises(ValueError) as exc: start('load_source_data', object_id=workflow_id) assert exc.match(r'source_data.*missing')
def test_merge_without_conflicts_handles_update_without_acquisition_source_and_acts_as_rootless( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.PublisherOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITHOUT_ACQUISITION_SOURCE_AND_NO_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert not conflicts assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == {} # source us unknown, so no new root is saved. roots = read_all_wf_record_sources(factory.record_metadata.id) assert not roots
def test_harvesting_arxiv_workflow_already_on_legacy( mocked_refextract_extract_refs, mocked_api_request_beard_block, mocked_api_request_magpie, mocked_api_request_beard, mocked_download, small_app, already_harvested_on_legacy_record, ): """Test a full harvesting workflow.""" extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } workflow_uuid = None with small_app.app_context(): with mock.patch.dict(small_app.config, extra_config): workflow_uuid = start('article', [ already_harvested_on_legacy_record]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.COMPLETED assert 'already-ingested' in obj.extra_data assert obj.extra_data['already-ingested']
def test_merge_without_conflicts_rootful( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id insert_wf_record_source(json=ARXIV_ROOT, record_uuid=factory.record_metadata.id, source='arxiv') eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert not conflicts assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == ARXIV_ROOT updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == RECORD_WITH_CONFLICTS
def test_stop_matched_holdingpen_wfs(app, simple_record): # need to run a wf in order to assign to it the wf definition and a uuid # for it obj = workflow_object_class.create( data_type='hep', **simple_record ) workflow_uuid = start('article', object_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj.status = ObjectStatus.HALTED obj.save() obj_id = obj.id es.indices.refresh('holdingpen-hep') obj2 = WorkflowObject.create(data_type='hep', **simple_record) obj2_id = obj2.id match_non_completed_wf_in_holdingpen(obj2, None) assert obj2.extra_data['holdingpen_matches'] == [obj_id] stop_matched_holdingpen_wfs(obj2, None) stopped_wf = workflow_object_class.get(obj_id) assert stopped_wf.status == ObjectStatus.COMPLETED assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"] obj = workflow_object_class.create( data=record, status=ObjectStatus.COMPLETED, data_type="hep" ) obj.extra_data["approved"] = False # reject it obj.save() es.indices.refresh("holdingpen-hep") extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.processed_objects[0] assert not obj2.extra_data["auto-approved"] assert len(obj2.extra_data["previously_rejected_matches"]) > 0 assert obj2.status == ObjectStatus.COMPLETED
def test_validation_error_callback_with_malformed_with_invalid_types(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(invalid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback( workflow_app, # id "Alias Investigations", obj.data, # extra_data "Jessica Jones", ) data = json.loads(response.get_data()) expected_message = "The workflow request is malformed." expected_error_code = "MALFORMED" assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert "errors" in data
def test_harvesting_arxiv_workflow_accepted( mocked, db_only_app, record_oai_arxiv_plots): """Test a full harvesting workflow.""" from invenio_workflows import ( start, WorkflowEngine, ObjectStatus, workflow_object_class ) from dojson.contrib.marc21.utils import create_record from invenio_db import db from inspirehep.dojson.hep import hep from inspirehep.modules.converter.xslt import convert # Convert to MARCXML, then dict, then HEP JSON record_oai_arxiv_plots_marcxml = convert( record_oai_arxiv_plots, "oaiarXiv2marcxml.xsl" ) record_marc = create_record(record_oai_arxiv_plots_marcxml) record_json = hep.do(record_marc) workflow_uuid = None with db_only_app.app_context(): workflow_uuid = start('article', [record_json]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] # A publication note should have been extracted pub_info = obj.data.get('publication_info') assert pub_info assert pub_info[0] assert pub_info[0].get('year') == "2014" assert pub_info[0].get('journal_title') == "J. Math. Phys." # This record should not have been touched yet assert "approved" not in obj.extra_data # Now let's resolve it as accepted and continue # FIXME Should be accept, but record validation prevents us. obj.remove_action() obj.extra_data["approved"] = True obj.extra_data["core"] = True obj.save() db.session.commit() with db_only_app.app_context(): eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_merge_with_conflicts_rootful( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id # By default the root is {}. eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.HALTED assert len(conflicts) == 1 assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == {}
def start_edit_article_workflow(recid): try: record = get_db_record('lit', recid) except RecordGetterError: raise CallbackRecordNotFoundError(recid) record_permission = RecordPermission.create(action='update', record=record) if not record_permission.can(): abort(403, record_permission) # has to be done before start() since, it is deattaching this session user_id = current_user.get_id() eng_uuid = start('edit_article', data=record) workflow_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id workflow = workflow_object_class.get(workflow_id) workflow.id_user = user_id if request.referrer: base_rt_url = get_rt_link_for_ticket('').replace('?', '\?') ticket_match = re.match(base_rt_url + '(?P<ticket_id>\d+)', request.referrer) if ticket_match: ticket_id = int(ticket_match.group('ticket_id')) workflow.extra_data['curation_ticket_id'] = ticket_id workflow.save() db.session.commit() url = "{}{}".format(current_app.config['WORKFLOWS_EDITOR_API_URL'], workflow_id) return redirect(location=url, code=302)
def test_harvesting_arxiv_workflow_core_record_auto_accepted( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record, categories = core_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): workflow_id = build_workflow(record).id with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.extra_data["approved"] is True assert obj.extra_data["auto-approved"] is True assert obj.data["core"] is True
def edit_workflow(workflow_app): app_client = workflow_app.test_client() user = User.query.filter_by(email='*****@*****.**').one() login_user_via_session(app_client, user=user) record = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'arxiv_eprints': [ { 'categories': [ 'nucl-th' ], 'value': '1802.03287' } ], 'control_number': 123, 'document_type': ['article'], 'titles': [{'title': 'Resource Pooling in Large-Scale Content Delivery Systems'}], 'self': {'$ref': 'http://localhost:5000/schemas/records/hep.json'}, '_collections': ['Literature'] } factory = TestRecordMetadata.create_from_kwargs(json=record) eng_uuid = start('edit_article', data=factory.record_metadata.json) obj = WorkflowEngine.from_uuid(eng_uuid).objects[0] assert obj.status == ObjectStatus.WAITING assert obj.extra_data['callback_url'] return obj
def test_merge_callback_url_with_malformed_workflow( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS payload = { 'id': obj.id, 'metadata': 'Jessica Jones', '_extra_data': 'Frank Castle' } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) data = json.loads(response.get_data()) expected_message = 'The workflow request is malformed.' assert response.status_code == 400 assert expected_message == data['message'] eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.HALTED assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('conflicts') is not None assert obj.extra_data['merger_root'] is not None updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_equality(app, halt_workflow): """Test WorkflowObject comparison functions.""" with app.app_context(): obj1 = WorkflowObject.create({"x": 22}) obj2 = WorkflowObject.create({"x": 22}) start("halttest", [obj1, obj2]) ident1 = obj1.id ident2 = obj2.id obj1 = WorkflowObject.get(ident1) obj2 = WorkflowObject.get(ident2) assert obj1 == obj2 obj3 = WorkflowObject.create({"x": 22}) obj4 = WorkflowObject.create({"x": 2}) assert obj4 != obj3
def test_match_in_holdingpen_stops_pending_wf( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) es.indices.refresh("holdingpen-hep") eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data["previously_rejected"] is False record2 = record record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" record2_workflow = build_workflow(record2).id start("article", object_id=record2_workflow) es.indices.refresh("holdingpen-hep") update_wf = workflow_object_class.get(record2_workflow) assert update_wf.status == ObjectStatus.HALTED # As workflow stops (in error) before setting this assert update_wf.extra_data["previously_rejected"] is False assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True assert update_wf.extra_data["is-update"] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_authors_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = { 'name': { 'preferred_name': 'John Smith', 'value': 'Smith, John' } } obj_id = build_workflow(invalid_record, data_type='authors').id with pytest.raises(ValidationError): start('author', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.ERROR assert '_error_msg' in obj.extra_data assert 'required' in obj.extra_data['_error_msg']
def test_refextract_from_pdf( mocked_indexing_task, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services ): """Test refextract from PDF and reference matching for default Configuration by going through the entire workflow.""" cited_record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'arxiv_eprints': [ { 'categories': ['quant-ph', 'cond-mat.mes-hall', 'cond-mat.str-el', 'math-ph', 'math.MP'], 'value': '1308.0815' } ], 'control_number': 1000, 'document_type': ['article'], 'titles': [ { 'source': 'arXiv', 'title': 'Solving a two-electron quantum dot model in terms of polynomial solutions of a Biconfluent Heun equation' } ], } TestRecordMetadata.create_from_kwargs( json=cited_record_json, index='records-hep', pid_type='lit') citing_record, categories = insert_citing_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES': categories, } schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] assert validate(citing_record['acquisition_source'], subschema) is None with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(citing_record).id citing_doc_workflow_uuid = start('article', object_id=workflow_id) citing_doc_eng = WorkflowEngine.from_uuid(citing_doc_workflow_uuid) citing_doc_obj = citing_doc_eng.processed_objects[0] assert citing_doc_obj.data['references'][7]['record']['$ref'] == 'http://localhost:5000/api/literature/1000' assert citing_doc_obj.data['references'][0]['raw_refs'][0]['source'] == 'arXiv'
def test_harvesting_arxiv_workflow_accepted(mocked, small_app, record_oai_arxiv_plots): """Test a full harvesting workflow.""" # Convert to MARCXML, then dict, then HEP JSON record_oai_arxiv_plots_marcxml = convert(record_oai_arxiv_plots, "oaiarXiv2marcxml.xsl") record_marc = create_record(record_oai_arxiv_plots_marcxml) record_json = hep.do(record_marc) workflow_uuid = None with small_app.app_context(): workflow_uuid = start('article', [record_json]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] # A publication note should have been extracted pub_info = obj.data.get('publication_info') assert pub_info assert pub_info[0] assert pub_info[0].get('year') == 2014 assert pub_info[0].get('journal_title') == "J. Math. Phys." # This record should not have been touched yet assert "approved" not in obj.extra_data # Now let's resolve it as accepted and continue # FIXME Should be accept, but record validation prevents us. obj.remove_action() obj.extra_data["approved"] = True obj.extra_data["core"] = True obj.save() db.session.commit() with small_app.app_context(): eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_merge_without_conflicts_callback_url( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch( 'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') record_update = RECORD_WITHOUT_CONFLICTS record_update.update({ 'arxiv_eprints': factory.record_metadata.json.get('arxiv_eprints') }) eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.COMPLETED assert conflicts is None assert obj.extra_data.get('is-update') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == record_update payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( url, data=json.dumps(payload), content_type='application/json', ) assert response.status_code == 400
def test_workflow_checks_affiliations_if_record_is_rejected_by_curator( mocked_is_auto_rejected, mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() record['authors'][0]['raw_affiliations'] = [{ "value": "IN2P3." }, { "value": "Some words with CErN, inside." }] record['authors'][1]['raw_affiliations'] = [{"value": "Fermilab?"}] workflow_id = build_workflow(record).id with patch.dict( workflow_app.config, { 'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True, 'INSPIREHEP_URL': "http://web:8000" }): start("article", object_id=workflow_id) wf = workflow_object_class.get(workflow_id) wf.extra_data['approved'] = False wf.save() wf.continue_workflow(delayed=False) collections_in_record = mocked_external_services.request_history[0].json( )['_collections'] assert "CDS Hidden" in collections_in_record assert "HAL Hidden" in collections_in_record assert "Fermilab" in collections_in_record assert "Literature" not in collections_in_record
def test_validation_error_callback_with_validation_error(workflow_app): invalid_record = { '_collections': [ 'Literature', ], 'document_type': [ 'article', ], 'titles': [ { 'title': 'A title' }, ], 'preprint_date': 'Jessica Jones' } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start('article', object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_message = 'Validation error.' expected_error_code = 'VALIDATION_ERROR' data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data['error_code'] assert expected_message == data['message'] assert data['workflow']['_extra_data']['callback_url'] assert len(data['workflow']['_extra_data']['validation_errors']) == 1
def test_workflow_with_validation_error( fake_validation, mocked_match, mocked_magpie_json_api_request, mocked_beard_json_api_request, workflow_app, mocked_external_services, ): record_with_validation_error = { "$schema": "https://labs.inspirehep.net/schemas/records/hep.json", "titles": [{"title": "Update without conflicts title."}], "arxiv_eprints": [ {"categories": ["WRONG_CATEGORY", "hep-th"], "value": "1703.04802"} ], "document_type": ["article"], "_collections": ["Literature"], "acquisition_source": {"source": "arXiv"}, } workflow = build_workflow(record_with_validation_error) with pytest.raises(ValidationError): start("article", object_id=workflow.id) assert fake_validation.call_count == 2 assert workflow.status == ObjectStatus.ERROR
def test_update_record_goes_through_api_version_of_store_record_connection_timeout( mocked_request_in_upload, mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id with mock.patch.dict( workflow_app.config, { "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True, "INSPIREHEP_URL": "http://go_to_wrong_address.bad__:98765" }): with pytest.raises(requests.exceptions.ConnectionError): start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR assert obj.extra_data['_error_msg'].endswith("\nConnectTimeout\n") is True
def test_merge_callback_url_with_malformed_workflow(workflow_app, enable_merge_on_update, disable_file_upload): factory = TestRecordMetadata.create_from_file(__name__, 'record_for_merging.json') record_update = RECORD_WITH_CONFLICTS record_update.update({ 'dois': factory.record_metadata.json.get('dois'), }) eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True payload = { 'id': obj.id, 'metadata': 'Jessica Jones', '_extra_data': 'Frank Castle' } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) data = json.loads(response.get_data()) expected_message = 'The workflow request is malformed.' assert response.status_code == 400 assert expected_message == data['message'] eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.HALTED assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('conflicts') is not None
def test_authors_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = { 'name': { 'preferred_name': 'John Smith', 'value': 'Smith, John' } } obj = workflow_object_class.create( data=invalid_record, data_type='authors', id_user=1, ) obj_id = obj.id with pytest.raises(ValidationError): start('author', invalid_record, obj_id) obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.ERROR assert '_error_msg' in obj.extra_data assert 'required' in obj.extra_data['_error_msg']
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() record_workflow = build_workflow(record).id eng_uuid = start("article", object_id=record_workflow) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get("approved") is False current_search.flush_and_refresh("holdingpen-hep") record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" # this workflow matches in the holdingpen and stops because the # matched one was rejected workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data["previously_rejected"] is True assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
def test_restart(app, restart_workflow): """Test halt task.""" assert 'restarttest' in app.extensions['invenio-workflows'].workflows with app.app_context(): data = {} eng_uuid = start('restarttest', data) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.HALTED == obj.status assert WorkflowStatus.HALTED == eng.status assert obj.data == {"title": "foo"} assert obj.get_action() == "foo" assert obj.get_action_message() == "Test" # Restart shall have no effect (still halted) new_eng_uuid = restart(eng_uuid) assert new_eng_uuid == eng_uuid eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.processed_objects[0] assert obj.known_statuses.HALTED == obj.status assert WorkflowStatus.HALTED == eng.status assert obj.data == {"title": {"value": "bar"}} assert obj.get_action() == "foo" obj.remove_action() assert obj.get_action() is None obj_id = obj.id # Now it should resume the next task resume(obj_id) obj = WorkflowObject.get(obj_id) assert obj.known_statuses.COMPLETED == obj.status assert obj.extra_data.get('test') == 'test' assert obj.data.get('title').get('source') == 'TEST' # We restart the object again restart(obj.workflow.uuid, data=obj) assert obj.known_statuses.HALTED == obj.status assert WorkflowStatus.HALTED == eng.status assert obj.data == {"title": {"value": "bar"}}
def test_article_workflow_continues_when_record_is_valid(workflow_app): valid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(valid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR assert "_error_msg" not in obj.extra_data
def test_merge_without_conflicts_rootful( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch( 'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') record_update = RECORD_WITH_CONFLICTS record_update.update({ 'arxiv_eprints': factory.record_metadata.json.get('arxiv_eprints') }) ARXIV_ROOT.update({ 'arxiv_eprints': factory.record_metadata.json.get('arxiv_eprints') }) insert_wf_record_source(json=ARXIV_ROOT, record_uuid=factory.record_metadata.id, source='arxiv') eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert not conflicts assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('is-update') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == record_update
def test_merge_with_disabled_merge_on_update_feature_flag( workflow_app, disable_file_upload): factory = TestRecordMetadata.create_from_file(__name__, 'record_for_merging.json') record_update = RECORD_WITHOUT_CONFLICTS record_update.update({ '$schema': factory.record_metadata.json.get('$schema'), 'dois': factory.record_metadata.json.get('dois'), }) eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('conflicts') is None assert obj.extra_data.get('merged') is True
def test_merge_with_conflicts(workflow_app, enable_merge_on_update, disable_file_upload): factory = TestRecordMetadata.create_from_file(__name__, 'record_for_merging.json') record_update = RECORD_WITH_CONFLICTS record_update.update({'dois': factory.record_metadata.json.get('dois')}) eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.HALTED assert len(conflicts) == 1 assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('is-update') is True
def test_update_exact_matched_goes_trough_the_workflow( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db): record = record_from_db eng_uuid = start('article', [record]) obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.extra_data['already-in-holding-pen'] is False assert obj.extra_data['holdingpen_matches'] == [] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf') assert obj.extra_data['is-update'] assert obj.extra_data['exact-matched'] assert obj.extra_data['matches']['exact'] == [record.get('control_number')] assert obj.extra_data['matches']['approved'] == record.get( 'control_number') assert obj.extra_data['approved'] assert obj.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_already_on_legacy( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_download, small_app): """Test a full harvesting workflow.""" extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } with small_app.app_context(): with mock.patch.dict(small_app.config, extra_config): workflow_uuid = start('article', [already_harvested_on_legacy_record()]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.COMPLETED assert 'already-ingested' in obj.extra_data assert obj.extra_data['already-ingested']
def test_article_workflow_continues_when_record_is_valid(workflow_app): valid_record = { '_collections': [ 'Literature', ], 'document_type': [ 'article', ], 'titles': [ {'title': 'A title'}, ], } eng_uuid = start('article', [valid_record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR assert '_error_msg' not in obj.extra_data
def test_validation_error_callback_with_missing_worfklow(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(invalid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback(workflow_app, 1111, obj.data, obj.extra_data) data = json.loads(response.get_data()) expected_message = 'The workflow with id "1111" was not found.' expected_error_code = "WORKFLOW_NOT_FOUND" assert response.status_code == 404 assert expected_error_code == data["error_code"] assert expected_message == data["message"]
def test_harvesting_arxiv_workflow_already_on_legacy( mocked_download, mocked_is_pdf, mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, workflow_app, mocked_external_services): """Test a full harvesting workflow.""" record, categories = already_harvested_on_legacy_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES_ALREADY_HARVESTED_ON_LEGACY': categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.COMPLETED assert 'already-ingested' in obj.extra_data assert obj.extra_data['already-ingested']
def test_validation_error_callback_with_a_valid(workflow_app): valid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(valid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_error_code = "WORKFLOW_NOT_IN_ERROR_STATE" data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data["error_code"]
def test_harvesting_arxiv_workflow_core_record_auto_accepted( mocked_download, mocked_is_pdf, mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, workflow_app, mocked_external_services): """Test a full harvesting workflow.""" record, categories = core_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES': categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is True assert obj.data['core'] is True
def test_merge_without_conflicts_callback_url(workflow_app, enable_merge_on_update, disable_file_upload): factory = TestRecordMetadata.create_from_file(__name__, 'record_for_merging.json') record_update = RECORD_WITHOUT_CONFLICTS record_update.update({ '$schema': factory.record_metadata.json.get('$schema'), 'dois': factory.record_metadata.json.get('dois'), }) eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert conflicts is None assert obj.extra_data.get('is-update') is True payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( url, data=json.dumps(payload), content_type='application/json', ) assert response.status_code == 400
def test_stop_matched_holdingpen_wfs(app, simple_record): # need to run a wf in order to assign to it the wf definition and a uuid # for it workflow_uuid = start('article', [simple_record]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj.status = ObjectStatus.HALTED obj.save() obj_id = obj.id es.indices.refresh('holdingpen-hep') obj2 = WorkflowObject.create(data=simple_record, data_type='hep') obj2_id = obj2.id match_non_completed_wf_in_holdingpen(obj2, None) assert obj2.extra_data['holdingpen_matches'] == [obj_id] stop_matched_holdingpen_wfs(obj2, None) stopped_wf = workflow_object_class.get(obj_id) assert stopped_wf.status == ObjectStatus.COMPLETED assert stopped_wf.extra_data['stopped-by-wf'] == obj2_id
def test_validation_error_callback_with_malformed_with_invalid_types( workflow_app): invalid_record = { '_collections': [ 'Literature', ], 'document_type': [ 'article', ], 'titles': [ { 'title': 'A title' }, ], } workflow_id = build_workflow(invalid_record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback( workflow_app, # id 'Alias Investigations', obj.data, # extra_data 'Jessica Jones') data = json.loads(response.get_data()) expected_message = 'The workflow request is malformed.' expected_error_code = 'MALFORMED' assert response.status_code == 400 assert expected_error_code == data['error_code'] assert expected_message == data['message'] assert 'errors' in data
def test_merge_with_conflicts(workflow_app, enable_merge_on_update, record_to_merge): record_update = { '$schema': 'http://schemas.stark-industries.com/schemas/records/avengers.json', '_collections': ['Literature'], 'document_type': ['article'], 'titles': [ {'title': 'Jessica Jones'}, {'title': 'Luke Cage'}, {'title': 'Frank Castle'}, ], 'authors': [ {'full_name': 'Maldacena, J.'}, {'full_name': 'Strominger, A.'}, ], 'abstracts': [ {'source': 'arxiv', 'value': 'A basic abstract.'} ], 'report_numbers': [{'value': 'DESY-17-036'}], 'dois': [ { 'value': '10.1007/978-3-319-15001-7' } ], } eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.HALTED assert len(conflicts) == 1 assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('is-update') is True
def test_merge_without_conflicts_handles_update_without_acquisition_source_and_acts_as_rootless( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch( 'inspire_json_merger.config.PublisherOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') record_update = RECORD_WITHOUT_ACQUISITION_SOURCE_AND_NO_CONFLICTS record_update.update({ 'arxiv_eprints': factory.record_metadata.json.get('arxiv_eprints') }) eng_uuid = start('article', [record_update]) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert not conflicts assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('is-update') is True # source us unknown, so no new root is saved. roots = read_all_wf_record_sources(factory.record_metadata.id) assert not roots
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() record['arxiv_eprints'][0]['categories'] = ['q-bio.GN'] obj = workflow_object_class.create( data=record, status=ObjectStatus.COMPLETED, data_type='hep', ) obj.extra_data['approved'] = False # reject it obj.save() es.indices.refresh('holdingpen-hep') extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES': categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.processed_objects[0] assert not obj2.extra_data['auto-approved'] assert len(obj2.extra_data['previously_rejected_matches']) > 0 assert obj2.status == ObjectStatus.COMPLETED
def test_update_exact_matched_goes_trough_the_workflow( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.extra_data["holdingpen_matches"] == [] assert obj.extra_data["previously_rejected"] is False assert not obj.extra_data.get("stopped-matched-holdingpen-wf") assert obj.extra_data["is-update"] assert obj.extra_data["exact-matched"] assert obj.extra_data["matches"]["exact"] == [record.get("control_number")] assert obj.extra_data["matches"]["approved"] == record.get("control_number") assert obj.extra_data["approved"] assert obj.status == ObjectStatus.COMPLETED
def test_regression_non_relevant_update_is_not_rejected_and_gets_merged( mocked_api_request_magpie, mocked_beard_api, mock_is_record_relevant, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep' ) update_workflow_id = build_workflow(factory.record_metadata.json).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] mock_is_record_relevant.assert_not_called() assert obj.extra_data.get('is-update') is True assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is True assert obj.extra_data['merged'] is True
def test_merge_with_conflicts_callback_url_and_resolve( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS # resolve conflicts obj.data['number_of_pages'] = factory.record_metadata.json.get('number_of_pages') del obj.extra_data['conflicts'] payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) assert response.status_code == 200 eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert conflicts is None assert obj.extra_data.get('approved') is True assert obj.extra_data.get('is-update') is True assert obj.extra_data.get('merged') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == RECORD_WITH_CONFLICTS