def test_match_in_holdingpen_different_sources_continues( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) wf_to_match = eng.objects[0].id obj = workflow_object_class.get(wf_to_match) assert obj.status == ObjectStatus.HALTED # generated wf pending in holdingpen record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' record['acquisition_source']['source'] = 'but not the source' # this workflow matches in the holdingpen but continues because has a # different source workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data['already-in-holding-pen'] is True assert obj.extra_data['holdingpen_matches'] == [wf_to_match] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_match_in_holdingpen_different_sources_continues( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) wf_to_match = eng.objects[0].id obj = workflow_object_class.get(wf_to_match) assert obj.status == ObjectStatus.HALTED # generated wf pending in holdingpen record['titles'][0][ 'title'] = 'This is an update that will match the wf in the holdingpen' record['acquisition_source']['source'] = 'but not the source' # this workflow matches in the holdingpen but continues because has a # different source workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data['already-in-holding-pen'] is True assert obj.extra_data['holdingpen_matches'] == [wf_to_match] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() record_workflow = build_workflow(record).id eng_uuid = start("article", object_id=record_workflow) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get("approved") is False es.indices.refresh("holdingpen-hep") record["titles"][0][ "title"] = "This is an update that will match the wf in the holdingpen" # this workflow matches in the holdingpen and stops because the # matched one was rejected workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data["previously_rejected"] is True assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
def test_cli_restart_by_error_restarts_one_wf_from_current_step(app_cli_runner): obj_1 = build_workflow({}, data_type='hep') obj_1.status = ObjectStatus.ERROR obj_1.extra_data["_error_msg"] = "Error in SendRobotUpload" obj_1.save() obj_2 = build_workflow({}, data_type='hep') obj_2.status = ObjectStatus.ERROR obj_2.extra_data["_error_msg"] = "Error in WebColl" obj_1.save() result = app_cli_runner.invoke(workflows, ['restart_by_error', 'RobotUpload']) assert "Found 1 workflows to restart from current step" in result.output_bytes
def test_responses_with_etag(workflow_app): factory = TestRecordMetadata.create_from_kwargs( json={'titles': [{'title': 'Etag version'}]} ) workflow_id = build_workflow(factory.record_metadata.json).id obj = workflow_object_class.get(workflow_id) obj.save() db.session.commit() workflow_url = '/api/holdingpen/{}'.format(obj.id) with workflow_app.test_client() as client: login_user_via_session(client, email='*****@*****.**') response = client.get(workflow_url) assert response.status_code == 200 etag = response.headers['ETag'] last_modified = response.headers['Last-Modified'] response = client.get( workflow_url, headers={'If-Modified-Since': last_modified}) assert response.status_code == 304 response = client.get(workflow_url, headers={'If-None-Match': etag}) assert response.status_code == 304 response = client.get(workflow_url, headers={'If-None-Match': 'Jessica Jones'}) assert response.status_code == 200
def test_validation_error_callback_with_missing_worfklow(workflow_app): invalid_record = { '_collections': [ 'Literature', ], 'document_type': [ 'article', ], 'titles': [ { 'title': 'A title' }, ], } workflow_id = build_workflow(invalid_record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback(workflow_app, 1111, obj.data, obj.extra_data) data = json.loads(response.get_data()) expected_message = 'The workflow with id "1111" was not found.' expected_error_code = 'WORKFLOW_NOT_FOUND' assert response.status_code == 404 assert expected_error_code == data['error_code'] assert expected_message == data['message']
def test_validation_error_callback_with_a_valid(workflow_app): valid_record = { '_collections': [ 'Literature', ], 'document_type': [ 'article', ], 'titles': [ { 'title': 'A title' }, ], } workflow_id = build_workflow(valid_record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_error_code = 'WORKFLOW_NOT_IN_ERROR_STATE' data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data['error_code']
def test_article_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = { 'document_type': [ 'article', ], 'titles': [ { 'title': 'A title' }, ], } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start('article', object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR assert '_error_msg' in obj.extra_data assert 'required' in obj.extra_data['_error_msg'] expected_url = 'http://localhost:5000/callback/workflows/resolve_validation_errors' assert expected_url == obj.extra_data['callback_url'] assert obj.extra_data['validation_errors'] assert 'message' in obj.extra_data['validation_errors'][0] assert 'path' in obj.extra_data['validation_errors'][0]
def test_conflict_creates_ticket( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch( 'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): TestRecordMetadata.create_from_file(__name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id start('article', object_id=update_workflow_id) wf = workflow_object_class.get(update_workflow_id) expected_ticket = u'content=Queue%3A+HEP_conflicts%0AText%3A+Merge+conflict+needs+to+be+resolved.%0A++%0A++https%3A%2F%2Flocalhost%3A5000%2Feditor%2Fholdingpen%2F{wf_id}%0ASubject%3A+arXiv%3A1703.04802+%28%23None%29%0Aid%3A+ticket%2Fnew%0ACF'.format( wf_id=wf.id) assert mocked_external_services.request_history[0].text.startswith( expected_ticket) assert wf.extra_data['conflict-ticket-id'] expected_ticket_close_url = 'http://rt.inspire/ticket/{ticket_id}/edit'.format( ticket_id=wf.extra_data['conflict-ticket-id']) wf.continue_workflow() assert mocked_external_services.request_history[ 1].url == expected_ticket_close_url assert mocked_external_services.request_history[ 1].text == u'content=Status%3A+resolved'
def test_cli_restart_by_error_restarts_one_wf_from_beginning(app_cli_runner): obj_1 = build_workflow({}, data_type='hep') obj_1.status = ObjectStatus.ERROR obj_1.extra_data["_error_msg"] = "Error in WebColl number 1" obj_1.save() obj_2 = build_workflow({}, data_type='hep') obj_2.status = ObjectStatus.ERROR obj_2.extra_data["_error_msg"] = "Error in WebColl number 2" obj_1.save() result = app_cli_runner.invoke( workflows, ['restart_by_error', 'WebColl', '--from-beginning']) output = result.output_bytes assert 'Found 2 workflows to restart from first step\n' in output
def test_merge_with_conflicts_callback_url( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch( 'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) data = json.loads(response.get_data()) expected_message = 'Workflow {} has been saved with conflicts.'.format( obj.id) assert response.status_code == 200 assert expected_message == data['message'] eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.HALTED updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"] obj = workflow_object_class.create( data=record, status=ObjectStatus.COMPLETED, data_type="hep" ) obj.extra_data["approved"] = False # reject it obj.save() es.indices.refresh("holdingpen-hep") extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.processed_objects[0] assert not obj2.extra_data["auto-approved"] assert len(obj2.extra_data["previously_rejected_matches"]) > 0 assert obj2.status == ObjectStatus.COMPLETED
def test_workflow_checks_affiliations_if_record_is_not_important( mocked_is_auto_rejected, mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() record['authors'][0]['raw_affiliations'] = [{ "value": "IN2P3" }, { "value": "Cern" }] record['authors'][1]['raw_affiliations'] = [{"value": "Fermilab"}] workflow_id = build_workflow(record).id with patch.dict( workflow_app.config, { 'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True, 'INSPIREHEP_URL': "http://web:8000" }): start("article", object_id=workflow_id) collections_in_record = mocked_external_services.request_history[0].json( )['_collections'] assert "CDS Hidden" in collections_in_record assert "HAL Hidden" in collections_in_record assert "Fermilab" in collections_in_record assert "Literature" not in collections_in_record
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"] obj = workflow_object_class.create(data=record, status=ObjectStatus.COMPLETED, data_type="hep") obj.extra_data["approved"] = False # reject it obj.save() es.indices.refresh("holdingpen-hep") extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.processed_objects[0] assert not obj2.extra_data["auto-approved"] assert len(obj2.extra_data["previously_rejected_matches"]) > 0 assert obj2.status == ObjectStatus.COMPLETED
def test_workflow_without_validation_error( fake_validation, mocked_match, mocked_magpie_json_api_request, mocked_beard_json_api_request, workflow_app, mocked_external_services, ): record_without_validation_error = { "$schema": "https://labs.inspirehep.net/schemas/records/hep.json", "titles": [{ "title": "Update without conflicts title." }], "arxiv_eprints": [{ "categories": ["hep-lat", "hep-th"], "value": "1703.04802" }], "document_type": ["article"], "_collections": ["Literature"], "acquisition_source": { "source": "arXiv" }, } workflow = build_workflow(record_without_validation_error) start("article", object_id=workflow.id) assert fake_validation.call_count == 2 assert workflow.status == ObjectStatus.WAITING
def test_workflows_halts_on_multiple_exact_matches(workflow_app): # Record from arxiv with just arxiv ID in DB TestRecordMetadata.create_from_file(__name__, "multiple_matches_arxiv.json", index_name="records-hep") # Record from publisher with just DOI in DB TestRecordMetadata.create_from_file(__name__, "multiple_matches_publisher.json", index_name="records-hep") path = pkg_resources.resource_filename( __name__, "fixtures/multiple_matches_arxiv_update.json") update_from_arxiv = json.load(open(path)) # An update from arxiv with the same arxiv and DOI as above records workflow_id = build_workflow(update_from_arxiv).id start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert len(set(obj.extra_data["matches"]["exact"])) == 2 assert obj.status == ObjectStatus.HALTED assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
def test_workflow_do_not_changes_to_hidden_if_record_authors_do_not_have_interesting_affiliations( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() workflow_id = build_workflow(record).id with patch.dict( workflow_app.config, { 'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True, 'INSPIREHEP_URL': "http://web:8000" }): start("article", object_id=workflow_id) wf = workflow_object_class.get(workflow_id) wf.extra_data['approved'] = True wf.save() wf.continue_workflow(delayed=False) collections_in_record = mocked_external_services.request_history[0].json( )['_collections'] assert "CDS Hidden" not in collections_in_record assert "HAL Hidden" not in collections_in_record assert "Fermilab" not in collections_in_record assert ["Literature"] == collections_in_record
def test_article_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = { "document_type": ["article"], "titles": [{ "title": "A title" }] } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR assert "_error_msg" in obj.extra_data assert "required" in obj.extra_data["_error_msg"] expected_url = "http://localhost:5000/callback/workflows/resolve_validation_errors" assert expected_url == obj.extra_data["callback_url"] assert obj.extra_data["validation_errors"] assert "message" in obj.extra_data["validation_errors"][0] assert "path" in obj.extra_data["validation_errors"][0]
def test_merge_without_conflicts_handles_update_without_acquisition_source_and_acts_as_rootless( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.PublisherOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITHOUT_ACQUISITION_SOURCE_AND_NO_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert not conflicts assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == {} # source us unknown, so no new root is saved. roots = read_all_wf_record_sources(factory.record_metadata.id) assert not roots
def test_validation_error_callback_with_malformed_with_invalid_types(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(invalid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback( workflow_app, # id "Alias Investigations", obj.data, # extra_data "Jessica Jones", ) data = json.loads(response.get_data()) expected_message = "The workflow request is malformed." expected_error_code = "MALFORMED" assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert "errors" in data
def test_harvesting_arxiv_workflow_core_record_auto_accepted( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record, categories = core_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): workflow_id = build_workflow(record).id with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.extra_data["approved"] is True assert obj.extra_data["auto-approved"] is True assert obj.data["core"] is True
def test_workflow_restart_count_initialized_properly( mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() with workflow_app.app_context(): obj_id = build_workflow(record).id start('article', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.extra_data['source_data']['persistent_data']['marks'][ 'restart-count'] == 0 assert obj.extra_data['restart-count'] == 0 obj.callback_pos = [0] obj.save() db.session.commit() start('article', object_id=obj_id) assert obj.extra_data['source_data']['persistent_data']['marks'][ 'restart-count'] == 1 assert obj.extra_data['restart-count'] == 1
def test_validation_error_callback_with_a_valid(workflow_app): valid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{ "title": "A title" }], } workflow_id = build_workflow(valid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_error_code = "WORKFLOW_NOT_IN_ERROR_STATE" data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data["error_code"]
def test_merge_with_conflicts_rootful( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id # By default the root is {}. eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.HALTED assert len(conflicts) == 1 assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == {}
def test_validation_error_callback_with_validation_error(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{ "title": "A title" }], "preprint_date": "Jessica Jones", } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_message = "Validation error." expected_error_code = "VALIDATION_ERROR" data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert data["workflow"]["_extra_data"]["callback_url"] assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
def test_merge_without_conflicts_rootful( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id insert_wf_record_source(json=ARXIV_ROOT, record_uuid=factory.record_metadata.id, source='arxiv') eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') assert obj.status == ObjectStatus.COMPLETED assert not conflicts assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_head_revision'] == 0 assert obj.extra_data['merger_original_root'] == ARXIV_ROOT updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == RECORD_WITH_CONFLICTS
def test_validation_error_callback_with_missing_worfklow(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{ "title": "A title" }], } workflow_id = build_workflow(invalid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback(workflow_app, 1111, obj.data, obj.extra_data) data = json.loads(response.get_data()) expected_message = 'The workflow with id "1111" was not found.' expected_error_code = "WORKFLOW_NOT_FOUND" assert response.status_code == 404 assert expected_error_code == data["error_code"] assert expected_message == data["message"]
def test_validation_error_callback_with_malformed_with_invalid_types( workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{ "title": "A title" }], } workflow_id = build_workflow(invalid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback( workflow_app, # id "Alias Investigations", obj.data, # extra_data "Jessica Jones", ) data = json.loads(response.get_data()) expected_message = "The workflow request is malformed." expected_error_code = "MALFORMED" assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert "errors" in data
def test_workflow_restart_count_initialized_properly( mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() with workflow_app.app_context(): obj_id = build_workflow(record).id start('article', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0 assert obj.extra_data['restart-count'] == 0 obj.callback_pos = [0] obj.save() db.session.commit() start('article', object_id=obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1 assert obj.extra_data['restart-count'] == 1
def test_validation_error_callback_with_validation_error(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], "preprint_date": "Jessica Jones", } workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_message = "Validation error." expected_error_code = "VALIDATION_ERROR" data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data["error_code"] assert expected_message == data["message"] assert data["workflow"]["_extra_data"]["callback_url"] assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
def test_update_exact_matched_goes_trough_the_workflow( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.extra_data["holdingpen_matches"] == [] assert obj.extra_data["previously_rejected"] is False assert not obj.extra_data.get("stopped-matched-holdingpen-wf") assert obj.extra_data["is-update"] assert obj.extra_data["exact-matched"] assert obj.extra_data["matches"]["exact"] == [record.get("control_number")] assert obj.extra_data["matches"]["approved"] == record.get( "control_number") assert obj.extra_data["approved"] assert obj.status == ObjectStatus.COMPLETED
def test_merge_with_disabled_merge_on_update_feature_flag( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, ): with patch.dict(workflow_app.config, {'FEATURE_FLAG_ENABLE_MERGER': False}): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITHOUT_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('callback_url') is None assert obj.extra_data.get('conflicts') is None assert obj.extra_data.get('merged') is True assert obj.extra_data.get('merger_root') is None assert obj.extra_data.get('is-update') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_workflows_halts_on_multiple_exact_matches(workflow_app): # Record from arxiv with just arxiv ID in DB TestRecordMetadata.create_from_file( __name__, "multiple_matches_arxiv.json", index_name="records-hep" ) # Record from publisher with just DOI in DB TestRecordMetadata.create_from_file( __name__, "multiple_matches_publisher.json", index_name="records-hep" ) path = pkg_resources.resource_filename( __name__, "fixtures/multiple_matches_arxiv_update.json" ) update_from_arxiv = json.load(open(path)) # An update from arxiv with the same arxiv and DOI as above records workflow_id = build_workflow(update_from_arxiv).id start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert len(set(obj.extra_data["matches"]["exact"])) == 2 assert obj.status == ObjectStatus.HALTED assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
def test_merge_callback_url_with_malformed_workflow( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.HALTED assert expected_url == obj.extra_data.get('callback_url') assert len(conflicts) == 1 assert obj.extra_data.get('is-update') is True assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS payload = { 'id': obj.id, 'metadata': 'Jessica Jones', '_extra_data': 'Frank Castle' } with workflow_app.test_client() as client: response = client.put( obj.extra_data.get('callback_url'), data=json.dumps(payload), content_type='application/json', ) data = json.loads(response.get_data()) expected_message = 'The workflow request is malformed.' assert response.status_code == 400 assert expected_message == data['message'] eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status == ObjectStatus.HALTED assert obj.extra_data.get('callback_url') is not None assert obj.extra_data.get('conflicts') is not None assert obj.extra_data['merger_root'] is not None updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root is None
def test_update_record_goes_through_api_version_of_store_record_without_issue( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id expected_control_number = record['control_number'] expected_head_uuid = str(record.id) with mock.patch.dict( workflow_app.config, { "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True, "INSPIREHEP_URL": "http://web:8000" }): with requests_mock.Mocker(real_http=True) as requests_mocker: requests_mocker.register_uri( 'PUT', '{url}/literature/{cn}'.format( url=workflow_app.config.get("INSPIREHEP_URL"), cn=expected_control_number, ), headers={'content-type': 'application/json'}, status_code=200, json={ 'metadata': { 'control_number': expected_control_number, }, 'id_': expected_head_uuid }) eng_uuid = start("article", object_id=workflow_id) url_paths = [r.path for r in requests_mocker.request_history] url_hostnames = [ r.hostname for r in requests_mocker.request_history ] assert 'web' in url_hostnames assert "/literature/{cn}".format( cn=expected_control_number) in url_paths obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.data['control_number'] == expected_control_number assert obj.extra_data["holdingpen_matches"] == [] assert obj.extra_data["previously_rejected"] is False assert not obj.extra_data.get("stopped-matched-holdingpen-wf") assert obj.extra_data["is-update"] assert obj.extra_data["exact-matched"] assert obj.extra_data["matches"]["exact"] == [record.get("control_number")] assert obj.extra_data["matches"]["approved"] == record.get( "control_number") assert obj.extra_data["approved"] assert obj.status == ObjectStatus.COMPLETED
def test_match_in_holdingpen_stops_pending_wf( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) es.indices.refresh("holdingpen-hep") eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data["previously_rejected"] is False record2 = record record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" record2_workflow = build_workflow(record2).id start("article", object_id=record2_workflow) es.indices.refresh("holdingpen-hep") update_wf = workflow_object_class.get(record2_workflow) assert update_wf.status == ObjectStatus.HALTED # As workflow stops (in error) before setting this assert update_wf.extra_data["previously_rejected"] is False assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True assert update_wf.extra_data["is-update"] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_match_in_holdingpen_stops_pending_wf( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) current_search.flush_and_refresh("holdingpen-hep") eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data["previously_rejected"] is False record2 = record record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" record2_workflow = build_workflow(record2).id start("article", object_id=record2_workflow) current_search.flush_and_refresh("holdingpen-hep") update_wf = workflow_object_class.get(record2_workflow) assert update_wf.status == ObjectStatus.HALTED # As workflow stops (in error) before setting this assert update_wf.extra_data["previously_rejected"] is False assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True assert update_wf.extra_data["is-update"] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_refextract_from_pdf(mocked_indexing_task, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services): """Test refextract from PDF and reference matching for default Configuration by going through the entire workflow.""" cited_record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'arxiv_eprints': [{ 'categories': [ 'quant-ph', 'cond-mat.mes-hall', 'cond-mat.str-el', 'math-ph', 'math.MP' ], 'value': '1308.0815' }], 'control_number': 1000, 'document_type': ['article'], 'titles': [{ 'source': 'arXiv', 'title': 'Solving a two-electron quantum dot model in terms of polynomial solutions of a Biconfluent Heun equation' }], } TestRecordMetadata.create_from_kwargs(json=cited_record_json, index='records-hep', pid_type='lit') citing_record, categories = insert_citing_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES': categories, } schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] assert validate(citing_record['acquisition_source'], subschema) is None with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(citing_record).id citing_doc_workflow_uuid = start('article', object_id=workflow_id) citing_doc_eng = WorkflowEngine.from_uuid(citing_doc_workflow_uuid) citing_doc_obj = citing_doc_eng.processed_objects[0] assert citing_doc_obj.data['references'][7]['record'][ '$ref'] == 'http://localhost:5000/api/literature/1000' assert citing_doc_obj.data['references'][0]['raw_refs'][0][ 'source'] == 'arXiv'
def test_refextract_from_pdf( mocked_indexing_task, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services ): """Test refextract from PDF and reference matching for default Configuration by going through the entire workflow.""" cited_record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', '_collections': ['Literature'], 'arxiv_eprints': [ { 'categories': ['quant-ph', 'cond-mat.mes-hall', 'cond-mat.str-el', 'math-ph', 'math.MP'], 'value': '1308.0815' } ], 'control_number': 1000, 'document_type': ['article'], 'titles': [ { 'source': 'arXiv', 'title': 'Solving a two-electron quantum dot model in terms of polynomial solutions of a Biconfluent Heun equation' } ], } TestRecordMetadata.create_from_kwargs( json=cited_record_json, index='records-hep', pid_type='lit') citing_record, categories = insert_citing_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES': categories, } schema = load_schema('hep') subschema = schema['properties']['acquisition_source'] assert validate(citing_record['acquisition_source'], subschema) is None with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(citing_record).id citing_doc_workflow_uuid = start('article', object_id=workflow_id) citing_doc_eng = WorkflowEngine.from_uuid(citing_doc_workflow_uuid) citing_doc_obj = citing_doc_eng.processed_objects[0] assert citing_doc_obj.data['references'][7]['record']['$ref'] == 'http://localhost:5000/api/literature/1000' assert citing_doc_obj.data['references'][0]['raw_refs'][0]['source'] == 'arXiv'
def test_start_wf_with_no_source_data_fails(workflow_app): record = generate_record() obj = build_workflow(record) del obj.extra_data["source_data"] obj.save() db.session.commit() with pytest.raises(ValueError): start("article", object_id=obj.id)
def test_update_record_goes_through_api_version_of_store_record_without_issue( mocked_is_pdf_link, mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, record_from_db, ): record = record_from_db workflow_id = build_workflow(record).id expected_control_number = record['control_number'] expected_head_uuid = str(record.id) with mock.patch.dict( workflow_app.config, { "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True, "INSPIREHEP_URL": "http://web:8000" } ): with requests_mock.Mocker(real_http=True) as requests_mocker: requests_mocker.register_uri( 'PUT', '{url}/literature/{cn}'.format( url=workflow_app.config.get("INSPIREHEP_URL"), cn=expected_control_number, ), headers={'content-type': 'application/json'}, status_code=200, json={ 'metadata': { 'control_number': expected_control_number, }, 'id_': expected_head_uuid } ) eng_uuid = start("article", object_id=workflow_id) url_paths = [r.path for r in requests_mocker.request_history] url_hostnames = [r.hostname for r in requests_mocker.request_history] assert 'web' in url_hostnames assert "/literature/{cn}".format(cn=expected_control_number) in url_paths obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id obj = workflow_object_class.get(obj_id) assert obj.data['control_number'] == expected_control_number assert obj.extra_data["holdingpen_matches"] == [] assert obj.extra_data["previously_rejected"] is False assert not obj.extra_data.get("stopped-matched-holdingpen-wf") assert obj.extra_data["is-update"] assert obj.extra_data["exact-matched"] assert obj.extra_data["matches"]["exact"] == [record.get("control_number")] assert obj.extra_data["matches"]["approved"] == record.get("control_number") assert obj.extra_data["approved"] assert obj.status == ObjectStatus.COMPLETED
def test_match_wf_in_error_goes_in_initial_state(workflow_app): record = generate_record() obj = workflow_object_class.create(data=record, data_type="hep") obj.status = ObjectStatus.INITIAL obj.save() es.indices.refresh("holdingpen-hep") with pytest.raises(WorkflowsError): workflow_id = build_workflow(record).id start("article", object_id=workflow_id)
def get_halted_workflow(mocked_is_pdf_link, app, record, extra_config=None): mocked_is_pdf_link.return_value = True extra_config = extra_config or {} with mock.patch.dict(app.config, extra_config): workflow_id = build_workflow(record).id workflow_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.HALTED assert obj.data_type == "hep" # Files should have been attached (tarball + pdf, and plots) assert obj.files["1407.7587.pdf"] assert obj.files["1407.7587.tar.gz"] assert len(obj.files) > 2 # A publication note should have been extracted pub_info = obj.data.get("publication_info") assert pub_info assert pub_info[0] assert pub_info[0].get("year") == 2014 assert pub_info[0].get("journal_title") == "J. Math. Phys." # A prediction should have been made prediction = obj.extra_data.get("relevance_prediction") assert prediction assert prediction["decision"] == "Non-CORE" assert prediction["scores"]["Non-CORE"] == 0.8358207729691823 expected_experiment_prediction = { "experiments": [{"label": "CMS", "score": 0.75495152473449707}] } experiments_prediction = obj.extra_data.get("experiments_prediction") assert experiments_prediction == expected_experiment_prediction keywords_prediction = obj.extra_data.get("keywords_prediction") assert keywords_prediction assert { "label": "galaxy", "score": 0.29424679279327393, "accept": True, } in keywords_prediction["keywords"] # This record should not have been touched yet assert obj.extra_data["approved"] is None return workflow_uuid, eng, obj
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() record_workflow = build_workflow(record).id eng_uuid = start("article", object_id=record_workflow) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get("approved") is False es.indices.refresh("holdingpen-hep") record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" # this workflow matches in the holdingpen and stops because the # matched one was rejected workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data["previously_rejected"] is True assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
def test_article_workflow_continues_when_record_is_valid(workflow_app): valid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(valid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR assert "_error_msg" not in obj.extra_data
def test_authors_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = { 'name': { 'preferred_name': 'John Smith', 'value': 'Smith, John' } } obj_id = build_workflow(invalid_record, data_type='authors').id with pytest.raises(ValidationError): start('author', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.ERROR assert '_error_msg' in obj.extra_data assert 'required' in obj.extra_data['_error_msg']
def test_merge_without_conflicts_callback_url( mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, disable_file_upload, enable_merge_on_update, ): with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep') update_workflow_id = build_workflow(RECORD_WITHOUT_CONFLICTS).id eng_uuid = start('article', object_id=update_workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] conflicts = obj.extra_data.get('conflicts') url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts' assert obj.status == ObjectStatus.COMPLETED assert conflicts is None assert obj.extra_data.get('is-update') is True updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv') assert updated_root.json == RECORD_WITHOUT_CONFLICTS payload = { 'id': obj.id, 'metadata': obj.data, '_extra_data': obj.extra_data } with workflow_app.test_client() as client: response = client.put( url, data=json.dumps(payload), content_type='application/json', ) assert response.status_code == 400
def test_authors_workflow_continues_when_record_is_valid(workflow_app, mocked_external_services): valid_record = { '_collections': ['Authors'], 'name': { 'preferred_name': 'John Smith', 'value': 'Smith, John' } } workflow_id = build_workflow(valid_record, data_type='authors', id_user=1).id obj = workflow_object_class.get(workflow_id) start('author', object_id=obj.id) obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.HALTED assert '_error_msg' not in obj.extra_data
def test_workflow_restarts_twice_if_working_with_stale_data( mocked__is_stale_data, mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, ): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep' ) obj_id = build_workflow(factory.record_metadata.json).id start('article', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.extra_data['head_version_id'] == 1 assert obj.extra_data['is-update'] assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 2 assert obj.status == ObjectStatus.COMPLETED
def test_article_workflow_stops_when_record_is_not_valid(workflow_app): invalid_record = {"document_type": ["article"], "titles": [{"title": "A title"}]} workflow_id = build_workflow(invalid_record).id with pytest.raises(ValidationError): start("article", object_id=workflow_id) obj = workflow_object_class.get(workflow_id) assert obj.status == ObjectStatus.ERROR assert "_error_msg" in obj.extra_data assert "required" in obj.extra_data["_error_msg"] expected_url = "http://localhost:5000/callback/workflows/resolve_validation_errors" assert expected_url == obj.extra_data["callback_url"] assert obj.extra_data["validation_errors"] assert "message" in obj.extra_data["validation_errors"][0] assert "path" in obj.extra_data["validation_errors"][0]
def test_workflow_restarts_goes_in_error_after_three_restarts( mocked__is_stale_data, mocked_api_request_magpie, mocked_beard_api, workflow_app, mocked_external_services, ): factory = TestRecordMetadata.create_from_file( __name__, 'merge_record_arxiv.json', index_name='records-hep' ) obj_id = build_workflow(factory.record_metadata.json).id with pytest.raises(WorkflowsError): start('article', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 3 assert 'Workflow restarted too many times' in obj.extra_data['_error_msg'] assert obj.status == ObjectStatus.ERROR
def test_workflow_without_validation_error( fake_validation, mocked_match, mocked_magpie_json_api_request, mocked_beard_json_api_request, workflow_app, mocked_external_services, ): record_without_validation_error = { "$schema": "https://labs.inspirehep.net/schemas/records/hep.json", "titles": [{"title": "Update without conflicts title."}], "arxiv_eprints": [{"categories": ["hep-lat", "hep-th"], "value": "1703.04802"}], "document_type": ["article"], "_collections": ["Literature"], "acquisition_source": {"source": "arXiv"}, } workflow = build_workflow(record_without_validation_error) start("article", object_id=workflow.id) assert fake_validation.call_count == 2 assert workflow.status == ObjectStatus.WAITING
def test_validation_error_callback_with_a_valid(workflow_app): valid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(valid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.status != ObjectStatus.ERROR response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data) expected_error_code = "WORKFLOW_NOT_IN_ERROR_STATE" data = json.loads(response.get_data()) assert response.status_code == 400 assert expected_error_code == data["error_code"]
def test_validation_error_callback_with_missing_worfklow(workflow_app): invalid_record = { "_collections": ["Literature"], "document_type": ["article"], "titles": [{"title": "A title"}], } workflow_id = build_workflow(invalid_record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] response = do_validation_callback(workflow_app, 1111, obj.data, obj.extra_data) data = json.loads(response.get_data()) expected_message = 'The workflow with id "1111" was not found.' expected_error_code = "WORKFLOW_NOT_FOUND" assert response.status_code == 404 assert expected_error_code == data["error_code"] assert expected_message == data["message"]