def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() record_workflow = build_workflow(record).id eng_uuid = start("article", object_id=record_workflow) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get("approved") is False es.indices.refresh("holdingpen-hep") record["titles"][0][ "title"] = "This is an update that will match the wf in the holdingpen" # this workflow matches in the holdingpen and stops because the # matched one was rejected workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data["previously_rejected"] is True assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
def test_workflow_checks_affiliations_if_record_is_not_important( mocked_is_auto_rejected, mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() record['authors'][0]['raw_affiliations'] = [{ "value": "IN2P3" }, { "value": "Cern" }] record['authors'][1]['raw_affiliations'] = [{"value": "Fermilab"}] workflow_id = build_workflow(record).id with patch.dict( workflow_app.config, { 'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True, 'INSPIREHEP_URL': "http://web:8000" }): start("article", object_id=workflow_id) collections_in_record = mocked_external_services.request_history[0].json( )['_collections'] assert "CDS Hidden" in collections_in_record assert "HAL Hidden" in collections_in_record assert "Fermilab" in collections_in_record assert "Literature" not in collections_in_record
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, workflow_app, mocked_external_services, ): record = generate_record() eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('approved') is False es.indices.refresh('holdingpen-hep') record['titles'][0][ 'title'] = 'This is an update that will match the wf in the holdingpen' # this workflow matches in the holdingpen and stops because the # matched one was rejected eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data['already-in-holding-pen'] is False assert obj2.extra_data['previously_rejected'] is True assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_match, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() """Test a full harvesting workflow.""" workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record) do_accept_core(app=workflow_app, workflow_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is True
def test_workflow_restart_count_initialized_properly( mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() with workflow_app.app_context(): obj_id = build_workflow(record).id start('article', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.extra_data['source_data']['persistent_data']['marks'][ 'restart-count'] == 0 assert obj.extra_data['restart-count'] == 0 obj.callback_pos = [0] obj.save() db.session.commit() start('article', object_id=obj_id) assert obj.extra_data['source_data']['persistent_data']['marks'][ 'restart-count'] == 1 assert obj.extra_data['restart-count'] == 1
def test_harvesting_arxiv_workflow_manual_rejected( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, extra_config=extra_config, record=record ) obj.extra_data["approved"] = False obj.save() db.session.commit() eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was rejected assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is False
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_match, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() """Test a full harvesting workflow.""" workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, record=record) do_accept_core(app=workflow_app, workflow_id=obj.id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING do_robotupload_callback(app=workflow_app, workflow_id=obj.id, recids=[12345]) obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING do_webcoll_callback(app=workflow_app, recids=[12345]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is True
def test_workflow_restart_count_initialized_properly( mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() with workflow_app.app_context(): obj_id = build_workflow(record).id start('article', object_id=obj_id) obj = workflow_object_class.get(obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0 assert obj.extra_data['restart-count'] == 0 obj.callback_pos = [0] obj.save() db.session.commit() start('article', object_id=obj_id) assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1 assert obj.extra_data['restart-count'] == 1
def test_match_in_holdingpen_different_sources_continues( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) wf_to_match = eng.objects[0].id obj = workflow_object_class.get(wf_to_match) assert obj.status == ObjectStatus.HALTED # generated wf pending in holdingpen record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' record['acquisition_source']['source'] = 'but not the source' # this workflow matches in the holdingpen but continues because has a # different source workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data['already-in-holding-pen'] is True assert obj.extra_data['holdingpen_matches'] == [wf_to_match] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_match_in_holdingpen_different_sources_continues( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) wf_to_match = eng.objects[0].id obj = workflow_object_class.get(wf_to_match) assert obj.status == ObjectStatus.HALTED # generated wf pending in holdingpen record['titles'][0][ 'title'] = 'This is an update that will match the wf in the holdingpen' record['acquisition_source']['source'] = 'but not the source' # this workflow matches in the holdingpen but continues because has a # different source workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj = eng.objects[0] assert obj.extra_data['already-in-holding-pen'] is True assert obj.extra_data['holdingpen_matches'] == [wf_to_match] assert obj.extra_data['previously_rejected'] is False assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
def test_harvesting_arxiv_workflow_manual_rejected( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } workflow_uuid, eng, obj = get_halted_workflow(app=workflow_app, extra_config=extra_config, record=record) obj.extra_data["approved"] = False obj.save() db.session.commit() eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was rejected assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data["approved"] is False
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get('approved') is False es.indices.refresh('holdingpen-hep') record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' # this workflow matches in the holdingpen and stops because the # matched one was rejected eng_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data['already-in-holding-pen'] is False assert obj2.extra_data['previously_rejected'] is True assert obj2.extra_data['previously_rejected_matches'] == [obj_id]
def test_workflow_do_not_changes_to_hidden_if_record_authors_do_not_have_interesting_affiliations( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() workflow_id = build_workflow(record).id with patch.dict( workflow_app.config, { 'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True, 'INSPIREHEP_URL': "http://web:8000" }): start("article", object_id=workflow_id) wf = workflow_object_class.get(workflow_id) wf.extra_data['approved'] = True wf.save() wf.continue_workflow(delayed=False) collections_in_record = mocked_external_services.request_history[0].json( )['_collections'] assert "CDS Hidden" not in collections_in_record assert "HAL Hidden" not in collections_in_record assert "Fermilab" not in collections_in_record assert ["Literature"] == collections_in_record
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_search, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, workflow_app, ): record = generate_record() """Test a full harvesting workflow.""" with requests_mock.Mocker() as requests_mocker: requests_mocker.register_uri( requests_mock.ANY, re.compile('.*(indexer|localhost).*'), real_http=True, ) requests_mocker.register_uri( 'POST', re.compile('https?://localhost:1234.*', ), text=u'[INFO]', status_code=200, ) workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, extra_config={'PRODUCTION_MODE': False}, record=record, ) do_accept_core( app=workflow_app, workflow_id=obj.id, ) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING response = do_robotupload_callback( app=workflow_app, workflow_id=obj.id, recids=[12345], ) assert response.status_code == 200 obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING response = do_webcoll_callback(app=workflow_app, recids=[12345]) assert response.status_code == 200 eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_start_wf_with_no_source_data_fails(workflow_app): record = generate_record() obj = build_workflow(record) del obj.extra_data["source_data"] obj.save() db.session.commit() with pytest.raises(ValueError): start("article", object_id=obj.id)
def test_start_wf_with_no_source_data_fails(workflow_app): record = generate_record() obj = build_workflow(record) del obj.extra_data["source_data"] obj.save() db.session.commit() with pytest.raises(ValueError): start("article", object_id=obj.id)
def test_match_wf_in_error_goes_in_initial_state(workflow_app): record = generate_record() obj = workflow_object_class.create(data=record, data_type='hep') obj.status = ObjectStatus.INITIAL obj.save() es.indices.refresh('holdingpen-hep') with pytest.raises(WorkflowsError): start('article', record)
def test_match_wf_in_error_goes_in_initial_state(workflow_app): record = generate_record() obj = workflow_object_class.create(data=record, data_type="hep") obj.status = ObjectStatus.INITIAL obj.save() es.indices.refresh("holdingpen-hep") with pytest.raises(WorkflowsError): workflow_id = build_workflow(record).id start("article", object_id=workflow_id)
def test_match_wf_in_error_goes_in_error_state(workflow_app): record = generate_record() obj = workflow_object_class.create(data=record, data_type='hep') obj.status = ObjectStatus.ERROR obj.save() es.indices.refresh('holdingpen-hep') with pytest.raises(WorkflowsError): workflow_id = build_workflow(record).id start('article', object_id=workflow_id)
def test_match_wf_in_error_goes_in_initial_state(workflow_app): record = generate_record() obj = workflow_object_class.create(data=record, data_type="hep") obj.status = ObjectStatus.INITIAL obj.save() es.indices.refresh("holdingpen-hep") with pytest.raises(WorkflowsError): workflow_id = build_workflow(record).id start("article", object_id=workflow_id)
def test_match_wf_in_error_goes_in_error_state(workflow_app): record = generate_record() obj = workflow_object_class.create(data=record, data_type="hep") obj.status = ObjectStatus.ERROR obj.save() current_search.flush_and_refresh("holdingpen-hep") with pytest.raises(WorkflowsError): workflow_id = build_workflow(record).id start("article", object_id=workflow_id)
def test_match_in_holdingpen_stops_pending_wf( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) es.indices.refresh("holdingpen-hep") eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data["previously_rejected"] is False record2 = record record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" record2_workflow = build_workflow(record2).id start("article", object_id=record2_workflow) es.indices.refresh("holdingpen-hep") update_wf = workflow_object_class.get(record2_workflow) assert update_wf.status == ObjectStatus.HALTED # As workflow stops (in error) before setting this assert update_wf.extra_data["previously_rejected"] is False assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True assert update_wf.extra_data["is-update"] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_match_in_holdingpen_stops_pending_wf( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) current_search.flush_and_refresh("holdingpen-hep") eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data["previously_rejected"] is False record2 = record record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" record2_workflow = build_workflow(record2).id start("article", object_id=record2_workflow) current_search.flush_and_refresh("holdingpen-hep") update_wf = workflow_object_class.get(record2_workflow) assert update_wf.status == ObjectStatus.HALTED # As workflow stops (in error) before setting this assert update_wf.extra_data["previously_rejected"] is False assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True assert update_wf.extra_data["is-update"] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_manual_accepted( mocked_refextract_extract_refs, mocked_matching_match, mocked_api_request_magpie, mocked_api_request_beard, mocked_download_utils, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() """Test a full harvesting workflow.""" workflow_uuid, eng, obj = get_halted_workflow( app=workflow_app, record=record, ) do_accept_core( app=workflow_app, workflow_id=obj.id, ) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.status == ObjectStatus.WAITING response = do_robotupload_callback( app=workflow_app, workflow_id=obj.id, recids=[12345], ) assert response.status_code == 200 obj = workflow_object_class.get(obj.id) assert obj.status == ObjectStatus.WAITING response = do_webcoll_callback(app=workflow_app, recids=[12345]) assert response.status_code == 200 eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] # It was accepted assert obj.status == ObjectStatus.COMPLETED
def test_match_in_holdingpen_stops_pending_wf( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() workflow_id = build_workflow(record).id eng_uuid = start('article', object_id=workflow_id) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data['previously_rejected'] is False record2 = record record['titles'][0][ 'title'] = 'This is an update that will match the wf in the holdingpen' record2_workflow = build_workflow(record2).id eng_uuid2 = start('article', object_id=record2_workflow) es.indices.refresh('holdingpen-hep') eng2 = WorkflowEngine.from_uuid(eng_uuid2) update_wf = eng2.objects[0] assert update_wf.status == ObjectStatus.HALTED assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data['previously_rejected'] is False assert update_wf.extra_data['stopped-matched-holdingpen-wf'] is True assert update_wf.extra_data['is-update'] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_match_in_holdingpen_stops_pending_wf( mocked_download_arxiv, mocked_api_request_beard, mocked_api_request_magpie, mocked_package_download, workflow_app, mocked_external_services, ): record = generate_record() eng_uuid = start('article', [record]) es.indices.refresh('holdingpen-hep') eng = WorkflowEngine.from_uuid(eng_uuid) old_wf = eng.objects[0] obj_id = old_wf.id assert old_wf.status == ObjectStatus.HALTED assert old_wf.extra_data['previously_rejected'] is False record2 = record record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen' eng_uuid2 = start('article', [record2]) es.indices.refresh('holdingpen-hep') eng2 = WorkflowEngine.from_uuid(eng_uuid2) update_wf = eng2.objects[0] assert update_wf.status == ObjectStatus.HALTED assert update_wf.extra_data['already-in-holding-pen'] is True assert update_wf.extra_data['previously_rejected'] is False assert update_wf.extra_data['stopped-matched-holdingpen-wf'] is True assert update_wf.extra_data['is-update'] is False old_wf = workflow_object_class.get(obj_id) assert old_wf.extra_data['already-in-holding-pen'] is False assert old_wf.extra_data['previously_rejected'] is False assert old_wf.extra_data['stopped-by-wf'] == update_wf.id assert old_wf.extra_data.get('approved') is None assert old_wf.extra_data['is-update'] is False assert old_wf.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_manual_rejected( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_download, small_app, ): """Test a full harvesting workflow.""" record = generate_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", } workflow_uuid = None with small_app.app_context(): workflow_uuid, eng, obj = get_halted_workflow( app=small_app, extra_config=extra_config, record=record, ) # Now let's resolve it as accepted and continue # FIXME Should be accept, but record validation prevents us. obj.remove_action() obj.extra_data["approved"] = False # obj.extra_data["core"] = True obj.save() db.session.commit() eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] obj_id = obj.id obj.continue_workflow() obj = workflow_object_class.get(obj_id) # It was rejected assert obj.status == ObjectStatus.COMPLETED
def test_workflow_checks_affiliations_if_record_is_rejected_by_curator( mocked_is_auto_rejected, mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_beard_api, mocked_actions_download, mocked_is_pdf_link, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record = generate_record() record['authors'][0]['raw_affiliations'] = [{ "value": "IN2P3." }, { "value": "Some words with CErN, inside." }] record['authors'][1]['raw_affiliations'] = [{"value": "Fermilab?"}] workflow_id = build_workflow(record).id with patch.dict( workflow_app.config, { 'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True, 'INSPIREHEP_URL': "http://web:8000" }): start("article", object_id=workflow_id) wf = workflow_object_class.get(workflow_id) wf.extra_data['approved'] = False wf.save() wf.continue_workflow(delayed=False) collections_in_record = mocked_external_services.request_history[0].json( )['_collections'] assert "CDS Hidden" in collections_in_record assert "HAL Hidden" in collections_in_record assert "Fermilab" in collections_in_record assert "Literature" not in collections_in_record
def test_match_in_holdingpen_previously_rejected_wf_stop( mocked_api_request_magpie, mocked_api_request_beard, mocked_package_download, mocked_is_pdf_link, mocked_download_arxiv, workflow_app, mocked_external_services, ): record = generate_record() record_workflow = build_workflow(record).id eng_uuid = start("article", object_id=record_workflow) eng = WorkflowEngine.from_uuid(eng_uuid) obj_id = eng.objects[0].id obj = workflow_object_class.get(obj_id) obj.extra_data["approved"] = False # reject record obj.continue_workflow() obj = workflow_object_class.get(obj_id) assert obj.status == ObjectStatus.COMPLETED assert obj.extra_data.get("approved") is False es.indices.refresh("holdingpen-hep") record["titles"][0][ "title" ] = "This is an update that will match the wf in the holdingpen" # this workflow matches in the holdingpen and stops because the # matched one was rejected workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.objects[0] assert obj2.extra_data["previously_rejected"] is True assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
def test_do_not_repeat( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): def return_value(val): def _return_value(obj, eng): obj.extra_data["id"] = val obj.save() return {"id": val} return _return_value custom_wf_steps = [ load_from_source_data, do_not_repeat("one")(return_value(1)), do_not_repeat("two")(return_value(2)), ] custom_wf_steps_to_repeat = [ load_from_source_data, do_not_repeat("one")(return_value(41)), do_not_repeat("two")(return_value(42)), do_not_repeat("three")(return_value(43)), ] expected_persistent_data_first_run = { 'one': {'id': 1}, 'two': {'id': 2} }.viewitems() expected_persistent_data_second_run = { 'one': {'id': 1}, 'two': {'id': 2}, 'three': {'id': 43}, }.viewitems() record = generate_record() with workflow_app.app_context(): wf_id = build_workflow(record).id workflow_uuid = start("article", object_id=wf_id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] eng = WorkflowEngine.from_uuid(obj.id_workflow) eng.callbacks.replace(custom_wf_steps) eng.process([obj]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] persistent_data = obj.extra_data['source_data']['persistent_data'].viewitems() assert expected_persistent_data_first_run <= persistent_data assert obj.extra_data['id'] == 2 assert obj.status == ObjectStatus.COMPLETED eng = WorkflowEngine.from_uuid(obj.id_workflow) eng.callbacks.replace(custom_wf_steps_to_repeat) obj.callback_pos = [0] obj.save() db.session.commit() eng.process([obj]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] persistent_data = obj.extra_data['source_data']['persistent_data'].viewitems() assert expected_persistent_data_second_run <= persistent_data assert obj.extra_data['id'] == 43
def test_do_not_repeat( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): def return_value(val): def _return_value(obj, eng): obj.extra_data["id"] = val obj.save() return {"id": val} return _return_value custom_wf_steps = [ load_from_source_data, do_not_repeat("one")(return_value(1)), do_not_repeat("two")(return_value(2)), ] custom_wf_steps_to_repeat = [ load_from_source_data, do_not_repeat("one")(return_value(41)), do_not_repeat("two")(return_value(42)), do_not_repeat("three")(return_value(43)), ] expected_persistent_data_first_run = { 'one': { 'id': 1 }, 'two': { 'id': 2 } }.viewitems() expected_persistent_data_second_run = { 'one': { 'id': 1 }, 'two': { 'id': 2 }, 'three': { 'id': 43 }, }.viewitems() record = generate_record() with workflow_app.app_context(): wf_id = build_workflow(record).id workflow_uuid = start("article", object_id=wf_id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] eng = WorkflowEngine.from_uuid(obj.id_workflow) eng.callbacks.replace(custom_wf_steps) eng.process([obj]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] persistent_data = obj.extra_data['source_data'][ 'persistent_data'].viewitems() assert expected_persistent_data_first_run <= persistent_data assert obj.extra_data['id'] == 2 assert obj.status == ObjectStatus.COMPLETED eng = WorkflowEngine.from_uuid(obj.id_workflow) eng.callbacks.replace(custom_wf_steps_to_repeat) obj.callback_pos = [0] obj.save() db.session.commit() eng.process([obj]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] persistent_data = obj.extra_data['source_data'][ 'persistent_data'].viewitems() assert expected_persistent_data_second_run <= persistent_data assert obj.extra_data['id'] == 43