def test_0101_run_reruns_once_record_has_been_modified(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task from invenio_records.api import get_record # Given a task that does not force re-running on unmodified records task_data = dict(TestApi.task_data) task_data['force_run_on_unmodified_records'] = False Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) with patch('invenio_checker.models.CheckerRule.filepath', filepath_with_class): with patch('invenio_checker.models.Query', Query): # Run once task_id = run_task(task_data['name']) execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed # Mark first record as modified rec = get_record(small_rng[0]) rec['unrelated_thing'] = 'unrelated_key' rec.commit() # Run again task_id = run_task(task_data['name']) execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed assert get_record(small_rng[0])['field1'] == 3.4 * 2 assert get_record(small_rng[1])['field1'] == 3.4
def test_074_run_modifies_records_appropriately(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task from invenio_records.api import get_record # Given a task that forces re-running on unmodified records task_data = TestApi.task_data Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) with patch('invenio_checker.models.CheckerRule.filepath', filepath_with_class): with patch('invenio_checker.models.Query', Query): task_id = run_task(task_data['name']) # Ensure that it modifies the records as coded execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed for i in small_rng: assert get_record(i)['field1'] == 3.4 with patch('invenio_checker.models.CheckerRule.filepath', filepath_with_class): with patch('invenio_checker.models.Query', Query): task_id = run_task(task_data['name']) # And that it picks up the changes it did last time on the next run execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed for i in small_rng: assert get_record(i)['field1'] == 3.4 * 2
def task_run(): requested_task_names = request.values.getlist('task_names[]') tasks_in_db = CheckerRule.query.filter( CheckerRule.name.in_(requested_task_names)).all() if len(tasks_in_db) != len(requested_task_names): return jsonify({'error': 'Missing tasks requested'}), 400 for task in tasks_in_db: run_task(task.name) return jsonify({})
def test_036_run_task_with_missing_name_raises(self): from invenio_checker.api import run_task # Given a task name that's not in the database task_name = TestApi.test_entry_prefix + "does not exist" # Make sure the task complains before branching into celery with pytest.raises(NoResultFound): run_task(task_name)
def test_0103_worker_that_spun_returns_result(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task # Given a task task_data = TestApi.task_data Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) # ..that conflicts with other workers for a while def conflict_yielder(): conflicts = ( {MagicMock(uuid=1), MagicMock(uuid=2)}, {MagicMock(uuid=1)}, {}, ) global conflict_check_count conflict_check_count = 0 for conflict in conflicts: conflict_check_count += 1 yield conflict with patch('invenio_checker.models.CheckerRule.filepath', filepath_without_class): with patch('invenio_checker.models.Query', Query): with patch('invenio_checker.conftest.conftest_checker._worker_conflicts_with_currently_running', side_effect=conflict_yielder()): task_id = run_task(task_data['name']) # Ensure it finishes successfully execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed assert conflict_check_count == 3
def test_0104_worker_does_not_conflict_with_others_when_not_using_record_related_fixtures(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task # Given a task task_data = TestApi.task_data Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) # ..that has no conflicts (since it should have None,None as allowed_{paths,recids}) def m_worker_conflicts_with_currently_running(worker): assert worker.allowed_paths is None assert worker.allowed_recids is None return {} with patch('invenio_checker.models.CheckerRule.filepath', filepath_without_any_record_fetching_fixture): with patch('invenio_checker.models.Query', Query): with patch('invenio_checker.conftest.conftest_checker._worker_conflicts_with_currently_running', side_effect=m_worker_conflicts_with_currently_running): task_id = run_task(task_data['name']) # Ensure it finishes successfully execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed
def test_0100_run_does_not_run_twice_on_records_that_did_not_change_since_last_run(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task from invenio_records.api import get_record # Given a task that does not force re-running on unmodified records task_data = dict(TestApi.task_data) task_data['force_run_on_unmodified_records'] = False Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) with patch('invenio_checker.models.CheckerRule.filepath', filepath_with_class): with patch('invenio_checker.models.Query', Query): run_task(task_data['name']) task_id = run_task(task_data['name']) execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed for i in small_rng: assert get_record(i)['field1'] == 3.4
def test_077_run_on_non_record_centric_calls_check_function(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task from invenio_records.api import get_record # Given a task that does not force re-running on unmodified records task_data = dict(TestApi.task_data) task_data['force_run_on_unmodified_records'] = False Query = get_Query(task_data) create_task(task_data) self.create_records([6000000, 6000001]) with patch('invenio_checker.models.CheckerRule.filepath', filepath_non_record_centric): with patch('invenio_checker.models.Query', Query): run_task(task_data['name']) task_id = run_task(task_data['name']) execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed assert get_record(6000000)['a_field'] == 'a_value_2' assert get_record(6000001)['a_field'] == 'another_value'
def test_043_run_task_creates_execution_object_in_database(self): from invenio_checker.api import run_task, create_task # Given a task task_data = TestApi.task_data Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) with patch('invenio_checker.models.CheckerRule.filepath', filepath_with_class): with patch('invenio_checker.models.Query', Query): task_id = run_task(task_data['name']) # Make sure the execution information is stored in the database execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).first() assert execution
def test_055_run_reports_successful_completion_without_class(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import run_task, create_task # Given a task with a check that uses a class task_data = TestApi.task_data Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) with patch('invenio_checker.models.CheckerRule.filepath', filepath_without_class): with patch('invenio_checker.models.Query', Query): task_id = run_task(task_data['name']) # Make sure it reports successful completion execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed
def test_0110_run_with_multiple_functions_in_check_file_fails(self): from invenio_checker.api import create_task, run_task from invenio_checker.clients.master import StatusMaster # Given a task.. task_data = TestApi.task_data Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) # ..where the file has too many check functions with patch('invenio_checker.models.CheckerRule.filepath', filepath_two_check_functions): with patch('invenio_checker.models.Query', Query): task_id = run_task(task_data['name']) # Ensure that it failed execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.failed
def test_0102_run_without_record_fixture_runs_once(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task from invenio_records.api import get_record # Given a task task_data = TestApi.task_data # Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) with patch('invenio_checker.models.CheckerRule.filepath', filepath_without_record_fixture): task_id = run_task(task_data['name']) execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed assert get_record(small_rng[0])['this_record_has_id_1'] == True assert 'this_record_has_id_1' not in get_record(small_rng[1])
def test_077_run_with_dry_run_does_not_call_reporters_for_exceptions(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task from invenio_records.api import get_record # Given a task that forces re-running on unmodified records task_data = TestApi.task_data Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) with patch('invenio_checker.models.CheckerRule.filepath', filepath_raises_exception_every_time): with patch('invenio_checker.models.Query', Query): task_id = run_task(task_data['name'], dry_run=True) # Ensure that it modifies the records as coded execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.failed assert reported_exceptions == 0
def test_0105_run_initialized_reporters_only_when_not_spinning(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task, create_reporter # Given a task with a reporter task_data = TestApi.task_data new_task = create_task(task_data) new_reporter = create_reporter(TestApi.reporter_data) Query = get_Query(task_data) self.create_records(small_rng) # ..while tracking the reporter's initialization reporterA = reimport_module('tests.demo_package.checkerext.reporters.reporterA') reporterA.get_reporter = MagicMock() # ..as well as calls to the task conflict resolver conflict_resolver = Mock(side_effect=({MagicMock(uuid=1)}, {})) mock_manager = Mock() mock_manager.attach_mock(reporterA.get_reporter, 'get_reporter') mock_manager.attach_mock(conflict_resolver, 'conflict_resolver') with patch('invenio_checker.models.CheckerRule.filepath', filepath_without_class): with patch('invenio_checker.models.Query', Query): with patch('invenio_checker.models.CheckerReporter.module', reporterA): with patch('invenio_checker.conftest.conftest_checker._worker_conflicts_with_currently_running', conflict_resolver): task_id = run_task(task_data['name']) # (Better safe than (very) sorry) execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed # Ensure that the reporter was not initialized before no conflicts were remaining from ..conftest import contains_sublist assert contains_sublist( [call[0] for call in mock_manager.mock_calls], # func names [ 'conflict_resolver', 'conflict_resolver', 'get_reporter', ] )
def test_0106_run_does_not_start_when_file_is_missing(self): """ ..note:: It shouldn't be necessary to have records for this to fail but that's how it is. FIXME """ from invenio_checker.api import create_task, run_task from invenio_checker.clients.master import StatusMaster # Given a task.. task_data = TestApi.task_data Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) # ..whose check file is absent with patch('invenio_checker.models.CheckerRule.filepath', None): with patch('invenio_checker.models.Query', Query): with pytest.raises(Exception): task_id = run_task(task_data['name'])
def test_075_run_with_dry_run_does_not_modify_records(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import create_task, run_task from invenio_records.api import get_record # Given a task that forces re-running on unmodified records task_data = TestApi.task_data Query = get_Query(task_data) create_task(task_data) self.create_records(small_rng) with patch('invenio_checker.models.CheckerRule.filepath', filepath_with_class): with patch('invenio_checker.models.Query', Query): task_id = run_task(task_data['name'], dry_run=True) # Ensure that it modifies the records as coded execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed for i in small_rng: assert 'field1' not in get_record(i)
def test_0110_run_calls_reporters_when_check_wants_to_log(self): from invenio_checker.clients.master import StatusMaster from invenio_checker.api import run_task, create_task, create_reporter # Given a task.. task_data = TestApi.task_data new_task = create_task(task_data) # ..with a reporter attached new_reporter = create_reporter(TestApi.reporter_data) # and some records in the database self.create_records(small_rng) reporterA = reimport_module('tests.demo_package.checkerext.reporters.reporterA') Query = get_Query(task_data) with patch('invenio_checker.models.CheckerRule.filepath', filepath_with_class): with patch('invenio_checker.models.Query', Query): with patch('invenio_checker.models.CheckerReporter.module', reporterA): task_id = run_task(task_data['name']) execution = CheckerRuleExecution.query.filter(CheckerRuleExecution.uuid == task_id).one() assert execution.status == StatusMaster.completed assert reported_reports == len(small_rng)
def submit_task(): """Insert or modify an existing task and its reporters.""" from invenio_checker.clients.supervisor import run_task def failure(type_, errors): assert type_ in ('general', 'validation') return jsonify({'failure_type': type_, 'errors': errors}), 400 def success(): return jsonify({}) # Recreate the forms that we have previously served to the user so that we # can validate. form_origin = get_NewTaskForm(request.form) form_plugin = get_ArgForm(request.form['plugin'], request.form) if not (form_origin.validate() & form_plugin.validate()): form_errors = defaultdict(list) for field, errors in chain(form_origin.errors.items(), form_plugin.errors.items()): form_errors[field].extend(errors) return failure('validation', form_errors) # Get a dictionary that we can pass as kwargs to the database object, form_for_db = dict(form_origin.data) # but first, pop metadata out of it. modify = form_for_db.pop('modify') original_name = form_for_db.pop('original_name') requested_action = form_for_db.pop('requested_action') reporter_names = set(form_for_db.pop('reporters')) form_for_db['arguments'] = form_plugin.data_for_db try: # Create or edit task if modify: task = edit_task(original_name, form_for_db, commit=False) else: task = create_task(form_for_db, commit=False) # Create or edit reporters and attach them as well for reporter_name in reporter_names: form_reporter = get_ArgForm(reporter_name, request.form) try: reporter = get_reporter_db(form_reporter.plugin_name, original_name) except NoResultFound: create_reporter( { 'plugin': form_reporter.plugin_name, 'rule_name': task.name, 'arguments': form_reporter.data_for_db }, commit=False) else: edit_reporter(reporter, { 'plugin': form_reporter.plugin_name, 'rule_name': task.name, 'arguments': form_reporter.data_for_db }, commit=False) if modify: # Delete reporters that are no longer selected attached_reporter_plugin_names = { reporter.plugin for reporter in task.reporters } for plugin_name_to_remove in attached_reporter_plugin_names - reporter_names: ex_reporter = get_reporter_db(plugin_name_to_remove, original_name) remove_reporter(ex_reporter, commit=False) db.session.commit() except Exception as e: db.session.rollback() return failure('general', format_exc()) if requested_action.startswith('submit_run'): try: run_task(task.name) except Exception as e: return failure('general', format_exc()) return success()
def submit_task(): """Insert or modify an existing task and its reporters.""" from invenio_checker.clients.supervisor import run_task def failure(type_, errors): assert type_ in ('general', 'validation') return jsonify({'failure_type': type_, 'errors': errors}), 400 def success(): return jsonify({}) # Recreate the forms that we have previously served to the user so that we # can validate. form_origin = get_NewTaskForm(request.form) form_plugin = get_ArgForm(request.form['plugin'], request.form) if not (form_origin.validate() & form_plugin.validate()): form_errors = defaultdict(list) for field, errors in chain(form_origin.errors.items(), form_plugin.errors.items()): form_errors[field].extend(errors) return failure('validation', form_errors) # Get a dictionary that we can pass as kwargs to the database object, form_for_db = dict(form_origin.data) # but first, pop metadata out of it. modify = form_for_db.pop('modify') original_name = form_for_db.pop('original_name') requested_action = form_for_db.pop('requested_action') reporter_names = set(form_for_db.pop('reporters')) form_for_db['arguments'] = form_plugin.data_for_db try: # Create or edit task if modify: task = edit_task(original_name, form_for_db, commit=False) else: task = create_task(form_for_db, commit=False) # Create or edit reporters and attach them as well for reporter_name in reporter_names: form_reporter = get_ArgForm(reporter_name, request.form) try: reporter = get_reporter_db(form_reporter.plugin_name, original_name) except NoResultFound: create_reporter({'plugin': form_reporter.plugin_name, 'rule_name': task.name, 'arguments': form_reporter.data_for_db}, commit=False) else: edit_reporter(reporter, {'plugin': form_reporter.plugin_name, 'rule_name': task.name, 'arguments': form_reporter.data_for_db}, commit=False) if modify: # Delete reporters that are no longer selected attached_reporter_plugin_names = {reporter.plugin for reporter in task.reporters} for plugin_name_to_remove in attached_reporter_plugin_names - reporter_names: ex_reporter = get_reporter_db(plugin_name_to_remove, original_name) remove_reporter(ex_reporter, commit=False) db.session.commit() except Exception as e: db.session.rollback() return failure('general', format_exc()) if requested_action.startswith('submit_run'): try: run_task(task.name) except Exception as e: return failure('general', format_exc()) return success()