def test_process_step_in_batch_does_not_call_docs_save(self): runner = mock.Mock(wraps=lambda x: x) docs = [mock.Mock() for i in range(5)] p = PreProcessPipeline([runner], docs) p.process_step_in_batch(runner) for d in docs: self.assertFalse(d.save.called)
def test_process_step_in_batch_does_not_call_docs_save(self): runner = mock.Mock(wraps=lambda x: x) docs = [mock.Mock() for i in range(5)] p = PreProcessPipeline([runner], docs) p.process_step_in_batch(runner) for d in docs: self.assertFalse(d.save.called)
def test_process_step_in_batch_does_nothing_with_previous_steps_runner(self): runner1 = mock.Mock(wraps=lambda x: x) runner2 = mock.Mock(wraps=lambda x: x) docs = [object() for i in range(5)] p = PreProcessPipeline([runner1, runner2], docs) p.process_step_in_batch(runner2) self.assertFalse(runner1.called)
def test_process_step_in_batch_does_nothing_with_previous_steps_runner( self): runner1 = mock.Mock(wraps=lambda x: x) runner2 = mock.Mock(wraps=lambda x: x) docs = [object() for i in range(5)] p = PreProcessPipeline([runner1, runner2], docs) p.process_step_in_batch(runner2) self.assertFalse(runner1.called)
def test_process_step_in_batch_applies_runner_to_all_documents(self): # We take care that doesn't have attr "step" _runner = lambda x: x runner = mock.Mock(wraps=_runner) docs = [object() for i in range(5)] p = PreProcessPipeline([runner], docs) p.process_step_in_batch(runner) self.assertEqual(runner.call_count, len(docs)) self.assertEqual(runner.call_args_list, [mock.call(d) for d in docs])
def test_process_step_in_batch_applies_runner_to_all_documents(self): # We take care that doesn't have attr "step" _runner = lambda x: x runner = mock.Mock(wraps=_runner) docs = [object() for i in range(5)] p = PreProcessPipeline([runner], docs) p.process_step_in_batch(runner) self.assertEqual(runner.call_count, len(docs)) self.assertEqual(runner.call_args_list, [mock.call(d) for d in docs])
def test_process_step_in_batch_filter_docs_to_apply_if_has_attr_step(self): step_runner = mock.MagicMock(step=PreProcessSteps.tokenization, override=False, increment=False) all_docs = [object() for i in range(5)] self.patch_object(DocumentManager, '__iter__', return_value=all_docs) dm_get_docs = self.patch_object(DocumentManager, 'get_documents_lacking_preprocess', return_value=all_docs[:2]) # Ok, docs manager has 5 docs, but get_documents_lacking_preprocess will return # only 2 of them p = PreProcessPipeline([step_runner], DocumentManager()) p.process_step_in_batch(step_runner) dm_get_docs.assert_called_once_with(step_runner.step) self.assertNotEqual(step_runner.call_count, 5) self.assertEqual(step_runner.call_count, 2) self.assertEqual(step_runner.call_args_list, [mock.call(d) for d in all_docs[:2]])
def test_process_step_in_batch_filter_docs_to_apply_if_has_attr_step(self): step_runner = mock.MagicMock(step=PreProcessSteps.tokenization, override=False, increment=False) all_docs = [object() for i in range(5)] docs_manager = mock.MagicMock() docs_manager.__iter__.return_value = all_docs docs_manager.get_documents_lacking_preprocess.side_effect = lambda x: all_docs[:2] # Ok, docs manager has 5 docs, but get_documents_lacking_preprocess will return # only 2 of them p = PreProcessPipeline([step_runner], docs_manager) p.process_step_in_batch(step_runner) docs_filter = docs_manager.get_documents_lacking_preprocess docs_filter.assert_called_once_with(step_runner.step) self.assertNotEqual(step_runner.call_count, 5) self.assertEqual(step_runner.call_count, 2) self.assertEqual(step_runner.call_args_list, [mock.call(d) for d in all_docs[:2]])
def test_process_step_in_batch_filter_docs_to_apply_if_has_attr_step(self): step_runner = mock.MagicMock(step=PreProcessSteps.tokenization, override=False, increment=False) all_docs = [object() for i in range(5)] self.patch_object(DocumentManager, '__iter__', return_value=all_docs) dm_get_docs = self.patch_object(DocumentManager, 'get_documents_lacking_preprocess', return_value=all_docs[:2]) # Ok, docs manager has 5 docs, but get_documents_lacking_preprocess will return # only 2 of them p = PreProcessPipeline([step_runner], DocumentManager()) p.process_step_in_batch(step_runner) dm_get_docs.assert_called_once_with(step_runner.step) self.assertNotEqual(step_runner.call_count, 5) self.assertEqual(step_runner.call_count, 2) self.assertEqual(step_runner.call_args_list, [mock.call(d) for d in all_docs[:2]])
def test_process_step_in_batch_filter_docs_to_apply_if_has_attr_step(self): step_runner = mock.MagicMock(step=PreProcessSteps.tokenization, override=False) all_docs = [object() for i in range(5)] docs_manager = mock.MagicMock() docs_manager.__iter__.return_value = all_docs docs_manager.get_documents_lacking_preprocess.side_effect = lambda x: all_docs[: 2 ] # Ok, docs manager has 5 docs, but get_documents_lacking_preprocess will return # only 2 of them p = PreProcessPipeline([step_runner], docs_manager) p.process_step_in_batch(step_runner) docs_filter = docs_manager.get_documents_lacking_preprocess docs_filter.assert_called_once_with(step_runner.step) self.assertNotEqual(step_runner.call_count, 5) self.assertEqual(step_runner.call_count, 2) self.assertEqual(step_runner.call_args_list, [mock.call(d) for d in all_docs[:2]])