def test_get_processor(self): processors = Processors() processor_1 = MagicMock() processor_2 = MagicMock() processors.add(processor_1) processors.add(processor_2) self.assertEqual(processors.get(0), processor_1) self.assertEqual(processors.get(1), processor_2) self.assertRaises(IndexError, processors.get, 2)
def test_add_multiple_processors(self): processors = Processors() processor_1 = MagicMock() processor_2 = MagicMock() processor_3 = MagicMock() processors.add_multiple([processor_1, processor_2, processor_3]) self.assertEqual(processors.count, 3) self.assertEqual(processors._processors, [processor_1, processor_2, processor_3])
def test_job_failed_because_a_job_execution_exception_was_raised( self, retrieve_and_process_items_mock): retrieve_and_process_items_mock.side_effect = JobExecutionError source_instance = MagicMock() processor_instance = MagicMock() processors = Processors() processors.add(processor_instance) job = Job(source_instance, processors) result = job.run() self.assertTrue(result.failed) processor_instance.process_item.assert_not_called()
def test_job_failed_because_the_source_raised_an_exception(self): source_instance = MagicMock() source_instance.items.side_effect = ValueError processor_instance = MagicMock() processors = Processors() processors.add(processor_instance) job = Job(source_instance, processors) result = job.run() self.assertTrue(result.failed) processor_instance.process_item.assert_not_called() source_instance.items.assert_called_with()
def test_processor_failed_to_process_item(self): source_instance = MagicMock() source_instance.items.return_value = [1, 2, 3] processor_instance = MagicMock() processor_instance.process_item.side_effect = ItemProcessingError processors = Processors() processors.add(processor_instance) job = Job(source_instance, processors) result = job.run() self.assertFalse(result.failed) processor_instance.process_item.assert_has_calls([ call(source_instance, 1), call(source_instance, 2), call(source_instance, 3) ]) source_instance.items.assert_called_with()
def test_processor_raised_an_exception_while_processing_item(self): Source = MagicMock() source_instance = Source.return_value source_instance.items.return_value = [1, 2, 3] source_instance.job_started = MagicMock() source_instance.job_finished = MagicMock() Processor = MagicMock() processor_instance = Processor.return_value processor_instance.process_item = MagicMock() processor_instance.process_item.side_effect = ValueError processors = Processors() processors.add(processor_instance) job = Job(source_instance, processors) result = job.run() self.assertFalse(result.failed)
def test_job_finished(self): processors = Processors() processor = MagicMock() processors.add(processor) job = MagicMock() processors.job_finished(job) processor.job_finished.assert_called_once_with(job)
def test_configure_processors(self): processors = Processors() processor = MagicMock() processors.add(processor) config = {} processors.configure_all(config) processor.configure.assert_called_once_with(config)
def test_process_item(self): processors = Processors() processor = MagicMock() processors.add(processor) source = MagicMock() item = MagicMock() processors.process_item(source, item) processor.process_item.assert_called_once_with(source, item)
def test_run_job(self): Source = MagicMock() source_instance = Source.return_value source_instance.items.return_value = [1, 2, 3] source_instance.job_started = MagicMock() source_instance.job_finished = MagicMock() Processor = MagicMock() processor_instance = Processor.return_value processor_instance.process_item = MagicMock() processor_instance.process_item.return_value = True processor_instance.job_started = MagicMock() processor_instance.job_finished = MagicMock() processors = Processors() processors.add(processor_instance) job = Job(source_instance, processors) result = job.run() self.assertFalse(result.failed) source_instance.job_started.assert_called_with(job) processor_instance.job_started.assert_called_with(job) source_instance.items.assert_called_with() calls = [ call(source_instance, 1), call(source_instance, 2), call(source_instance, 3) ] processor_instance.process_item.assert_has_calls(calls) source_instance.job_finished.assert_called_with(job) processor_instance.job_finished.assert_called_with(job)
def test_remove_processor(self): processors = Processors() processor_1 = MagicMock() processor_2 = MagicMock() processors.add(processor_1) processors.add(processor_2) processors.remove(0) self.assertEqual(processors._processors, [processor_2])
def create_hackernews_api_crawler_job(config, session): """Create the job that will crawl hackernews :param dict config: the job configuration :param Session session: the sqlalchemy Session object to use :rtype: HackernewsCrawlJob :return: the job object """ logger.info("Initializing data processors") processors = Processors() processors.add(SQLAlchemyStorage(session)) processors.add_multiple( [import_string(processor)() for processor in config["PROCESSORS"]]) processors.configure_all(config) logger.info("Data processors initialized") job = HackernewsCrawlJob(config, processors) logger.info("Created job with id %s", job.id) return job