def test_mapreduce_over_entities(self): pipeline = map_reduce_entities( TestModel._meta.db_table, connection.settings_dict["NAMESPACE"], yield_letters, reduce_count, output_writers.GoogleCloudStorageKeyValueOutputWriter, _output_writer_kwargs={'bucket_name': 'test-bucket'}) self.process_task_queues() # Refetch the pipeline record pipeline = get_pipeline_by_id(pipeline.pipeline_id) self.assertTrue(pipeline.has_finalized)
def test_mapreduce_over_entities(self): pipeline = map_reduce_entities( TestModel._meta.db_table, connection.settings_dict["NAMESPACE"], yield_letters, reduce_count, output_writers.GoogleCloudStorageKeyValueOutputWriter, _output_writer_kwargs={ 'bucket_name': 'test-bucket' } ) self.process_task_queues() # Refetch the pipeline record pipeline = get_pipeline_by_id(pipeline.pipeline_id) self.assertTrue(pipeline.has_finalized)
def test_filters(self): """ Passing the `_filters` kwarg to to `map_reduce_entities` should allow only some entities to be processed. """ counter = Counter.objects.create() pipeline = map_reduce_entities( TestModel._meta.db_table, connection.settings_dict["NAMESPACE"], count_entity_to_default_counter, reduce_count, # This is a no-op because count_entity doesn't return anything output_writers.GoogleCloudStorageKeyValueOutputWriter, _output_writer_kwargs={'bucket_name': 'test-bucket'}, _filters=[("text", "=", "abcc-3")]) self.process_task_queues() # Refetch the pipeline record pipeline = get_pipeline_by_id(pipeline.pipeline_id) self.assertTrue(pipeline.has_finalized) # We expect only the one entity to have been counted counter.refresh_from_db() self.assertEqual(counter.count, 1)
def test_filters(self): """ Passing the `_filters` kwarg to to `map_reduce_entities` should allow only some entities to be processed. """ counter = Counter.objects.create() pipeline = map_reduce_entities( TestModel._meta.db_table, connection.settings_dict["NAMESPACE"], count_entity_to_default_counter, reduce_count, # This is a no-op because count_entity doesn't return anything output_writers.GoogleCloudStorageKeyValueOutputWriter, _output_writer_kwargs={ 'bucket_name': 'test-bucket' }, _filters=[("text", "=", "abcc-3")] ) self.process_task_queues() # Refetch the pipeline record pipeline = get_pipeline_by_id(pipeline.pipeline_id) self.assertTrue(pipeline.has_finalized) # We expect only the one entity to have been counted counter.refresh_from_db() self.assertEqual(counter.count, 1)