def run(self, job_name, mapper_spec, reducer_spec, input_reader_spec, output_writer_spec=None, mapper_params=None, reducer_params=None, shards=None, combiner_spec=None): map_pipeline = yield MapPipeline(job_name, mapper_spec, input_reader_spec, params=mapper_params, shards=shards) shuffler_pipeline = yield ShufflePipeline( job_name, map_pipeline) reducer_pipeline = yield ReducePipeline( job_name, reducer_spec, output_writer_spec, reducer_params, shuffler_pipeline, combiner_spec=combiner_spec) with pipeline.After(reducer_pipeline): all_temp_files = yield pipeline_common.Extend( map_pipeline, shuffler_pipeline) yield mapper_pipeline._CleanupPipeline(all_temp_files) yield pipeline_common.Return(reducer_pipeline)
def run(self, job_name, mapper_spec, reducer_spec, input_reader_spec, output_writer_spec=None, mapper_params=None, reducer_params=None, shards=None, combiner_spec=None): map_pipeline = yield MapPipeline(job_name, mapper_spec, input_reader_spec, params=mapper_params, shards=shards) shuffler_pipeline = yield ShufflePipeline(job_name, map_pipeline) reducer_pipeline = yield ReducePipeline(job_name, reducer_spec, output_writer_spec, reducer_params, shuffler_pipeline, combiner_spec=combiner_spec) with pipeline.After(reducer_pipeline): all_temp_files = yield pipeline_common.Extend( map_pipeline, shuffler_pipeline) yield mapper_pipeline._CleanupPipeline(all_temp_files) yield pipeline_common.Return(reducer_pipeline)
def run(self, job_name, filenames): hashed_files = yield _HashPipeline(job_name, filenames) sorted_files = yield _SortChunksPipeline(job_name, hashed_files) merged_files = yield _MergePipeline(job_name, sorted_files) with pipeline.After(merged_files): all_temp_files = yield pipeline_common.Extend( hashed_files, sorted_files) yield mapper_pipeline._CleanupPipeline(all_temp_files) yield pipeline_common.Return(merged_files)
def run(self, job_name, filenames, shards=None): if files.shuffler.available(): yield _ShuffleServicePipeline(job_name, filenames) else: hashed_files = yield _HashPipeline(job_name, filenames, shards=shards) sorted_files = yield _SortChunksPipeline(job_name, hashed_files) temp_files = [hashed_files, sorted_files] merged_files = yield _MergePipeline(job_name, sorted_files) with pipeline.After(merged_files): all_temp_files = yield pipeline_common.Extend(*temp_files) yield mapper_pipeline._CleanupPipeline(all_temp_files) yield pipeline_common.Return(merged_files)
def run(self, job_name, mapper_params, filenames, shards=None): bucket_name = mapper_params["bucket_name"] filenames_only = _strip_bucket_name(bucket_name, filenames) hashed_files = yield _HashPipeline(job_name, bucket_name, filenames_only, shards=shards) sorted_files = yield _SortChunksPipeline(job_name, hashed_files) temp_files = [hashed_files, sorted_files] merged_files = yield _MergePipeline(job_name, sorted_files) with pipeline.After(merged_files): all_temp_files = yield pipeline_common.Extend(*temp_files) yield mapper_pipeline._CleanupPipeline(all_temp_files) yield pipeline_common.Return(merged_files)