class Quant_Func_Reducer: def __call__(self, key, values): F = values.next() for v in values: F += v yield key, F class Sum_Mapper: def __call__(self, records): F = records.next()[1] for row in records: F += row[1] yield 'F', F class Sum_Reducer: def __call__(self, key, values): F = values.next() for v in values: F += v yield key, F.tolist() if __name__ == '__main__': import dumbo job = dumbo.Job() job.additer(Quant_Func_Mapper, Quant_Func_Reducer) job.additer(Sum_Mapper, Sum_Reducer) job.run()
def filterMain(): job = dumbo.Job() job.additer(Mapper, reducer, combiner=reducer) job.additer(FilterMapper, reducer, combiner=reducer) job.run()