def run(self): db_list = [s.path for s in self.input()] mapreduce(Collection.from_sharded_list(db_list), mapper=JoinMapper(), reducer=PivotReducer(), output_prefix=self.output().path, num_shards=10)
def run(self): db_list = [s.path for s in self.input()] mapreduce( Collection.from_sharded_list(db_list), mapper=JoinMapper(), reducer=PivotReducer(), output_prefix=self.output().path, num_shards=10)
def run(self): mapreduce(Collection.from_sharded(self.input().path), mapper=Harmonized2OpenFDAMapper(), reducer=parallel.IdentityReducer(), output_prefix=self.output().path, num_shards=1)