Пример #1
0
class MRJob(object):

    INPUT_PROTOCOL = protocol.JSONValueProtocol
    INTERNAL_PROTOCOL = protocol.JSONProtocol
    OUTPUT_PROTOCOL = protocol.JSONValueProtocol

    def __init__(self, counters=None):
        self._settings = self.settings()
        self._steps = self.steps()
        self._counters = MRCounter()

    @classmethod
    def run(cls):
        mapreduce(cls)

    @abstractmethod
    def steps(self):
        """define steps necessary to run the job"""

    @abstractmethod
    def settings(self):
        """define settings"""

    def increment_counter(self, group, counter, amount=1):
        self._counters.incr(group, counter, amount)

    def get_step(self, step_idx):
        return self.steps()[step_idx]
Пример #2
0
def combine_counters(work_dir, n_map_shards, n_reduce_shards):
    filenames = map(lambda (work_dir, shard):
                    os.path.join(work_dir, 'map-%d.counters' % shard),
                    zip([work_dir] * n_map_shards, range(n_map_shards)))
    filenames += map(lambda (work_dir, shard):
                     os.path.join(work_dir, 'combine-%d.counters' % shard),
                     zip([work_dir] * n_map_shards, range(n_map_shards)))
    filenames += map(lambda (work_dir, shard):
                     os.path.join(work_dir, 'reduce-%d.counters' % shard),
                     zip([work_dir] * n_reduce_shards, range(n_reduce_shards)))
    return MRCounter.sum(
        imap(MRCounter.deserialize,
             read_files(filter(os.path.exists, filenames))))
Пример #3
0
 def __init__(self, counters=None):
     self._settings = self.settings()
     self._steps = self.steps()
     self._counters = MRCounter()