def run_maptask(mapper, key_value_iterator, out_shards, markerfn, task_id): out_shard_num = len(out_shards) num_recs = 0 for k, v in mapper.start_map(task_id): out_shards[base.shard_for_key(k, out_shard_num)][k] = v count('start-out') for key, value in key_value_iterator: if flags.grep_debug_key: if not flags.grep_debug_key in key: continue else: print "Processing: %s" % key if flags.log_every and num_recs % flags.log_every == 0: print count_line() count('map-in') num_recs += 1 if flags.record_limit == num_recs: break for kv in mapper.process(key, value): if kv: k, v = kv out_shards[base.shard_for_key(k, out_shard_num)][k] = v count('map-out') for k, v in mapper.flush(task_id): out_shards[base.shard_for_key(k, out_shard_num)][k] = v count('flush-out') for out_shard in out_shards: if out_shard is not None: out_shard.close() file(markerfn, 'w').write('DONE')
def __contains__(self, key): idx = base.shard_for_key(key, len(self._shards)) return key in self._shards[idx]
def get(self, key, default=None): idx = base.shard_for_key(key, len(self._shards)) return self._shards[idx].get(key, default)
def __getitem__(self, key): idx = base.shard_for_key(key, len(self._shards)) return self._shards[idx][key]
def __setitem__(self, key, value): idx = base.shard_for_key(key, len(self._shards)) self._shards[idx][key] = value