def unpack(cls, jobpack, globals={}): """Unpack the previously packed :class:`JobDict`.""" jobdict = cls.defaults.copy() jobdict.update(**decode_netstring_fd(jobpack)) for key in cls.defaults: if key == 'input': jobdict['input'] = [i.split() for i in jobdict['input'].split(' ')] elif key == 'nr_reduces': jobdict[key] = int(jobdict[key]) elif key == 'scheduler': for key in cls.scheduler_keys: if 'sched_%s' % key in jobdict: jobdict['scheduler'][key] = jobdict.pop('sched_%s' % key) elif key == 'prefix': pass elif jobdict[key] is None: pass elif key in cls.stacks: jobdict[key] = util.unpack_stack(jobdict[key], globals=globals) else: jobdict[key] = util.unpack(jobdict[key], globals=globals) # map readers and writers to streams for oldio, stream, wrapper in cls.io_mappings: if jobdict[oldio]: jobdict[stream].append(wrapper(jobdict[oldio])) return cls(**jobdict)
def unpack(cls, jobpack, globals={}): """Unpack the previously packed :class:`JobDict`.""" jobdict = cls.defaults.copy() jobdict.update(**decode_netstring_fd(jobpack)) for key in cls.defaults: if key == 'input': jobdict['input'] = [ i.split() for i in jobdict['input'].split(' ') ] elif key == 'username': pass elif key == 'nr_reduces': jobdict[key] = int(jobdict[key]) elif key == 'scheduler': for key in cls.scheduler_keys: if 'sched_%s' % key in jobdict: jobdict['scheduler'][key] = jobdict.pop('sched_%s' % key) elif key == 'prefix': pass elif jobdict[key] is None: pass elif key in cls.stacks: jobdict[key] = util.unpack_stack(jobdict[key], globals=globals) else: jobdict[key] = util.unpack(jobdict[key], globals=globals) return cls(**jobdict)
def unpack(cls, jobpack, globals={}): """Unpack the previously packed :class:`JobDict`.""" jobdict = cls.defaults.copy() jobdict.update(**decode_netstring_fd(jobpack)) for key in cls.defaults: if key == 'input': jobdict['input'] = [i.split() for i in jobdict['input'].split(' ')] elif key == 'username': pass elif key == 'nr_reduces': jobdict[key] = int(jobdict[key]) elif key == 'scheduler': for key in cls.scheduler_keys: if 'sched_%s' % key in jobdict: jobdict['scheduler'][key] = jobdict.pop('sched_%s' % key) elif key == 'prefix': pass elif jobdict[key] is None: pass elif key in cls.stacks: jobdict[key] = util.unpack_stack(jobdict[key], globals=globals) else: jobdict[key] = util.unpack(jobdict[key], globals=globals) return cls(**jobdict)
def load_stack(job, mode, inout): key = "%s_%s_stream" % (mode, inout) stack = [("disco.func.%s" % key, getattr(disco.func, key))] if key in job: stack = [(k, util.unpack(v)) for k, v in decode_netstring_str(job[key])] for k, fn in stack: fn.func_globals.update(globals()) return stack
def op_reduce(job): msg("Received a new reduce job!") do_sort = int(job['sort']) mem_sort_limit = int(job['mem_sort_limit']) global fun_init if 'reduce_init' in job: fun_init = util.unpack(job['reduce_init'], globals=globals()) global fun_reader, fun_writer fun_reader = util.unpack(job['reduce_reader'], globals=globals()) fun_writer = util.unpack(job['reduce_writer'], globals=globals()) global fun_reduce if 'ext_reduce' in job: if "ext_params" in job: red_params = job['ext_params'] else: red_params = "0\n" path = Task.path("EXT_MAP") external.prepare(job['ext_reduce'], red_params, path) fun_reduce = external.ext_reduce else: fun_reduce = util.unpack(job['reduce'], globals=globals()) red_params = util.unpack(job['params'], globals=globals()) init_common(job) red_in = ReduceReader(Task.inputs, do_sort, mem_sort_limit, red_params).iter() red_out = ReduceOutput(red_params) msg("Starting reduce") fun_init(red_in, red_params) fun_reduce(red_in, red_out, red_params) msg("Reduce done") red_out.close() external.close_ext() index, index_url = Task.reduce_index safe_update(index, {"%d %s" % (Task.id, red_out.url()): True}) OutputURL(index_url)
def op_map(job): msg("Received a new map job!") if len(Task.inputs) != 1: err("Map can only handle one input. Got: %s" % " ".join(Task.inputs)) global fun_reader, fun_writer, fun_partition fun_reader = util.unpack(job['map_reader'], globals=globals()) fun_writer = util.unpack(job['map_writer'], globals=globals()) fun_partition = util.unpack(job['partition'], globals=globals()) global fun_init if 'map_init' in job: fun_init = util.unpack(job['map_init'], globals=globals()) global fun_map if 'ext_map' in job: if 'ext_params' in job: map_params = job['ext_params'] else: map_params = "0\n" path = Task.path("EXT_MAP") external.prepare(job['ext_map'], map_params, path) fun_map = external.ext_map else: map_params = util.unpack(job['params'], globals=globals()) fun_map = util.unpack(job['map'], globals=globals()) global fun_combiner if 'combiner' in job: fun_combiner = util.unpack(job['combiner'], globals=globals()) init_common(job) nr_part = max(1, Task.num_partitions) if 'combiner' in job: partitions = [MapOutput(i, map_params, fun_combiner)\ for i in range(nr_part)] else: partitions = [MapOutput(i, map_params) for i in range(nr_part)] run_map(Task.inputs[0], partitions, map_params) external.close_ext() urls = {} for i, p in enumerate(partitions): p.close() urls["%d %s" % (i, p.url())] = True index, index_url = Task.map_index safe_update(index, urls) OutputURL(index_url)
def init_common(job): global status_interval, input_stream_stack, output_stream_stack if 'required_files' in job: path = Task.path("REQ_FILES") write_files(util.unpack(job['required_files'], globals=globals()), path) sys.path.insert(0, path) Task.num_partitions = int(job['nr_reduces']) status_interval = int(job['status_interval']) input_stream_stack = load_stack(job, Task.mode, "input") output_stream_stack = load_stack(job, Task.mode, "output") req_mod = job['required_modules'].split() import_modules(req_mod)
def prepare(ext_job, params, path): write_files(util.unpack(ext_job), path) open_ext(path + "/op", params)
def __setstate__(self, state): for k, v in state.iteritems(): self.__dict__[k] = util.unpack(v)
def test_pack(self): now = datetime.now() self.assertEquals(now, unpack(pack(now))) self.assertEquals(666, unpack(pack(666))) self.assertEquals(function.func_code, unpack(pack(function)).func_code)