def pack_apply_message(f, args, kwargs, buffer_threshold=MAX_BYTES, item_threshold=MAX_ITEMS): """pack up a function, args, and kwargs to be sent over the wire Each element of args/kwargs will be canned for special treatment, but inspection will not go any deeper than that. Any object whose data is larger than `threshold` will not have their data copied (only numpy arrays and bytes/buffers support zero-copy) Message will be a list of bytes/buffers of the format: [ cf, pinfo, <arg_bufs>, <kwarg_bufs> ] With length at least two + len(args) + len(kwargs) """ arg_bufs = flatten(serialize_object(arg, buffer_threshold, item_threshold) for arg in args) kw_keys = sorted(kwargs.keys()) kwarg_bufs = flatten(serialize_object(kwargs[key], buffer_threshold, item_threshold) for key in kw_keys) info = dict(nargs=len(args), narg_bufs=len(arg_bufs), kw_keys=kw_keys) msg = [pickle.dumps(can(f), PICKLE_PROTOCOL)] msg.append(pickle.dumps(info, PICKLE_PROTOCOL)) msg.extend(arg_bufs) msg.extend(kwarg_bufs) return msg
def pack_apply_message(f, args, kwargs, buffer_threshold=MAX_BYTES, item_threshold=MAX_ITEMS): """pack up a function, args, and kwargs to be sent over the wire Each element of args/kwargs will be canned for special treatment, but inspection will not go any deeper than that. Any object whose data is larger than `threshold` will not have their data copied (only numpy arrays and bytes/buffers support zero-copy) Message will be a list of bytes/buffers of the format: [ cf, pinfo, <arg_bufs>, <kwarg_bufs> ] With length at least two + len(args) + len(kwargs) """ arg_bufs = flatten(serialize_object(arg, buffer_threshold, item_threshold) for arg in args) kw_keys = sorted(kwargs.keys()) kwarg_bufs = flatten(serialize_object(kwargs[key], buffer_threshold, item_threshold) for key in kw_keys) info = dict(nargs=len(args), narg_bufs=len(arg_bufs), kw_keys=kw_keys) msg = [pickle.dumps(can(f),-1)] msg.append(pickle.dumps(info, -1)) msg.extend(arg_bufs) msg.extend(kwarg_bufs) return msg
def serialize_object(obj, threshold=64e-6): """Serialize an object into a list of sendable buffers. Parameters ---------- obj : object The object to be serialized threshold : float The threshold for not double-pickling the content. Returns ------- ('pmd', [bufs]) : where pmd is the pickled metadata wrapper, bufs is a list of data buffers """ databuffers = [] if isinstance(obj, (list, tuple)): clist = canSequence(obj) slist = map(serialize, clist) for s in slist: if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold: databuffers.append(s.getData()) s.data = None return pickle.dumps(slist, -1), databuffers elif isinstance(obj, dict): sobj = {} for k in sorted(obj.iterkeys()): s = serialize(can(obj[k])) if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold: databuffers.append(s.getData()) s.data = None sobj[k] = s return pickle.dumps(sobj, -1), databuffers else: s = serialize(can(obj)) if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold: databuffers.append(s.getData()) s.data = None return pickle.dumps(s, -1), databuffers
def test_uncan_function_globals(self): """test that uncanning a module function restores it into its module""" from re import search cf = can(search) csearch = uncan(cf) self.assertEqual(csearch.__module__, search.__module__) self.assertNotEqual(csearch('asd', 'asdf'), None) csearch = uncan(cf, dict(a=5)) self.assertEqual(csearch.__module__, search.__module__) self.assertNotEqual(csearch('asd', 'asdf'), None)
def serialize_object(obj, threshold=64e-6): """Serialize an object into a list of sendable buffers. Parameters ---------- obj : object The object to be serialized threshold : float The threshold for not double-pickling the content. Returns ------- ('pmd', [bufs]) : where pmd is the pickled metadata wrapper, bufs is a list of data buffers """ databuffers = [] if isinstance(obj, (list, tuple)): clist = canSequence(obj) slist = map(serialize, clist) for s in slist: if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold: databuffers.append(s.getData()) s.data = None return pickle.dumps(slist,-1), databuffers elif isinstance(obj, dict): sobj = {} for k in sorted(obj.iterkeys()): s = serialize(can(obj[k])) if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold: databuffers.append(s.getData()) s.data = None sobj[k] = s return pickle.dumps(sobj,-1),databuffers else: s = serialize(can(obj)) if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold: databuffers.append(s.getData()) s.data = None return pickle.dumps(s,-1),databuffers
def test_uncan_function_globals(self): """test that uncanning a module function restores it into its module""" from re import search cf = can(search) csearch = uncan(cf) self.assertEqual(csearch.__module__, search.__module__) self.assertNotEqual(csearch("asd", "asdf"), None) csearch = uncan(cf, dict(a=5)) self.assertEqual(csearch.__module__, search.__module__) self.assertNotEqual(csearch("asd", "asdf"), None)
def serialize_object(obj, buffer_threshold=MAX_BYTES, item_threshold=MAX_ITEMS): """Serialize an object into a list of sendable buffers. Parameters ---------- obj : object The object to be serialized buffer_threshold : int The threshold (in bytes) for pulling out data buffers to avoid pickling them. item_threshold : int The maximum number of items over which canning will iterate. Containers (lists, dicts) larger than this will be pickled without introspection. Returns ------- [bufs] : list of buffers representing the serialized object. """ buffers = [] if istype(obj, sequence_types) and len(obj) < item_threshold: cobj = can_sequence(obj) for c in cobj: buffers.extend(_extract_buffers(c, buffer_threshold)) elif istype(obj, dict) and len(obj) < item_threshold: cobj = {} for k in sorted(obj): c = can(obj[k]) buffers.extend(_extract_buffers(c, buffer_threshold)) cobj[k] = c else: cobj = can(obj) buffers.extend(_extract_buffers(cobj, buffer_threshold)) buffers.insert(0, pickle.dumps(cobj, PICKLE_PROTOCOL)) return buffers
def pack_apply_message(f, args, kwargs, threshold=64e-6): """pack up a function, args, and kwargs to be sent over the wire as a series of buffers. Any object whose data is larger than `threshold` will not have their data copied (currently only numpy arrays support zero-copy)""" msg = [pickle.dumps(can(f),-1)] databuffers = [] # for large objects sargs, bufs = serialize_object(args,threshold) msg.append(sargs) databuffers.extend(bufs) skwargs, bufs = serialize_object(kwargs,threshold) msg.append(skwargs) databuffers.extend(bufs) msg.extend(databuffers) return msg
def pack_apply_message(f, args, kwargs, threshold=64e-6): """pack up a function, args, and kwargs to be sent over the wire as a series of buffers. Any object whose data is larger than `threshold` will not have their data copied (currently only numpy arrays support zero-copy)""" msg = [pickle.dumps(can(f), -1)] databuffers = [] # for large objects sargs, bufs = serialize_object(args, threshold) msg.append(sargs) databuffers.extend(bufs) skwargs, bufs = serialize_object(kwargs, threshold) msg.append(skwargs) databuffers.extend(bufs) msg.extend(databuffers) return msg
def require(*objects, **mapping): """Simple decorator for requiring local objects and modules to be available when the decorated function is called on the engine. Modules specified by name or passed directly will be imported prior to calling the decorated function. Objects other than modules will be pushed as a part of the task. Functions can be passed positionally, and will be pushed to the engine with their __name__. Other objects can be passed by keyword arg. Examples -------- In [1]: @require('numpy') ...: def norm(a): ...: return numpy.linalg.norm(a,2) In [2]: foo = lambda x: x*x In [3]: @require(foo) ...: def bar(a): ...: return foo(1-a) """ names = [] for obj in objects: if isinstance(obj, ModuleType): obj = obj.__name__ if isinstance(obj, basestring): names.append(obj) elif hasattr(obj, '__name__'): mapping[obj.__name__] = obj else: raise TypeError("Objects other than modules and functions " "must be passed by kwarg, but got: %s" % type(obj) ) for name, obj in mapping.items(): mapping[name] = can(obj) return depend(_require, *names, **mapping)
def require(*objects, **mapping): """Simple decorator for requiring local objects and modules to be available when the decorated function is called on the engine. Modules specified by name or passed directly will be imported prior to calling the decorated function. Objects other than modules will be pushed as a part of the task. Functions can be passed positionally, and will be pushed to the engine with their __name__. Other objects can be passed by keyword arg. Examples -------- In [1]: @require('numpy') ...: def norm(a): ...: return numpy.linalg.norm(a,2) In [2]: foo = lambda x: x*x In [3]: @require(foo) ...: def bar(a): ...: return foo(1-a) """ names = [] for obj in objects: if isinstance(obj, ModuleType): obj = obj.__name__ if isinstance(obj, basestring): names.append(obj) elif hasattr(obj, '__name__'): mapping[obj.__name__] = obj else: raise TypeError("Objects other than modules and functions " "must be passed by kwarg, but got: %s" % type(obj)) for name, obj in mapping.items(): mapping[name] = can(obj) return depend(_require, *names, **mapping)
def dumps(obj): return pickle.dumps(can(obj))
def cancan(self, f): """decorator to pass through canning into self.user_ns""" return uncan(can(f), self.user_ns)
def run_roundtrip(self, obj): o = uncan(can(obj)) assert o == obj, "failed assertion: %r == %r"%(o,obj)
def test_canning(self): d = dict(a=5, b=6) cd = can(d) self.assertTrue(isinstance(cd, dict))
def test_canned_function(self): f = lambda: 7 cf = can(f) self.assertTrue(isinstance(cf, CannedFunction))
def run_roundtrip(self, obj): o = uncan(can(obj)) assert o == obj, "failed assertion: %r == %r" % (o, obj)
def test_canning(self): d = dict(a=5,b=6) cd = can(d) self.assertTrue(isinstance(cd, dict))
def test_canned_function(self): f = lambda : 7 cf = can(f) self.assertTrue(isinstance(cf, CannedFunction))