def send_arrays(socket, arrays, stop=False): """Send NumPy arrays using the buffer interface and some metadata. Parameters ---------- socket : :class:`zmq.Socket` The socket to send data over. arrays : list A list of :class:`numpy.ndarray` to transfer. stop : bool, optional Instead of sending a series of NumPy arrays, send a JSON object with a single `stop` key. The :func:`recv_arrays` will raise ``StopIteration`` when it receives this. Notes ----- The protocol is very simple: A single JSON object describing the array format (using the same specification as ``.npy`` files) is sent first. Subsequently the arrays are sent as bytestreams (through NumPy's support of the buffering protocol). """ if arrays: # The buffer protocol only works on contiguous arrays arrays = [numpy.ascontiguousarray(array) for array in arrays] if stop: headers = {'stop': True} socket.send_json(headers) else: headers = [header_data_from_array_1_0(array) for array in arrays] socket.send_json(headers, zmq.SNDMORE) for array in arrays[:-1]: socket.send(array, zmq.SNDMORE) socket.send(arrays[-1])
def encode(obj): """ Encode numpy arrays and slices. Also converts numpy scalars and dtypes to pure Python objects. Args: obj: object to serialize Returns: dictionary or Python scalar """ if isinstance(obj, np.ndarray): arr = header_data_from_array_1_0(obj) arr['arraydata'] = obj.tostring() arr['__ndarray__'] = True return arr elif isinstance(obj, slice): return { '__slice__': (obj.start, obj.stop, obj.step) } elif isclass(obj) and issubclass(obj, np.number): # make sure numpy type classes such as np.float64 (used, e.g., as dtype # arguments) are serialized to strings return obj().dtype.name elif isinstance(obj, np.dtype): return obj.name elif isinstance(obj, np.number): # convert to Python scalar return np.asscalar(obj) return obj
def _init_from_array(self, array): """Initialize the object from an array. Sets the the header_length so large that it is possible to append to the array. Returns ------- h_bytes : io.BytesIO Contains the oversized header bytes """ self.shape = (0, ) + array.shape[1:] self.dtype = array.dtype self.itemsize = array.itemsize # Read header data from array and set modify it to be large for the length # 1_0 is the same for 2_0 d = npformat.header_data_from_array_1_0(array) d['shape'] = (self.MAX_SHAPE_LEN, ) + d['shape'][1:] d['fortran_order'] = False # Write a prefix for a very long array to make it large enough for appending new # data h_bytes = io.BytesIO() npformat.write_array_header_2_0(h_bytes, d) self.header_length = h_bytes.tell() # Write header prefix to file self.fs.seek(0) h_bytes.seek(0) self.fs.write(h_bytes.read(self.HEADER_DATA_OFFSET)) # Write header data for the zero length to make it a valid file self._prepare_header_data() self._write_header_data()
def _npy_size(ary): assert not ary.dtype.hasobject magic_len = npy.MAGIC_LEN # TODO: could calculate this directly with closing(StringIO()) as sio: npy.write_array_header_1_0(sio, npy.header_data_from_array_1_0(ary)) header_len = sio.tell() data_len = ary.dtype.itemsize * ary.size return magic_len + header_len + data_len
def encode(obj): """ Encode numpy arrays and slices :param obj: object to serialize :return: dictionary with encoded array or slice """ if isinstance(obj, np.ndarray): arr = header_data_from_array_1_0(obj) arr['arraydata'] = obj.tostring() arr['__ndarray__'] = True return arr elif isinstance(obj, slice): return {'__slice__': (obj.start, obj.stop, obj.step)} elif isclass(obj) and issubclass(obj, np.number): # make sure numpy type classes such as np.float64 (used, e.g., as dtype # arguments) are serialized to strings return obj().dtype.name elif isinstance(obj, np.dtype): return obj.name elif isinstance(obj, np.number): # convert to Python scalar return np.asscalar(obj) elif isinstance(obj, File): data = { RESPONSE_H5FILE: obj.file, RESPONSE_NODE_TYPE: NODE_TYPE_FILE, RESPONSE_NODE_PATH: obj.path, } return data elif isinstance(obj, Group): # TODO include attrs? data = { RESPONSE_H5FILE: obj.file, RESPONSE_NODE_TYPE: NODE_TYPE_GROUP, RESPONSE_NODE_PATH: obj.path, } return data elif isinstance(obj, Dataset): # TODO include attrs? data = { RESPONSE_H5FILE: obj.file, RESPONSE_NODE_TYPE: NODE_TYPE_DATASET, RESPONSE_NODE_PATH: obj.path, RESPONSE_NODE_SHAPE: obj.shape, RESPONSE_NODE_DTYPE: obj.dtype, } return data return obj
def encode_np_array(obj): """ Encode numpy arrays and slices :param obj: object to serialize :return: dictionary with encoded array or slice """ if isinstance(obj, np.ndarray): arr = header_data_from_array_1_0(obj) arr['data'] = obj.tostring() arr['__ndarray__'] = True return arr elif isinstance(obj, slice): return {'__slice__': (obj.start, obj.stop, obj.step)} return obj
def save_large_array(fp, array, axis=0, desc=None): """Save a large, potentially memmapped array, into a NPY file, chunk by chunk to avoid loading it entirely in memory.""" assert axis == 0 # TODO: support other axes version = None _check_version(version) _write_array_header(fp, header_data_from_array_1_0(array), version) N = array.shape[axis] if N == 0: return k = int(ceil(float(N) / 100)) # 100 chunks assert k >= 1 for i in tqdm(range(0, N, k), desc=desc): chunk = array[i:i + k, ...] fp.write(chunk.tobytes())
def serialize_numpy(obj): """Serializes NumPy arrays and scalars. Note that ``numpy.generic`` objects are converted to Python types, which means that on deserialization they won't be the same object e.g. ``numpy.float32`` will be deserialized as a ``float``. """ if (isinstance(obj, numpy.generic) or (isinstance(obj, numpy.ndarray) and obj.ndim == 0)): return obj.item() if isinstance(obj, numpy.ndarray): if not obj.flags.c_contiguous: obj = numpy.ascontiguousarray(obj) dct = header_data_from_array_1_0(obj) data = base64.b64encode(obj.data) dct['__ndarray__'] = data return dct raise TypeError