def _dump(self, file, protocol=2): if hasattr(file, "write"): ctx = contextlib_nullcontext(file) else: ctx = open(os_fspath(file), "wb") with ctx as f: pickle.dump(self, f, protocol=protocol)
def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, names=None, titles=None, aligned=False, byteorder=None): """Create an array from binary file data If file is a string or a path-like object then that file is opened, else it is assumed to be a file object. The file object must support random access (i.e. it must have tell and seek methods). >>> from tempfile import TemporaryFile >>> a = np.empty(10,dtype='f8,i4,a5') >>> a[5] = (0.5,10,'abcde') >>> >>> fd=TemporaryFile() >>> a = a.newbyteorder('<') >>> a.tofile(fd) >>> >>> fd.seek(0) >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10, ... byteorder='<') >>> print(r[5]) (0.5, 10, 'abcde') >>> r.shape (10,) """ if dtype is None and formats is None: raise TypeError("fromfile() needs a 'dtype' or 'formats' argument") if (shape is None or shape == 0): shape = (-1,) elif isinstance(shape, (int, long)): shape = (shape,) if isfileobj(fd): # file already opened name = 0 else: # open file fd = open(os_fspath(fd), 'rb') name = 1 if (offset > 0): fd.seek(offset, 1) size = get_remaining_size(fd) if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr itemsize = descr.itemsize shapeprod = sb.array(shape).prod(dtype=nt.intp) shapesize = shapeprod * itemsize if shapesize < 0: shape = list(shape) shape[shape.index(-1)] = size // -shapesize shape = tuple(shape) shapeprod = sb.array(shape).prod(dtype=nt.intp) nbytes = shapeprod * itemsize if nbytes > size: raise ValueError( "Not enough bytes left in file for specified shape and type") # create the array _array = recarray(shape, descr) nbytesread = fd.readinto(_array.data) if nbytesread != nbytes: raise IOError("Didn't read as many bytes as expected") if name: fd.close() return _array
def open_memmap(filename, mode='r+', dtype=None, shape=None, fortran_order=False, version=None): """ Open a .npy file as a memory-mapped array. This may be used to read an existing file or create a new one. Parameters ---------- filename : str or path-like The name of the file on disk. This may *not* be a file-like object. mode : str, optional The mode in which to open the file; the default is 'r+'. In addition to the standard file modes, 'c' is also accepted to mean "copy on write." See `memmap` for the available mode strings. dtype : data-type, optional The data type of the array if we are creating a new file in "write" mode, if not, `dtype` is ignored. The default value is None, which results in a data-type of `float64`. shape : tuple of int The shape of the array if we are creating a new file in "write" mode, in which case this parameter is required. Otherwise, this parameter is ignored and is thus optional. fortran_order : bool, optional Whether the array should be Fortran-contiguous (True) or C-contiguous (False, the default) if we are creating a new file in "write" mode. version : tuple of int (major, minor) or None If the mode is a "write" mode, then this is the version of the file format used to create the file. None means use the oldest supported version that is able to store the data. Default: None Returns ------- marray : memmap The memory-mapped array. Raises ------ ValueError If the data or the mode is invalid. IOError If the file is not found or cannot be opened correctly. See Also -------- memmap """ if isfileobj(filename): raise ValueError("Filename must be a string or a path-like object." " Memmap cannot use existing file handles.") if 'w' in mode: # We are creating the file, not reading it. # Check if we ought to create the file. _check_version(version) # Ensure that the given dtype is an authentic dtype object rather # than just something that can be interpreted as a dtype object. dtype = numpy.dtype(dtype) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) d = dict( descr=dtype_to_descr(dtype), fortran_order=fortran_order, shape=shape, ) # If we got here, then it should be safe to create the file. with open(os_fspath(filename), mode + 'b') as fp: _write_array_header(fp, d, version) offset = fp.tell() else: # Read the header of the file first. with open(os_fspath(filename), 'rb') as fp: version = read_magic(fp) _check_version(version) shape, fortran_order, dtype = _read_array_header(fp, version) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) offset = fp.tell() if fortran_order: order = 'F' else: order = 'C' # We need to change a write-only mode to a read-write mode since we've # already written data to the file. if mode == 'w+': mode = 'r+' marray = numpy.memmap(filename, dtype=dtype, shape=shape, order=order, mode=mode, offset=offset) return marray
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None, progress_callback=None): """numpy.savetxt modified to output progress """ # Py3 conversions first if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) class WriteWrap: """Convert to bytes on bytestream inputs. """ def __init__(self, ifh, iencoding): self.fh = ifh self.encoding = iencoding self.do_write = self.first_write self.write = None def close(self): self.fh.close() def write(self, v): self.do_write(v) def write_bytes(self, v): if isinstance(v, bytes): self.fh.write(v) else: self.fh.write(v.encode(self.encoding)) def write_normal(self, v): self.fh.write(asunicode(v)) def first_write(self, v): try: self.write_normal(v) self.write = self.write_normal except TypeError: # input is probably a bytestream self.write_bytes(v) self.write = self.write_bytes own_fh = False if isinstance(fname, os_PathLike): fname = os_fspath(fname) if isinstance(fname, str): # datasource doesn't vis_support creating a new file ... open(fname, 'wt').close() fh = np.lib._datasource.open(fname, 'wt', encoding=encoding) own_fh = True elif hasattr(fname, 'write'): # wrap to handle byte output streams fh = WriteWrap(fname, encoding or 'latin1') else: raise ValueError('fname must be a string or file handle') try: X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 0 or X.ndim > 2: raise ValueError("Expected 1D or 2D array, got %dD array instead" % X.ndim) elif X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = len(X.dtype.names) else: ncol = X.shape[1] iscomplex_X = np.iscomplexobj(X) # `fmt` can be a string with multiple insertion points or a # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') if type(fmt) in (list, tuple): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) iformat = asstr(delimiter).join(map(asstr, fmt)) elif isinstance(fmt, str): n_fmt_chars = fmt.count('%') error = ValueError('fmt has wrong number of %% formats: %s' % fmt) if n_fmt_chars == 1: if iscomplex_X: fmt = [ ' (%s+%sj)' % (fmt, fmt), ] * ncol else: fmt = [ fmt, ] * ncol iformat = delimiter.join(fmt) elif iscomplex_X and n_fmt_chars != (2 * ncol): raise error elif (not iscomplex_X) and n_fmt_chars != ncol: raise error else: iformat = fmt else: raise ValueError('invalid fmt: %r' % (fmt, )) if len(header) > 0: header = header.replace('\n', '\n' + comments) fh.write(comments + header + newline) if iscomplex_X: for row in X: row2 = [] for number in row: row2.append(number.real) row2.append(number.imag) s = iformat % tuple(row2) + newline fh.write(s.replace('+-', '-')) else: length = len(X) for ind, row in enumerate(X): try: v = iformat % tuple(row) + newline except TypeError: raise TypeError("Mismatch between array dtype ('%s') and " "format specifier ('%s')" % (str(X.dtype), iformat)) fh.write(v) progress_callback.emit((ind + 1) * 100 / length) if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) fh.write(comments + footer + newline) finally: if own_fh: fh.close()
def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, shape=None, order='C'): # Import here to minimize 'import numpy' overhead import mmap import os.path try: mode = mode_equivalents[mode] except KeyError as e: if mode not in valid_filemodes: raise ValueError( "mode must be one of {!r} (got {!r})" .format(valid_filemodes + list(mode_equivalents.keys()), mode) ) from None if mode == 'w+' and shape is None: raise ValueError("shape must be given") if hasattr(filename, 'read'): f_ctx = nullcontext(filename) else: f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b') with f_ctx as fid: fid.seek(0, 2) flen = fid.tell() descr = dtypedescr(dtype) _dbytes = descr.itemsize if shape is None: bytes = flen - offset if bytes % _dbytes: raise ValueError("Size of available data is not a " "multiple of the data-type size.") size = bytes // _dbytes shape = (size,) else: if not isinstance(shape, tuple): shape = (shape,) size = np.intp(1) # avoid default choice of np.int_, which might overflow for k in shape: size *= k bytes = int(offset + size*_dbytes) if mode in ('w+', 'r+') and flen < bytes: fid.seek(bytes - 1, 0) fid.write(b'\0') fid.flush() if mode == 'c': acc = mmap.ACCESS_COPY elif mode == 'r': acc = mmap.ACCESS_READ else: acc = mmap.ACCESS_WRITE start = offset - offset % mmap.ALLOCATIONGRANULARITY bytes -= start array_offset = offset - start mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, offset=array_offset, order=order) self._mmap = mm self.offset = offset self.mode = mode if is_pathlib_path(filename): # special case - if we were constructed with a pathlib.path, # then filename is a path object, not a string self.filename = filename.resolve() elif hasattr(fid, "name") and isinstance(fid.name, str): # py3 returns int for TemporaryFile().name self.filename = os.path.abspath(fid.name) # same as memmap copies (e.g. memmap + 1) else: self.filename = None return self
def open_memmap(filename, mode='r+', dtype=None, shape=None, fortran_order=False, version=None): """ Open a .npy file as a memory-mapped array. This may be used to read an existing file or create a new one. Parameters ---------- filename : str or path-like The name of the file on disk. This may *not* be a file-like object. mode : str, optional The mode in which to open the file; the default is 'r+'. In addition to the standard file modes, 'c' is also accepted to mean "copy on write." See `memmap` for the available mode strings. dtype : data-type, optional The data type of the array if we are creating a new file in "write" mode, if not, `dtype` is ignored. The default value is None, which results in a data-type of `float64`. shape : tuple of int The shape of the array if we are creating a new file in "write" mode, in which case this parameter is required. Otherwise, this parameter is ignored and is thus optional. fortran_order : bool, optional Whether the array should be Fortran-contiguous (True) or C-contiguous (False, the default) if we are creating a new file in "write" mode. version : tuple of int (major, minor) or None If the mode is a "write" mode, then this is the version of the file format used to create the file. None means use the oldest supported version that is able to store the data. Default: None Returns ------- marray : memmap The memory-mapped array. Raises ------ ValueError If the data or the mode is invalid. IOError If the file is not found or cannot be opened correctly. See Also -------- memmap """ if isfileobj(filename): raise ValueError("Filename must be a string or a path-like object." " Memmap cannot use existing file handles.") if 'w' in mode: # We are creating the file, not reading it. # Check if we ought to create the file. _check_version(version) # Ensure that the given dtype is an authentic dtype object rather # than just something that can be interpreted as a dtype object. dtype = numpy.dtype(dtype) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) d = dict( descr=dtype_to_descr(dtype), fortran_order=fortran_order, shape=shape, ) # If we got here, then it should be safe to create the file. fp = open(os_fspath(filename), mode+'b') try: used_ver = _write_array_header(fp, d, version) # this warning can be removed when 1.9 has aged enough if version != (2, 0) and used_ver == (2, 0): warnings.warn("Stored array in format 2.0. It can only be" "read by NumPy >= 1.9", UserWarning, stacklevel=2) offset = fp.tell() finally: fp.close() else: # Read the header of the file first. fp = open(os_fspath(filename), 'rb') try: version = read_magic(fp) _check_version(version) shape, fortran_order, dtype = _read_array_header(fp, version) if dtype.hasobject: msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) offset = fp.tell() finally: fp.close() if fortran_order: order = 'F' else: order = 'C' # We need to change a write-only mode to a read-write mode since we've # already written data to the file. if mode == 'w+': mode = 'r+' marray = numpy.memmap(filename, dtype=dtype, shape=shape, order=order, mode=mode, offset=offset) return marray
def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, names=None, titles=None, aligned=False, byteorder=None): """Create an array from binary file data Parameters ---------- fd : str or file type If file is a string or a path-like object then that file is opened, else it is assumed to be a file object. The file object must support random access (i.e. it must have tell and seek methods). dtype : data-type, optional valid dtype for all arrays shape : int or tuple of ints, optional shape of each array. offset : int, optional Position in the file to start reading from. formats, names, titles, aligned, byteorder : If `dtype` is ``None``, these arguments are passed to `numpy.format_parser` to construct a dtype. See that function for detailed documentation Returns ------- np.recarray record array consisting of data enclosed in file. Examples -------- >>> from tempfile import TemporaryFile >>> a = np.empty(10,dtype='f8,i4,a5') >>> a[5] = (0.5,10,'abcde') >>> >>> fd=TemporaryFile() >>> a = a.newbyteorder('<') >>> a.tofile(fd) >>> >>> _ = fd.seek(0) >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10, ... byteorder='<') >>> print(r[5]) (0.5, 10, 'abcde') >>> r.shape (10,) """ if dtype is None and formats is None: raise TypeError("fromfile() needs a 'dtype' or 'formats' argument") # NumPy 1.19.0, 2020-01-01 shape = _deprecate_shape_0_as_None(shape) if shape is None: shape = (-1, ) elif isinstance(shape, int): shape = (shape, ) if hasattr(fd, 'readinto'): # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase interface. # Example of fd: gzip, BytesIO, BufferedReader # file already opened ctx = nullcontext(fd) else: # open file ctx = open(os_fspath(fd), 'rb') with ctx as fd: if offset > 0: fd.seek(offset, 1) size = get_remaining_size(fd) if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder).dtype itemsize = descr.itemsize shapeprod = sb.array(shape).prod(dtype=nt.intp) shapesize = shapeprod * itemsize if shapesize < 0: shape = list(shape) shape[shape.index(-1)] = size // -shapesize shape = tuple(shape) shapeprod = sb.array(shape).prod(dtype=nt.intp) nbytes = shapeprod * itemsize if nbytes > size: raise ValueError( "Not enough bytes left in file for specified shape and type") # create the array _array = recarray(shape, descr) nbytesread = fd.readinto(_array.data) if nbytesread != nbytes: raise IOError("Didn't read as many bytes as expected") return _array
def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, names=None, titles=None, aligned=False, byteorder=None): """Create an array from binary file data If file is a string or a path-like object then that file is opened, else it is assumed to be a file object. The file object must support random access (i.e. it must have tell and seek methods). >>> from tempfile import TemporaryFile >>> a = np.empty(10,dtype='f8,i4,a5') >>> a[5] = (0.5,10,'abcde') >>> >>> fd=TemporaryFile() >>> a = a.newbyteorder('<') >>> a.tofile(fd) >>> >>> _ = fd.seek(0) >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10, ... byteorder='<') >>> print(r[5]) (0.5, 10, 'abcde') >>> r.shape (10,) """ if dtype is None and formats is None: raise TypeError("fromfile() needs a 'dtype' or 'formats' argument") if (shape is None or shape == 0): shape = (-1,) elif isinstance(shape, (int, long)): shape = (shape,) if isfileobj(fd): # file already opened name = 0 else: # open file fd = open(os_fspath(fd), 'rb') name = 1 if (offset > 0): fd.seek(offset, 1) size = get_remaining_size(fd) if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr itemsize = descr.itemsize shapeprod = sb.array(shape).prod(dtype=nt.intp) shapesize = shapeprod * itemsize if shapesize < 0: shape = list(shape) shape[shape.index(-1)] = size // -shapesize shape = tuple(shape) shapeprod = sb.array(shape).prod(dtype=nt.intp) nbytes = shapeprod * itemsize if nbytes > size: raise ValueError( "Not enough bytes left in file for specified shape and type") # create the array _array = recarray(shape, descr) nbytesread = fd.readinto(_array.data) if nbytesread != nbytes: raise IOError("Didn't read as many bytes as expected") if name: fd.close() return _array
def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, shape=None, order='C'): # Import here to minimize 'import numpy' overhead import mmap import os.path try: mode = mode_equivalents[mode] except KeyError: if mode not in valid_filemodes: raise ValueError("mode must be one of %s" % (valid_filemodes + list(mode_equivalents.keys()))) if mode == 'w+' and shape is None: raise ValueError("shape must be given") if hasattr(filename, 'read'): f_ctx = contextlib_nullcontext(filename) else: f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b') with f_ctx as fid: fid.seek(0, 2) flen = fid.tell() descr = dtypedescr(dtype) _dbytes = descr.itemsize if shape is None: bytes = flen - offset if bytes % _dbytes: raise ValueError("Size of available data is not a " "multiple of the data-type size.") size = bytes // _dbytes shape = (size,) else: if not isinstance(shape, tuple): shape = (shape,) size = np.intp(1) # avoid default choice of np.int_, which might overflow for k in shape: size *= k bytes = long(offset + size*_dbytes) if mode == 'w+' or (mode == 'r+' and flen < bytes): fid.seek(bytes - 1, 0) fid.write(b'\0') fid.flush() if mode == 'c': acc = mmap.ACCESS_COPY elif mode == 'r': acc = mmap.ACCESS_READ else: acc = mmap.ACCESS_WRITE start = offset - offset % mmap.ALLOCATIONGRANULARITY bytes -= start array_offset = offset - start mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, offset=array_offset, order=order) self._mmap = mm self.offset = offset self.mode = mode if is_pathlib_path(filename): # special case - if we were constructed with a pathlib.path, # then filename is a path object, not a string self.filename = filename.resolve() elif hasattr(fid, "name") and isinstance(fid.name, basestring): # py3 returns int for TemporaryFile().name self.filename = os.path.abspath(fid.name) # same as memmap copies (e.g. memmap + 1) else: self.filename = None return self
def test_os_fspath_strings(): for string_path in (b'/a/b/c.d', u'/a/b/c.d'): assert_(os_fspath(string_path) == string_path)
def test_os_fspath_strings(): for string_path in (b"/a/b/c.d", u"/a/b/c.d"): assert_(os_fspath(string_path) == string_path)