示例#1
0
def read_npy(fp, prn=False):
    """ read an npy file quickly
    : fp = file path
    :
    : file = "c:/temp/a01.npy"
    """
    frmt = """
    Magic {}
    Shape {},  C-contig {}, dtype {}
    """
    from numpy.lib import format as format_
    with open(fp, 'rb') as f:
        major, minor = format_.read_magic(f)
        mag = format_.magic(major, minor)
        shp, is_fortran, dt = format_.read_array_header_1_0(f)
        count = np.multiply.reduce(shp, dtype=np.int64)
        #data = f.readlines()

        BUFFER_SIZE = 2**18
        max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize)
        array = np.ndarray(count, dtype=dt)
        for i in range(0, count, max_read_count):
            read_count = min(max_read_count, count - i)
            read_size = int(read_count * dt.itemsize)
            data = format_._read_bytes(f, read_size, "array data")
            array[i:i+read_count] = np.frombuffer(data, dtype=dt,
                                                  count=read_count)
        array.shape = shp
    if prn:
        print(dedent(frmt).format(mag, shp, (not is_fortran), dt))
    return array
示例#2
0
def read_npy(fp, prn=False):
    """ read an npy file quickly
    : fp = file path
    :
    : file = "c:/temp/a01.npy"
    """
    frmt = """
    Magic {}
    Shape {},  C-contig {}, dtype {}
    """
    from numpy.lib import format as format_
    with open(fp, 'rb') as f:
        major, minor = format_.read_magic(f)
        mag = format_.magic(major, minor)
        shp, is_fortran, dt = format_.read_array_header_1_0(f)
        count = np.multiply.reduce(shp, dtype=np.int64)
        #data = f.readlines()

        BUFFER_SIZE = 2**18
        max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize)
        array = np.ndarray(count, dtype=dt)
        for i in range(0, count, max_read_count):
            read_count = min(max_read_count, count - i)
            read_size = int(read_count * dt.itemsize)
            data = format_._read_bytes(f, read_size, "array data")
            array[i:i + read_count] = np.frombuffer(data,
                                                    dtype=dt,
                                                    count=read_count)
        array.shape = shp
    if prn:
        print(dedent(frmt).format(mag, shp, (not is_fortran), dt))
    return array
示例#3
0
def load_shape(n):
    with open(n, 'rb') as f:
        major, minor = read_magic(f)
        shape, fortran, dtype = read_array_header_1_0(f)
    if len(shape) != 4:
        raise TypeError('Errr! Single image... %s' % n)
    return shape
示例#4
0
    def _parse_npy(bio):
        mmapfile = bio.raw.fileobj
        if isinstance(mmapfile, mmap.mmap):
            version = read_magic(bio)
            _check_version(version)

            shape, fortran_order, dtype = _read_array_header(bio, version)
            if dtype.hasobject:
                msg = "Array can't be memory-mapped: Python objects in dtype."
                raise ValueError(msg)
            order = "F" if fortran_order else "C"
            offset = bio.tell()
            # Add the offset from the Wrapper file
            offset += bio.raw.offset
            data = np.ndarray.__new__(
                np.memmap,
                shape,
                dtype=dtype,
                buffer=mmapfile,
                offset=offset,
                order=order,
            )
            data._mmap = mmapfile
            data.offset = offset
            data.mode = "r+"
        else:
            b = BytesIO(bio.read())
            data = np.load(b)

        return data
示例#5
0
 def _read_header(self):
     with open(self.path, "rb") as fp:
         version = format.read_magic(fp)
         try:
             format._check_version(version)
         except ValueError:
             raise ValueError("Invalid file format.")
         header_data = format._read_array_header(fp, version)
         self.shape, self.fortran_order, self.dtype = header_data
示例#6
0
def read_header_data(fname):

    fp = open(fname, 'r')
    version = npfor.read_magic(fp)
    if version != (1, 0):
        msg = "only support version (1,0) of file format, not %r"
        raise ValueError(msg % (version,))
    shape, fortran_order, dtype = npfor.read_array_header_1_0(fp)
    header_length = fp.tell()
    return shape, fortran_order, dtype, header_length
示例#7
0
def read_header_data(fname):

    fp = open(fname, 'r')
    version = npfor.read_magic(fp)
    if version != (1, 0):
        msg = "only support version (1,0) of file format, not %r"
        raise ValueError(msg % (version, ))
    shape, fortran_order, dtype = npfor.read_array_header_1_0(fp)
    header_length = fp.tell()
    return shape, fortran_order, dtype, header_length
示例#8
0
    def _get_info(self):
        from numpy.lib import format
        with self.f as fp:
            version = format.read_magic(fp)
            format._check_version(version)

            shape, fortran_order, dtype = format._read_array_header(fp, version)
            self.shape = shape
            self.dtype = dtype
            self.order = 'F' if fortran_order else 'C'
            self.offset = fp.tell()
示例#9
0
def test_read_magic():
    s1 = BytesIO()
    s2 = BytesIO()

    arr = np.ones((3, 6), dtype=float)

    format.write_array(s1, arr, version=(1, 0))
    format.write_array(s2, arr, version=(2, 0))

    s1.seek(0)
    s2.seek(0)

    version1 = format.read_magic(s1)
    version2 = format.read_magic(s2)

    assert_(version1 == (1, 0))
    assert_(version2 == (2, 0))

    assert_(s1.tell() == format.MAGIC_LEN)
    assert_(s2.tell() == format.MAGIC_LEN)
示例#10
0
文件: io.py 项目: vibhatha/dislib
def load_npy_file(path, block_size):
    """ Loads a file in npy format (must be 2-dimensional).

    Parameters
    ----------
    path : str
        Path to the npy file.
    block_size : tuple (int, int)
        Block size of the resulting ds-array.

    Returns
    -------
    x : ds-array
    """
    try:
        fid = open(path, "rb")
        version = format.read_magic(fid)
        format._check_version(version)
        shape, fortran_order, dtype = format._read_array_header(fid, version)

        if fortran_order:
            raise ValueError("Fortran order not supported for npy files")

        if len(shape) != 2:
            raise ValueError("Array is not 2-dimensional")

        if block_size[0] > shape[0] or block_size[1] > shape[1]:
            raise ValueError("Block size is larger than the array")

        blocks = []
        n_blocks = int(ceil(shape[1] / block_size[1]))

        for i in range(0, shape[0], block_size[0]):
            read_count = min(block_size[0], shape[0] - i)
            read_size = int(read_count * shape[1] * dtype.itemsize)
            data = fid.read(read_size)
            out_blocks = [object() for _ in range(n_blocks)]
            _read_from_buffer(data, dtype, shape[1], block_size[1], out_blocks)
            blocks.append(out_blocks)

        return Array(blocks=blocks,
                     top_left_shape=block_size,
                     reg_shape=block_size,
                     shape=shape,
                     sparse=False)
    finally:
        fid.close()
示例#11
0
文件: stream.py 项目: nvagus/tforce
 def __init__(self, filename):
     self._filename = filename
     self._data = {}
     npz = np.load(filename)
     file = npz.zip.fp
     for key in npz.files:
         filename = '{}.npy'.format(key)
         npz.zip.open(filename)
         version = nlf.read_magic(file)
         shape, fortran_order, dtype = nlf.read_array_header_1_0(file) if version == (1, 0) \
             else nlf.read_array_header_2_0(file)
         self._data[key] = np.memmap(file,
                                     dtype=dtype,
                                     mode='r',
                                     shape=shape,
                                     order='F' if fortran_order else 'C',
                                     offset=file.tell())
示例#12
0
def read_npy(fp, prn=False):
    """ Read an npy file quickly

    fp : string
        The file path: "c:/temp/a01.npy"
    prn : boolean
        obtain full information if True

    Requires:
    ---------
    from numpy.lib import format

    Notes:
    -------
    shortcut ... np.load("c:/temp/a01.npy")
    """
    frmt = """
    ---- npy reader ---------------------------------------------------------
    File  {}
    Shape {},  C-contig {},  dtype {}
    Magic {}
    -------------------------------------------------------------------------
    """
    with open(fp, 'rb') as f:
        major, minor = format.read_magic(f)
        mag = format.magic(major, minor)
        shp, is_fortran, dt = format.read_array_header_1_0(f)
        count = np.multiply.reduce(shp, dtype=np.int64)
        BUFFER_SIZE = 2**18
        max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dt.itemsize)
        array = np.ndarray(count, dtype=dt)
        for i in range(0, count, max_read_count):
            cnt = min(max_read_count, count - i)
            read_size = int(cnt * dt.itemsize)
            data = format._read_bytes(f, read_size, "array data")
            array[i:i + cnt] = np.frombuffer(data, dtype=dt, count=cnt)
        array.shape = shp
    if prn:
        print(dedent(frmt).format(fp, shp, (not is_fortran), dt, mag))
    return array
示例#13
0
文件: mpi.py 项目: idiap/cbi_toolbox
def load(file_name, axis, mpi_comm=MPI.COMM_WORLD):
    """
    Load a numpy array across parallel jobs in the MPI communicator.
    The array is sliced along the chosen dimension, with minimal bandwidth.

    Parameters
    ----------
    file_name : str
        The numpy array file to load.
    axis : int
        The axis on which to distribute the array.
    mpi_comm : mpi4py.MPI.Comm, optional
        The MPI communicator used to distribute, by default MPI.COMM_WORLD.

    Returns
    -------
    (numpy.ndarray, tuple(int))
        The distributed array, and the size of the full array.

    Raises
    ------
    ValueError
        If the numpy version used to save the file is not supported.
    NotImplementedError
        If the array is saved in Fortran order.
    """

    header = None
    if is_root_process(mpi_comm):
        with open(file_name, 'rb') as fp:
            version, _ = npformat.read_magic(fp)

            if version == 1:
                header = npformat.read_array_header_1_0(fp)
            elif version == 2:
                header = npformat.read_array_header_2_0(fp)
            else:
                raise ValueError(
                    "Invalid numpy format version: {}".format(version))

            header = *header, fp.tell()

    header = mpi_comm.bcast(header, root=0)
    full_shape, fortran, dtype, header_offset = header

    if fortran:
        raise NotImplementedError(
            "Fortran-ordered (column-major) arrays are not supported")

    ndims = len(full_shape)
    axis = utils.positive_index(axis, ndims)

    i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm)

    l_shape = list(full_shape)
    l_shape[axis] = bin_size

    l_array = np.empty(l_shape, dtype=dtype)

    slice_type = create_slice_view(axis,
                                   bin_size,
                                   shape=full_shape,
                                   dtype=dtype)
    slice_type.Commit()

    single_slice_extent = slice_type.extent
    if bin_size != 0:
        single_slice_extent /= bin_size

    displacement = header_offset + i_start * single_slice_extent
    base_type = to_mpi_datatype(l_array.dtype)

    fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_RDONLY)
    fh.Set_view(displacement, filetype=slice_type)

    fh.Read_all([l_array, l_array.size, base_type])
    fh.Close()
    slice_type.Free()

    return l_array, full_shape
示例#14
0
 def _init(self):
     #TODO: Can speed this up with PAG's regex header parser
     self._file = self._path.open('rb')
     version = npformat.read_magic(self._file)
     _, _, dtype = npformat._read_array_header(self._file, version)
     self._dtype = dtype
示例#15
0
def open_memmap(filename, mode='r+', dtype=None, shape=None,
                fortran_order=False, version=(1,0), offset=0):
    """
    Open a .npy file as a memory-mapped array, with offset argument.

    This may be used to read an existing file or create a new one.
    
    :param str filename: The name of the file on disk. This may not be a 
        file-like object.
    :param str mode: The mode to open the file with. In addition to the 
        standard file modes, 'c' is also accepted to mean "copy on write". 
        See `numpy.memmap` for the available mode strings.
    :param dtype dtype: The data type of the array if we are creating a 
        new file in "write" mode.
    :param tuple shape: The shape of the array if we are creating a new 
        file in "write" mode. Shape of (contiguous) slice if opening an 
        existing file.
    :param bool fortran_order: Whether the array should be Fortran-contiguous 
        (True) or C-contiguous (False) if we are creating a new file in 
        "write" mode.
    :param tuple version: If the mode is a "write" mode, then this is the 
        version (major, minor) of the file format used to create the file.
    :param int offset: Number of elements to skip along the first dimension.
    :return numpy.memmap: The memory-mapped array.

    Raises:
    
    * :exc:`ValueError` if the data or the mode is invalid
    * :exc:`IOError` if the file is not found or cannot be opened correctly.
    
    .. seealso:: :func:`numpy.memmap`
    """
    if not isinstance(filename, basestring):
        raise ValueError("Filename must be a string.  Memmap cannot use" \
                         " existing file handles.")

    if 'w' in mode:
        assert offset == 0, "Cannot specify offset when creating memmap"
        # We are creating the file, not reading it.
        # Check if we ought to create the file.
        if version != (1, 0):
            msg = "only support version (1,0) of file format, not %r"
            raise ValueError(msg % (version,))
        # Ensure that the given dtype is an authentic dtype object rather than
        # just something that can be interpreted as a dtype object.
        dtype = np.dtype(dtype)
        if dtype.hasobject:
            msg = "Array can't be memory-mapped: Python objects in dtype."
            raise ValueError(msg)
        d = dict(
            descr=dtype_to_descr(dtype),
            fortran_order=fortran_order,
            shape=shape,
        )
        # If we got here, then it should be safe to create the file.
        fp = open(filename, mode+'b')
        try:
            fp.write(magic(*version))
            write_array_header_1_0(fp, d)
            offset = fp.tell()
        finally:
            fp.close()
    else:
        # Read the header of the file first.
        fp = open(filename, 'rb')
        try:
            version = read_magic(fp)
            if version != (1, 0):
                msg = "only support version (1,0) of file format, not %r"
                raise ValueError(msg % (version,))
            fullshape, fortran_order, dtype = read_array_header_1_0(fp)
            
            if shape:
                length = np.atleast_1d(shape)
                msg = "Specify shape along first dimension only"
                assert length.ndim == 1, msg
            else:
                length = fullshape[0] - offset
            shape = (length,) + fullshape[1:]
            
            if dtype.hasobject:
                msg = "Array can't be memory-mapped: Python objects in dtype."
                raise ValueError(msg)
            
            offset_items = offset * np.prod(fullshape[1:], dtype=int)
            offset_bytes = fp.tell() + offset_items * dtype.itemsize
        finally:
            fp.close()
    
    if fortran_order:
        order = 'F'
    else:
        order = 'C'

    # We need to change a write-only mode to a read-write mode since we've
    # already written data to the file.
    if mode == 'w+':
        mode = 'r+'

    marray = np.memmap(filename, dtype=dtype, shape=shape, order=order,
        mode=mode, offset=offset_bytes)

    return marray
示例#16
0
def load_hstack_npy_files(path, cols_per_block=None):
    """ Loads the .npy files in a directory into a ds-array, stacking them
    horizontally, like (A|B|C). The order of concatenation is alphanumeric.

    At least 1 valid .npy file must exist in the directory, and every .npy file
    must contain a valid array. Every array must have the same dtype, order,
    and number of rows.

    The blocks of the returned ds-array will have the same number of rows as
    the input arrays, and cols_per_block columns, which defaults to the number
    of columns of the first array.

    Parameters
    ----------
    path : string
        Folder path.
    cols_per_block : tuple (int, int)
        Number of columns of the blocks for the output ds-array. If None, the
        number of columns of the first array is used.

    Returns
    -------
    x : ds-array
        A distributed representation (ds-array) of the stacked arrays.
    """
    dirlist = os.listdir(path)
    folder_paths = [os.path.join(path, name) for name in sorted(dirlist)]
    # Full path of .npy files in the folder
    files = [
        pth for pth in folder_paths
        if os.path.isfile(pth) and pth[-4:] == '.npy'
    ]
    # Read the header of the first file to get shape, order, and dtype
    with open(files[0], "rb") as fid:
        version = format.read_magic(fid)
        format._check_version(version)
        shape0, order0, dtype0 = format._read_array_header(fid, version)
    rows = shape0[0]
    if cols_per_block is None:
        cols_per_block = shape0[1]
    # Check that all files have the same number of rows, order and datatype,
    # and store the number of columns for each file.
    files_cols = [shape0[1]]
    for filename in files[1:]:
        with open(filename, "rb") as fid:
            version = format.read_magic(fid)
            format._check_version(version)
            shape, order, dtype = format._read_array_header(fid, version)
            if shape[0] != shape0[0] or order0 != order or dtype0 != dtype:
                raise AssertionError()
            files_cols.append(shape[1])

    # Compute the parameters block_files, start_col and end_col for each block,
    # and call the task _load_hstack_npy_block() to generate each block.
    blocks = []
    file_idx = 0
    start_col = 0
    while file_idx < len(files):
        block_files = [files[file_idx]]
        cols = files_cols[file_idx] - start_col
        while cols < cols_per_block:  # while block not completed
            if file_idx + 1 == len(files):  # last file
                break
            file_idx += 1
            block_files.append(files[file_idx])
            cols += files_cols[file_idx]
        # Compute end_col of last file in block (last block may be smaller)
        end_col = files_cols[file_idx] - max(0, (cols - cols_per_block))
        blocks.append(_load_hstack_npy_block(block_files, start_col, end_col))
        if end_col == files_cols[file_idx]:  # file completed
            file_idx += 1
            start_col = 0
        else:  # file uncompleted
            start_col = end_col

    return Array(blocks=[blocks],
                 top_left_shape=(rows, cols_per_block),
                 reg_shape=(rows, cols_per_block),
                 shape=(rows, sum(files_cols)),
                 sparse=False)