Пример #1
0
def test_read_array_header_2_0():
    s = BytesIO()

    arr = np.ones((3, 6), dtype=float)
    format.write_array(s, arr, version=(2, 0))

    s.seek(format.MAGIC_LEN)
    shape, fortran, dtype = format.read_array_header_2_0(s)

    assert_((shape, fortran, dtype) == ((3, 6), False, float))
Пример #2
0
    def _init_from_file_header(self):
        """Initialize the object from existing file"""
        self.fs.seek(self.HEADER_DATA_SIZE_OFFSET)
        self.shape, fortran_order, self.dtype = npformat.read_array_header_2_0(
            self.fs)
        self.header_length = self.fs.tell()

        if fortran_order:
            raise ValueError(
                'Column major (Fortran-style) files are not supported. Please'
                'translate if first to row major (C-style).')

        # Determine itemsize
        shape = (0, ) + self.shape[1:]
        self.itemsize = np.empty(shape=shape, dtype=self.dtype).itemsize
Пример #3
0
 def __init__(self, filename):
     self._filename = filename
     self._data = {}
     npz = np.load(filename)
     file = npz.zip.fp
     for key in npz.files:
         filename = '{}.npy'.format(key)
         npz.zip.open(filename)
         version = nlf.read_magic(file)
         shape, fortran_order, dtype = nlf.read_array_header_1_0(file) if version == (1, 0) \
             else nlf.read_array_header_2_0(file)
         self._data[key] = np.memmap(file,
                                     dtype=dtype,
                                     mode='r',
                                     shape=shape,
                                     order='F' if fortran_order else 'C',
                                     offset=file.tell())
Пример #4
0
    def _init_from_file_header(self):
        """Initialize the object from an existing file."""
        self.fs.seek(self.HEADER_DATA_SIZE_OFFSET)
        try:
            self.shape, fortran_order, self.dtype = \
                npformat.read_array_header_2_0(self.fs)
        except ValueError:
            raise ValueError(
                'Npy file {} header is not 2.0 format. You can make the '
                'conversion using elfi.store.NpyFile by passing the '
                'preloaded array as an argument.'.format(self.filename))
        self.header_length = self.fs.tell()

        if fortran_order:
            raise ValueError(
                'Column major (Fortran-style) files are not supported. Please'
                'translate if first to row major (C-style).')

        # Determine itemsize
        shape = (0, ) + self.shape[1:]
        self.itemsize = np.empty(shape=shape, dtype=self.dtype).itemsize
Пример #5
0
def load(file_name, axis, mpi_comm=MPI.COMM_WORLD):
    """
    Load a numpy array across parallel jobs in the MPI communicator.
    The array is sliced along the chosen dimension, with minimal bandwidth.

    Parameters
    ----------
    file_name : str
        The numpy array file to load.
    axis : int
        The axis on which to distribute the array.
    mpi_comm : mpi4py.MPI.Comm, optional
        The MPI communicator used to distribute, by default MPI.COMM_WORLD.

    Returns
    -------
    (numpy.ndarray, tuple(int))
        The distributed array, and the size of the full array.

    Raises
    ------
    ValueError
        If the numpy version used to save the file is not supported.
    NotImplementedError
        If the array is saved in Fortran order.
    """

    header = None
    if is_root_process(mpi_comm):
        with open(file_name, 'rb') as fp:
            version, _ = npformat.read_magic(fp)

            if version == 1:
                header = npformat.read_array_header_1_0(fp)
            elif version == 2:
                header = npformat.read_array_header_2_0(fp)
            else:
                raise ValueError(
                    "Invalid numpy format version: {}".format(version))

            header = *header, fp.tell()

    header = mpi_comm.bcast(header, root=0)
    full_shape, fortran, dtype, header_offset = header

    if fortran:
        raise NotImplementedError(
            "Fortran-ordered (column-major) arrays are not supported")

    ndims = len(full_shape)
    axis = utils.positive_index(axis, ndims)

    i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm)

    l_shape = list(full_shape)
    l_shape[axis] = bin_size

    l_array = np.empty(l_shape, dtype=dtype)

    slice_type = create_slice_view(axis,
                                   bin_size,
                                   shape=full_shape,
                                   dtype=dtype)
    slice_type.Commit()

    single_slice_extent = slice_type.extent
    if bin_size != 0:
        single_slice_extent /= bin_size

    displacement = header_offset + i_start * single_slice_extent
    base_type = to_mpi_datatype(l_array.dtype)

    fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_RDONLY)
    fh.Set_view(displacement, filetype=slice_type)

    fh.Read_all([l_array, l_array.size, base_type])
    fh.Close()
    slice_type.Free()

    return l_array, full_shape