示例#1
0
def save_mpiio(comm, fn, g_kl):
    """
    Write a global two-dimensional array to a single file in the npy format
    using MPI I/O: https://docs.scipy.org/doc/numpy/neps/npy-format.html

    Arrays written with this function can be read with numpy.load.

    Parameters
    ----------
    comm
        MPI communicator.
    fn : str
        File name.
    g_kl : array
        Portion of the array on this MPI processes.
    """
    from numpy.lib.format import dtype_to_descr, magic
    magic_str = magic(1, 0)

    local_nx, local_ny = g_kl.shape
    nx = np.empty_like(local_nx)
    ny = np.empty_like(local_ny)

    commx = comm.Sub((True, False))
    commy = comm.Sub((False, True))
    commx.Allreduce(np.asarray(local_nx), nx)
    commy.Allreduce(np.asarray(local_ny), ny)

    arr_dict_str = str({
        'descr': dtype_to_descr(g_kl.dtype),
        'fortran_order': False,
        'shape': (np.asscalar(nx), np.asscalar(ny))
    })
    while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15:
        arr_dict_str += ' '
    arr_dict_str += '\n'
    header_len = len(arr_dict_str) + len(magic_str) + 2

    offsetx = np.zeros_like(local_nx)
    commx.Exscan(np.asarray(ny * local_nx), offsetx)
    offsety = np.zeros_like(local_ny)
    commy.Exscan(np.asarray(local_ny), offsety)

    file = MPI.File.Open(comm, fn, MPI.MODE_CREATE | MPI.MODE_WRONLY)
    if MPI.COMM_WORLD.Get_rank() == 0:
        file.Write(magic_str)
        file.Write(np.int16(len(arr_dict_str)))
        file.Write(arr_dict_str.encode('latin-1'))
    mpitype = MPI._typedict[g_kl.dtype.char]
    filetype = mpitype.Create_vector(g_kl.shape[0], g_kl.shape[1], ny)
    filetype.Commit()
    file.Set_view(header_len + (offsety + offsetx) * mpitype.Get_size(),
                  filetype=filetype)
    file.Write_all(g_kl.copy())
    filetype.Free()
    file.Close()
示例#2
0
    def save_mpiio(self, file_name: str, vec: np.ndarray) -> None:
        """
        Write a global two-dimensional array to a single file in the npy format
        using MPI I/O.

        Arrays written with this function can be read with numpy.load.

        Args:
            file_name (str): File name.
            vec (np.ndarray):
            Portion of the array on this MPI processes. This needs to be a
            two-dimensional array.
        """

        magic_str = magic(1, 0)
        x_size, y_size = vec.shape

        vx, vy = np.empty_like(x_size), np.empty_like(y_size)

        commx = self.rank_grid.Sub((True, False))
        commy = self.rank_grid.Sub((False, True))
        commx.Allreduce(np.asarray(x_size), vx)
        commy.Allreduce(np.asarray(y_size), vy)

        arr_dict_str = str({
            'descr': dtype_to_descr(vec.dtype),
            'fortran_order': False,
            'shape': (np.asscalar(vx), np.asscalar(vy))
        })

        while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15:
            arr_dict_str += ' '
        arr_dict_str += '\n'
        header_len = len(arr_dict_str) + len(magic_str) + 2

        x_offset = np.zeros_like(x_size)
        commx.Exscan(np.asarray(vy * x_size), x_offset)
        y_offset = np.zeros_like(y_size)
        commy.Exscan(np.asarray(y_size), y_offset)

        file = MPI.File.Open(self.rank_grid, file_name,
                             MPI.MODE_CREATE | MPI.MODE_WRONLY)
        if self.rank == 0:
            file.Write(magic_str)
            file.Write(np.int16(len(arr_dict_str)))
            file.Write(arr_dict_str.encode('latin-1'))
        mpitype = MPI._typedict[vec.dtype.char]
        filetype = mpitype.Create_vector(x_size, y_size, vy)
        filetype.Commit()
        file.Set_view(header_len + (y_offset + x_offset) * mpitype.Get_size(),
                      filetype=filetype)
        file.Write_all(vec.copy())
        filetype.Free()
        file.Close()
示例#3
0
def get_header(files, axis):
    """get header for concatenated file

    Parameters
    ----------
    files : path-like
        npy files to concatenate
    axis : int, default=0
        axis to cocatenate along

    Returns
    -------
    dict
        datatype descr, fortran order and shape, for concatenated file
    """
    dtypes, shapes, forders = [], [], []
    for file in files:
        f = np.load(file, "r")
        dtypes.append(f.dtype)
        shapes.append(f.shape)
        forders.append(f.flags["F_CONTIGUOUS"])

    if all(dtype == dtypes[0] for dtype in dtypes):
        dtype = dtypes[0]
    else:
        raise ValueError("All files must have the same dtype")

    if all(forder == forders[0] for forder in forders):
        forder = forders[0]
    else:
        raise ValueError("All files must have the same fortran order")

    if all(len(shape) == len(shapes[0]) for shape in shapes):
        ndims = len(shapes[0])
    else:
        raise ValueError("All files must have the same number of dimensions")

    if all(
            all(shape[axis_] == shapes[0][axis_] for shape in shapes)
            for axis_ in range(ndims) if axis_ != axis):
        shape = list(shapes[0])
        shape[axis] = sum(shape_[axis] for shape_ in shapes)
        shape = tuple(shape)
    else:
        raise ValueError(
            "All files must have the same shape along the concatenation axis")

    header = {
        "descr": fmt.dtype_to_descr(dtype),
        "fortran_order": forder,
        "shape": shape,
    }
    return header
示例#4
0
def _npy_header(shape, dtype, order='C'):  # pragma: no cover
    d = {'shape': shape}
    if order == 'C':
        d['fortran_order'] = False
    elif order == 'F':
        d['fortran_order'] = True
    else:
        # Totally non-contiguous data. We will have to make it C-contiguous
        # before writing. Note that we need to test for C_CONTIGUOUS first
        # because a 1-D array is both C_CONTIGUOUS and F_CONTIGUOUS.
        d['fortran_order'] = False
    d['descr'] = dtype_to_descr(dtype)
    return d
示例#5
0
def pack_header_data(shape, fortran_order, dtype):
    
    # Do very strict type checking, which is normally a not done in python.
    # We need repr() to work perfectly.
    msg = "`shape` must me a tuple of intergers."
    if type(shape) != type(()):
        raise TypeError(msg)
    for s in shape:
        if type(s) != type(1):
            raise TypeError(msg)
    if type(fortran_order) != type(True):
        msg = "`fortran_order` must be boolian."
        raise TypeError(msg)

    header_data = {}
    header_data['shape'] = shape
    header_data['fortran_order'] = fortran_order
    header_data['descr'] = npfor.dtype_to_descr(np.dtype(dtype))
    return header_data
示例#6
0
def pack_header_data(shape, fortran_order, dtype):

    # Do very strict type checking, which is normally a not done in python.
    # We need repr() to work perfectly.
    msg = "`shape` must me a tuple of intergers."
    if type(shape) != type(()):
        raise TypeError(msg)
    for s in shape:
        if type(s) != type(1):
            raise TypeError(msg)
    if type(fortran_order) != type(True):
        msg = "`fortran_order` must be boolian."
        raise TypeError(msg)

    header_data = {}
    header_data['shape'] = shape
    header_data['fortran_order'] = fortran_order
    header_data['descr'] = npfor.dtype_to_descr(np.dtype(dtype))
    return header_data
示例#7
0
    def _prepare_header_data(self):
        # Make header data
        d = {
            'shape': self.shape,
            'fortran_order': self.fortran_order,
            'descr': npformat.dtype_to_descr(self.dtype)
        }

        h_bytes = io.BytesIO()
        npformat.write_array_header_2_0(h_bytes, d)

        # Pad the end of the header
        fill_len = self.header_length - h_bytes.tell()
        if fill_len < 0:
            raise OverflowError(
                "File {} cannot be appended. The header is too short.".format(
                    self.filename))
        elif fill_len > 0:
            h_bytes.write(b'\x20' * fill_len)

        h_bytes.seek(0)
        self._header_bytes_to_write = h_bytes.read()
示例#8
0
文件: mpi.py 项目: idiap/cbi_toolbox
def save(file_name, array, axis, full_shape=None, mpi_comm=MPI.COMM_WORLD):
    """
    Save a numpy array from parallel jobs in the MPI communicator.
    The array is gathered along the chosen dimension.

    Parameters
    ----------
    file_name : str
        The numpy array file to load.
    array : numpy.ndarray
        The distributed array.
    axis : int
        The axis on which to distribute the array.
    full_shape : tuple(int), optional
        The size of the full array, by default None.
    mpi_comm : mpi4py.MPI.Comm, optional
        The MPI communicator used to distribute, by default MPI.COMM_WORLD.
    """

    if full_shape is None:
        full_shape = gather_full_shape(array, axis, mpi_comm)

    axis = utils.positive_index(axis, len(full_shape))

    header_offset = None
    if is_root_process(mpi_comm):
        header_dict = {
            'shape': full_shape,
            'fortran_order': False,
            'descr': npformat.dtype_to_descr(array.dtype)
        }

        with open(file_name, 'wb') as fp:
            try:
                npformat.write_array_header_1_0(fp, header_dict)
            except ValueError:
                npformat.write_array_header_2_0(fp, header_dict)

            header_offset = fp.tell()
    header_offset = mpi_comm.bcast(header_offset, root=0)

    i_start, bin_size = distribute_mpi(full_shape[axis], mpi_comm)

    slice_type = create_slice_view(axis,
                                   bin_size,
                                   shape=full_shape,
                                   dtype=array.dtype)
    slice_type.Commit()

    single_slice_extent = slice_type.extent
    if bin_size != 0:
        single_slice_extent /= bin_size

    displacement = header_offset + i_start * single_slice_extent
    base_type = to_mpi_datatype(array.dtype)

    fh = MPI.File.Open(mpi_comm, file_name, MPI.MODE_WRONLY | MPI.MODE_APPEND)
    fh.Set_view(displacement, filetype=slice_type)

    fh.Write_all([array, array.size, base_type])
    fh.Close()
    slice_type.Free()
示例#9
0
def open_memmap(filename, mode='r+', dtype=None, shape=None,
                fortran_order=False, version=(1,0), offset=0):
    """
    Open a .npy file as a memory-mapped array, with offset argument.

    This may be used to read an existing file or create a new one.
    
    :param str filename: The name of the file on disk. This may not be a 
        file-like object.
    :param str mode: The mode to open the file with. In addition to the 
        standard file modes, 'c' is also accepted to mean "copy on write". 
        See `numpy.memmap` for the available mode strings.
    :param dtype dtype: The data type of the array if we are creating a 
        new file in "write" mode.
    :param tuple shape: The shape of the array if we are creating a new 
        file in "write" mode. Shape of (contiguous) slice if opening an 
        existing file.
    :param bool fortran_order: Whether the array should be Fortran-contiguous 
        (True) or C-contiguous (False) if we are creating a new file in 
        "write" mode.
    :param tuple version: If the mode is a "write" mode, then this is the 
        version (major, minor) of the file format used to create the file.
    :param int offset: Number of elements to skip along the first dimension.
    :return numpy.memmap: The memory-mapped array.

    Raises:
    
    * :exc:`ValueError` if the data or the mode is invalid
    * :exc:`IOError` if the file is not found or cannot be opened correctly.
    
    .. seealso:: :func:`numpy.memmap`
    """
    if not isinstance(filename, basestring):
        raise ValueError("Filename must be a string.  Memmap cannot use" \
                         " existing file handles.")

    if 'w' in mode:
        assert offset == 0, "Cannot specify offset when creating memmap"
        # We are creating the file, not reading it.
        # Check if we ought to create the file.
        if version != (1, 0):
            msg = "only support version (1,0) of file format, not %r"
            raise ValueError(msg % (version,))
        # Ensure that the given dtype is an authentic dtype object rather than
        # just something that can be interpreted as a dtype object.
        dtype = np.dtype(dtype)
        if dtype.hasobject:
            msg = "Array can't be memory-mapped: Python objects in dtype."
            raise ValueError(msg)
        d = dict(
            descr=dtype_to_descr(dtype),
            fortran_order=fortran_order,
            shape=shape,
        )
        # If we got here, then it should be safe to create the file.
        fp = open(filename, mode+'b')
        try:
            fp.write(magic(*version))
            write_array_header_1_0(fp, d)
            offset = fp.tell()
        finally:
            fp.close()
    else:
        # Read the header of the file first.
        fp = open(filename, 'rb')
        try:
            version = read_magic(fp)
            if version != (1, 0):
                msg = "only support version (1,0) of file format, not %r"
                raise ValueError(msg % (version,))
            fullshape, fortran_order, dtype = read_array_header_1_0(fp)
            
            if shape:
                length = np.atleast_1d(shape)
                msg = "Specify shape along first dimension only"
                assert length.ndim == 1, msg
            else:
                length = fullshape[0] - offset
            shape = (length,) + fullshape[1:]
            
            if dtype.hasobject:
                msg = "Array can't be memory-mapped: Python objects in dtype."
                raise ValueError(msg)
            
            offset_items = offset * np.prod(fullshape[1:], dtype=int)
            offset_bytes = fp.tell() + offset_items * dtype.itemsize
        finally:
            fp.close()
    
    if fortran_order:
        order = 'F'
    else:
        order = 'C'

    # We need to change a write-only mode to a read-write mode since we've
    # already written data to the file.
    if mode == 'w+':
        mode = 'r+'

    marray = np.memmap(filename, dtype=dtype, shape=shape, order=order,
        mode=mode, offset=offset_bytes)

    return marray
示例#10
0
def save_npy(fn,
             data,
             subdomain_locations=None,
             nb_grid_pts=None,
             comm=MPI.COMM_WORLD):
    """

    Parameters
    ----------
    data : numpy array : data owned by the processor
    location : index of the first element of data within the global data
    nb_grid_pts : nb_grid_pts of the global data
    comm : MPI communicator

    Returns
    -------

    """
    data = np.asarray(data)
    ndims = len(data.shape)

    if ndims == 1:
        data = data.reshape((-1, 1))

    if subdomain_locations is None:
        subdomain_locations = (0, 0)
    elif ndims == 1:
        subdomain_locations = (subdomain_locations, 0)
    nb_subdomain_grid_pts = data.shape

    if nb_grid_pts is None:
        nb_grid_pts = nb_subdomain_grid_pts
    elif ndims == 1:
        nb_grid_pts = (nb_grid_pts, 1)

    fortran_order = np.isfortran(data)

    from numpy.lib.format import dtype_to_descr, magic
    magic_str = magic(1, 0)
    arr_dict_str = str({
        'descr':
        dtype_to_descr(data.dtype),
        'fortran_order':
        fortran_order,
        'shape': (nb_grid_pts[0], ) if ndims == 1 else nb_grid_pts
    })

    while (len(arr_dict_str) + len(magic_str) + 2) % 16 != 15:
        arr_dict_str += ' '
    arr_dict_str += '\n'
    header_len = len(arr_dict_str) + len(magic_str) + 2

    file = MPI.File.Open(comm, fn, MPI.MODE_CREATE | MPI.MODE_WRONLY)
    if comm.Get_rank() == 0:
        file.Write(magic_str)
        file.Write(np.int16(len(arr_dict_str)))
        file.Write(arr_dict_str.encode('latin-1'))

    if fortran_order:
        # the returned array will be in fortran_order.
        # the data is loaded in C_contiguous array but in a transposed manner
        # data.transpose() is called which swaps the shapes back again and
        # toggles C-order to F-order
        ix = 1
        iy = 0
    else:
        ix = 0
        iy = 1

    mpitype = MPI._typedict[data.dtype.char]
    filetype = mpitype.Create_vector(
        nb_subdomain_grid_pts[ix],
        # number of blocks  : length of data in the non-contiguous direction
        nb_subdomain_grid_pts[
            iy],  # length of block : length of data in contiguous direction
        nb_grid_pts[iy]
        # stepsize: the data is contiguous in y direction,
        # two matrix elements with same x position are separated by ny in memory
    )  # create a type
    # see MPI_TYPE_VECTOR

    filetype.Commit()  # verification if type is OK
    file.Set_view(
        header_len +
        (subdomain_locations[ix] * nb_grid_pts[iy] + subdomain_locations[iy]) *
        mpitype.Get_size(),
        filetype=filetype)
    if fortran_order:
        data = data.transpose()
    file.Write_all(data.copy())  # TODO: is the copy needed ?
    filetype.Free()

    file.Close()