def func(n, m): slices = split(n, m) assert_eq(len(slices), m) x = np.zeros(n, int) for s in slices: x[s] += 1 assert_same(x, 1, broadcasting=True) assert_eq([split(n, m, i) for i in range(m)], slices)
def test_distribute(): class MyComm(object): def __init__(self, rank, size): self.rank = rank self.size = size if size > 1: return def func(a, r, shape, shapes): assert_equal(a[r], shape[0]) assert_equal(shapes[r], shape) for n in range(10): for sz in range(1, 7): work = np.zeros(n, int) for i in range(n): work[i] = i % sz a = np.zeros(sz, int) for r in range(sz): a[r] = sum(work == r) stop = tuple(np.cumsum(a)) start = (0, ) + stop[:-1] comm = MyComm(0, sz) for s in [(), (1, ), (3, 4)]: shapes = distribute_shapes((n, ) + s, comm=comm) for r in range(sz): shape = distribute_shape((n, ) + s, rank=r, size=sz) yield func, a, r, shape, shapes if len(s) > 0: continue sl = slice(start[r], stop[r]) yield assert_eq, sl, split(n, sz, r)
def func(shape, dtype): d = MPIDistributionGlobalOperator(shape) x_global = np.ones(shape, dtype) s = split(shape[0], size, rank) x_local = d(x_global) assert_eq(x_local, x_global[s]) assert_eq(d.T(x_local), x_global)
def scatter(self, comm=None): """ MPI-scatter of the table. Parameter --------- comm : MPI.Comm The MPI communicator of the group of processes in which the table will be scattered. """ if self.comm.size > 1: raise ValueError('The table is already distributed.') if comm is None: comm = MPI.COMM_WORLD if comm.size == 1: return self selection = split(len(self), comm.size, comm.rank) out = self[selection] for k in out._special_attributes: if k in out._reserved_attributes: continue try: v = out.__dict__[k] except KeyError: continue if isinstance(v, np.ndarray) and v.ndim > 0: setattr(out, k, v.copy()) object.__setattr__(out, 'comm', comm) return out
def test_distribute(): class MyComm(object): def __init__(self, rank, size): self.rank = rank self.size = size if size > 1: return def func(a, r, shape, shapes): assert_equal(a[r], shape[0]) assert_equal(shapes[r], shape) for n in range(10): for sz in range(1, 7): work = np.zeros(n, int) for i in range(n): work[i] = i % sz a = np.zeros(sz, int) for r in range(sz): a[r] = sum(work == r) stop = tuple(np.cumsum(a)) start = (0,) + stop[:-1] comm = MyComm(0, sz) for s in [(), (1,), (3, 4)]: shapes = distribute_shapes((n,) + s, comm=comm) for r in range(sz): shape = distribute_shape((n,) + s, rank=r, size=sz) yield func, a, r, shape, shapes if len(s) > 0: continue sl = slice(start[r], stop[r]) yield assert_eq, sl, split(n, sz, r)
def parallel_for_chunk(func, args, nprocs=8): pool = Pool(nprocs) slices = tuple(split(len(args), nprocs)) def wrapper(islice): return func(args[slices[islice]]) out = pool.amap(wrapper, xrange(len(slices))).get(TIMEOUT) return list(itertools.chain(*out))
def test_scatter(): np.random.seed(0) n = 4 x = np.random.random(n) layout = PackedTable(n, x=x) s = split(n, size, rank) scattered = layout.scatter() assert_same(scattered.x, x[s]) assert_same(scattered.all.x, x)
def _get_synthbeam(scene, position, area, nu, bandwidth, horn, primary_beam, secondary_beam, synthbeam_dtype=np.float32, theta_max=45): """ Return the monochromatic synthetic beam for a specified location on the focal plane, multiplied by a given area and bandwidth. Parameters ---------- scene : QubicScene The scene. position : array-like of shape (..., 3) The 3D coordinates where the response is computed, in meters. area : array-like The integration area, in m^2. nu : float The frequency for which the response is computed [Hz]. bandwidth : float The filter bandwidth [Hz]. horn : PackedArray The horn layout. primary_beam : Beam The primary beam. secondary_beam : Beam The secondary beam. synthbeam_dtype : dtype, optional The data type for the synthetic beams (default: float32). It is the dtype used to store the values of the pointing matrix. theta_max : float, optional The maximum zenithal angle above which the synthetic beam is assumed to be zero, in degrees. """ MAX_MEMORY_B = 1e9 theta, phi = hp.pix2ang(scene.nside, scene.index) index = np.where(theta <= np.radians(theta_max))[0] nhorn = int(np.sum(horn.open)) npix = len(index) nbytes_B = npix * nhorn * 24 ngroup = np.ceil(nbytes_B / MAX_MEMORY_B).astype(np.int) out = np.zeros(position.shape[:-1] + (len(scene), ), dtype=synthbeam_dtype) for s in split(npix, ngroup): index_ = index[s] sb = QubicInstrument._get_response(theta[index_], phi[index_], bandwidth, position, area, nu, horn, primary_beam, secondary_beam) out[..., index_] = abs2(sb, dtype=synthbeam_dtype) return out
def _get_synthbeam_( scene, position, area, nu, bandwidth, horn, primary_beam, secondary_beam, spectral_irradiance=1, synthbeam_dtype=np.float32, theta_max=30): """ Return the monochromatic synthetic beam for a specified location on the focal plane, multiplied by a given area and bandwidth. Parameters ---------- scene : QubicScene The scene. x : array-like The X-coordinate in the focal plane where the response is computed, in meters. If not provided, the detector central positions are assumed. y : array-like The Y-coordinate in the focal plane where the response is computed, in meters. If not provided, the detector central positions are assumed. area : array-like The integration area, in m^2. nu : float The frequency for which the response is computed [Hz]. bandwidth : float The filter bandwidth [Hz]. horn : PackedArray The horn layout. primary_beam : Beam The primary beam. secondary_beam : Beam The secondary beam. synthbeam_dtype : dtype, optional The data type for the synthetic beams (default: float32). It is the dtype used to store the values of the pointing matrix. theta_max : float, optional The maximum zenithal angle above which the synthetic beam is assumed to be zero, in degrees. """ MAX_MEMORY_B = 1e9 theta, phi = hp.pix2ang(scene.nside, scene.index) index = np.where(theta <= np.radians(theta_max))[0] nhorn = int(np.sum(horn.open)) npix = len(index) nbytes_B = npix * nhorn * 24 ngroup = np.ceil(nbytes_B / MAX_MEMORY_B) out = np.zeros(position.shape[:-1] + (len(scene),), dtype=synthbeam_dtype) for s in split(npix, ngroup): index_ = index[s] sb = MultiQubicInstrument._get_response( theta[index_], phi[index_], spectral_irradiance, position, area, nu, horn, primary_beam, secondary_beam) out[..., index_] = abs2(sb, dtype=synthbeam_dtype) return out * bandwidth * deriv_and_const(nu, scene.nside)
def read_fits(filename, extname, comm): """ Read and distribute a FITS file into local arrays. Parameters ---------- filename : str The FITS file name. extname : str The FITS extension name. Use None to read the first HDU with data. comm : mpi4py.Comm The MPI communicator of the local arrays. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename+str(extname)) all_equal = all([f == files[0] for f in files]) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') # get primary hdu or extension fits = pyfits.open(filename) if extname is not None: hdu = fits[extname] else: ihdu = 0 while True: try: hdu = fits[ihdu] except IndexError: raise IOError('The FITS file has no data.') if hdu.header['NAXIS'] == 0: ihdu += 1 continue if hdu.data is not None: break header = hdu.header n = header['NAXIS' + str(header['NAXIS'])] s = split(n, comm.size, comm.rank) output = pyfits.Section(hdu)[s] if not output.dtype.isnative: output = output.byteswap().newbyteorder('=') # update the header header['NAXIS' + str(header['NAXIS'])] = s.stop - s.start try: if header['CTYPE1'] == 'RA---TAN' and header['CTYPE2'] == 'DEC--TAN': header['CRPIX2'] -= s.start except KeyError: pass comm.Barrier() return output, header
def read_fits(filename, extname, comm): """ Read and distribute a FITS file into local arrays. Parameters ---------- filename : str The FITS file name. extname : str The FITS extension name. Use None to read the first HDU with data. comm : mpi4py.Comm The MPI communicator of the local arrays. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename + str(extname)) all_equal = all([f == files[0] for f in files]) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') # get primary hdu or extension fits = pyfits.open(filename) if extname is not None: hdu = fits[extname] else: ihdu = 0 while True: try: hdu = fits[ihdu] except IndexError: raise IOError('The FITS file has no data.') if hdu.header['NAXIS'] == 0: ihdu += 1 continue if hdu.data is not None: break header = hdu.header n = header['NAXIS' + str(header['NAXIS'])] s = split(n, comm.size, comm.rank) output = pyfits.Section(hdu)[s] if not output.dtype.isnative: output = output.byteswap().newbyteorder('=') # update the header header['NAXIS' + str(header['NAXIS'])] = s.stop - s.start try: if header['CTYPE1'] == 'RA---TAN' and header['CTYPE2'] == 'DEC--TAN': header['CRPIX2'] -= s.start except KeyError: pass comm.Barrier() return output, header
def split(self, n): """ Split the instrument in partitioning groups. Example ------- >>> instr = Instrument('instr', Layout((4, 4))) >>> [len(_) for _ in instr.split(2)] [8, 8] """ return tuple(self[_] for _ in split(len(self), n))
def split(self, n): """ Split the table in partitioning groups. Example ------- >>> table = PackedTable((4, 4), selection=[0, 1, 4, 5]) >>> print(table.split(2)) (PackedTable((4, 4), index=slice(0, 2, 1)), PackedTable((4, 4), index=slice(4, 6, 1))) """ return tuple(self[_] for _ in split(len(self), n))
def scatter_fitsheader(header, comm=MPI.COMM_WORLD): """ Return the header of local arrays given that of the global array. """ if 'NAXIS' not in header: raise KeyError("The FITS header does not contain the 'NAXIS' keyword.") if 'NAXIS1' not in header: raise KeyError('Scalar FITS headers cannot be split.') axis = str(header['NAXIS']) nglobal = header['NAXIS' + axis] s = split(nglobal, comm.size, comm.rank) header = header.copy() header['NAXIS' + axis] = s.stop - s.start if 'CRPIX' + axis in header: header['CRPIX' + axis] -= s.start return header
def gather_fitsheader(header, comm=MPI.COMM_WORLD): """ Combine headers of local arrays into a global one. """ if 'NAXIS' not in header: raise KeyError("The FITS header does not contain the 'NAXIS' keyword.") if 'NAXIS1' not in header: raise KeyError('Scalar FITS headers cannot be gathered.') naxis = str(header['NAXIS']) nlocal = header['NAXIS' + naxis] nglobal = combine(nlocal, comm=comm) s = split(nglobal, comm.size, comm.rank) header = header.copy() header['NAXIS' + naxis] = nglobal if 'CRPIX' + naxis in header: header['CRPIX' + naxis] += s.start return header
def write_fits(filename, data, header, extension, extname, comm): """ Collectively write local arrays into a single FITS file. Parameters ---------- filename : str The FITS file name. data : ndarray The array to be written. header : pyfits.Header The data FITS header. None can be set, in which case a minimal FITS header will be inferred from the data. extension : boolean If True, the data will be written as an extension to an already existing FITS file. extname : str The FITS extension name. Use None to write the primary HDU. comm : mpi4py.Comm The MPI communicator of the local arrays. Use MPI.COMM_SELF if the data are not meant to be combined into a global array. Make sure that the MPI processes are not executing this routine with the same file name. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename+str(extname)) all_equal = all(f == files[0] for f in files) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') ndims = comm.allgather(data.ndim) if any(n != ndims[0] for n in ndims): raise ValueError("The arrays have an incompatible number of dimensions" ": '{0}'.".format(', '.join(str(n) for n in ndims))) ndim = ndims[0] shapes = comm.allgather(data.shape) if any(s[1:] != shapes[0][1:] for s in shapes): raise ValueError("The arrays have incompatible shapes: '{0}'.".format( strshape(shapes))) # get header if header is None: header = create_fitsheader_for(data, extname=extname) else: header = header.copy() if extname is not None: header['extname'] = extname # we remove the file first to avoid an annoying pyfits informative message if not extension: if comm.rank == 0: try: os.remove(filename) except OSError: pass # case without MPI communication if comm.size == 1: if not extension: hdu = pyfits.PrimaryHDU(data, header) hdu.writeto(filename, clobber=True) else: pyfits.append(filename, data, header) return # get global/local parameters nglobal = sum(s[0] for s in shapes) s = split(nglobal, comm.size, comm.rank) nlocal = s.stop - s.start if data.shape[0] != nlocal: raise ValueError("On rank {}, the local array shape '{}' is invalid. T" "he first dimension does not match the expected local" " number '{}' given the global number '{}'.{}".format( comm.rank, data.shape, nlocal, nglobal, '' if comm.rank > 0 else ' Shapes are: {}.'.format( shapes))) # write FITS header if comm.rank == 0: header['NAXIS' + str(ndim)] = nglobal shdu = pyfits.StreamingHDU(filename, header) data_loc = shdu._datLoc shdu.close() else: data_loc = None data_loc = comm.bcast(data_loc) # get a communicator excluding the processes which have no work to do # (Create_subarray does not allow 0-sized subarrays) chunk = product(data.shape[1:]) rank_nowork = min(comm.size, nglobal) group = comm.Get_group() group.Incl(list(range(rank_nowork))) newcomm = comm.Create(group) # collectively write data if comm.rank < rank_nowork: # mpi4py 1.2.2: pb with viewing data as big endian KeyError '>d' if sys.byteorder == 'little' and data.dtype.byteorder == '=' or \ data.dtype.byteorder == '<': data = data.byteswap() data = data.newbyteorder('=') mtype = DTYPE_MAP[data.dtype] ftype = mtype.Create_subarray([nglobal*chunk], [nlocal*chunk], [s.start*chunk]) ftype.Commit() f = MPI.File.Open(newcomm, filename, amode=MPI.MODE_APPEND | MPI.MODE_WRONLY | MPI.MODE_CREATE) f.Set_view(data_loc, mtype, ftype, 'native', MPI.INFO_NULL) f.Write_all(data) f.Close() ftype.Free() newcomm.Free() # pad FITS file with zeros if comm.rank == 0: datasize = nglobal * chunk * data.dtype.itemsize BLOCK_SIZE = 2880 padding = BLOCK_SIZE - (datasize % BLOCK_SIZE) with open(filename, 'a') as f: if f.tell() - data_loc != datasize: raise RuntimeError('Unexpected file size.') f.write(padding * '\0') comm.Barrier()
def gather(self, *args): """ MPI-gather the (already scattered) table or a given array. table_global = table_local.gather() array_global = table_local.gather(array_local) Parameter --------- array_local : array-like, optional If provided, gather the scattered input array, instead of the whole table. Returns ------- table_global : PackedTable The global packed table, whose all special attribute have been MPI-gathered. array_global : array The MPI-gathered input array. """ def func(x): x = np.asarray(x) out = np.empty((ntot, ) + x.shape[1:], x.dtype) nbytes = product(x.shape[1:]) * x.itemsize self.comm.Allgatherv(x.view(np.byte), [ out.view(np.byte), ([_ * nbytes for _ in counts], [_ * nbytes for _ in offsets]) ]) return out ntot = np.array(len(self)) self.comm.Allreduce(MPI.IN_PLACE, ntot, op=MPI.SUM) counts = [] offsets = [0] for s in split(ntot, self.comm.size): n = s.stop - s.start counts.append(n) offsets.append(offsets[-1] + n) offsets.pop() if len(args) == 1: return func(args[0]) elif len(args) > 1: raise TypeError( 'gather takes at most 1 argument ({} given)'.format(len(args))) out = copy.copy(self) out._index = self._normalize_int_selection( func(self.index), product(self.shape[:self.ndim])) for k in out._special_attributes: if k in out._reserved_attributes: continue try: v = self.__dict__[k] except KeyError: continue if isinstance(v, np.ndarray) and v.ndim > 0: setattr(out, k, func(v)) return out
def _get_synthbeam( scene, position, area, nu, bandwidth, horn, primary_beam, secondary_beam, synthbeam_dtype=np.float32, theta_max=45, external_A=None, ): """ Return the monochromatic synthetic beam for a specified location on the focal plane, multiplied by a given area and bandwidth. Parameters ---------- scene : QubicScene The scene. position : array-like of shape (..., 3) The 3D coordinates where the response is computed, in meters. area : array-like The integration area, in m^2. nu : float The frequency for which the response is computed [Hz]. bandwidth : float The filter bandwidth [Hz]. horn : PackedArray The horn layout. primary_beam : Beam The primary beam. secondary_beam : Beam The secondary beam. synthbeam_dtype : dtype, optional The data type for the synthetic beams (default: float32). It is the dtype used to store the values of the pointing matrix. theta_max : float, optional The maximum zenithal angle above which the synthetic beam is assumed to be zero, in degrees. external_A : list of tables describing the phase and amplitude at each point of the focal plane for each of the horns: [0] : array of nn with x values in meters [1] : array of nn with y values in meters [2] : array of [nhorns, nn, nn] with amplitude [3] : array of [nhorns, nn, nn] with phase in degrees """ MAX_MEMORY_B = 1e9 theta, phi = hp.pix2ang(scene.nside, scene.index) index = np.where(theta <= np.radians(theta_max))[0] nhorn = int(np.sum(horn.open)) npix = len(index) nbytes_B = npix * nhorn * 24 ngroup = np.ceil(nbytes_B / MAX_MEMORY_B) out = np.zeros(position.shape[:-1] + (len(scene),), dtype=synthbeam_dtype) for s in split(npix, ngroup): index_ = index[s] sb = QubicInstrument._get_response( theta[index_], phi[index_], bandwidth, position, area, nu, horn, primary_beam, secondary_beam, external_A=external_A, ) out[..., index_] = abs2(sb, dtype=synthbeam_dtype) return out
def gather(self, *args): """ MPI-gather the (already scattered) table or a given array. table_global = table_local.gather() array_global = table_local.gather(array_local) Parameter --------- array_local : array-like, optional If provided, gather the scattered input array, instead of the whole table. Returns ------- table_global : PackedTable The global packed table, whose all special attribute have been MPI-gathered. array_global : array The MPI-gathered input array. """ def func(x): x = np.asarray(x) out = np.empty((ntot,) + x.shape[1:], x.dtype) nbytes = product(x.shape[1:]) * x.itemsize self.comm.Allgatherv( x.view(np.byte), [out.view(np.byte), ([_ * nbytes for _ in counts], [_ * nbytes for _ in offsets])]) return out ntot = np.array(len(self)) self.comm.Allreduce(MPI.IN_PLACE, ntot, op=MPI.SUM) counts = [] offsets = [0] for s in split(ntot, self.comm.size): n = s.stop - s.start counts.append(n) offsets.append(offsets[-1] + n) offsets.pop() if len(args) == 1: return func(args[0]) elif len(args) > 1: raise TypeError( 'gather takes at most 1 argument ({} given)'.format(len(args))) out = copy.copy(self) out._index = self._normalize_int_selection( func(self.index), product(self.shape[:self.ndim])) for k in out._special_attributes: if k in out._reserved_attributes: continue try: v = self.__dict__[k] except KeyError: continue if isinstance(v, np.ndarray) and v.ndim > 0: setattr(out, k, func(v)) return out
def __init__(self, instrument, sampling, scene, block=None, max_nbytes=None, nprocs_instrument=None, nprocs_sampling=None, comm=None): """ Parameters ---------- instrument : Instrument The Instrument instance. sampling : Sampling The sampling information (pointings, etc.) scene : Scene Discretization of the observed scene. block : tuple of slices, optional Partition of the samplings. max_nbytes : int or None, optional Maximum number of bytes to be allocated for the acquisition's operator. nprocs_instrument : int For a given sampling slice, number of procs dedicated to the instrument. nprocs_sampling : int For a given detector slice, number of procs dedicated to the sampling. comm : mpi4py.MPI.Comm The acquisition's MPI communicator. Note that it is transformed into a 2d cartesian communicator before being stored as the 'comm' attribute. The following relationship must hold: comm.size = nprocs_instrument * nprocs_sampling """ if not isinstance(instrument, Instrument): raise TypeError( "The instrument input has an invalid type '{}'.".format( type(instrument).__name__)) if not isinstance(sampling, Sampling): raise TypeError( "The sampling input has an invalid type '{}'.".format( type(instrument).__name__)) if not isinstance(scene, Scene): raise TypeError( "The scene input has an invalid type '{}'.".format( type(scene).__name__)) if comm is None: comm = MPI.COMM_WORLD if nprocs_instrument is None and nprocs_sampling is None: nprocs_sampling = comm.size if nprocs_instrument is None: if nprocs_sampling < 1 or nprocs_sampling > comm.size: raise ValueError("Invalid value for nprocs_sampling '{0}'.". format(nprocs_sampling)) nprocs_instrument = comm.size // nprocs_sampling elif nprocs_sampling is None: if nprocs_instrument < 1 or nprocs_sampling > comm.size: raise ValueError("Invalid value for nprocs_instrument '{0}'.". format(nprocs_instrument)) nprocs_sampling = comm.size // nprocs_instrument if nprocs_instrument * nprocs_sampling != comm.size: raise ValueError('Invalid MPI distribution of the acquisition.') commgrid = comm.Create_cart( [nprocs_sampling, nprocs_instrument], reorder=True) comm_instrument = commgrid.Sub([False, True]) comm_sampling = commgrid.Sub([True, False]) self.scene = scene self.instrument = instrument.scatter(comm_instrument) self.sampling = sampling.scatter(comm_sampling) self.comm = commgrid self.block = block if block is None: self.block = slice(0, len(self.sampling)), if max_nbytes is not None: nbytes = self.get_operator_nbytes() if nbytes > max_nbytes: nblocks = int(np.ceil(nbytes / max_nbytes)) self.block = tuple(split(len(self.sampling), nblocks)) elif not isinstance(block, (list, tuple)) or \ any(not isinstance(b, slice) for b in block): raise TypeError("Invalid block argument '{}'.".format(block))
def write_fits(filename, data, header, extension, extname, comm): """ Collectively write local arrays into a single FITS file. Parameters ---------- filename : str The FITS file name. data : ndarray The array to be written. header : pyfits.Header The data FITS header. None can be set, in which case a minimal FITS header will be inferred from the data. extension : boolean If True, the data will be written as an extension to an already existing FITS file. extname : str The FITS extension name. Use None to write the primary HDU. comm : mpi4py.Comm The MPI communicator of the local arrays. Use MPI.COMM_SELF if the data are not meant to be combined into a global array. Make sure that the MPI processes are not executing this routine with the same file name. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename + str(extname)) all_equal = all(f == files[0] for f in files) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') ndims = comm.allgather(data.ndim) if any(n != ndims[0] for n in ndims): raise ValueError("The arrays have an incompatible number of dimensions" ": '{0}'.".format(', '.join(str(n) for n in ndims))) ndim = ndims[0] shapes = comm.allgather(data.shape) if any(s[1:] != shapes[0][1:] for s in shapes): raise ValueError("The arrays have incompatible shapes: '{0}'.".format( strshape(shapes))) # get header if header is None: header = create_fitsheader_for(data, extname=extname) else: header = header.copy() if extname is not None: header['extname'] = extname # we remove the file first to avoid an annoying pyfits informative message if not extension: if comm.rank == 0: try: os.remove(filename) except OSError: pass # case without MPI communication if comm.size == 1: if not extension: hdu = pyfits.PrimaryHDU(data, header) hdu.writeto(filename, overwrite=True) else: pyfits.append(filename, data, header) return # get global/local parameters nglobal = sum(s[0] for s in shapes) s = split(nglobal, comm.size, comm.rank) nlocal = s.stop - s.start if data.shape[0] != nlocal: raise ValueError( "On rank {}, the local array shape '{}' is invalid. T" "he first dimension does not match the expected local" " number '{}' given the global number '{}'.{}".format( comm.rank, data.shape, nlocal, nglobal, '' if comm.rank > 0 else ' Shapes are: {}.'.format(shapes))) # write FITS header if comm.rank == 0: header['NAXIS' + str(ndim)] = nglobal shdu = pyfits.StreamingHDU(filename, header) data_loc = shdu._datLoc shdu.close() else: data_loc = None data_loc = comm.bcast(data_loc) # get a communicator excluding the processes which have no work to do # (Create_subarray does not allow 0-sized subarrays) chunk = product(data.shape[1:]) rank_nowork = min(comm.size, nglobal) group = comm.Get_group() group.Incl(list(range(rank_nowork))) newcomm = comm.Create(group) # collectively write data if comm.rank < rank_nowork: # mpi4py 1.2.2: pb with viewing data as big endian KeyError '>d' if sys.byteorder == 'little' and data.dtype.byteorder == '=' or \ data.dtype.byteorder == '<': data = data.byteswap() data = data.newbyteorder('=') mtype = DTYPE_MAP[data.dtype] ftype = mtype.Create_subarray([nglobal * chunk], [nlocal * chunk], [s.start * chunk]) ftype.Commit() f = MPI.File.Open(newcomm, filename, amode=MPI.MODE_APPEND | MPI.MODE_WRONLY | MPI.MODE_CREATE) f.Set_view(data_loc, mtype, ftype, 'native', MPI.INFO_NULL) f.Write_all(data) f.Close() ftype.Free() newcomm.Free() # pad FITS file with zeros if comm.rank == 0: datasize = nglobal * chunk * data.dtype.itemsize BLOCK_SIZE = 2880 padding = BLOCK_SIZE - (datasize % BLOCK_SIZE) with open(filename, 'a') as f: if f.tell() - data_loc != datasize: raise RuntimeError('Unexpected file size.') f.write(padding * '\0') comm.Barrier()