def read_fits(filename, extname, comm): """ Read and distribute a FITS file into local arrays. Parameters ---------- filename : str The FITS file name. extname : str The FITS extension name. Use None to read the first HDU with data. comm : mpi4py.Comm The MPI communicator of the local arrays. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename+str(extname)) all_equal = all([f == files[0] for f in files]) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') # get primary hdu or extension fits = pyfits.open(filename) if extname is not None: hdu = fits[extname] else: ihdu = 0 while True: try: hdu = fits[ihdu] except IndexError: raise IOError('The FITS file has no data.') if hdu.header['NAXIS'] == 0: ihdu += 1 continue if hdu.data is not None: break header = hdu.header n = header['NAXIS' + str(header['NAXIS'])] s = distribute_slice(n, comm=comm) output = pyfits.Section(hdu)[s] if not output.dtype.isnative: output = output.byteswap().newbyteorder('=') # update the header header['NAXIS' + str(header['NAXIS'])] = s.stop - s.start try: if header['CTYPE1'] == 'RA---TAN' and header['CTYPE2'] == 'DEC--TAN': header['CRPIX2'] -= s.start except KeyError: pass comm.Barrier() return output, header
def read_fits(filename, extname, comm): """ Read and distribute a FITS file into local arrays. Parameters ---------- filename : str The FITS file name. extname : str The FITS extension name. Use None to read the first HDU with data. comm : mpi4py.Comm The MPI communicator of the local arrays. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename + str(extname)) all_equal = all([f == files[0] for f in files]) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') # get primary hdu or extension fits = pyfits.open(filename) if extname is not None: hdu = fits[extname] else: ihdu = 0 while True: try: hdu = fits[ihdu] except IndexError: raise IOError('The FITS file has no data.') if hdu.header['NAXIS'] == 0: ihdu += 1 continue if hdu.data is not None: break header = hdu.header n = header['NAXIS' + str(header['NAXIS'])] s = distribute_slice(n, comm=comm) output = pyfits.Section(hdu)[s] if not output.dtype.isnative: output = output.byteswap().newbyteorder('=') # update the header header['NAXIS' + str(header['NAXIS'])] = s.stop - s.start try: if header['CTYPE1'] == 'RA---TAN' and header['CTYPE2'] == 'DEC--TAN': header['CRPIX2'] -= s.start except KeyError: pass comm.Barrier() return output, header
def scatter_fitsheader(header, comm=MPI.COMM_WORLD): """ Return the header of local arrays given that of the global array. """ if 'NAXIS' not in header: raise KeyError("The FITS header does not contain the 'NAXIS' keyword.") if 'NAXIS1' not in header: raise KeyError('Scalar FITS headers cannot be split.') axis = str(header['NAXIS']) nglobal = header['NAXIS' + axis] s = distribute_slice(nglobal, comm=comm) header = header.copy() header['NAXIS' + axis] = s.stop - s.start if 'CRPIX' + axis in header: header['CRPIX' + axis] -= s.start return header
def gather_fitsheader(header, comm=MPI.COMM_WORLD): """ Combine headers of local arrays into a global one. """ if 'NAXIS' not in header: raise KeyError("The FITS header does not contain the 'NAXIS' keyword.") if 'NAXIS1' not in header: raise KeyError('Scalar FITS headers cannot be gathered.') naxis = str(header['NAXIS']) nlocal = header['NAXIS' + naxis] nglobal = combine(nlocal, comm=comm) s = distribute_slice(nglobal, comm=comm) header = header.copy() header['NAXIS' + naxis] = nglobal if 'CRPIX' + naxis in header: header['CRPIX' + naxis] += s.start return header
def test3(): comm_map = MPI.COMM_WORLD for obs, tod in ((obs1, tod1), (obs2, tod2)): proj = obs.get_projection_operator(downsampling=True, npixels_per_sample=6, header=map_ref_global.header, commin=comm_map) proj.apply_mask(tod.mask) m = mapper_naive(tod, proj) yield check_map_local, m if size == 1: continue mlocal = m mlocal[:] = rank + 1 mlocalpacked = proj.operands[1](mlocal) mglobal = UnpackOperator(proj.operands[1].mask)(mlocalpacked) def func(s, irank): assert np.all((mglobal[s] == 0) | (mglobal[s] == irank+1)) for irank in range(size): s = distribute_slice(map_ref_global.shape[0], rank=irank) yield func, s, irank
def test3(): comm_map = MPI.COMM_WORLD for obs, tod in ((obs1, tod1), (obs2, tod2)): proj = obs.get_projection_operator( downsampling=True, npixels_per_sample=6, header=map_ref_global.header, commin=comm_map ) proj.apply_mask(tod.mask) m = mapper_naive(tod, proj) yield check_map_local, m if size == 1: continue mlocal = m mlocal[:] = rank + 1 mlocalpacked = proj.operands[1](mlocal) mglobal = UnpackOperator(proj.operands[1].mask)(mlocalpacked) def func(s, irank): assert np.all((mglobal[s] == 0) | (mglobal[s] == irank + 1)) for irank in range(size): s = distribute_slice(map_ref_global.shape[0], rank=irank) yield func, s, irank
def write_fits(filename, data, header, extension, extname, comm): """ Collectively write local arrays into a single FITS file. Parameters ---------- filename : str The FITS file name. data : ndarray The array to be written. header : pyfits.Header The data FITS header. None can be set, in which case a minimal FITS header will be inferred from the data. extension : boolean If True, the data will be written as an extension to an already existing FITS file. extname : str The FITS extension name. Use None to write the primary HDU. comm : mpi4py.Comm The MPI communicator of the local arrays. Use MPI.COMM_SELF if the data are not meant to be combined into a global array. Make sure that the MPI processes are not executing this routine with the same file name. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename+str(extname)) all_equal = all(f == files[0] for f in files) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') ndims = comm.allgather(data.ndim) if any(n != ndims[0] for n in ndims): raise ValueError("The arrays have an incompatible number of dimensions:" " '{0}'.".format(', '.join(str(n) for n in ndims))) ndim = ndims[0] shapes = comm.allgather(data.shape) if any(s[1:] != shapes[0][1:] for s in shapes): raise ValueError("The arrays have incompatible shapes: '{0}'.".format( strshape(shapes))) # get header if header is None: header = create_fitsheader(fromdata=data, extname=extname) else: header = header.copy() if extname is not None: header.update('extname', extname) # we remove the file first to avoid an annoying pyfits informative message if not extension: if comm.rank == 0: try: os.remove(filename) except OSError: pass # case without MPI communication if comm.size == 1: if not extension: hdu = pyfits.PrimaryHDU(data, header) hdu.writeto(filename, clobber=True) else: pyfits.append(filename, data, header) return # get global/local parameters nglobal = sum(s[0] for s in shapes) s = distribute_slice(nglobal) nlocal = s.stop - s.start if data.shape[0] != nlocal: raise ValueError("On rank {}, the local array shape '{}' is invalid. Th" "e first dimension does not match the expected local number '{}' gi" "ven the global number '{}'.{}".format(comm.rank, data.shape, nlocal, nglobal, '' if comm.rank > 0 else ' Shapes are: {}.'.format( shapes))) # write FITS header if comm.rank == 0: header['NAXIS' + str(ndim)] = nglobal shdu = pyfits.StreamingHDU(filename, header) data_loc = shdu._datLoc shdu.close() else: data_loc = None data_loc = comm.bcast(data_loc) # get a communicator excluding the processes which have no work to do # (Create_subarray does not allow 0-sized subarrays) chunk = product(data.shape[1:]) rank_nowork = min(comm.size, nglobal) group = comm.Get_group() group.Incl(range(rank_nowork)) newcomm = comm.Create(group) # collectively write data if comm.rank < rank_nowork: # mpi4py 1.2.2: pb with viewing data as big endian KeyError '>d' if sys.byteorder == 'little' and data.dtype.byteorder == '=' or \ data.dtype.byteorder == '<': data = data.byteswap() data = data.newbyteorder('=') mtype = DTYPE_MAP[data.dtype] ftype = mtype.Create_subarray([nglobal*chunk], [nlocal*chunk], [s.start*chunk]) ftype.Commit() f = MPI.File.Open(newcomm, filename, amode=MPI.MODE_APPEND | MPI.MODE_WRONLY | MPI.MODE_CREATE) f.Set_view(data_loc, mtype, ftype, 'native', MPI.INFO_NULL) f.Write_all(data) f.Close() ftype.Free() newcomm.Free() # pad FITS file with zeros if comm.rank == 0: datasize = nglobal * chunk * data.dtype.itemsize BLOCK_SIZE = 2880 padding = BLOCK_SIZE - (datasize % BLOCK_SIZE) with open(filename, 'a') as f: if f.tell() - data_loc != datasize: raise RuntimeError('Unexpected file size.') f.write(padding * '\0') comm.Barrier()
def write_fits(filename, data, header, extension, extname, comm): """ Collectively write local arrays into a single FITS file. Parameters ---------- filename : str The FITS file name. data : ndarray The array to be written. header : pyfits.Header The data FITS header. None can be set, in which case a minimal FITS header will be inferred from the data. extension : boolean If True, the data will be written as an extension to an already existing FITS file. extname : str The FITS extension name. Use None to write the primary HDU. comm : mpi4py.Comm The MPI communicator of the local arrays. Use MPI.COMM_SELF if the data are not meant to be combined into a global array. Make sure that the MPI processes are not executing this routine with the same file name. """ # check if the file name is the same for all MPI jobs files = comm.allgather(filename + str(extname)) all_equal = all(f == files[0] for f in files) if comm.size > 1 and not all_equal: raise ValueError('The file name is not the same for all MPI jobs.') ndims = comm.allgather(data.ndim) if any(n != ndims[0] for n in ndims): raise ValueError( "The arrays have an incompatible number of dimensions:" " '{0}'.".format(', '.join(str(n) for n in ndims))) ndim = ndims[0] shapes = comm.allgather(data.shape) if any(s[1:] != shapes[0][1:] for s in shapes): raise ValueError("The arrays have incompatible shapes: '{0}'.".format( strshape(shapes))) # get header if header is None: header = create_fitsheader(fromdata=data, extname=extname) else: header = header.copy() if extname is not None: header.update('extname', extname) # we remove the file first to avoid an annoying pyfits informative message if not extension: if comm.rank == 0: try: os.remove(filename) except OSError: pass # case without MPI communication if comm.size == 1: if not extension: hdu = pyfits.PrimaryHDU(data, header) hdu.writeto(filename, clobber=True) else: pyfits.append(filename, data, header) return # get global/local parameters nglobal = sum(s[0] for s in shapes) s = distribute_slice(nglobal) nlocal = s.stop - s.start if data.shape[0] != nlocal: raise ValueError( "On rank {}, the local array shape '{}' is invalid. Th" "e first dimension does not match the expected local number '{}' gi" "ven the global number '{}'.{}".format( comm.rank, data.shape, nlocal, nglobal, '' if comm.rank > 0 else ' Shapes are: {}.'.format(shapes))) # write FITS header if comm.rank == 0: header['NAXIS' + str(ndim)] = nglobal shdu = pyfits.StreamingHDU(filename, header) data_loc = shdu._datLoc shdu.close() else: data_loc = None data_loc = comm.bcast(data_loc) # get a communicator excluding the processes which have no work to do # (Create_subarray does not allow 0-sized subarrays) chunk = product(data.shape[1:]) rank_nowork = min(comm.size, nglobal) group = comm.Get_group() group.Incl(range(rank_nowork)) newcomm = comm.Create(group) # collectively write data if comm.rank < rank_nowork: # mpi4py 1.2.2: pb with viewing data as big endian KeyError '>d' if sys.byteorder == 'little' and data.dtype.byteorder == '=' or \ data.dtype.byteorder == '<': data = data.byteswap() data = data.newbyteorder('=') mtype = DTYPE_MAP[data.dtype] ftype = mtype.Create_subarray([nglobal * chunk], [nlocal * chunk], [s.start * chunk]) ftype.Commit() f = MPI.File.Open(newcomm, filename, amode=MPI.MODE_APPEND | MPI.MODE_WRONLY | MPI.MODE_CREATE) f.Set_view(data_loc, mtype, ftype, 'native', MPI.INFO_NULL) f.Write_all(data) f.Close() ftype.Free() newcomm.Free() # pad FITS file with zeros if comm.rank == 0: datasize = nglobal * chunk * data.dtype.itemsize BLOCK_SIZE = 2880 padding = BLOCK_SIZE - (datasize % BLOCK_SIZE) with open(filename, 'a') as f: if f.tell() - data_loc != datasize: raise RuntimeError('Unexpected file size.') f.write(padding * '\0') comm.Barrier()