def generate_mmodes(self): """Calculate the m-modes corresponding to the Timestream. Perform an MPI transpose for efficiency. """ if os.path.exists(self.output_directory + "/mmodes/COMPLETED_M"): if mpiutil.rank0: print "******* m-files already generated ********" return tel = self.telescope mmax = tel.mmax nfreq = tel.nfreq lfreq, sfreq, efreq = mpiutil.split_local(nfreq) lm, sm, em = mpiutil.split_local(mmax + 1) # Load in the local frequencies of the time stream tstream = np.zeros((lfreq, tel.npairs, self.ntime), dtype=np.complex128) for lfi, fi in enumerate(range(sfreq, efreq)): tstream[lfi] = self.timestream_f(fi) # FFT to calculate the m-modes for the timestream row_mmodes = np.fft.fft(tstream, axis=-1) / self.ntime ## Combine positive and negative m parts. row_mpairs = np.zeros((lfreq, 2, tel.npairs, mmax+1), dtype=np.complex128) row_mpairs[:, 0, ..., 0] = row_mmodes[..., 0] for mi in range(1, mmax+1): row_mpairs[:, 0, ..., mi] = row_mmodes[..., mi] row_mpairs[:, 1, ..., mi] = row_mmodes[..., -mi].conj() # Transpose to get the entirety of an m-mode on each process (i.e. all frequencies) col_mmodes = mpiutil.transpose_blocks(row_mpairs, (nfreq, 2, tel.npairs, mmax + 1)) # Transpose the local section to make the m's first col_mmodes = np.transpose(col_mmodes, (3, 0, 1, 2)) for lmi, mi in enumerate(range(sm, em)): # Make directory for each m-mode if not os.path.exists(self._mdir(mi)): os.makedirs(self._mdir(mi)) # Create the m-file and save the result. with h5py.File(self._mfile(mi), 'w') as f: f.create_dataset('/mmode', data=col_mmodes[lmi]) f.attrs['m'] = mi if mpiutil.rank0: # Make file marker that the m's have been correctly generated: open(self.output_directory + "/mmodes/COMPLETED_M", 'a').close() mpiutil.barrier()
def load(self, filename, freq_split=True): """Load a large dataset from a single file on disk. Parameters ---------- filename : string File to load. freq_split : boolean Split file across nodes by frequency (default) or by time. Returns ------- mpi_dset : MPIDataset """ mpi_dset = MPIDataset() with h5py.File(filename, 'r') as f: mpi_dset.global_timestamp = f['timestamp'][:] mpi_dset.global_nfreq = f['vis'].shape[0] if freq_split: st = 0 et = mpi_dset.global_timestamp.shape[0] nf, sf, ef = mpiutil.split_local(mpi_dset.global_nfreq) else: # Reset flags of which split we are in mpi_dset.freq_split = False mpi_dset.time_split = True sf = 0 ef = mpi_dset.global_nfreq nt, st, et = mpiutil.split_local(mpi_dset.global_timestamp.shape[0]) # Copy correct section of data from file. mpi_dset.data = f['vis'][sf:ef, ..., st:et][:] # Load mask if required if 'mask' in f: mpi_dset.mask = f['mask'][sf:ef, ..., st:et][:] else: mpi_dset.mask = np.ones_like(mpi_dset.data, dtype=np.int8) mpi_dset.time_start = st mpi_dset.time_end = et mpi_dset.freq_start = sf mpi_dset.freq_end = ef return mpi_dset
def make_clzz_array(self): p_bands, s_bands, e_bands = mpiutil.split_all(self.nbands) p, s, e = mpiutil.split_local(self.nbands) self.clarray = np.zeros((self.nbands, self.telescope.lmax + 1, self.telescope.nfreq, self.telescope.nfreq), dtype=np.float64) for bi in range(s, e): self.clarray[bi] = self.make_clzz(self.band_pk[bi]) bandsize = (self.telescope.lmax + 1) * self.telescope.nfreq * self.telescope.nfreq sizes = p_bands * bandsize displ = s_bands * bandsize MPI.COMM_WORLD.Allgatherv(MPI.IN_PLACE, [self.clarray, sizes, displ, MPI.DOUBLE])
def to_time_split(self): """Transform from a frequency split dataset to a time split one. Returns ------- mpi_dset : MPIDataset A dataset which is now distributed along the time axis. """ if self.time_split: return self if not self.freq_split: raise Exception("Can't transform from mixed splitting.") tsplit_dset = MPIDataset() # Set global properties tsplit_dset.global_timestamp = self.global_timestamp tsplit_dset.global_nfreq = self.global_nfreq # Set local frequency properties tsplit_dset.freq_start = 0 tsplit_dset.freq_end = self.global_nfreq # Determine local time properties gtime = self.global_timestamp.size lt, st, et = mpiutil.split_local(gtime) tsplit_dset.time_start = st tsplit_dset.time_end = et # MPI transpose the data ts_data = mpiutil.transpose_blocks(self.data, self.global_shape) ts_mask = mpiutil.transpose_blocks(self.mask, self.global_shape) tsplit_dset.data = ts_data tsplit_dset.mask = ts_mask tsplit_dset.freq_split = False tsplit_dset.time_split = True return tsplit_dset
def to_freq_split(self): """Transform from a time split dataset to a frequency split one. Returns ------- mpi_dset : MPIDataset A dataset which is now distributed along the freq axis. """ if self.freq_split: return self if not self.time_split: raise Exception("Can't transform from mixed splitting.") fsplit_dset = MPIDataset() # Set global properties fsplit_dset.global_timestamp = self.global_timestamp fsplit_dset.global_nfreq = self.global_nfreq # Set local frequency properties fsplit_dset.time_start = 0 fsplit_dset.time_end = self.global_timestamp.size # Determine local time properties lf, sf, ef = mpiutil.split_local(self.global_nfreq) fsplit_dset.freq_start = sf fsplit_dset.freq_end = ef fs_data = mpiutil.transpose_blocks(self.data.T, self.global_shape[::-1]) fs_mask = mpiutil.transpose_blocks(self.mask.T, self.global_shape[::-1]) fsplit_dset.data = fs_data.T fsplit_dset.mask = fs_mask.T return fsplit_dset
def simulate(m, outdir, maps=[], ndays=None, resolution=0, seed=None, **kwargs): """Create a simulated timestream and save it to disk. Parameters ---------- m : ProductManager object Products of telescope to simulate. outdir : directoryname Directory that we will save the timestream into. maps : list List of map filenames. The sum of these form the simulated sky. ndays : int, optional Number of days of observation. Setting `ndays = None` (default) uses the default stored in the telescope object; `ndays = 0`, assumes the observation time is infinite so that the noise is zero. resolution : scalar, optional Approximate time resolution in seconds. Setting `resolution = 0` (default) calculates the value from the mmax. Returns ------- timestream : Timestream """ ## Read in telescope system bt = m.beamtransfer tel = bt.telescope lmax = tel.lmax mmax = tel.mmax nfreq = tel.nfreq npol = tel.num_pol_sky projmaps = (len(maps) > 0) lfreq, sfreq, efreq = mpiutil.split_local(nfreq) local_freq = range(sfreq, efreq) lm, sm, em = mpiutil.split_local(mmax + 1) # If ndays is not set use the default value. if ndays is None: ndays = tel.ndays # Calculate the number of timesamples from the resolution if resolution == 0: # Set the minimum resolution required for the sky. ntime = 2*mmax+1 else: # Set the cl ntime = int(np.round(24 * 3600.0 / resolution)) col_vis = np.zeros((tel.npairs, lfreq, ntime), dtype=np.complex128) ## If we want to add maps use the m-mode formalism to project a skymap ## into visibility space. if projmaps: # Load file to find out the map shapes. with h5py.File(maps[0], 'r') as f: mapshape = f['map'].shape if lfreq > 0: # Allocate array to store the local frequencies row_map = np.zeros((lfreq,) + mapshape[1:], dtype=np.float64) # Read in and sum up the local frequencies of the supplied maps. for mapfile in maps: with h5py.File(mapfile, 'r') as f: row_map += f['map'][sfreq:efreq] # Calculate the alm's for the local sections row_alm = hputil.sphtrans_sky(row_map, lmax=lmax).reshape((lfreq, npol * (lmax+1), lmax+1)) else: row_alm = np.zeros((lfreq, npol * (lmax+1), lmax+1), dtype=np.complex128) # Perform the transposition to distribute different m's across processes. Neat # tip, putting a shorter value for the number of columns, trims the array at # the same time col_alm = mpiutil.transpose_blocks(row_alm, (nfreq, npol * (lmax+1), mmax+1)) # Transpose and reshape to shift m index first. col_alm = np.transpose(col_alm, (2, 0, 1)).reshape(lm, nfreq, npol, lmax+1) # Create storage for visibility data vis_data = np.zeros((lm, nfreq, bt.ntel), dtype=np.complex128) # Iterate over m's local to this process and generate the corresponding # visibilities for mp, mi in enumerate(range(sm, em)): vis_data[mp] = bt.project_vector_sky_to_telescope(mi, col_alm[mp]) # Rearrange axes such that frequency is last (as we want to divide # frequencies across processors) row_vis = vis_data.transpose((0, 2, 1))#.reshape((lm * bt.ntel, nfreq)) # Parallel transpose to get all m's back onto the same processor col_vis_tmp = mpiutil.transpose_blocks(row_vis, ((mmax+1), bt.ntel, nfreq)) col_vis_tmp = col_vis_tmp.reshape(mmax + 1, 2, tel.npairs, lfreq) # Transpose the local section to make the m's the last axis and unwrap the # positive and negative m at the same time. col_vis[..., 0] = col_vis_tmp[0, 0] for mi in range(1, mmax+1): col_vis[..., mi] = col_vis_tmp[mi, 0] col_vis[..., -mi] = col_vis_tmp[mi, 1].conj() # Conjugate only (not (-1)**m - see paper) del col_vis_tmp ## If we're simulating noise, create a realisation and add it to col_vis if ndays > 0: # Fetch the noise powerspectrum noise_ps = tel.noisepower(np.arange(tel.npairs)[:, np.newaxis], np.array(local_freq)[np.newaxis, :], ndays=ndays).reshape(tel.npairs, lfreq)[:, :, np.newaxis] # Seed random number generator to give consistent noise if seed is not None: # Must include rank such that we don't have massive power deficit from correlated noise np.random.seed(seed + mpiutil.rank) # Create and weight complex noise coefficients noise_vis = (np.array([1.0, 1.0J]) * np.random.standard_normal(col_vis.shape + (2,))).sum(axis=-1) noise_vis *= (noise_ps / 2.0)**0.5 # Reset RNG if seed is not None: np.random.seed() # Add into main noise sims col_vis += noise_vis del noise_vis # Fourier transform m-modes back to get timestream. vis_stream = np.fft.ifft(col_vis, axis=-1) * ntime vis_stream = vis_stream.reshape(tel.npairs, lfreq, ntime) # The time samples the visibility is calculated at tphi = np.linspace(0, 2*np.pi, ntime, endpoint=False) # Create timestream object tstream = Timestream(outdir, m) ## Iterate over the local frequencies and write them to disk. for lfi, fi in enumerate(local_freq): # Make directory if required if not os.path.exists(tstream._fdir(fi)): os.makedirs(tstream._fdir(fi)) # Write file contents with h5py.File(tstream._ffile(fi), 'w') as f: # Timestream data f.create_dataset('/timestream', data=vis_stream[:, lfi]) f.create_dataset('/phi', data=tphi) # Telescope layout data f.create_dataset('/feedmap', data=tel.feedmap) f.create_dataset('/feedconj', data=tel.feedconj) f.create_dataset('/feedmask', data=tel.feedmask) f.create_dataset('/uniquepairs', data=tel.uniquepairs) f.create_dataset('/baselines', data=tel.baselines) # Write metadata f.attrs['beamtransfer_path'] = os.path.abspath(bt.directory) f.attrs['ntime'] = ntime tstream.save() mpiutil.barrier() return tstream
def from_files(cls, filelist, acq=True): """Load from a set of CHIME data files. Parameters ---------- filelist : list List of filenames to load. acq : boolean, optional Are the files analysis files or acquisition files (default)? Returns ------- mpi_dset : MPIDataset Distributed dataset type. """ mpi_dset = cls() filelist.sort() # Global number of files ngfiles = len(filelist) # Split into local set of files. lf, sf, ef = mpiutil.split_local(ngfiles) local_files = filelist[sf:ef] fshape = None # Set file loading routine depending on whether files are acq or # analysis. _load_file = andata.AnData.from_acq_h5 if acq else andata.AnData.from_file # Rank 0 should open file and check the shape. if mpiutil.rank0: d0 = _load_file(local_files[0]) fshape = d0.datasets['vis'].shape # Broadcast the shape to all other ranks fshape = mpiutil.world.bcast(fshape, root=0) # Unpack to get the individual lengths nfreq, nprod, ntime = fshape # This will be the local shape, file ordered. lshape = (lf, ntime, nprod, nfreq) local_array = np.zeros(lshape, dtype=np.complex128) # Timestamps timestamps = [] for li, lfile in enumerate(local_files): print "Rank %i reading %s" % (mpiutil.rank, lfile) # Load file df = _load_file(lfile) # Copy data into local dataset dset = df.datasets['vis'] if dset.shape != fshape: raise Exception("Data from %s is not the right shape" % lfile) local_array[li] = dset.T # Get timestamps timestamps.append((li + sf, df.timestamp)) ## Merge timestamps tslist = mpiutil.world.allgather(timestamps) tsflat = [ts for proclist in tslist for ts in proclist] # Flatten list # Add timestamps into array timestamp_array = np.zeros((ngfiles, ntime), dtype=np.float64) for ind, tstamps in tsflat: timestamp_array[ind] = tstamps timestamp_array = timestamp_array.reshape(ngfiles * ntime) if mpiutil.rank0: print "Starting transpose...", data_by_freq = mpiutil.transpose_blocks(local_array, (ngfiles, ntime, nprod, nfreq)) if mpiutil.rank0: print " done." # Get local frequencies lfreq, sfreq, efreq = mpiutil.split_local(nfreq) data_by_freq = data_by_freq.reshape((ngfiles * ntime, nprod, lfreq)).T # Set dataset mpi_dset.data = data_by_freq mpi_dset.mask = np.ones_like(mpi_dset.data, dtype=np.int8) # Set time properties mpi_dset.global_timestamp = timestamp_array mpi_dset.time_start = 0 mpi_dset.time_end = mpi_dset.timestamp.shape[0] # Set frequency properties mpi_dset.freq_start = sfreq mpi_dset.freq_end = efreq mpi_dset.global_nfreq = nfreq return mpi_dset