def main(args): # this first part of the code is run by all processes # set up MPI environment comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() if size < 2: raise RuntimeError( f"This program requires at least two MPI processes to function. Please rerun with more resources" ) # designate the last process as the supervisor/file reader supervisor = size - 1 # open the TBN file for reading tbnf = LWASVDataFile(args.tbn_filename, ignore_timetag_errors=True) # figure out the details of the run we want to do tx_coords = known_transmitters.parse_args(args) antennas = station.antennas valid_ants, n_baselines = select_antennas(antennas, args.use_pol) n_ants = len(valid_ants) sample_rate = tbnf.get_info('sample_rate') # some of our TBNs claim to have frame size 1024 but they are lying frame_size = 512 tbn_center_freq = tbnf.get_info('freq1') total_integrations, _ = compute_integration_numbers( tbnf, args.integration_length) # open the output HDF5 file and create datasets # because of the way parallelism in h5py works all processes (even ones # that don't write to the file) must do this h5f = build_output_file(args.hdf5_file, tbnf, valid_ants, n_baselines, args.integration_length, tx_freq=args.tx_freq, fft_len=args.fft_len, use_pfb=args.use_pfb, use_pol=args.use_pol, opt_method=opt_method, vis_model='gaussian', transmitter_coords=tx_coords, mpi_comm=comm) if rank == supervisor: # the supervisor process runs this code print("supervisor: started") # state info reached_end = False workers_alive = [True for _ in range(size - 1)] int_no = 0 while True: if not reached_end: # grab data for the next available worker try: duration, start_time, data = tbnf.read( args.integration_length) # only use data from valid antennas data = data[[a.digitizer - 1 for a in valid_ants], :] except EOFError: reached_end = True print(f"supervisor: reached EOF") if int_no >= total_integrations: print(f"supervisor: this is the last integration") reached_end = True # get the next "ready" message from the workers st = MPI.Status() msg = comm.recv(status=st) if msg == "ready": print( f"supervisor: received 'ready' message from worker {st.source}" ) # if we're done, send an exit message and mark that we've killed this worker # an empty array indicates that the worker should exit if reached_end: print( f"supervisor: sending exit message to worker {st.source}" ) comm.Send(np.array([]), dest=st.source, tag=int_no) workers_alive[st.source] = False if not any(workers_alive): print(f"supervisor: all workers told to exit, goodbye") break # otherwise, send the data to the worker for processing else: print( f"supervisor: sending data for integration {int_no}/{total_integrations} to worker {st.source}" ) # Send with a capital S is optimized to send numpy arrays comm.Send(data, dest=st.source, tag=int_no) int_no += 1 else: raise ValueError( f"Supervisor received unrecognized message '{msg}' from worker {st.source}" ) tbnf.close() else: # the worker processes run this code print(f"worker {rank} started") # workers don't need access to the TBN file tbnf.close() # figure out the size of the incoming data buffer samples_per_integration = int( round(args.integration_length * sample_rate / frame_size)) * frame_size buffer_shape = (n_ants, samples_per_integration) while True: # send with a lowercase s can send any pickle-able python object # this is a synchronous send - it will block until the message is read by the supervisor # the other sends (e.g. comm.Send) only block until the message is safely taken by MPI, which might happen before the receiver actually reads it comm.ssend("ready", dest=supervisor) # build a buffer to be filled with data data = np.empty(buffer_shape, np.complex64) # receive the data from the supervisor st = MPI.Status() comm.Recv(data, source=supervisor, status=st) int_no = st.tag # if the buffer is empty, we're done if st.count == 0: print(f"worker {rank}: received exit message, exiting") break # otherwise process the data we've recieved print( f"worker {rank}: received data for integration {int_no}, starting processing" ) # run the correlator bl, freqs, vis = fxc.FXMaster( data, valid_ants, LFFT=args.fft_len, pfb=args.use_pfb, sample_rate=sample_rate, central_freq=tbn_center_freq, Pol='xx' if args.use_pol == 0 else 'yy', return_baselines=True, gain_correct=True) # extract the frequency bin we want target_bin = np.argmin([abs(args.tx_freq - f) for f in freqs]) vis_tbin = vis[:, target_bin] # baselines in wavelengths uvw = uvw_from_antenna_pairs(bl, wavelength=3e8 / args.tx_freq) # model fitting l_out, m_out, opt_result = fit_model_to_vis(uvw, vis_tbin, residual_function, l_init, m_init, verbose=False) # convert direction cosines to sky coords src_elev, src_az = lm_to_ea(l_out, m_out) # write data to h5 file h5f['l_start'][int_no] = l_init h5f['m_start'][int_no] = m_init h5f['l_est'][int_no] = l_out h5f['m_est'][int_no] = m_out h5f['elevation'][int_no] = src_elev h5f['azimuth'][int_no] = src_az h5f['cost'][int_no] = opt_result['cost'] h5f['nfev'][int_no] = opt_result['nfev'] # compute the bin power and save it to the file # arbitrarily picking the tenth antenna in this list power_calc_data = data[10, :] h5f['snr_est'][int_no] = estimate_snr(power_calc_data, args.fft_len, args.tx_freq, sample_rate, tbn_center_freq) print(f"worker {rank}: done processing integration {int_no}") # back to common code for both supervisor and workers h5f.attrs['total_integrations'] = int_no h5f.close()
def main(args): # this first part of the code is run by all processes # set up MPI environment comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() if size < 2: raise RuntimeError( f"This program requires at least two MPI processes to function. Please rerun with more resources" ) # designate the last process as the supervisor/file reader supervisor = size - 1 # open the TBN file for reading tbnf = LWASVDataFile(args.tbn_filename, ignore_timetag_errors=True) # figure out the details of the run we want to do tx_coords = known_transmitters.parse_args(args) antennas = station.antennas valid_ants, n_baselines = select_antennas(antennas, args.use_pol) n_ants = len(valid_ants) total_integrations, _ = compute_integration_numbers( tbnf, args.integration_length) sample_rate = tbnf.get_info('sample_rate') # some of our TBNs claim to have frame size 1024 but they are lying frame_size = 512 tbn_center_freq = tbnf.get_info('freq1') # open the output HDF5 file and create datasets # because of the way parallelism in h5py works all processes (even ones # that don't write to the file) must do this h5f = build_output_file(args.hdf5_file, tbnf, valid_ants, n_baselines, args.integration_length, tx_freq=args.tx_freq, fft_len=args.fft_len, use_pfb=args.use_pfb, use_pol=args.use_pol, transmitter_coords=tx_coords, mpi_comm=comm) if args.point_finding_alg == 'all' or args.point_finding_alg == 'peak': h5f.create_dataset_like('l_peak', h5f['l_est']) h5f.create_dataset_like('m_peak', h5f['m_est']) h5f.create_dataset_like('elevation_peak', h5f['elevation']) h5f.create_dataset_like('azimuth_peak', h5f['azimuth']) if args.point_finding_alg == 'all' or args.point_finding_alg == 'CoM': h5f.create_dataset_like('l_CoM', h5f['l_est']) h5f.create_dataset_like('m_CoM', h5f['m_est']) h5f.create_dataset_like('elevation_CoM', h5f['elevation']) h5f.create_dataset_like('azimuth_CoM', h5f['azimuth']) else: raise NotImplementedError( f"Unrecognized point finding algorithm: {args.point_finding_alg}") del h5f['l_est'] del h5f['m_est'] del h5f['elevation'] del h5f['azimuth'] if rank == supervisor: # the supervisor process runs this code print("supervisor: started") # state info reached_end = False workers_alive = [True for _ in range(size - 1)] int_no = 0 while True: if not reached_end: # grab data for the next available worker try: duration, start_time, data = tbnf.read( args.integration_length) # only use data from valid antennas data = data[[a.digitizer - 1 for a in valid_ants], :] except EOFError: reached_end = True print(f"supervisor: reached EOF") if int_no >= total_integrations: print(f"supervisor: this is the last integration") reached_end = True # get the next "ready" message from the workers st = MPI.Status() msg = comm.recv(status=st) if msg == "ready": print( f"supervisor: received 'ready' message from worker {st.source}" ) # if we're done, send an exit message and mark that we've killed this worker # an empty array indicates that the worker should exit if reached_end: print( f"supervisor: sending exit message to worker {st.source}" ) comm.Send(np.array([]), dest=st.source, tag=int_no) workers_alive[st.source] = False if not any(workers_alive): print(f"supervisor: all workers told to exit, goodbye") break # otherwise, send the data to the worker for processing else: print( f"supervisor: sending data for integration {int_no}/{total_integrations} to worker {st.source}" ) # Send with a capital S is optimized to send numpy arrays comm.Send(data, dest=st.source, tag=int_no) int_no += 1 else: raise ValueError( f"Supervisor received unrecognized message '{msg}' from worker {st.source}" ) tbnf.close() else: # the worker processes run this code print(f"worker {rank} started") # workers don't need access to the TBN file tbnf.close() # figure out the size of the incoming data buffer samples_per_integration = int( round(args.integration_length * sample_rate / frame_size)) * frame_size buffer_shape = (n_ants, samples_per_integration) while True: # send with a lowercase s can send any pickle-able python object # this is a synchronous send - it will block until the message is read by the supervisor # the other sends (e.g. comm.Send) only block until the message is safely taken by MPI, which might happen before the receiver actually reads it comm.ssend("ready", dest=supervisor) # build a buffer to be filled with data data = np.empty(buffer_shape, np.complex64) # receive the data from the supervisor st = MPI.Status() comm.Recv(data, source=supervisor, status=st) int_no = st.tag # if the buffer is empty, we're done if st.count == 0: print(f"worker {rank}: received exit message, exiting") break # otherwise process the data we've recieved print( f"worker {rank}: received data for integration {int_no}, starting processing" ) # run the correlator bl, freqs, vis = fxc.FXMaster( data, valid_ants, LFFT=args.fft_len, pfb=args.use_pfb, sample_rate=sample_rate, central_freq=tbn_center_freq, Pol='xx' if args.use_pol == 0 else 'yy', return_baselines=True, gain_correct=True) gridded_image = grid_visibilities(bl, freqs, vis, args.tx_freq, station) save_all_sky = (args.all_sky and int_no in args.all_sky) or ( args.all_sky_every and int_no % args.all_sky_every == 0) if args.point_finding_alg == 'all' or 'peak': result = get_gimg_max(gridded_image, return_img=save_all_sky) l = result[0] m = result[1] src_elev, src_az = lm_to_ea(l, m) h5f['l_peak'][int_no] = l h5f['m_peak'][int_no] = m h5f['elevation_peak'][int_no] = src_elev h5f['azimuth_peak'][int_no] = src_az if args.point_finding_alg == 'all' or args.point_finding_alg == 'CoM': result = get_gimg_center_of_mass(gridded_image, return_img=save_all_sky) l = result[0] m = result[1] src_elev, src_az = lm_to_ea(l, m) h5f['l_CoM'][int_no] = l h5f['m_CoM'][int_no] = m h5f['elevation_CoM'][int_no] = src_elev h5f['azimuth_CoM'][int_no] = src_az if save_all_sky: img = result[2] extent = result[3] fig, ax = plt.subplots() ax.imshow(img, extent=extent, origin='lower', interpolation='nearest') plt.savefig('allsky_int_{}.png'.format(int_no)) # compute the bin power and save it to the file # arbitrarily picking the tenth antenna in this list power_calc_data = data[10, :] h5f['snr_est'][int_no] = estimate_snr(power_calc_data, args.fft_len, args.tx_freq, sample_rate, tbn_center_freq) print(f"worker {rank}: done processing integration {int_no}") # back to common code for both supervisor and workers h5f.attrs['total_integrations'] = int_no h5f.close()