def tar_job_wrapper(hsm_work_dir, file_list, remove=True): print(file_list) for fn, fits_dir in enumerate(file_list): if fits_dir.endswith("\n"): fits_dir = fits_dir[:-1] dir_name = fits_dir.split("/")[-1] #only lets 5 jobs at a time to not flood the transfer (could probably handle more if os.path.exists(fits_dir): print(dir_name) your_slurm_queue_check(max_queue=5, grep='tar', queue='workq') commands = [] commands.append('cd {}'.format(fits_dir)) commands.append("srun -n 1 -c 1 tar zcvvf - {0} | ssh hpc-hsm.pawsey.org.au 'cat - > {1}/temp_{2}.tar.gz'".format(fits_dir, hsm_work_dir, fn)) commands.append('errorcode=$?') commands.append('if [ $errorcode == "0" ]; then') commands.append(' echo mv temp_{0}.tar.gz {1}.tar.gz'.format(fn, dir_name)) commands.append(' ssh hpc-hsm.pawsey.org.au "mv {0}/temp_{1}.tar.gz {0}/{2}.tar.gz"'.format(hsm_work_dir, fn, dir_name)) if remove: commands.append(' cd ..') commands.append(' rm -r {}'.format(dir_name)) commands.append('fi') #TODO and a move command submit_slurm('tar_{0}_{1}'.format(dir_name,fn), commands, batch_dir="./", slurm_kwargs={"time": "5:00:00"}, queue='copyq', submit=True, export='ALL') else: print('{} does not exist'.format(dir_name))
def write_picket_fence_scripts(self): """Function to write RTS submission scripts in the case of a picket-fence observation. A significant amount of extra information needs to be determined and placed in the RTS configuration files. There will be: 1 RTS configuration file per set of adjacent single channels (subband) The only exception to that rule is where the 129 boundary is crossed, in which case that subband will be split in two. Returns ------- jobids : list of ints A list of all the job IDs submitted to the system compute queue. """ logger.info("Sorting picket-fence channels and determining subband info...") self.sort_obs_channels() hichan_groups, lochan_groups = self.construct_subbands() # write out the RTS config files and keep track of the number of nodes required for each count = 0 lodict, count = self.get_subband_config(lochan_groups, self.rts_out_dir, "low", count) hidict, count = self.get_subband_config(hichan_groups, self.rts_out_dir, "high", count) chan_file_dict = lodict.copy() chan_file_dict.update(hidict) # Now submit the RTS jobs logger.info("Writing individual subband RTS configuration scripts") hostname = socket.gethostname() if hostname.startswith("galaxy"): mem = 1024 else: mem = 10240 jobids = [] for k, v in chan_file_dict.items(): nnodes = v + 1 chans = k.split('_')[-1].split(".")[0] rts_batch = "RTS_{0}_{1}".format(self.cal_obsid, chans) slurm_kwargs = {"chdir": "{0}".format(self.rts_out_dir), "time": "2:00:00", "nodes": "{0}".format(nnodes), "cpus-per-gpu": "1"} module_list= ["RTS/master"] commands = list(self.script_body) # make a copy of body to then extend commands.append("export UCX_MEMTYPE_CACHE=n") commands.append("srun --export=all -N {0} -n {0} rts_gpu {1}".format(nnodes, k)) jobid = submit_slurm(rts_batch, commands, slurm_kwargs=slurm_kwargs, module_list=module_list, batch_dir=self.batch_dir, submit=self.submit, queue='gpuq', export="NONE", mem=mem, load_vcstools=False) jobids.append(jobid) return jobids
def download_cal(obs_id, cal_obs_id, data_dir, product_dir, vcstools_version="master", nice=0): #Load computer dependant config file comp_config = load_config_file() batch_dir = os.path.join(product_dir, 'batch') product_dir = os.path.join(product_dir, 'cal', str(cal_obs_id)) vis_dir = os.path.join(data_dir, 'vis') mdir(vis_dir, 'Calibrator vis', gid=comp_config['gid']) mdir(product_dir, 'Calibrator product', gid=comp_config['gid']) mdir(batch_dir, 'Batch', gid=comp_config['gid']) # Downloads the visablities to /astro/mwavcs/vcs/[cal_obs_id]/vis # but creates a link to it here /astro/mwavcs/vcs/[obs_id]/cal/[cal_obs_id] csvfile = os.path.join(batch_dir, "{0}_dl.csv".format(cal_obs_id)) create_link(data_dir, 'vis', product_dir, 'vis') obsdownload_batch = "caldownload_{0}".format(cal_obs_id) secs_to_run = "03:00:00" # sometimes the staging can take a while... module_list = ["manta-ray-client/python3"] commands = [] commands.append("csvfile={0}".format(csvfile)) commands.append('cd {0}'.format(vis_dir)) commands.append('if [[ -z ${MWA_ASVO_API_KEY} ]]') commands.append('then') commands.append(' echo "Error, MWA_ASVO_API_KEY not set"') commands.append(' echo "Cannot use client"') commands.append(' echo "Please read the MWA ASVO documentation ' 'about setting this (https://wiki.mwatelescope.org/' 'display/MP/MWA+ASVO%3A+Release+Notes)"') commands.append(' exit 1') commands.append('fi') commands.append('echo "obs_id={0}, job_type=d, download_type=vis" > {1}'.\ format(cal_obs_id,csvfile)) commands.append('mwa_client --csv={0} --dir={1}'.format(csvfile,vis_dir)) #commands.append("ln -sfn {0} {1}/{2}".format(data_dir, product_dir, 'vis')) commands.append('unzip *.zip') submit_slurm(obsdownload_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time": secs_to_run, "nice": nice}, vcstools_version=vcstools_version, queue="copyq", export="NONE", mem=4096, # Manually handing it the module dir as it should only run module_dir='/group/mwa/software/modulefiles')
def ppp_archive_creation(cfg, depends_on=None, depend_type="afterany"): """Makes commands for converting ot archive file then converting back to fits""" fits_dir = join(cfg["files"]["psr_dir"], cfg["source"]["my_pointing"]) bins = cfg["source"]["my_bins"] if not cfg["run_ops"]["vdif"] else 1024 dm = cfg["source"]["my_DM"] period = cfg["source"]["my_P"] total = cfg["source"]["total"] seek = cfg["source"]["seek"] # Change to working directory commands = [f"cd {cfg['files']['psr_dir']}"] # Check for vdif vdif_hdrs = join(cfg["source"]["my_pointing"], "*.hdr") cfg["run_ops"]["vdif"] = bool(glob(vdif_hdrs)) # Add folds to commands psrchive_container = comp_config['prschive_container'] archive_base = cfg["files"]["archive"].split(".ar")[ 0] # Archive without .ar extension commands.append( fits_to_archive(fits_dir, archive_base, bins, dm, period, cfg["files"]["psr_dir"], total=total, seek=seek, vdif=cfg["run_ops"]["vdif"], container=psrchive_container)) # Add ar -> fits conversion to commands commands.append( archive_to_fits(cfg["files"]["archive"], container=psrchive_container)) #Submit_job name = f"to_archive_{cfg['source']['name']}_{cfg['obs']['id']}" slurm_kwargs = {"time": "08:00:00"} # dspsr folding can take some time modules = ["singularity"] mem = 32768 jid = submit_slurm(name, commands, slurm_kwargs=slurm_kwargs, module_list=modules, mem=mem, batch_dir=cfg["files"]["batch_dir"], depend=depends_on, depend_type=depend_type, vcstools_version=cfg["run_ops"]["vcstools"], submit=True) logger.info(f"Submitted archive/fits creation job: {name}") logger.info(f"job ID: {jid}") if depends_on: logger.info(f"Job depends on job id(s): {depends_on}") return jid, name
def write_cont_scripts(self): """Function to write RTS submission script in the case of a "standard" contiguous bandwidth observation. This is relatively simple and just requires us to use the standard format RTS configuration file. Returns ------- jobids : list of ints A list of all the job IDs submitted to the system compute queue. """ logger.info("Writing RTS configuration script for contiguous bandwidth observation") fname = "{0}/rts_{1}.in".format(self.rts_out_dir, self.cal_obsid) # output file name to write with open(fname, 'w') as f: f.write(self.base_str) jobids = [] nnodes = 25 # number of required GPU nodes - 1 per coarse channels + 1 master node rts_batch = "RTS_{0}".format(self.cal_obsid) slurm_kwargs = {"chdir": "{0}".format(self.rts_out_dir), "time": "2:00:00", "nodes": "{0}".format(nnodes), "cpus-per-gpu": "1"} module_list = ["RTS/master"] commands = list(self.script_body) # make a copy of body to then extend commands.append("export UCX_MEMTYPE_CACHE=n") commands.append("srun --export=all -N {0} -n {0} rts_gpu {1}".format(nnodes, fname)) hostname = socket.gethostname() if hostname.startswith("galaxy"): mem = 1024 else: mem = 10240 jobid = submit_slurm(rts_batch, commands, slurm_kwargs=slurm_kwargs, module_list=module_list, batch_dir=self.batch_dir, submit=self.submit, queue='gpuq', export="NONE", mem=mem, load_vcstools=self.offline, #load if offline vcstools_version = self.vcstools_version) jobids.append(jobid) return jobids
def ppp_baseline_removal(cfg, depends_on=None, depend_type="afterany"): """Submits a job that removes baseline RFI""" # Change to working directory commands = [f"cd {cfg['files']['psr_dir']}"] # Fit the profile with gaussian try: archive_fit(cfg, cfg["files"]["archive"], cliptype="verbose") except (ProfileLengthError, NoFitError) as e: ex_type, _, _ = sys.exc_info( ) # Raise different messages for different errors if ex_type == NoFitError: raise NoFitError( "A Gaussian fit to this profile could not be made. This profile is likely too noisy" ) elif ex_type == ProfileLengthError: raise ProfileLengthError( "No VDIF files available and profile is not long enough to fit Gaussian" ) # Add the baseline removal commands commands.append(remove_baseline(cfg)) #Submit_job name = f"debase_{cfg['source']['name']}_{cfg['obs']['id']}" slurm_kwargs = {"time": "01:00:00"} modules = ["singularity", "psrsalsa"] mem = 32768 jid = submit_slurm(name, commands, slurm_kwargs=slurm_kwargs, module_list=modules, mem=mem, batch_dir=cfg["files"]["batch_dir"], depend=depends_on, depend_type=depend_type, vcstools_version=cfg["run_ops"]["vcstools"], submit=True) logger.info(f"Submitted archive/fits creation job: {name}") logger.info(f"job ID: {jid}") if depends_on: logger.info(f"Job depends on job id(s): {depends_on}") cfg["completed"]["debase"] = True return jid, name
def submit_prepfold(cfg, nbins, pointing, psr_dir, depends_on=None, depend_type="afterany"): """Creates the commands for a prepfold job and submits it to the queue""" # Make the commands for the job prep_kwargs = common_kwargs(cfg, int(nbins), pointing) cmds = [f"cd {psr_dir}"] cmds += add_prepfold_to_commands(prep_kwargs, cfg["source"]["name"], pointing, eph=cfg["source"]["edited_eph"], eph_name=cfg["source"]["edited_eph_name"], presto_container="/pawsey/mwa/singularity/presto/presto.sif", binary=cfg["source"]["binary"]) # TODO: get rid of the container hard-code ^^ # Work out some things for job submission name = generate_prep_name(cfg, nbins, pointing) time = prepfold_time_alloc(cfg, prep_kwargs) slurm_kwargs = {"time":time} modules = ["singularity"] mem=8192 # Submit Job jid = submit_slurm(name, cmds, slurm_kwargs=slurm_kwargs, module_list=modules, mem=mem, batch_dir=cfg["files"]["batch_dir"], depend=depends_on, depend_type=depend_type, vcstools_version=cfg["run_ops"]["vcstools"], submit=True) return jid, name
def submit_classify(cfg): """launched a classify job""" # Make the commands for the job cmds = add_classify_to_commands(cfg) # Work out some things for the job name = f"{cfg['files']['file_precursor']}_classify" slurm_kwargs = {"time": "00:30:00"} modules = ["singularity"] mem = 8192 # Submit Job jid = submit_slurm(name, cmds, slurm_kwargs=slurm_kwargs, module_list=modules, mem=mem, batch_dir=cfg["files"]["batch_dir"], load_vcstools=False, submit=True) logger.info(f"Submitted classiy job: {name}") logger.info(f"Job ID: {jid}") return jid, name
def relaunch_ppp(cfg, depends_on=None, depend_type="afterany", fresh_run=False, reset_logs=False, time="00:30:00"): """Relaunches the pulsar processing pipeline using the supplied cfg file""" # dump the new cfg dump_to_yaml(cfg) label = launch_label(cfg) name = f"ppp_{label}_{cfg['files']['file_precursor']}" slurm_kwargs = {"time": time} mem = 8192 ppp_launch = "pulsar_processing_pipeline.py" ppp_launch += f" --cfg {cfg['files']['my_name']}" if fresh_run: ppp_launch += " --force_rerun" if reset_logs: ppp_launch += " --reset_logs" cmds = [f"cd {cfg['files']['psr_dir']}"] cmds.append(ppp_launch) modules = [f"mwa_search/{cfg['run_ops']['mwa_search']}", "singularity"] jid = submit_slurm(name, cmds, slurm_kwargs=slurm_kwargs, module_list=modules, mem=mem, batch_dir=cfg["files"]["batch_dir"], depend=depends_on, depend_type=depend_type, vcstools_version=cfg["run_ops"]["vcstools"], submit=True) logger.info(f"Submitted relaunch of ppp: {name}") logger.info(f"job ID: {jid}") if depends_on: logger.info(f"Job depends on job id(s): {depends_on}")
def submit_prepfold_products_db(cfg, dep_id=None, dep_type="afterany"): """Submits the best fold profile to the pulsar database. Will also submit .ppps""" my_pointing = cfg["source"]["my_pointing"] # We will upload the init fold and the best post fold bin_list = list(cfg["folds"][my_pointing]["init"].keys()) bin_list.append(cfg["source"]["my_bins"]) jids = [] for bin_count in bin_list: commands = [] commands.append(f"cd {cfg['files']['psr_dir']}") # Get the files to upload try: ppps = glob_pfds(cfg, my_pointing, bin_count, pfd_type=".ps")[0] except IndexError as e: raise IndexError(f"No ppps files found in dir: {cfg['files']['psr_dir']} for pointing {my_pointing} and bin count {bin_count}") try: bestprof = glob_pfds(cfg, my_pointing, bin_count, pfd_type=".bestprof")[0] except IndexError as e: raise IndexError(f"No bestprof files found in dir: {cfg['files']['psr_dir']} for pointing {my_pointing} and bin count {bin_count}") commands.append(f"echo 'Submitting profile to database with {bin_count} bins'") commands.append(f"submit_to_database.py -o {cfg['obs']['id']} --cal_id {cfg['obs']['cal']} -p {cfg['source']['name']} --bestprof {bestprof} --ppps {ppps}") # Submit this job name = f"Submit_db_{cfg['files']['file_precursor']}_{bin_count}" batch_dir = join(comp_config['base_data_dir'], cfg['obs']['id'], "batch") this_id = submit_slurm(name, commands, batch_dir=batch_dir, slurm_kwargs={"time": "00:30:00"}, depend=dep_id, module_list=[f"mwa_search/{cfg['run_ops']['mwa_search']}"], vcstools_version=cfg["run_ops"]["vcstools"], submit=True, depend_type=dep_type) jids.append(this_id) logger.info(f"Submission script on queue for profile: {bestprof}") logger.info(f"Job Name: {name}") logger.info(f"Job ID: {this_id}") cfg["completed"]["upload"] = True return jids
def launch_pabeam_sim(obsid, pointing, begin, duration, source_name="noname", metafits_file=None, flagged_tiles=None, delays=None, efficiency=1, vcstools_version='master', args=None, common_metadata=None, output_dir=None): """Submit a job to run the pabeam code to estimate the system equivelent flux density and a dependent job to resume the submit_to_databse.py code if `args` is given. Parameters ---------- obsid : `int` The MWA observation ID. pointing : `str` The pointing of the simulation in the format HH:MM:SS.SS_DD:MM:SS.SS. begin : `int` The begining of the simulation in GPS time. duration : `int` The duration of the simulation in seconds (used to calculate the end of the simulation). source_name : `str`, optional The name of the source to be used to label output files. |br| Default: "noname". metafits_file : `str`, optional The location of the metafits file. If none given will assume the default location. flagged_tiles : `list`, optional A list of the flagged tiles. If none given will assume no tiles were flagged. efficiency : `float`, optional Frequency and pointing dependent array efficiency. |br| Default: 1. vcstools_version : `str`, optional VCSTools version to load in the job. args : `dict`, optional The argument parse dictionary from submit_to_database.py. If supplied will launch a dependedn job with submit_to_databse.py to complete the script. common_metadata : `list`, optional The list of common metadata generated from :py:meth:`vcstools.metadb_utils.get_common_obs_metadata`. output_dir : `str` The output directory of the simulation results. By default will put it in the VCS directory under <obsid>/sefd_simulations. Examples -------- A simple example: >>>launch_pabeam_sim(1206977296, "12:49:12_+27:12:00", 1206977300, 600, source_name="SEFD_test", output_dir=".") """ # Load computer dependant config file comp_config = load_config_file() # Ensure metafits file is there data_dir = "{}{}".format(comp_config['base_data_dir'], obsid) ensure_metafits(data_dir, obsid, "{0}_metafits_ppds.fits".format(obsid)) # Perform metadata calls if common_metadata is None: common_metadata = get_common_obs_metadata(obsid) # Get frequencies centre_freq = common_metadata[5] * 10e5 low_freq = common_metadata[6][0] * 1.28 * 10e5 high_freq = common_metadata[6][-1] * 1.28 * 10e5 sim_freqs = [str(low_freq), str(centre_freq), str(high_freq)] # Calculate required pixel res and cores/mem array_phase = get_obs_array_phase(obsid) fwhm = calc_ta_fwhm(high_freq / 10e5, array_phase=array_phase) #degrees phi_res = theta_res = fwhm / 3 if phi_res < 0.015: # Going any smaller causes memory errors phi_res = theta_res = 0.015 npixels = 360. // phi_res + 90. // theta_res cores_required = npixels * len(sim_freqs) // 600 nodes_required = cores_required // 24 + 1 # Make directories batch_dir = "{}/batch".format(data_dir) if output_dir is None: sefd_dir = "{}/sefd_simulations".format(data_dir) else: sefd_dir = output_dir if not os.path.exists(batch_dir): mdir(batch_dir, "Batch", gid=comp_config['gid']) if not os.path.exists(sefd_dir): mdir(sefd_dir, "SEFD", gid=comp_config['gid']) # Parse defaults if metafits_file is None: metafits_file = "{0}{1}/{1}_metafits_ppds.fits".format( comp_config['base_data_dir'], obsid) # Get delays if none given if delays is None: delays = get_common_obs_metadata(obsid)[4][0] print(delays) print(' '.join(np.array(delays, dtype=str))) # Set up pabeam command command = 'srun --export=all -u -n {} pabeam.py'.format( int(nodes_required * 24)) command += ' -o {}'.format(obsid) command += ' -b {}'.format(begin) command += ' -d {}'.format(int(duration)) command += ' -s {}'.format( int(duration // 4 - 1)) # force 4 time steps to get reasonable std command += ' -e {}'.format(efficiency) command += ' --metafits {}'.format(metafits_file) command += ' -p {}'.format(pointing) command += ' --grid_res {:.3f} {:.3f}'.format(theta_res, phi_res) command += ' --delays {}'.format(' '.join(np.array(delays, dtype=str))) command += ' --out_dir {}'.format(sefd_dir) command += ' --out_name {}'.format(source_name) command += ' --freq {}'.format(" ".join(sim_freqs)) if flagged_tiles is not None: logger.debug("flagged_tiles: {}".format(flagged_tiles)) command += ' --flagged_tiles {}'.format(' '.join(flagged_tiles)) # Set up and launch job batch_file_name = 'pabeam_{}_{}_{}'.format(obsid, source_name, pointing) job_id = submit_slurm(batch_file_name, [command], batch_dir=batch_dir, slurm_kwargs={ "time": datetime.timedelta(seconds=10 * 60 * 60), "nodes": int(nodes_required) }, module_list=['hyperbeam-python'], queue='cpuq', cpu_threads=24, mem=12288, vcstools_version=vcstools_version) if args: # Set up dependant submit_to_database.py job submit_args = vars(args) # Add sefd_file argument submit_args['sefd_file'] = "{}/{}*stats".format(sefd_dir, source_name) command_str = "submit_to_database.py" for key, val in submit_args.items(): if val: if val == True: command_str += " --{}".format(key) else: command_str += " --{} {}".format(key, val) batch_file_name = 'submit_to_database_{}_{}_{}'.format( obsid, source_name, pointing) job_id_dependant = submit_slurm( batch_file_name, [command_str], batch_dir=batch_dir, slurm_kwargs={"time": datetime.timedelta(seconds=1 * 60 * 60)}, queue='cpuq', vcstools_version=vcstools_version, depend=[job_id]) return job_id, job_id_dependant
def vcs_download(obsid, start_time, stop_time, increment, data_dir, product_dir, parallel, ics=False, n_untar=2, keep="", vcstools_version="master", nice=0): #Load computer dependant config file comp_config = load_config_file() logger.info("Downloading files from archive") voltdownload = "voltdownload.py" obsinfo = meta.getmeta(service='obs', params={'obs_id':str(obsid)}) comb_del_check = meta.combined_deleted_check(obsid, begin=start_time, end=stop_time) data_format = obsinfo['dataquality'] if data_format == 1 or (comb_del_check and data_format == 6): # either only the raw data is available (data_format == 1) # or there was combined files but they were deleted (comb_del_check and data_format == 6) target_dir = link = '/raw' if ics: logger.error("Data have not been recombined in the " "archive yet. Exiting") sys.exit(0) data_type = 11 dl_dir = "{0}/{1}".format(data_dir, target_dir) dir_description = "Raw" elif data_format == 6: target_dir = link = '/combined' if ics: data_type = 15 else: data_type = 16 dl_dir = "{0}/{1}".format(data_dir, target_dir) dir_description = "Combined" else: logger.error("Unable to determine data format from archive. Exiting") sys.exit(0) mdir(dl_dir, dir_description, gid=comp_config['gid']) create_link(data_dir, target_dir, product_dir, link) batch_dir = product_dir+"/batch/" for time_to_get in range(start_time,stop_time,increment): if time_to_get + increment > stop_time: increment = stop_time - time_to_get + 1 #need to subtract 1 from increment since voltdownload wants how many #seconds PAST the first one voltdownload_batch = "volt_{0}".format(time_to_get) check_batch = "check_volt_{0}".format(time_to_get) volt_secs_to_run = datetime.timedelta(seconds=500*increment) check_secs_to_run = "15:00" if data_type == 16: check_secs_to_run = "10:15:00" checks = "checks.py" # Write out the checks batch file but don't submit it commands = [] commands.append("newcount=0") commands.append("let oldcount=$newcount-1") commands.append("sed -i -e \"s/oldcount=${{oldcount}}/oldcount=${{newcount}}/\" {0}".\ format(batch_dir+voltdownload_batch+".batch")) commands.append("oldcount=$newcount; let newcount=$newcount+1") commands.append("sed -i -e \"s/_${{oldcount}}.out/_${{newcount}}.out/\" {0}".\ format(batch_dir+voltdownload_batch+".batch")) checks_command = "-m download -o {0} -w {1} -b {2} -i {3} --data_type {4}".format(obsid, dl_dir, time_to_get, increment, data_type) commands.append('{0} {1}'.format(checks, checks_command)) commands.append("if [ $? -eq 1 ];then") commands.append("sbatch {0}".format(batch_dir+voltdownload_batch+".batch")) # if we have tarballs we send the untar jobs to the workq if data_type == 16: commands.append("else") untar = 'untar.sh' untar_command = "-w {0} -o {1} -b {2} -e {3} -j {4} {5}".format(dl_dir, obsid, time_to_get, time_to_get+increment-1, n_untar, keep) commands.append('{0} {1}'.format(untar, untar_command)) #commands.append("sbatch {0}.batch".format(batch_dir+tar_batch)) commands.append("fi") # Download and checks should be done on Zeus's cpuq. This will only work # on Galaxy as the Ozstar workflow is different submit_slurm(check_batch, commands, batch_dir=batch_dir, slurm_kwargs={"time": check_secs_to_run, "nice": nice}, vcstools_version=vcstools_version, submit=False, outfile=batch_dir+check_batch+"_0.out", queue="zcpuq", export="NONE", mem=10240, # Manually handing it the module dir as it should only run module_dir='/group/mwa/software/modulefiles') # Write out the tar batch file if in mode 15 #if format == 16: # body = [] # for t in range(time_to_get, time_to_get+increment): # body.append("aprun tar -xf {0}/1149620392_{1}_combined.tar".format(dl_dir,t)) # submit_slurm(tar_batch,body,batch_dir=working_dir+"/batch/", slurm_kwargs={"time":"1:00:00", "partition":"gpuq" }) #module_list=["mwa-voltage/master"] #removed the master version load because by default we load the python 3 version module_list=[] body = [] body.append("oldcount=0") body.append("let newcount=$oldcount+1") body.append("if [ ${newcount} -gt 10 ]; then") body.append("echo \"Tried ten times, this is silly. Aborting here.\";exit") body.append("fi") body.append("sed -i -e \"s/newcount=${{oldcount}}/newcount=${{newcount}}/\" {0}\n".\ format(batch_dir+check_batch+".batch")) body.append("sed -i -e \"s/_${{oldcount}}.out/_${{newcount}}.out/\" {0}".\ format(batch_dir+check_batch+".batch")) body.append("sbatch -d afterany:${{SLURM_JOB_ID}} {0}".\ format(batch_dir+check_batch+".batch")) voltdownload_command = "--obs={0} --type={1} --from={2} --duration={3} --parallel={4}"\ " --dir={5}".format(obsid, data_type, time_to_get, increment-1, parallel, dl_dir) body.append("{0} {1}".format(voltdownload, voltdownload_command)) submit_slurm(voltdownload_batch, body, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time" : str(volt_secs_to_run), "nice" : nice}, vcstools_version=vcstools_version, outfile=batch_dir+voltdownload_batch+"_1.out", queue="copyq", export="NONE", mem=5120, # Manually handing it the module dir as it should only run module_dir='/group/mwa/software/modulefiles')
def coherent_beam(obs_id, start, stop, data_dir, product_dir, batch_dir, metafits_file, nfine_chan, pointing_list, rts_flag_file=None, bf_formats=None, DI_dir=None, execpath=None, calibration_type='rts', ipfb_filter="LSQ12", vcstools_version="master", nice=0, channels_to_beamform=None, beam_version="FEE2016"): """ This function runs the new version of the beamformer. It is modelled after the old function above and will likely be able to be streamlined after working implementation (SET) Streamlining underway, as well as full replacement of the old function (SET March 28, 2018) """ #Load computer dependant config file comp_config = load_config_file() # If execpath is given, change the make_beam executable command # otherwise, it should be on the PATH if vcstools has been installed if execpath: make_beam_cmd = "{0}/make_beam".format(execpath) make_beam_version_cmd = "{0}/make_beam -V".format(execpath) else: make_beam_cmd = "make_beam" make_beam_version_cmd = "make_beam -V" make_beam_version = subprocess.Popen(make_beam_version_cmd, stdout=subprocess.PIPE, shell=True).communicate()[0] logger.info("Current version of make_beam = {0}".format(make_beam_version.strip())) metafile = "{0}/{1}.meta".format(product_dir, obs_id) channels = None # No channels given so first check for a metafile if os.path.isfile(metafile): logger.info("Found observation metafile: {0}".format(metafile)) with open(metafile, 'r') as m: for line in m.readlines(): if line.startswith("channels"): channels = line.split(",")[1:] channels = np.array(channels, dtype=np.int) else: logger.debug("No metafile in {0}".format(metafile)) logger.debug("Channels before meta.get_channels: {0}".format(channels)) # If channels is still None get_channels will get it from the metadata channels = meta.get_channels(obs_id, channels=channels) # Make a metafile containing the channels so no future metadata calls are required if not os.path.isfile(metafile): with open(metafile, "w") as m: m.write("#Metadata for obs ID {0} required to determine if: normal or " "picket-fence\n".format(obs_id)) m.write("channels,{0}".format(",".join([str(c) for c in channels]))) channels = np.array(channels, dtype=np.int) hichans = [c for c in channels if c>128] lochans = [c for c in channels if c<=128] lochans.extend(list(reversed(hichans))) ordered_channels = lochans if channels_to_beamform is None: # If no channels_to_beamform given fold on everything channels_to_beamform = ordered_channels # Run for each coarse channel. Calculates delays and makes beam if not DI_dir: logger.error("You need to specify the path to the calibrator files, " "either where the DIJs are or where the Offringa " "calibration_solutions.bin file is. Aborting here") sys.exit(0) DI_dir = os.path.abspath(DI_dir) # make_beam_small requires the start time in UTC, get it from the start utctime = gps_to_utc(start) P_dir = os.path.join(product_dir, "pointings") mdir(P_dir, "Pointings", gid=comp_config['gid']) mdir(os.path.join(product_dir, "incoh"), "Incoh", gid=comp_config['gid']) # startjobs = True # Set up supercomputer dependant parameters import socket hostname = socket.gethostname() if hostname.startswith('john') or hostname.startswith('farnarkle'): max_pointing = 120 else: max_pointing = 15 if comp_config['ssd_dir'] is None: temp_mem = None else: #Work out required SSD size obs_length = stop - start + 1. temp_mem = int(0.0012 * obs_length * max_pointing + 1) temp_mem_single = int(0.0024 * obs_length + 2) if "-s" not in bf_format: temp_mem = temp_mem * 4 temp_mem_single = temp_mem_single *4 # set up SLURM requirements if len(pointing_list) > max_pointing: seconds_to_run = 8 * (stop - start + 1) * max_pointing else: seconds_to_run = 8 * (stop - start + 1) * len(pointing_list) if seconds_to_run > 86399.: secs_to_run = datetime.timedelta(seconds=86399) else: secs_to_run = datetime.timedelta(seconds=seconds_to_run) # Get the project id (eg G0057) from the metafits file with pyfits.open(metafits_file) as hdul: project_id = hdul[0].header['project'] # splits the pointing list into lists of length max_pointing pointing_list_list = list(chunks(pointing_list, max_pointing)) time_now = str(datetime.datetime.now()).replace(" ", "_") logging.info("Running make_beam") job_id_list_list = [] for pl, pointing_list in enumerate(pointing_list_list): pointing_str = ",".join(pointing_list) # Run one coarse channel per node job_id_list = [] for gpubox, coarse_chan in enumerate(ordered_channels, 1): if coarse_chan not in channels_to_beamform: continue if calibration_type == 'rts': #chan_list = get_frequencies(metafits_file, resort=True) DI_file = "{0}/DI_JonesMatrices_node{1:0>3}.dat".format(DI_dir, gpubox) jones_option = "-J {0}".format(DI_file) elif calibration_type == 'offringa': #chan_list = get_frequencies(metafits_file, resort=False) DI_file = "{0}/calibration_solution.bin".format(DI_dir) jones_option = "-O {0} -C {1}".format(DI_file, int(gpubox) - 1) else: logger.info("Please an accepted calibratin type. Aborting here.") sys.exit(0) # Making pointing directories for pointing in pointing_list: mdir("{0}/{1}".format(P_dir, pointing), "Pointing {0}".format(pointing), gid=comp_config['gid']) n_omp_threads = 1 if "v" in bf_formats: for pointing in pointing_list: make_beam_small_batch = "mb_{0}_ch{1}".format(pointing, coarse_chan) module_list = [comp_config['container_module']] commands = [] commands.append("cd {0}/{1}".format(P_dir,pointing)) runline = "srun --export=all -n 1" runline += " -c {}".format(n_omp_threads) if comp_config['container_command'] !='': runline += " {} '".format(comp_config['container_command']) runline += " {}".format(make_beam_cmd) runline += " -o {}".format(obs_id) runline += " -b {}".format(start) runline += " -e {}".format(stop) runline += " -a 128" runline += " -n 128" runline += " -f {}".format(coarse_chan) runline += " {}".format(jones_option) runline += " -d {}/combined".format(data_dir) runline += " -P {}".format(pointing) runline += " -r 10000" runline += " -m {}".format(metafits_file) runline += " -z {}".format(utctime) runline += " {}".format(bf_formats) runline += " -F {}".format(rts_flag_file) runline += " -S {}".format(ipfb_filter) if beam_version == "ANALYTIC": runline += " -H" if comp_config['container_command'] !='': runline += "'" commands.append(runline) job_id = submit_slurm(make_beam_small_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time":secs_to_run, "nice":nice}, queue='gpuq', vcstools_version=vcstools_version,#forces olf version with vdif submit=True, export="NONE", gpu_res=1, cpu_threads=n_omp_threads, mem=comp_config['gpu_beamform_mem'], temp_mem=temp_mem_single) job_id_list.append(job_id) else: make_beam_small_batch = "mb_{0}_{1}_ch{2}".format(pl, time_now, coarse_chan) module_list = [comp_config['container_module']] commands = [] if comp_config['ssd_dir'] is None: # Write outputs to SSDs if on Ozstar commands.append("cd {0}".format(P_dir)) else: commands.append("cd {0}".format(comp_config['ssd_dir'])) runline = "srun --export=all -n 1" runline += " -c {}".format(n_omp_threads) if comp_config['container_command'] !='': runline += " {} '".format(comp_config['container_command']) runline += " {}".format(make_beam_cmd) runline += " -o {}".format(obs_id) runline += " -b {}".format(start) runline += " -e {}".format(stop) runline += " -a 128" runline += " -n 128" runline += " -f {}".format(coarse_chan) runline += " {}".format(jones_option) runline += " -d {}/combined".format(data_dir) runline += " -P {}".format(pointing_str) runline += " -r 10000" runline += " -m {}".format(metafits_file) runline += " -z {}".format(utctime) runline += " {}".format(bf_formats) runline += " -F {}".format(rts_flag_file) if beam_version == "ANALYTIC": runline += " -H" if comp_config['container_command'] !='': runline += "'" commands.append(runline) commands.append("") if comp_config['ssd_dir'] is not None: for pointing in pointing_list: commands.append("cp {0}/{1}/{2}_{3}_{1}_ch{4}_00*.fits " "{5}/{1}/".format(comp_config['ssd_dir'], pointing, project_id, obs_id, coarse_chan, P_dir)) if 'i' in bf_formats: commands.append("cp {0}/{1}/{2}_{3}_{1}_ch{4}_00*.fits " "{5}/{1}/".format(comp_config['ssd_dir'], "incoh", project_id, obs_id, coarse_chan, product_dir)) commands.append("") job_id = submit_slurm(make_beam_small_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time":secs_to_run, "nice":nice}, queue='gpuq', vcstools_version=vcstools_version, submit=True, export="NONE", gpu_res=1, cpu_threads=n_omp_threads, mem=comp_config['gpu_beamform_mem'], temp_mem=temp_mem) job_id_list.append(job_id) job_id_list_list.append(job_id_list) return job_id_list_list, make_beam_small_batch.split('ch')[0]
def vcs_correlate(obsid,start,stop,increment, data_dir, product_dir, ft_res, metafits, vcstools_version="master", nice=0): #Load computer dependant config file comp_config = load_config_file() logger.info("Correlating files at {0} kHz and {1} milliseconds".\ format(ft_res[0], ft_res[1])) batch_dir = product_dir+"/batch/" target_dir = link = 'vis' if data_dir == product_dir: corr_dir = "{0}/cal/{1}/{2}".format(product_dir, obsid, target_dir) else: corr_dir = "{0}/{1}".format(data_dir, target_dir) product_dir = "{0}/cal/{1}/".format(product_dir, obsid) mdir(product_dir, "Correlator", gid=comp_config['gid']) mdir(corr_dir, "Correlator Product", gid=comp_config['gid']) create_link(data_dir, target_dir, product_dir, link) chan_list = get_frequencies(metafits_file, resort=True) #gpu_int = 0.01 # Code was compiled with a hard-coded 100 sample minimum intigration. For 'normal' data this means 0.01 seconds gpu_int = 10 # Code was compiled with a hard-coded 100 sample minimum integration. For 'normal' data this means 10 milliseconds. integrations=int(ft_res[1]/gpu_int) #num_frames=int(1.0/ft_res[1]) num_frames=int(1000/ft_res[1]) logger.info("Input chan list is {0}".format(chan_list)) for time_to_get in range(start,stop,increment): inc_start = time_to_get inc_stop = time_to_get+increment for index,channel in enumerate(chan_list): gpubox_label = (index+1) f=[] for time_to_corr in range(inc_start,inc_stop,1): file_to_process = "{0}/combined/{1}_{2}_ch{3:0>2}.dat".\ format(data_dir,obsid,time_to_corr,channel) #check the file exists if (os.path.isfile(file_to_process) == True): f.append(file_to_process) #now have a full list of files #for this increment #and this channel if (len(f) > 0): corr_batch = "correlator_{0}_gpubox{1:0>2}".format(inc_start,gpubox_label) body = [] to_corr = 0 for file in f: (current_time,_) = os.path.splitext(os.path.basename(file)) (obsid,gpstime,_) = current_time.split('_') t = Time(int(gpstime), format='gps', scale='utc') unix_time = int(t.unix) offline_correlator_command = "-o {0}/{1} -s {2} -r {3} -i {4} -n {5} "\ "-c {6:0>2} -d {7}".format(corr_dir, obsid, unix_time, num_frames, integrations, int(ft_res[0]/10), gpubox_label, file) body.append("{0} {1}".format("offline_correlator", offline_correlator_command)) to_corr += 1 #module_list = ["module switch PrgEnv-cray PrgEnv-gnu"] module_list = ["offline_correlator/v1.0.0"] secs_to_run = str(datetime.timedelta(seconds=2*12*num_frames*to_corr)) # added factor two on 10 April 2017 as galaxy seemed really slow... submit_slurm(corr_batch, body, module_list=module_list, slurm_kwargs={"time": secs_to_run, "nice": nice}, queue='gpuq', vcstools_version=vcstools_version, batch_dir=batch_dir, export="NONE") else: logger.error("Couldn't find any recombine files. Aborting here.")
def write_batch_files(obsid, begin, end, ra, dec, freq, flaggedtiles, step=500, thetares=0.05, phires=0.05, nnodes=1, eff=1, beam_model='hyperbeam', maploc="$PWD", odir=None, delays=[0] * 16, write=True, write_showspec=False, vcstools_version='master', metafits_loc=None): comp_config = load_config_file() times = np.arange(begin, end, step=step) times = np.append(times, end) #nprocesses = 32 * nnodes nprocesses = 1 * nnodes flags = " ".join(flaggedtiles) if odir is None: # Make default directories product_dir = os.path.join(comp_config['base_data_dir'], obsid, 'pabeam', '{}_{}'.format(ra, dec)) batch_dir = os.path.join(comp_config['base_data_dir'], obsid, 'batch') else: product_dir = batch_dir = odir mdir(product_dir, 'Product Dir', gid=comp_config['gid']) mdir(batch_dir, 'Batch Dir', gid=comp_config['gid']) # Loop over all times for i in range(len(times)): fname = "make_pabeam_{0}_{1}_{2}_{3:.2f}MHz".format( ra, dec, times[i], freq / 1e6) onamebase = "{0}_{1}_{2:.2f}MHz_tres{3}_pres{4}_{5}_{6}".format( obsid, float(times[i]), freq / 1e6, thetares, phires, ra, dec) commands = [] # Write out params commands.append("nprocesses={}".format(nprocesses)) commands.append("obsid={}".format(obsid)) commands.append("""ra='"{}"'""".format(ra)) commands.append("""dec='"{}"'""".format(dec)) commands.append("freq={}".format(freq)) commands.append("eff={}".format(eff)) commands.append('flags="{}"'.format(flags)) commands.append('delays="{}"'.format(delays)) commands.append("tres={}".format(thetares)) commands.append("pres={}".format(phires)) commands.append("obstime={}".format(times[i])) commands.append("odir={}".format(product_dir)) commands.append("metafits_loc={}".format(metafits_loc)) commands.append('beam="{}"'.format(beam_model)) # TODO remove this once hyperbeam is installed with python commands.append( "export PYTHONPATH=$PYTHONPATH:/pawsey/mwa/software/python3/hyperbeam/v0.3.0/lib/python3.8/site-packages" ) # Main command pabeam_command = "srun --export=all -u -n ${nprocesses} pabeam.py " +\ "-o ${obsid} -f ${freq} -t ${obstime} -e ${eff} -p ${ra} ${dec} --metafits ${metafits_loc} " +\ "--flagged_tiles ${flags} --grid_res ${tres} ${pres} --out_dir ${odir} --beam_model ${beam} --delays ${delays}" if write: pabeam_command = pabeam_command + " --write" commands.append('cd {}'.format(product_dir)) commands.append('echo "{}"'.format(pabeam_command)) commands.append(pabeam_command) # Combine the output files into one commands.append(pabeam_concat_cmd.format(onamebase, onamebase + ".dat")) # Remove the partial beam pattern files written by processes commands.append("rm {0}\n".format(onamebase + ".*.dat")) module_list = ['mpi4py', 'hyperbeam/v0.3.0'] submit_slurm(fname, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={ "time": "12:00:00", "nodes": nnodes, "ntasks-per-node": nprocesses }, vcstools_version=vcstools_version, queue='cpuq', mem=10240) if write_showspec: # Now write the showspec batch for this time write_showspec_batch(times[i], obsid, ra, dec, freq, (90 / thetares) + 1, 360 / phires, onamebase + ".dat", maploc)
def vcs_recombine(obsid, start_time, stop_time, increment, data_dir, product_dir, vcstools_version="master", nice=0): #Load computer dependant config file comp_config = load_config_file() logger.info("Running recombine on files") jobs_per_node = 8 target_dir = link = 'combined' mdir(data_dir + '/' + target_dir, 'Combined', gid=comp_config['gid']) create_link(data_dir, target_dir, product_dir, link) batch_dir = product_dir+"/batch/" recombine = "recombine.py" checks = "checks.py" recombine_binary = "recombine" for time_to_get in range(start_time,stop_time,increment): process_nsecs = increment if (time_to_get + increment <= stop_time) \ else (stop_time - time_to_get + 1) if (jobs_per_node > process_nsecs): jobs_per_node = process_nsecs nodes = (increment+(-increment%jobs_per_node))//jobs_per_node + 1 # Integer division with ceiling result plus 1 for master node recombine_batch = "recombine_{0}".format(time_to_get) check_batch = "check_recombine_{0}".format(time_to_get) #module_list = ["module switch PrgEnv-cray PrgEnv-gnu", "python/3.6.3", "numpy/1.13.3", "mwa-voltage/master"] module_list = ["mwa-voltage/master"] commands = [] commands.append("newcount=0") commands.append("let oldcount=$newcount-1") commands.append("sed -i -e \"s/oldcount=${{oldcount}}/oldcount=${{newcount}}/\" {0}".\ format(batch_dir+recombine_batch+".batch")) commands.append("oldcount=$newcount; let newcount=$newcount+1") commands.append("sed -i -e \"s/_${{oldcount}}.out/_${{newcount}}.out/\" {0}".\ format(batch_dir+recombine_batch+".batch")) checks_command = "-m recombine -o {0} -w {1}/combined/ -b {2} -i {3}".format(obsid, data_dir, time_to_get, process_nsecs) commands.append("{0} {1}".format(checks, checks_command)) commands.append("if [ $? -eq 1 ];then") commands.append("sbatch {0}".format(batch_dir+recombine_batch+".batch")) commands.append("fi") submit_slurm(check_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time": "15:00", "nice": nice}, vcstools_version=vcstools_version, submit=False, outfile=batch_dir+check_batch+"_0.out", queue='gpuq', export="NONE") #module_list = ["module switch PrgEnv-cray PrgEnv-gnu", "python/3.6.3", # "numpy/1.13.3", "mwa-voltage/master", "mpi4py", "cfitsio"] module_list = ["mwa-voltage/master", "mpi4py"] commands = [] commands.append("oldcount=0") commands.append("let newcount=$oldcount+1") commands.append("if [ ${newcount} -gt 10 ]; then") commands.append("echo \"Tried ten times, this is silly. Aborting here.\";exit") commands.append("fi") commands.append("sed -i -e \"s/newcount=${{oldcount}}/newcount=${{newcount}}/\" {0}".\ format(batch_dir+check_batch+".batch")) commands.append("sed -i -e \"s/_${{oldcount}}.out/_${{newcount}}.out/\" {0}".\ format(batch_dir+check_batch+".batch")) commands.append("sbatch -d afterany:${{SLURM_JOB_ID}} {0}".\ format(batch_dir+check_batch+".batch")) recombine_command = "-o {0} -s {1} -w {2} -e {3}".format(obsid, time_to_get, data_dir, recombine_binary) commands.append("srun --export=all {0} {1}".format(recombine, recombine_command)) submit_slurm(recombine_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time": "06:00:00", "nodes": str(nodes), "ntasks-per-node": jobs_per_node, "nice": nice}, vcstools_version=vcstools_version, outfile=batch_dir+recombine_batch+"_1.out", queue='gpuq', export="NONE")
def RVM_fit(cfg, depends_on=None, depend_type="afterany"): """Calculates parameters for a PSRSALSA ppolFit job and submits it""" alpha = cfg["pol"]["alpha"] beta = cfg["pol"]["beta"] if not cfg["completed"]["RVM_initial"] and not cfg["completed"][ "RVM_final"]: # Initial trials = 200 alpha_range = np.array((0, 180)) beta_range = np.array((-30, 30)) #Decide the longitude range to fit my_comp = cfg["source"]["my_component"] component_min = cfg["source"]["gfit"]["comp_idx"][my_comp][ 0] * 360 / len(cfg["source"]["gfit"]["profile"]) component_max = cfg["source"]["gfit"]["comp_idx"][my_comp][ -1] * 360 / len(cfg["source"]["gfit"]["profile"]) l_cmd = f" -l '{component_min} 1'" maxdl_cmd = f" -maxdl {component_max - component_min}" chigrid_file = basename( cfg['files']['chigrid_initial_ps'] ) # Can't have names greater than 100 characters - use basenames instead paswing_file = basename(cfg['files']['paswing_initial_ps']) outfile = cfg['files']['RVM_fit_initial'] name = f"RVM_fit_initial_{cfg['files']['file_precursor']}" cfg["completed"]["RVM_initial"] = True else: # Final trials = 400 alpha_range = np.array((alpha - 20, alpha + 20)) beta_range = np.array((beta - 10, beta + 10)) alpha_range.clip(0, 180) beta_range.clip(-30, 30) # forcing the range to reasonable values l_cmd = f" -l '{cfg['pol']['l0'] - 10} 1'" maxdl_cmd = " -maxdl 20" chigrid_file = basename(cfg['files']['chigrid_final_ps']) paswing_file = basename(cfg['files']['paswing_final_ps']) outfile = cfg['files']['RVM_fit_final'] name = f"RVM_fit_final_{cfg['files']['file_precursor']}" cfg["completed"]["RVM_final"] = True # Create the job commands commands = [f"cd {cfg['files']['psr_dir']}"] ppol_cmd = "ppolFit -showwedge" ppol_cmd += f" -g '{trials} {trials}'" ppol_cmd += f" -A '{alpha_range[0]} {alpha_range[1]}'" # Alpha range ppol_cmd += f" -B '{beta_range[0]} {beta_range[1]}'" # Beta range ppol_cmd += l_cmd # longitude start and step size ppol_cmd += maxdl_cmd #longitude search range ppol_cmd += " -best" # return the best fit values ppol_cmd += f" -device1 {chigrid_file}/cps" ppol_cmd += f" -device2 {paswing_file}/cps" ppol_cmd += f" -device1res '900 900'" ppol_cmd += f" *.paswing" ppol_cmd += f" > {outfile}" #write to stdout commands.append(ppol_cmd) #Submit job slurm_kwargs = {"time": "02:00:00"} mem = 32768 modules = ["psrsalsa"] jid = submit_slurm(name, commands, slurm_kwargs=slurm_kwargs, module_list=modules, mem=mem, batch_dir=cfg["files"]["batch_dir"], depend=depends_on, depend_type=depend_type, vcstools_version=cfg["run_ops"]["vcstools"], submit=True) logger.info(f"Submitted relaunch of ppp: {name}") logger.info(f"job ID: {jid}") if depends_on: logger.info(f"Job depends on job id(s): {depends_on}") return jid