def go(self): self.logger.info("Starting thumbnail_combine run") super(thumbnail_combine, self).go() hosts = self.inputs['target_hosts'] command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host in hosts: jobs.append( ComputeJob(host, command, arguments=[ self.inputs['executable'], self.inputs['file_pattern'], self.inputs['input_dir'], self.inputs['output_file'], self.inputs['clobber'] ])) self._schedule_jobs(jobs) if self.error.isSet(): self.logger.warn("Failed compute job process detected") return 1 else: return 0
def go(self): self.logger.info("Starting thumbnail_combine run") super(thumbnail_combine, self).go() # Hosts on which to execute hosts = ['lce019'] # Path to node script command = "python %s" % (self.__file__.replace('master', 'nodes')) # Build a list of jobs jobs = [] for host in hosts: jobs.append( ComputeJob( host, command, arguments=[ "/usr/bin/montage", # executable "*.th.png", # file_pattern "/path/to/png/files", # input_dir "/path/to/output.png", # output_dir True # clobber ])) # And run them self._schedule_jobs(jobs) # The error flag is set if a job failed if self.error.isSet(): self.logger.warn("Failed compute job process detected") return 1 else: return 0
def append_job(self, host, arguments): """ append_job adds a job to the current job list. It expects the host, a list of arguments. """ compute_job = ComputeJob(host, self._command, arguments) self._jobs.append(compute_job)
def go(self): self.logger.info("Starting demixing run") super(demixing, self).go() job_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = load_data_map(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = load_data_map(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed") return 1 else: # This is a bit of a kludge. The input MS-filenames are supposed to # contain the string "_uv". The demixing node script will produce # output MS-files, whose names have the string "_uv" replaced by # "_" + self.inputs['ms_target'] + "_sub". outdata = [(host, os.path.join( job_dir, os.path.basename(infile).replace( '_uv', '_' + self.inputs['ms_target'] + '_sub'))) for host, infile in indata] command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, infile in indata: jobs.append( ComputeJob( host, command, arguments=[ infile, job_dir, self.inputs['initscript'], self.inputs['demix_sources'], self.inputs['ms_target'], self.config.get('cluster', 'clusterdesc'), self.inputs['timestep'], self.inputs['freqstep'], self.inputs['half_window'], self.inputs['threshold'], self.inputs['demix_parset_dir'], self.inputs['skymodel'], self.inputs['db_host'] ])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) if self.error.isSet(): return 1 else: self.logger.debug("Writing mapfile %s" % self.inputs['mapfile']) store_data_map(self.inputs['mapfile'], outdata) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def _run_create_dbs_node(self, input_map, slice_paths_map, assoc_theta, source_list_map): """ Decompose the input mapfiles into task for specific nodes and distribute these to the node recipes. Wait for the jobs to finish and return the list of created jobs. """ # Compile the command to be executed on the remote machine node_command = " python3 %s" % (self.__file__.replace("master", "nodes")) # create jobs jobs = [] output_map = copy.deepcopy(input_map) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. align_data_maps(input_map, output_map, slice_paths_map, source_list_map) source_list_map.iterator = slice_paths_map.iterator = \ input_map.iterator = DataMap.SkipIterator for idx, (input_item, slice_item, source_list_item) in enumerate(zip( input_map, slice_paths_map, source_list_map)): host_ms, concat_ms = input_item.host, input_item.file host_slice, slice_paths = slice_item.host, slice_item.file # Create the parameters depending on the input_map sourcedb_target_path = os.path.join( concat_ms + self.inputs["sourcedb_suffix"]) # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join(self.inputs['working_directory'], "imager_create_dbs_{0}".format(idx)) # The actual call for the node script arguments = [concat_ms, sourcedb_target_path, self.inputs["monetdb_hostname"], self.inputs["monetdb_port"], self.inputs["monetdb_name"], self.inputs["monetdb_user"], self.inputs["monetdb_password"], assoc_theta, self.inputs["parmdb_executable"], slice_paths, self.inputs["parmdb_suffix"], self.environment, working_dir, self.inputs["makesourcedb_path"], source_list_item.file, self.inputs["major_cycle"]] jobs.append(ComputeJob(host_ms, node_command, arguments)) # Wait the nodes to finish if len(jobs) > 0: self._schedule_jobs(jobs) return jobs, output_map
def go(self): self.logger.info("Starting rficonsole run") super(rficonsole, self).go() # Load file <-> compute node mapping from disk # ---------------------------------------------------------------------- self.logger.debug("Loading map from %s" % self.inputs['args']) data = load_data_map(self.inputs['args'][0]) # Jobs being dispatched to each host are arranged in a dict. Each # entry in the dict is a list of list of filnames to process. # ---------------------------------------------------------------------- hostlist = defaultdict(lambda: list([[]])) for host, filename in data: if ( 'nmeasurementsets' in self.inputs and len(hostlist[host][-1]) >= self.inputs['nmeasurementsets'] ): hostlist[host].append([filename]) else: hostlist[host][-1].append(filename) if 'strategy' in self.inputs: strategy = self.inputs['strategy'] else: strategy = None command = "python3 %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, file_lists in hostlist.items(): for file_list in file_lists: jobs.append( ComputeJob( host, command, arguments=[ self.inputs['executable'], self.inputs['nthreads'], strategy, self.inputs['indirect_read'], self.inputs['skip_flagged'], self.inputs['working_dir'] ] + file_list, resources={ "cores": self.inputs['nthreads'] } ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) if self.error.isSet(): self.logger.warn("Failed rficonsole process detected") return 1 else: return 0
def go(self): super(example_parallel, self).go() node_command = "python %s" % (self.__file__.replace("master", "nodes")) job = ComputeJob("localhost", node_command, arguments=["example_argument"]) self._schedule_jobs([job]) if self.error.isSet(): return 1 else: return 0
def _run_jobs(self): """ Create and schedule the compute jobs """ command = "python3 %s" % (self.__file__.replace('master', 'nodes')) self.data_map.iterator = DataMap.SkipIterator self.inst_map.iterator = DataMap.SkipIterator self.sky_map.iterator = DataMap.SkipIterator for data, inst, sky in zip(self.data_map, self.inst_map, self.sky_map): self.jobs.append( ComputeJob(data.host, command, arguments=[(data.file, inst.file, sky.file), self.inputs['executable'], self.inputs['parset'], self.environment], resources={"cores": self.inputs['nthreads']})) self._schedule_jobs(self.jobs)
def _run_create_dbs_node(self, input_map, slice_paths_map, assoc_theta): """ Decompose the input mapfiles into task for specific nodes and distribute these to the node recipes. Wait for the jobs to finish and return the list of created jobs. """ # Compile the command to be executed on the remote machine node_command = " python %s" % (self.__file__.replace( "master", "nodes")) # create jobs jobs = [] output_map = copy.deepcopy(input_map) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for w, x, y in zip(input_map, output_map, slice_paths_map): w.skip = x.skip = y.skip = (w.skip or x.skip or y.skip) slice_paths_map.iterator = input_map.iterator = DataMap.SkipIterator for (input_item, slice_item) in zip(input_map, slice_paths_map): host_ms, concat_ms = input_item.host, input_item.file host_slice, slice_paths = slice_item.host, slice_item.file # Create the parameters depending on the input_map sourcedb_target_path = os.path.join(concat_ms + self.inputs["sourcedb_suffix"]) # The actual call for the node script arguments = [ concat_ms, sourcedb_target_path, self.inputs["monetdb_hostname"], self.inputs["monetdb_port"], self.inputs["monetdb_name"], self.inputs["monetdb_user"], self.inputs["monetdb_password"], assoc_theta, self.inputs["parmdb_executable"], slice_paths, self.inputs["parmdb_suffix"], self.environment, self.inputs["working_directory"], self.inputs["makesourcedb_path"], self.inputs["source_list_path"] ] jobs.append(ComputeJob(host_ms, node_command, arguments)) # Wait the nodes to finish if len(jobs) > 0: self._schedule_jobs(jobs) return jobs, output_map
def go(self): self.logger.info("Starting flag_baseline run") super(flag_baseline, self).go() # Serialise list of baselines to disk for compute nodes to pick up # ---------------------------------------------------------------------- fd, baseline_filename = mkstemp( dir=self.config.get("layout", "job_directory") ) baseline_file = os.fdopen(fd, "w") dump(self.inputs["baselines"], baseline_file) baseline_file.close() # try block ensures baseline_filename is always unlinked # ---------------------------------------------------------------------- try: # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ self.logger.debug("Loading map from %s" % self.inputs['args'][0]) data = load_data_map(self.inputs['args'][0]) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, ms in data: jobs.append( ComputeJob( host, command, arguments=[ ms, baseline_filename ] ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) finally: os.unlink(baseline_filename) if self.error.isSet(): return 1 else: self.outputs['mapfile'] = self.inputs['args'][0] return 0
def go(self): self.logger.info("Starting count_timesteps run") super(count_timesteps, self).go() self.logger.debug("Loading map from %s" % self.inputs['args'][0]) data = load_data_map(self.inputs['args'][0]) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, ms in data: jobs.append(ComputeJob(host, command, arguments=[ms])) jobs = self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) self.outputs['start_time'] = min(job.results['start_time'] for job in jobs.values()) self.outputs['end_time'] = max(job.results['end_time'] for job in jobs.values()) if self.error.isSet(): return 1 else: return 0
def go(self): self.logger.info("Starting make_flaggable run") super(make_flaggable, self).go() # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ self.logger.debug("Loading map from %s" % self.inputs['args'][0]) data = load_data_map(self.inputs['args'][0]) command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for host, ms in data: jobs.append( ComputeJob(host, command, arguments=[ms, self.inputs['makeflagwritable']])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) if self.error.isSet(): return 1 else: self.outputs['mapfile'] = self.inputs['args'][0] return 0
def go(self): """ Entry point for recipe: Called by the pipeline framework """ super(imager_prepare, self).go() self.logger.info("Starting imager_prepare run") # ********************************************************************* # input data input_map = DataMap.load(self.inputs['args'][0]) output_map = DataMap.load(self.inputs['target_mapfile']) slices_per_image = self.inputs['slices_per_image'] subbands_per_image = self.inputs['subbands_per_image'] # Validate input if not self._validate_input_map(input_map, output_map, slices_per_image, subbands_per_image): return 1 # outputs output_ms_mapfile_path = self.inputs['mapfile'] # ********************************************************************* # schedule the actual work # TODO: Refactor this function into: load data, perform work, # create output node_command = " python %s" % (self.__file__.replace( "master", "nodes")) jobs = [] paths_to_image_mapfiles = [] n_subband_groups = len(output_map) for idx_sb_group, item in enumerate(output_map): #create the input files for this node self.logger.debug("Creating input data subset for processing" "on: {0}".format(item.host)) inputs_for_image_map = \ self._create_input_map_for_sbgroup( slices_per_image, n_subband_groups, subbands_per_image, idx_sb_group, input_map) # Save the mapfile job_directory = self.config.get("layout", "job_directory") inputs_for_image_mapfile_path = os.path.join( job_directory, "mapfiles", "ms_per_image_{0}".format(idx_sb_group)) self._store_data_map(inputs_for_image_mapfile_path, inputs_for_image_map, "inputmap for location") #save the (input) ms, as a list of mapfiles paths_to_image_mapfiles.append( tuple([item.host, inputs_for_image_mapfile_path, False])) arguments = [ self.environment, self.inputs['parset'], self.inputs['working_directory'], self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'], item.file, slices_per_image, subbands_per_image, inputs_for_image_mapfile_path, self.inputs['asciistat_executable'], self.inputs['statplot_executable'], self.inputs['msselect_executable'], self.inputs['rficonsole_executable'], self.inputs['add_beam_tables'] ] jobs.append(ComputeJob(item.host, node_command, arguments)) # Hand over the job(s) to the pipeline scheduler self._schedule_jobs(jobs) # ********************************************************************* # validate the output, cleanup, return output if self.error.isSet(): #if one of the nodes failed self.logger.warn("Failed prepare_imager run detected: Generating " "new output_ms_mapfile_path without failed runs:" " {0}".format(output_ms_mapfile_path)) concat_ms = copy.deepcopy(output_map) slices = [] finished_runs = 0 #scan the return dict for completed key for (item, job) in zip(concat_ms, jobs): # only save the slices if the node has completed succesfull if job.results["returncode"] == 0: finished_runs += 1 slices.append( tuple([item.host, job.results["time_slices"], False])) else: # Set the dataproduct to skipped!! item.skip = True slices.append(tuple([item.host, ["/Failed"], True])) msg = "Failed run on {0}. NOT Created: {1} ".format( item.host, item.file) self.logger.warn(msg) if finished_runs == 0: self.logger.error( "None of the started compute node finished:" "The current recipe produced no output, aborting") return 1 # Write the output mapfiles: # concat.ms paths: self._store_data_map(output_ms_mapfile_path, concat_ms, "mapfile with concat.ms") # timeslices MultiDataMap(slices).save(self.inputs['slices_mapfile']) self.logger.info( "Wrote MultiMapfile with produces timeslice: {0}".format( self.inputs['slices_mapfile'])) #map with actual input mss. self._store_data_map(self.inputs["raw_ms_per_image_mapfile"], DataMap(paths_to_image_mapfiles), "mapfile containing (raw) input ms per image:") # Set the return values self.outputs['mapfile'] = output_ms_mapfile_path self.outputs['slices_mapfile'] = self.inputs['slices_mapfile'] self.outputs['raw_ms_per_image_mapfile'] = \ self.inputs["raw_ms_per_image_mapfile"] return 0
def go(self): """ """ super(imager_source_finding, self).go() self.logger.info("Starting imager_source_finding run") # ******************************************************************** # 1. load mapfiles with input images and collect some parameters from # The input ingredients input_map = DataMap.load(self.inputs['args'][0]) catalog_output_path = self.inputs["catalog_output_path"] # ******************************************************************** # 2. Start the node script node_command = " python %s" % (self.__file__.replace( "master", "nodes")) jobs = [] input_map.iterator = DataMap.SkipIterator for idx, item in enumerate(input_map): # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join(self.inputs['working_directory'], "imager_source_finding_{0}".format(idx)) arguments = [ item.file, self.inputs["bdsm_parset_file_run1"], self.inputs["bdsm_parset_file_run2x"], "%s-%s" % (catalog_output_path, idx), os.path.join(self.inputs["working_directory"], "bdsm_output-%s.img" % (idx, )), "%s-%s" % (self.inputs['sourcedb_target_path'], idx), self.environment, working_dir, self.inputs['makesourcedb_path'] ] jobs.append(ComputeJob(item.host, node_command, arguments)) # Hand over the job(s) to the pipeline scheduler self._schedule_jobs(jobs) # ******************************************************************** # 3. Test for errors and return output if self.error.isSet(): self.logger.warn("Failed imager_source_finding run detected") # Collect the nodes that succeeded source_dbs_from_nodes = copy.deepcopy(input_map) catalog_output_path_from_nodes = copy.deepcopy(input_map) source_dbs_from_nodes.iterator = \ catalog_output_path_from_nodes.iterator = DataMap.SkipIterator for job, sourcedb_item, catalog_item in zip( jobs, source_dbs_from_nodes, catalog_output_path_from_nodes): if "source_db" in job.results: succesfull_job = True sourcedb_item.file = job.results["source_db"] catalog_item.file = job.results["catalog_output_path"] else: sourcedb_item.file = "failed" sourcedb_item.skip = True catalog_item.file = "failed" catalog_item.skip = True # We now also have catalog path # Abort if none of the recipes succeeded if not succesfull_job: self.logger.error("None of the source finding recipes succeeded") self.logger.error("Exiting with a failure status") return 1 self._store_data_map(self.inputs['mapfile'], catalog_output_path_from_nodes, "datamap with created sourcelists") self._store_data_map(self.inputs['sourcedb_map_path'], source_dbs_from_nodes, " datamap with created sourcedbs") self.outputs["mapfile"] = self.inputs['mapfile'] self.outputs["sourcedb_map_path"] = self.inputs['sourcedb_map_path'] return 0
def go(self): self.logger.info("Starting BBS run") super(bbs, self).go() # Generate source and parameter databases for all input data # ---------------------------------------------------------------------- inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['parmdbm'] inputs['working_directory'] = self.config.get( "DEFAULT", "default_working_directory") inputs['mapfile'] = self.task_definitions.get('parmdb', 'mapfile') inputs['suffix'] = ".instrument" outputs = LOFARoutput(self.inputs) if self.cook_recipe('parmdb', inputs, outputs): self.logger.warn("parmdb reports failure") return 1 inputs['args'] = self.inputs['args'] inputs['executable'] = self.inputs['makesourcedb'] inputs['skymodel'] = self.inputs['skymodel'] inputs['mapfile'] = self.task_definitions.get('sourcedb', 'mapfile') inputs['suffix'] = ".sky" outputs = LOFARoutput(self.inputs) if self.cook_recipe('sourcedb', inputs, outputs): self.logger.warn("sourcedb reports failure") return 1 # Build a GVDS file describing all the data to be processed # ---------------------------------------------------------------------- self.logger.debug("Building VDS file describing all data for BBS") vds_file = os.path.join(self.config.get("layout", "job_directory"), "vds", "bbs.gvds") inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['gvds'] = vds_file inputs['unlink'] = False inputs['makevds'] = self.inputs['makevds'] inputs['combinevds'] = self.inputs['combinevds'] inputs['nproc'] = self.inputs['nproc'] inputs['directory'] = os.path.dirname(vds_file) outputs = LOFARoutput(self.inputs) if self.cook_recipe('vdsmaker', inputs, outputs): self.logger.warn("vdsmaker reports failure") return 1 self.logger.debug("BBS GVDS is %s" % (vds_file, )) # Iterate over groups of subbands divided up for convenient cluster # procesing -- ie, no more than nproc subbands per compute node # ---------------------------------------------------------------------- for to_process in gvds_iterator(vds_file, int(self.inputs["nproc"])): # to_process is a list of (host, filename, vds) tuples # ------------------------------------------------------------------ hosts, ms_names, vds_files = map(list, zip(*to_process)) # The BBS session database should be cleared for our key # ------------------------------------------------------------------ self.logger.debug("Cleaning BBS database for key %s" % (self.inputs["key"])) with closing( psycopg2.connect( host=self.inputs["db_host"], user=self.inputs["db_user"], database=self.inputs["db_name"])) as db_connection: db_connection.set_isolation_level( psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) with closing(db_connection.cursor()) as db_cursor: db_cursor.execute( "DELETE FROM blackboard.session WHERE key=%s", (self.inputs["key"], )) # BBS GlobalControl requires a GVDS file describing all the data # to be processed. We assemble that from the separate parts # already available on disk. # ------------------------------------------------------------------ self.logger.debug("Building VDS file describing data for BBS run") vds_dir = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) vds_file = os.path.join(vds_dir, "bbs.gvds") combineproc = utilities.spawn_process([ self.inputs['combinevds'], vds_file, ] + vds_files, self.logger) sout, serr = combineproc.communicate() log_process_output(self.inputs['combinevds'], sout, serr, self.logger) if combineproc.returncode != 0: raise subprocess.CalledProcessError(combineproc.returncode, command) # Construct a parset for BBS GlobalControl by patching the GVDS # file and database information into the supplied template # ------------------------------------------------------------------ self.logger.debug("Building parset for BBS control") bbs_parset = utilities.patch_parset( self.inputs['parset'], { 'Observation': vds_file, 'BBDB.Key': self.inputs['key'], 'BBDB.Name': self.inputs['db_name'], 'BBDB.User': self.inputs['db_user'], 'BBDB.Host': self.inputs['db_host'], # 'BBDB.Port': self.inputs['db_name'], }) self.logger.debug("BBS control parset is %s" % (bbs_parset, )) try: # When one of our processes fails, we set the killswitch. # Everything else will then come crashing down, rather than # hanging about forever. # -------------------------------------------------------------- self.killswitch = threading.Event() self.killswitch.clear() signal.signal(signal.SIGTERM, self.killswitch.set) # GlobalControl runs in its own thread # -------------------------------------------------------------- run_flag = threading.Event() run_flag.clear() bbs_control = threading.Thread(target=self._run_bbs_control, args=(bbs_parset, run_flag)) bbs_control.start() run_flag.wait() # Wait for control to start before proceeding # We run BBS KernelControl on each compute node by directly # invoking the node script using SSH # Note that we use a job_server to send out job details and # collect logging information, so we define a bunch of # ComputeJobs. However, we need more control than the generic # ComputeJob.dispatch method supplies, so we'll control them # with our own threads. # -------------------------------------------------------------- command = "python %s" % (self.__file__.replace( 'master', 'nodes')) env = { "LOFARROOT": utilities.read_initscript( self.logger, self.inputs['initscript'])["LOFARROOT"], "PYTHONPATH": self.config.get('deploy', 'engine_ppath'), "LD_LIBRARY_PATH": self.config.get('deploy', 'engine_lpath') } jobpool = {} bbs_kernels = [] with job_server(self.logger, jobpool, self.error) as (jobhost, jobport): self.logger.debug("Job server at %s:%d" % (jobhost, jobport)) for job_id, details in enumerate(to_process): host, file, vds = details jobpool[job_id] = ComputeJob( host, command, arguments=[ self.inputs['kernel_exec'], self.inputs['initscript'], file, self.inputs['key'], self.inputs['db_name'], self.inputs['db_user'], self.inputs['db_host'] ]) bbs_kernels.append( threading.Thread(target=self._run_bbs_kernel, args=(host, command, env, job_id, jobhost, str(jobport)))) self.logger.info("Starting %d threads" % len(bbs_kernels)) [thread.start() for thread in bbs_kernels] self.logger.debug("Waiting for all kernels to complete") [thread.join() for thread in bbs_kernels] # When GlobalControl finishes, our work here is done # ---------------------------------------------------------- self.logger.info("Waiting for GlobalControl thread") bbs_control.join() finally: os.unlink(bbs_parset) shutil.rmtree(vds_dir) if self.killswitch.isSet(): # If killswitch is set, then one of our processes failed so # the whole run is invalid # ---------------------------------------------------------- return 1 return 0
def go(self): self.logger.info("Starting setupsourcedb run") super(setupsourcedb, self).go() # ********************************************************************* # 1. Load input and output mapfiles. Validate args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed" ) return 1 else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], os.path.basename(item.file) + self.inputs['suffix'] ) # ********************************************************************* # 2. Check if input skymodel file exists. If not, make filename empty. try: skymodel = self.inputs['skymodel'] except KeyError: skymodel = "" self.logger.info("No skymodel specified. Using an empty one") # ******************************************************************** # 3. Call node side of script command = "python %s" % (self.__file__.replace('master', 'nodes')) outdata.iterator = DataMap.SkipIterator jobs = [] for outp in outdata: jobs.append( ComputeJob( outp.host, command, arguments=[ self.inputs['executable'], skymodel, outp.file, self.inputs['type'] ] ) ) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for job, outp in zip(jobs, outdata): if job.results['returncode'] != 0: outp.skip = True # ********************************************************************* # 4. Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) self.logger.debug("Writing sky map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): if 'executable' in self.inputs: executable = self.inputs['executable'] if self.inputs['nthreads']: self.environment["OMP_NUM_THREADS"] = str(self.inputs['nthreads']) if 'environment' in self.inputs: self.environment.update(self.inputs['environment']) self.logger.info("Starting %s run" % executable) super(executable_args, self).go() # args format stuff args_format = {'args_format': self.inputs['args_format'], 'args_format_argument': self.inputs['args_format_argument'], 'args_format_option': self.inputs['args_format_option'], 'args_formatlongoption': self.inputs['args_format_longoption'], 'args_format_option_argument': self.inputs['args_format_option_argument']} mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") work_dir = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # ********************************************************************* # try loading input/output data file, validate output vs the input location if # output locations are provided try: inputmapfiles = [] inlist = [] if self.inputs['mapfile_in']: inlist.append(self.inputs['mapfile_in']) if self.inputs['mapfiles_in']: for item in self.inputs['mapfiles_in']: inlist.append(item) self.inputs['mapfile_in'] = self.inputs['mapfiles_in'][0] for item in inlist: inputmapfiles.append(DataMap.load(item)) except Exception: self.logger.error('Could not load input Mapfile %s' % inlist) return 1 outputmapfiles = [] if self.inputs['mapfile_out']: try: outdata = DataMap.load(self.inputs['mapfile_out']) outputmapfiles.append(outdata) except Exception: self.logger.error('Could not load output Mapfile %s' % self.inputs['mapfile_out']) return 1 # sync skip fields in the mapfiles align_data_maps(inputmapfiles[0], outputmapfiles[0]) elif self.inputs['mapfiles_out']: for item in self.inputs['mapfiles_out']: outputmapfiles.append(DataMap.load(item)) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] else: # ouput will be directed in the working directory if no output mapfile is specified outdata = copy.deepcopy(inputmapfiles[0]) if not self.inputs['inplace']: for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], #os.path.basename(item.file) + '.' + os.path.split(str(executable))[1] os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] ) self.inputs['mapfile_out'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + 'mapfile') self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) else: self.inputs['mapfile_out'] = self.inputs['mapfile_in'] self.inputs['mapfiles_out'].append(self.inputs['mapfile_out']) outputmapfiles.append(outdata) if not validate_data_maps(inputmapfiles[0], outputmapfiles[0]): self.logger.error( "Validation of data mapfiles failed!" ) return 1 if self.inputs['outputsuffixes']: # Handle multiple outputfiles for name in self.inputs['outputsuffixes']: outputmapfiles.append(copy.deepcopy(inputmapfiles[0])) self.inputs['mapfiles_out'].append(os.path.join(mapfile_dir, self.inputs['stepname'] + name + '.' + 'mapfile')) for item in outputmapfiles[-1]: item.file = os.path.join( work_dir, os.path.splitext(os.path.basename(item.file))[0] + '.' + self.inputs['stepname'] + name ) self.inputs['mapfile_out'] = self.inputs['mapfiles_out'][0] # prepare arguments arglist = self.inputs['arguments'] parsetdict = {} if 'parset' in self.inputs: parset = Parset() parset.adoptFile(self.inputs['parset']) for k in parset.keys: parsetdict[k] = str(parset[k]) # construct multiple input data if self.inputs['inputkey'] and not self.inputs['inputkey'] in self.inputs['inputkeys']: self.inputs['inputkeys'].insert(0, self.inputs['inputkey']) if not self.inputs['outputkeys'] and self.inputs['outputkey']: self.inputs['outputkeys'].append(self.inputs['outputkey']) if not self.inputs['skip_infile'] and len(self.inputs['inputkeys']) is not len(inputmapfiles): self.logger.error("Number of input mapfiles %d and input keys %d have to match." % (len(inputmapfiles), len(self.inputs['inputkeys']))) return 1 filedict = {} if self.inputs['inputkeys'] and not self.inputs['skip_infile']: for key, filemap, mapname in zip(self.inputs['inputkeys'], inputmapfiles, inlist): if not mapname in self.inputs['mapfiles_as_string']: filedict[key] = [] for inp in filemap: filedict[key].append(inp.file) else: if key != mapname: filedict[key] = [] for inp in filemap: filedict[key].append(mapname) if self.inputs['outputkey']: filedict[self.inputs['outputkey']] = [] for item in outputmapfiles[0]: filedict[self.inputs['outputkey']].append(item.file) # ******************************************************************** # Call the node side of the recipe # Create and schedule the compute jobs #command = "python3 %s" % (self.__file__.replace('master', 'nodes')).replace('executable_args', self.inputs['nodescript']) recipe_dir_str = str(self.config.get('DEFAULT', 'recipe_directories')) recipe_directories = recipe_dir_str.rstrip(']').lstrip('[').split(',') pylist = os.getenv('PYTHONPATH').split(':') command = None for pl in pylist: if os.path.isfile(os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'lofarpipe/recipes/nodes/'+self.inputs['nodescript']+'.py') for pl in recipe_directories: if os.path.isfile(os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py')): command = "python3 %s" % os.path.join(pl,'nodes/'+self.inputs['nodescript']+'.py') inputmapfiles[0].iterator = outputmapfiles[0].iterator = DataMap.SkipIterator jobs = [] for i, (outp, inp,) in enumerate(zip( outputmapfiles[0], inputmapfiles[0]) ): arglist_copy = copy.deepcopy(arglist) parsetdict_copy = copy.deepcopy(parsetdict) if filedict: for name, value in filedict.items(): replaced = False if arglist_copy: for arg in arglist: if name == arg: ind = arglist_copy.index(arg) arglist_copy[ind] = arglist_copy[ind].replace(name, value[i]) replaced = True if parsetdict_copy: if name in list(parsetdict_copy.values()): for k, v in parsetdict_copy.items(): if v == name: parsetdict_copy[k] = value[i] else: if not replaced: parsetdict_copy[name] = value[i] jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, executable, arglist_copy, parsetdict_copy, work_dir, self.inputs['parsetasfile'], args_format, self.environment ], resources={ "cores": self.inputs['nthreads'] } ) ) max_per_node = self.inputs['max_per_node'] self._schedule_jobs(jobs, max_per_node) jobresultdict = {} resultmap = {} for job, outp in zip(jobs, outputmapfiles[0]): if job.results['returncode'] != 0: outp.skip = True if not self.inputs['error_tolerance']: self.logger.error("A job has failed with returncode %d and error_tolerance is not set. Bailing out!" % job.results['returncode']) return 1 for k, v in list(job.results.items()): if not k in jobresultdict: jobresultdict[k] = [] jobresultdict[k].append(DataProduct(job.host, job.results[k], outp.skip)) if k == 'break': self.outputs.update({'break': v}) # temp solution. write all output dict entries to a mapfile #mapfile_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") #check directory for stand alone mode if not os.path.isdir(mapfile_dir): try: os.mkdir(mapfile_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(mapfile_dir): pass else: raise for k, v in list(jobresultdict.items()): dmap = DataMap(v) dmap.save(os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile')) resultmap[k + '.mapfile'] = os.path.join(mapfile_dir, self.inputs['stepname'] + '.' + k + '.mapfile') self.outputs.update(resultmap) # ********************************************************************* # Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) mapdict = {} for item, name in zip(outputmapfiles, self.inputs['mapfiles_out']): self.logger.debug("Writing data map file: %s" % name) item.save(name) mapdict[os.path.basename(name)] = name self.outputs['mapfile'] = self.inputs['mapfile_out'] if self.inputs['outputsuffixes']: self.outputs.update(mapdict) return 0
def go(self): self.logger.info("Starting setupparmdb run") super(setupparmdb, self).go() # ********************************************************************* # 1. Create a temporary template parmdb at the master side of the recipe self.logger.info("Generating template parmdb") # generate a temp dir pdbdir = tempfile.mkdtemp( dir=self.config.get("layout", "job_directory"), suffix=".%s" % (os.path.basename(__file__), )) pdbfile = os.path.join(pdbdir, self.inputs['suffix']) # Create a template use tempdir for location try: parmdbm_process = subprocess.Popen([self.inputs['executable']], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = communicate_returning_strings( parmdbm_process, input=(template % pdbfile).encode()) log_process_output("parmdbm", sout, serr, self.logger) except OSError as err: self.logger.error("Failed to spawn parmdbm: %s" % str(err)) return 1 # ********************************************************************* # 2. Call node side of recipe with template and possible targets # If output location are provided as input these are validated. try: # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: # If output location provide validate the input and outputmap self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed") return 1 # else output location is inputlocation+suffix else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], os.path.basename(item.file) + self.inputs['suffix']) # Call the node side command = "python3 %s" % (self.__file__.replace('master', 'nodes')) outdata.iterator = DataMap.SkipIterator jobs = [] for outp in outdata: jobs.append( ComputeJob(outp.host, command, arguments=[pdbfile, outp.file])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for job, outp in zip(jobs, outdata): # If the returncode is 123456, failing ssh if job.results['returncode'] == 123456: self.logger.warning( "ssh connection with {0} failed." "Skipping further work on this task".format(outp.host)) self.logger.warning("Error code 123456.") outp.skip = True elif job.results['returncode'] != 0: outp.skip = True # ********************************************************************* # 3. validate performance, cleanup of temp files, construct output finally: self.logger.debug("Removing template parmdb") shutil.rmtree(pdbdir, ignore_errors=True) if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs") self.logger.debug("Writing parmdb map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): super(gainoutliercorrection, self).go() self.logger.info("Starting gainoutliercorrection run") # ******************************************************************** # 1. Validate input # if sigma is none use default behaviour and use executable: test if # It excists executable = self.inputs['executable'] if executable == "": pass elif not os.access(executable, os.X_OK): self.logger.warn( "No parmexportcal excecutable is not found on the suplied" "path: {0}".format(self.inputs['executable'])) self.logger.warn("Defaulting to edit_parmdb behaviour") # ******************************************************************** # 2. load mapfiles, validate if a target output location is provided args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed") return 1 else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], (os.path.splitext(os.path.basename(item.file))[0] + self.inputs['suffix'])) # Update the skip fields of the two maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for x, y in zip(indata, outdata): x.skip = y.skip = (x.skip or y.skip) # ******************************************************************** # 3. Call node side of the recipe command = "python3 %s" % (self.__file__.replace('master', 'nodes')) indata.iterator = outdata.iterator = DataMap.SkipIterator jobs = [] for inp, outp in zip(indata, outdata): jobs.append( ComputeJob(outp.host, command, arguments=[ inp.file, outp.file, self.inputs['executable'], self.environment, self.inputs['sigma'], self.inputs['export_instrument_model'] ])) self._schedule_jobs(jobs) for job, outp in zip(jobs, outdata): if job.results['returncode'] != 0: outp.skip = True # ******************************************************************** # 4. validate performance, return corrected files if self.error.isSet(): self.logger.warn("Detected failed gainoutliercorrection job") return 1 else: self.logger.debug("Writing instrument map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): """ Entry point for recipe: Called by the pipeline framework """ super(imager_prepare, self).go() self.logger.info("Starting imager_prepare run") job_directory = self.config.get("layout", "job_directory") # ********************************************************************* # input data input_map = DataMap.load(self.inputs['args'][0]) output_map = DataMap.load(self.inputs['target_mapfile']) slices_per_image = self.inputs['slices_per_image'] subbands_per_image = self.inputs['subbands_per_image'] # Validate input if not self._validate_input_map(input_map, output_map, slices_per_image, subbands_per_image): return 1 # outputs output_ms_mapfile_path = self.inputs['mapfile'] # ********************************************************************* # schedule the actual work # TODO: Refactor this function into: load data, perform work, # create output node_command = " python %s" % (self.__file__.replace( "master", "nodes")) jobs = [] paths_to_image_mapfiles = [] n_subband_groups = len(output_map) # needed for subsets in sb list globalfs = self.config.has_option( "remote", "globalfs") and self.config.getboolean( "remote", "globalfs") for idx_sb_group, item in enumerate(output_map): #create the input files for this node self.logger.debug("Creating input data subset for processing" "on: {0}".format(item.host)) inputs_for_image_map = \ self._create_input_map_for_sbgroup( slices_per_image, n_subband_groups, subbands_per_image, idx_sb_group, input_map) # Save the mapfile inputs_for_image_mapfile_path = os.path.join( job_directory, "mapfiles", "ms_per_image_{0}.map".format(idx_sb_group)) self._store_data_map(inputs_for_image_mapfile_path, inputs_for_image_map, "inputmap for location") # skip the current step if skip is set, cannot use skip due to # the enumerate: dependency on the index in the map if item.skip == True: # assure that the mapfile is correct paths_to_image_mapfiles.append(tuple([item.host, [], True])) continue #save the (input) ms, as a list of mapfiles paths_to_image_mapfiles.append( tuple([item.host, inputs_for_image_mapfile_path, False])) # use unique working directories per job, to prevent interference between jobs on a global fs working_dir = os.path.join( self.inputs['working_directory'], "imager_prepare_{0}".format(idx_sb_group)) arguments = [ self.environment, self.inputs['parset'], working_dir, self.inputs['processed_ms_dir'], self.inputs['ndppp_exec'], item.file, slices_per_image, subbands_per_image, inputs_for_image_mapfile_path, self.inputs['asciistat_executable'], self.inputs['statplot_executable'], self.inputs['msselect_executable'], self.inputs['rficonsole_executable'], self.inputs['do_rficonsole'], self.inputs['add_beam_tables'], globalfs ] jobs.append( ComputeJob(item.host, node_command, arguments, resources={"cores": self.inputs['nthreads']})) # Hand over the job(s) to the pipeline scheduler self._schedule_jobs(jobs) # ********************************************************************* # validate the output, cleanup, return output if self.error.isSet(): #if one of the nodes failed self.logger.warn("Failed prepare_imager run detected: Generating " "new output_ms_mapfile_path without failed runs:" " {0}".format(output_ms_mapfile_path)) concat_ms = copy.deepcopy(output_map) slices = [] finished_runs = 0 #scan the return dict for completed key # loop over the potential jobs including the skipped # If we have a skipped item, add the item to the slices with skip set jobs_idx = 0 for item in concat_ms: # If this is an item that is skipped via the skip parameter in # the parset, append a skipped if item.skip: slices.append(tuple([item.host, [], True])) continue # we cannot use the skip iterator so we need to manually get the # current job from the list job = jobs[jobs_idx] # only save the slices if the node has completed succesfull if job.results["returncode"] == 0: finished_runs += 1 slices.append( tuple([item.host, job.results["time_slices"], False])) else: # Set the dataproduct to skipped!! item.skip = True slices.append(tuple([item.host, [], True])) msg = "Failed run on {0}. NOT Created: {1} ".format( item.host, item.file) self.logger.warn(msg) # we have a non skipped workitem, increase the job idx jobs_idx += 1 if finished_runs == 0: self.logger.error( "None of the started compute node finished:" "The current recipe produced no output, aborting") return 1 # Write the output mapfiles: # concat.ms paths: self._store_data_map(output_ms_mapfile_path, concat_ms, "mapfile with concat.ms") # timeslices MultiDataMap(slices).save(self.inputs['slices_mapfile']) self.logger.info( "Wrote MultiMapfile with produces timeslice: {0}".format( self.inputs['slices_mapfile'])) #map with actual input mss. self._store_data_map(self.inputs["ms_per_image_mapfile"], DataMap(paths_to_image_mapfiles), "mapfile containing (used) input ms per image:") # Set the return values self.outputs['mapfile'] = output_ms_mapfile_path self.outputs['slices_mapfile'] = self.inputs['slices_mapfile'] self.outputs['ms_per_image_mapfile'] = \ self.inputs["ms_per_image_mapfile"] return 0
def go(self): self.logger.info("Starting DPPP run") super(dppp, self).go() # # Keep track of "Total flagged" messages in the DPPP logs # # ---------------------------------------------------------------------- # self.logger.searchpatterns["fullyflagged"] = "Fully flagged baselines" # ********************************************************************* # 1. load input data file, validate output vs the input location if # output locations are provided args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], os.path.basename(item.file) + self.inputs['suffix']) # ******************************************************************** # 2. Load parmdb and sourcedb # Load parmdb-mapfile, if one was given. if self.inputs.has_key('parmdb_mapfile'): self.logger.debug("Loading parmdb mapfile: %s" % self.inputs['parmdb_mapfile']) parmdbdata = DataMap.load(self.inputs['parmdb_mapfile']) else: parmdbdata = copy.deepcopy(indata) for item in parmdbdata: item.file = '' # Load sourcedb-mapfile, if one was given. if self.inputs.has_key('sourcedb_mapfile'): self.logger.debug("Loading sourcedb mapfile: %s" % self.inputs['sourcedb_mapfile']) sourcedbdata = DataMap.load(self.inputs['sourcedb_mapfile']) else: sourcedbdata = copy.deepcopy(indata) for item in sourcedbdata: item.file = '' # Validate all the data maps. if not validate_data_maps(indata, outdata, parmdbdata, sourcedbdata): self.logger.error("Validation of data mapfiles failed!") return 1 # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for w, x, y, z in zip(indata, outdata, parmdbdata, sourcedbdata): w.skip = x.skip = y.skip = z.skip = (w.skip or x.skip or y.skip or z.skip) # ******************************************************************** # 3. Call the node side of the recipe # Create and schedule the compute jobs command = "python %s" % (self.__file__.replace('master', 'nodes')) indata.iterator = outdata.iterator = DataMap.SkipIterator parmdbdata.iterator = sourcedbdata.iterator = DataMap.SkipIterator jobs = [] for inp, outp, pdb, sdb in zip(indata, outdata, parmdbdata, sourcedbdata): jobs.append( ComputeJob(inp.host, command, arguments=[ inp.file, outp.file, pdb.file, sdb.file, self.inputs['parset'], self.inputs['executable'], self.environment, self.inputs['demix_always'], self.inputs['demix_if_needed'], self.inputs['data_start_time'], self.inputs['data_end_time'], self.inputs['nthreads'], self.inputs['clobber'] ], resources={"cores": self.inputs['nthreads']})) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for job, outp in zip(jobs, outdata): if job.results['returncode'] != 0: outp.skip = True # # ********************************************************************* # # 4. parse logfile for fully flagged baselines # matches = self.logger.searchpatterns["fullyflagged"].results # self.logger.searchpatterns.clear() # finished searching # stripchars = "".join(set("Fully flagged baselines: ")) # baselinecounter = defaultdict(lambda: 0) # for match in matches: # for pair in ( # pair.strip(stripchars) for pair in match.getMessage().split(";") # ): # baselinecounter[pair] += 1 # self.outputs['fullyflagged'] = baselinecounter.keys() # ********************************************************************* # 4. Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs") self.logger.debug("Writing data map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): """ Steps: 1. Load and validate the input datamaps 2. Run the node parts of the recipe 3. Validate node output and format the recipe output """ super(selfcal_finalize, self).go() # ********************************************************************* # 1. Load the datamaps awimager_output_map = DataMap.load(self.inputs["awimager_output_map"]) ms_per_image_map = DataMap.load(self.inputs["ms_per_image_map"]) sourcelist_map = DataMap.load(self.inputs["sourcelist_map"]) sourcedb_map = DataMap.load(self.inputs["sourcedb_map"]) target_mapfile = DataMap.load(self.inputs["target_mapfile"]) output_image_mapfile = DataMap.load( self.inputs["output_image_mapfile"]) concat_ms_mapfile = DataMap.load(self.inputs["concat_ms_map_path"]) output_correlated_map = DataMap.load( self.inputs["output_correlated_mapfile"]) processed_ms_dir = self.inputs["processed_ms_dir"] fillrootimagegroup_exec = self.inputs["fillrootimagegroup_exec"] # Align the skip fields align_data_maps(awimager_output_map, ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map, concat_ms_mapfile, output_correlated_map) # Set the correct iterator sourcelist_map.iterator = awimager_output_map.iterator = \ ms_per_image_map.iterator = target_mapfile.iterator = \ output_image_mapfile.iterator = sourcedb_map.iterator = \ concat_ms_mapfile.iterator = output_correlated_map.iterator = \ DataMap.SkipIterator # ********************************************************************* # 2. Run the node side of the recupe command = " python3 %s" % (self.__file__.replace("master", "nodes")) jobs = [] for (awimager_output_item, ms_per_image_item, sourcelist_item, target_item, output_image_item, sourcedb_item, concat_ms_item, correlated_item) in zip(awimager_output_map, ms_per_image_map, sourcelist_map, target_mapfile, output_image_mapfile, sourcedb_map, concat_ms_mapfile, output_correlated_map): # collect the files as argument arguments = [ awimager_output_item.file, ms_per_image_item.file, sourcelist_item.file, target_item.file, output_image_item.file, self.inputs["minbaseline"], self.inputs["maxbaseline"], processed_ms_dir, fillrootimagegroup_exec, self.environment, sourcedb_item.file, concat_ms_item.file, correlated_item.file, self.inputs["msselect_executable"], ] self.logger.info( "Starting finalize with the folowing args: {0}".format( arguments)) jobs.append(ComputeJob(target_item.host, command, arguments)) self._schedule_jobs(jobs) # ********************************************************************* # 3. Validate the performance of the node script and assign output succesful_run = False for (job, output_image_item, output_correlated_item) in zip(jobs, output_image_mapfile, output_correlated_map): if not "hdf5" in job.results: # If the output failed set the skip to True output_image_item.skip = True output_correlated_item = True else: succesful_run = True # signal that we have at least a single run finished ok. # No need to set skip in this case if not succesful_run: self.logger.warn("Not a single finalizer succeeded") return 1 # Save the location of the output images output_image_mapfile.save(self.inputs['placed_image_mapfile']) self.logger.debug( "Wrote mapfile containing placed hdf5 images: {0}".format( self.inputs['placed_image_mapfile'])) # save the location of measurements sets output_correlated_map.save(self.inputs['placed_correlated_mapfile']) self.logger.debug("Wrote mapfile containing placed mss: {0}".format( self.inputs['placed_correlated_mapfile'])) self.outputs["placed_image_mapfile"] = self.inputs[ 'placed_image_mapfile'] self.outputs["placed_correlated_mapfile"] = self.inputs[ 'placed_correlated_mapfile'] return 0
def go(self): """ imager_bbs functionality. Called by framework performing all the work """ super(imager_bbs, self).go() self.logger.info("Starting imager_bbs run") # ******************************************************************** # 1. Load the and validate the data ms_map = MultiDataMap.load(self.inputs['args'][0]) parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile']) sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile']) # TODO: DataMap extention # #Check if the input has equal length and on the same nodes # if not validate_data_maps(ms_map, parmdb_map): # self.logger.error("The combination of mapfiles failed validation:") # self.logger.error("ms_map: \n{0}".format(ms_map)) # self.logger.error("parmdb_map: \n{0}".format(parmdb_map)) # return 1 # ********************************************************************* # 2. Start the node scripts jobs = [] node_command = " python3 %s" % (self.__file__.replace("master", "nodes")) map_dir = os.path.join( self.config.get("layout", "job_directory"), "mapfiles") run_id = str(self.inputs.get("id")) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for w, x, y in zip(ms_map, parmdb_map, sourcedb_map): w.skip = x.skip = y.skip = ( w.skip or x.skip or y.skip ) ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \ DataMap.SkipIterator for (idx, (ms, parmdb, sourcedb)) in enumerate(zip(ms_map, parmdb_map, sourcedb_map)): # host is same for each entry (validate_data_maps) host, ms_list = ms.host, ms.file # Write data maps to MultaDataMaps ms_list_path = os.path.join( map_dir, "%s-%s_map_%s.map" % (host, idx, run_id)) MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path) parmdb_list_path = os.path.join( map_dir, "%s-%s_parmdb_%s.map" % (host, idx, run_id)) MultiDataMap( [tuple([host, parmdb.file, False])]).save(parmdb_list_path) sourcedb_list_path = os.path.join( map_dir, "%s-%s_sky_%s.map" % (host, idx, run_id)) MultiDataMap( [tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path) arguments = [self.inputs['bbs_executable'], self.inputs['parset'], ms_list_path, parmdb_list_path, sourcedb_list_path] jobs.append(ComputeJob(host, node_command, arguments, resources = { "cores": self.inputs['nthreads'] })) # start and wait till all are finished self._schedule_jobs(jobs) # ********************************************************************** # 3. validate the node output and construct the output mapfile. if self.error.isSet(): # if one of the nodes failed self.logger.error("One of the nodes failed while performing" "a BBS run. Aborting: concat.ms corruption") return 1 # return the output: The measurement set that are calibrated: # calibrated data is placed in the ms sets MultiDataMap(ms_map).save(self.inputs['mapfile']) self.logger.info("Wrote file with calibrated data") self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): """ Contains functionality of the vdsmaker """ super(vdsmaker, self).go() # ********************************************************************** # 1. Load data from disk create output files args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) data = DataMap.load(args[0]) # Skip items in `data` that have 'skip' set to True data.iterator = DataMap.SkipIterator # Create output vds names vdsnames = [ os.path.join(self.inputs['directory'], os.path.basename(item.file) + '.vds') for item in data ] # ********************************************************************* # 2. Call vdsmaker command = "python %s" % (self.__file__.replace('master', 'nodes')) jobs = [] for inp, vdsfile in zip(data, vdsnames): jobs.append( ComputeJob(inp.host, command, arguments=[ inp.file, self.config.get('cluster', 'clusterdesc'), vdsfile, self.inputs['makevds'] ])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) vdsnames = [ vds for vds, job in zip(vdsnames, jobs) if job.results['returncode'] == 0 ] if not vdsnames: self.logger.error("All makevds processes failed. Bailing out!") return 1 # ********************************************************************* # 3. Combine VDS files to produce GDS failure = False self.logger.info("Combining VDS files") executable = self.inputs['combinevds'] gvds_out = self.inputs['gvds'] # Create the gvds directory for output files, needed for combine create_directory(os.path.dirname(gvds_out)) try: command = [executable, gvds_out] + vdsnames combineproc = subprocess.Popen(command, close_fds=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = combineproc.communicate() log_process_output(executable, sout, serr, self.logger) if combineproc.returncode != 0: raise subprocess.CalledProcessError(combineproc.returncode, command) self.outputs['gvds'] = gvds_out self.logger.info("Wrote combined VDS file: %s" % gvds_out) except subprocess.CalledProcessError, cpe: self.logger.exception("combinevds failed with status %d: %s" % (cpe.returncode, serr)) failure = True
def go(self): """ imager_bbs functionality. Called by framework performing all the work """ super(selfcal_bbs, self).go() self.logger.info("Starting imager_bbs run") # ******************************************************************** # 1. Load the and validate the data ms_map = MultiDataMap.load(self.inputs['args'][0]) parmdb_map = MultiDataMap.load(self.inputs['instrument_mapfile']) sourcedb_map = DataMap.load(self.inputs['sourcedb_mapfile']) concat_ms_map = DataMap.load(self.inputs['concat_ms_map_path']) # ********************************************************************* # 2. Start the node scripts jobs = [] node_command = " python %s" % (self.__file__.replace( "master", "nodes")) map_dir = os.path.join(self.config.get("layout", "job_directory"), "mapfiles") run_id = str(self.inputs.get("id")) # Update the skip fields of the four maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. align_data_maps(ms_map, parmdb_map, sourcedb_map, concat_ms_map) ms_map.iterator = parmdb_map.iterator = sourcedb_map.iterator = \ concat_ms_map.iterator = DataMap.SkipIterator # ********************************************************************* for (ms, parmdb, sourcedb, concat_ms) in zip(ms_map, parmdb_map, sourcedb_map, concat_ms_map): #host is same for each entry (validate_data_maps) host, ms_list = ms.host, ms.file # Write data maps to MultaDataMaps ms_list_path = os.path.join(map_dir, host + "_ms_" + run_id + ".map") MultiDataMap([tuple([host, ms_list, False])]).save(ms_list_path) parmdb_list_path = os.path.join( map_dir, host + "_parmdb_" + run_id + ".map") MultiDataMap([tuple([host, parmdb.file, False])]).save(parmdb_list_path) sourcedb_list_path = os.path.join(map_dir, host + "_sky_" + run_id + ".map") MultiDataMap([tuple([host, [sourcedb.file], False])]).save(sourcedb_list_path) # THe concat ms does not have to be written: It already is a # singular item (it is the output of the reduce step) # redmine issue #6021 arguments = [ self.inputs['bbs_executable'], self.inputs['parset'], ms_list_path, parmdb_list_path, sourcedb_list_path, concat_ms.file, self.inputs['major_cycle'] ] jobs.append(ComputeJob(host, node_command, arguments)) # start and wait till all are finished self._schedule_jobs(jobs) # ********************************************************************** # 3. validate the node output and construct the output mapfile. if self.error.isSet(): #if one of the nodes failed self.logger.warn("Failed bbs node run detected, skipping work" "on this work item for further computations") # find failed job and set the skip field for (ms_item, concat_item, job) in zip(ms_map, concat_ms_map, jobs): if job.results["returncode"] == 0: continue else: ms_item.skip = True concat_item.skip = True self.logger.warn("bbs failed on item: {0}".format( ms_item.file)) # return the output: The measurement set that are calibrated: # calibrated data is placed in the ms sets MultiDataMap(ms_map).save(self.inputs['mapfile']) # also save the concat_ms map with possible skips DataMap(concat_ms_map).save(self.inputs['concat_ms_map_path']) self.logger.info("Wrote file with calibrated data") self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): super(get_metadata, self).go() # ******************************************************************** # 1. Parse and validate inputs args = self.inputs['args'] product_type = self.inputs['product_type'] global_prefix = self.inputs['parset_prefix'] # Add a trailing dot (.) if not present in the prefix. if global_prefix and not global_prefix.endswith('.'): global_prefix += '.' if not product_type in self.valid_product_types: self.logger.error( "Unknown product type: %s\n\tValid product types are: %s" % (product_type, ', '.join(self.valid_product_types))) # ******************************************************************** # 2. Load mapfiles self.logger.debug("Loading input-data mapfile: %s" % args[0]) data = DataMap.load(args[0]) # ******************************************************************** # 3. call node side of the recipe command = "python %s" % (self.__file__.replace('master', 'nodes')) data.iterator = DataMap.SkipIterator jobs = [] for inp in data: jobs.append( ComputeJob(inp.host, command, arguments=[inp.file, self.inputs['product_type']])) self._schedule_jobs(jobs) for job, inp in zip(jobs, data): if job.results['returncode'] != 0: inp.skip = True # ******************************************************************** # 4. validate performance # 4. Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs") self.logger.debug("Updating data map file: %s" % args[0]) data.save(args[0]) # ******************************************************************** # 5. Create the parset-file and write it to disk. parset = parameterset() prefix = "Output_%s_" % product_type parset.replace('%snrOf%s' % (global_prefix, prefix), str(len(jobs))) prefix = global_prefix + prefix for idx, job in enumerate(jobs): self.logger.debug("job[%d].results = %s" % (idx, job.results)) parset.adoptCollection(metadata.to_parset(job.results), '%s[%d].' % (prefix, idx)) try: create_directory(os.path.dirname(self.inputs['parset_file'])) parset.writeFile(self.inputs['parset_file']) self.logger.info("Wrote meta data to: " + self.inputs['parset_file']) except RuntimeError, err: self.logger.error("Failed to write meta-data: %s" % str(err)) return 1
def go(self): self.logger.info("Starting cimager run") super(cimager, self).go() self.outputs['images'] = [] # Build a GVDS file describing all the data to be processed # ---------------------------------------------------------------------- self.logger.debug("Building VDS file describing all data for cimager") gvds_file = os.path.join(self.config.get("layout", "job_directory"), "vds", "cimager.gvds") inputs = LOFARinput(self.inputs) inputs['args'] = self.inputs['args'] inputs['gvds'] = gvds_file inputs['unlink'] = False inputs['makevds'] = self.inputs['makevds'] inputs['combinevds'] = self.inputs['combinevds'] inputs['nproc'] = self.inputs['nproc'] inputs['directory'] = os.path.dirname(gvds_file) outputs = LOFARoutput(self.inputs) if self.cook_recipe('vdsmaker', inputs, outputs): self.logger.warn("vdsmaker reports failure") return 1 self.logger.debug("cimager GVDS is %s" % (gvds_file, )) # Read data for processing from the GVDS file # ---------------------------------------------------------------------- parset = Parset(gvds_file) data = [] for part in range(parset.getInt('NParts')): host = parset.getString("Part%d.FileSys" % part).split(":")[0] vds = parset.getString("Part%d.Name" % part) data.append((host, vds)) # Divide data into timesteps for imaging # timesteps is a list of (start, end, results directory) tuples # ---------------------------------------------------------------------- timesteps = [] results_dir = self.inputs['results_dir'] if self.inputs['timestep'] == 0: self.logger.info("No timestep specified; imaging all data") timesteps = [(None, None, results_dir)] else: self.logger.info("Using timestep of %s s" % self.inputs['timestep']) gvds = get_parset(gvds_file) start_time = quantity(gvds['StartTime'].get()).get('s').get_value() end_time = quantity(gvds['EndTime'].get()).get('s').get_value() step = float(self.inputs['timestep']) while start_time < end_time: timesteps.append((start_time, start_time + step, os.path.join(results_dir, str(start_time)))) start_time += step # Run each cimager process in a separate thread # ---------------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) for label, timestep in enumerate(timesteps): self.logger.info("Processing timestep %d" % label) jobs = [] parsets = [] start_time, end_time, resultsdir = timestep for host, vds in data: vds_data = Parset(vds) frequency_range = [ vds_data.getDoubleVector("StartFreqs")[0], vds_data.getDoubleVector("EndFreqs")[-1] ] parsets.append( self.__get_parset( os.path.basename( vds_data.getString('FileName')).split('.')[0], vds_data.getString("FileName"), str(frequency_range), vds_data.getStringVector("Extra.FieldDirectionType") [0], vds_data.getStringVector("Extra.FieldDirectionRa")[0], vds_data.getStringVector("Extra.FieldDirectionDec")[0], 'True', # cimager bug: non-restored image unusable )) jobs.append( ComputeJob(host, command, arguments=[ self.inputs['imager_exec'], vds, parsets[-1], resultsdir, start_time, end_time ])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for parset in parsets: parset = Parset(parset) image_names = parset.getStringVector("Cimager.Images.Names") self.outputs['images'].extend(image_names) [os.unlink(parset) for parset in parsets] # Check if we recorded a failing process before returning # ---------------------------------------------------------------------- if self.error.isSet(): self.logger.warn("Failed imager process detected") return 1 else: return 0
def go(self): self.logger.info("Starting BBS run") super(new_bbs, self).go() # Check for relevant input parameters in the parset-file # --------------------------------------------------------------------- self.logger.debug("Reading parset from %s" % self.inputs['parset']) self.parset = parameterset(self.inputs['parset']) self._set_input('db_host', 'BBDB.Host') self._set_input('db_user', 'BBDB.User') self._set_input('db_name', 'BBDB.Name') self._set_input('db_key', 'BBDB.Key') #self.logger.debug("self.inputs = %s" % self.inputs) # Clean the blackboard database # --------------------------------------------------------------------- self.logger.info( "Cleaning BBS database for key '%s'" % (self.inputs['db_key']) ) command = ["psql", "-h", self.inputs['db_host'], "-U", self.inputs['db_user'], "-d", self.inputs['db_name'], "-c", "DELETE FROM blackboard.session WHERE key='%s';" % self.inputs['db_key'] ] self.logger.debug(command) if subprocess.call(command) != 0: self.logger.warning( "Failed to clean BBS database for key '%s'" % self.inputs['db_key'] ) # Create a bbs_map describing the file mapping on disk # --------------------------------------------------------------------- if not self._make_bbs_map(): return 1 # Produce a GVDS file, describing the data that must be processed. gvds_file = self.run_task( "vdsmaker", self.inputs['data_mapfile'], gvds=self.inputs['gvds'] )['gvds'] # Construct a parset for BBS GlobalControl by patching the GVDS # file and database information into the supplied template # ------------------------------------------------------------------ self.logger.debug("Building parset for BBS control") # Create a location for parsets job_directory = self.config.get( "layout", "job_directory") parset_directory = os.path.join(job_directory, "parsets") create_directory(parset_directory) # patch the parset and copy result to target location remove tempfile try: bbs_parset = utilities.patch_parset( self.parset, { 'Observation': gvds_file, 'BBDB.Key': self.inputs['db_key'], 'BBDB.Name': self.inputs['db_name'], 'BBDB.User': self.inputs['db_user'], 'BBDB.Host': self.inputs['db_host'], #'BBDB.Port': self.inputs['db_name'], } ) bbs_parset_path = os.path.join(parset_directory, "bbs_control.parset") shutil.copyfile(bbs_parset, bbs_parset_path) self.logger.debug("BBS control parset is %s" % (bbs_parset_path,)) finally: # Always remove the file in the tempdir os.remove(bbs_parset) try: # When one of our processes fails, we set the killswitch. # Everything else will then come crashing down, rather than # hanging about forever. # -------------------------------------------------------------- self.killswitch = threading.Event() self.killswitch.clear() signal.signal(signal.SIGTERM, self.killswitch.set) # GlobalControl runs in its own thread # -------------------------------------------------------------- run_flag = threading.Event() run_flag.clear() bbs_control = threading.Thread( target=self._run_bbs_control, args=(bbs_parset, run_flag) ) bbs_control.start() run_flag.wait() # Wait for control to start before proceeding # We run BBS KernelControl on each compute node by directly # invoking the node script using SSH # Note that we use a job_server to send out job details and # collect logging information, so we define a bunch of # ComputeJobs. However, we need more control than the generic # ComputeJob.dispatch method supplies, so we'll control them # with our own threads. # -------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) jobpool = {} bbs_kernels = [] with job_server(self.logger, jobpool, self.error) as(jobhost, jobport): self.logger.debug("Job server at %s:%d" % (jobhost, jobport)) for job_id, details in enumerate(self.bbs_map): host, files = details jobpool[job_id] = ComputeJob( host, command, arguments=[ self.inputs['kernel_exec'], files, self.inputs['db_key'], self.inputs['db_name'], self.inputs['db_user'], self.inputs['db_host'] ] ) bbs_kernels.append( threading.Thread( target=self._run_bbs_kernel, args=(host, command, job_id, jobhost, str(jobport)) ) ) self.logger.info("Starting %d threads" % len(bbs_kernels)) for thread in bbs_kernels: thread.start() self.logger.debug("Waiting for all kernels to complete") for thread in bbs_kernels: thread.join() # When GlobalControl finishes, our work here is done # ---------------------------------------------------------- self.logger.info("Waiting for GlobalControl thread") bbs_control.join() finally: os.unlink(bbs_parset) if self.killswitch.isSet(): # If killswitch is set, then one of our processes failed so # the whole run is invalid # ---------------------------------------------------------- return 1 self.outputs['mapfile'] = self.inputs['data_mapfile'] return 0
class setupparmdb(BaseRecipe, RemoteCommandRecipeMixIn): """ Create a distributed parameter database (ParmDB) for a distributed Measurement set (MS). 1. Create a parmdb template at the master side of the recipe 2. Call node side of recipe with template and possible targets 3. Validate performance, cleanup of temp files, construct output **Command line arguments** 1. A mapfile describing the data to be processed. 2. A mapfile with output location (If provide input and output are validated) """ inputs = { 'executable': ingredient.ExecField( '--executable', help="Full path to parmdbm executable", ), 'nproc': ingredient.IntField( '--nproc', help="Maximum number of simultaneous processes per compute node", default=8), 'suffix': ingredient.StringField( '--suffix', help="Suffix of the table name of the empty parmameter database", default=".parmdb"), 'working_directory': ingredient.StringField('-w', '--working-directory', help="Working directory used on output nodes. " "Results will be written here."), 'mapfile': ingredient.StringField( '--mapfile', help="Full path of mapfile to produce; it will contain " "a list of the generated empty parameter database files") } outputs = {'mapfile': ingredient.FileField()} def go(self): self.logger.info("Starting setupparmdb run") super(setupparmdb, self).go() # ********************************************************************* # 1. Create a temporary template parmdb at the master side of the recipe self.logger.info("Generating template parmdb") # generate a temp dir pdbdir = tempfile.mkdtemp( dir=self.config.get("layout", "job_directory")) pdbfile = os.path.join(pdbdir, self.inputs['suffix']) # Create a template use tempdir for location try: parmdbm_process = subprocess.Popen([self.inputs['executable']], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = parmdbm_process.communicate(template % pdbfile) log_process_output("parmdbm", sout, serr, self.logger) except OSError, err: self.logger.error("Failed to spawn parmdbm: %s" % str(err)) return 1 # ********************************************************************* # 2. Call node side of recipe with template and possible targets # If output location are provided as input these are validated. try: # Load file <-> compute node mapping from disk # ------------------------------------------------------------------ args = self.inputs['args'] self.logger.debug("Loading input-data mapfile: %s" % args[0]) indata = DataMap.load(args[0]) if len(args) > 1: # If output location provide validate the input and outputmap self.logger.debug("Loading output-data mapfile: %s" % args[1]) outdata = DataMap.load(args[1]) if not validate_data_maps(indata, outdata): self.logger.error( "Validation of input/output data mapfiles failed") return 1 # else output location is inputlocation+suffix else: outdata = copy.deepcopy(indata) for item in outdata: item.file = os.path.join( self.inputs['working_directory'], self.inputs['job_name'], os.path.basename(item.file) + self.inputs['suffix']) # Call the node side command = "python %s" % (self.__file__.replace('master', 'nodes')) outdata.iterator = DataMap.SkipIterator jobs = [] for outp in outdata: jobs.append( ComputeJob(outp.host, command, arguments=[pdbfile, outp.file])) self._schedule_jobs(jobs, max_per_node=self.inputs['nproc']) for job, outp in zip(jobs, outdata): # If the returncode is 123456, failing ssh if job.results['returncode'] == 123456: self.logger.warning( "ssh connection with {0} failed." "Skipping further work on this task".format(outp.host)) self.logger.warning("Error code 123456.") outp.skip = True elif job.results['returncode'] != 0: outp.skip = True # ********************************************************************* # 3. validate performance, cleanup of temp files, construct output finally: self.logger.debug("Removing template parmdb") shutil.rmtree(pdbdir, ignore_errors=True) if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs") self.logger.debug("Writing parmdb map file: %s" % self.inputs['mapfile']) outdata.save(self.inputs['mapfile']) self.outputs['mapfile'] = self.inputs['mapfile'] return 0
def go(self): """ This member contains all the functionality of the imager_awimager. Functionality is all located at the node side of the script. """ super(selfcal_awimager, self).go() self.logger.info("Starting imager_awimager run") # ********************************************************************* # 1. collect the inputs and validate input_map = DataMap.load(self.inputs['args'][0]) sourcedb_map = DataMap.load(self.inputs['sourcedb_path']) if not validate_data_maps(input_map, sourcedb_map): self.logger.error( "the supplied input_ms mapfile and sourcedb mapfile" "are incorrect. Aborting") self.logger.error(repr(input_map)) self.logger.error(repr(sourcedb_map)) return 1 # ********************************************************************* # 2. Start the node side of the awimager recipe # Compile the command to be executed on the remote machine node_command = "python3 %s" % (self.__file__.replace("master", "nodes")) jobs = [] output_map = copy.deepcopy(input_map) align_data_maps(input_map, output_map, sourcedb_map) sourcedb_map.iterator = input_map.iterator = output_map.iterator = \ DataMap.SkipIterator for measurement_item, source_item in zip(input_map, sourcedb_map): if measurement_item.skip or source_item.skip: jobs.append(None) continue # both the sourcedb and the measurement are in a map # unpack both host , measurement_path = measurement_item.host, measurement_item.file host2 , sourcedb_path = source_item.host, source_item.file # construct and save the output name arguments = [self.inputs['executable'], self.environment, self.inputs['parset'], self.inputs['working_directory'], self.inputs['output_image'], measurement_path, sourcedb_path, self.inputs['mask_patch_size'], self.inputs['autogenerate_parameters'], self.inputs['specify_fov'], self.inputs['fov'], self.inputs['major_cycle'], self.inputs['nr_cycles'], self.inputs['perform_self_cal'] ] jobs.append(ComputeJob(host, node_command, arguments)) self._schedule_jobs(jobs) # ********************************************************************* # 3. Check output of the node scripts for job, output_item in zip(jobs, output_map): # job == None on skipped job if not "image" in job.results: output_item.file = "failed" output_item.skip = True else: output_item.file = job.results["image"] output_item.skip = False # Check if there are finished runs succesfull_runs = None for item in output_map: if item.skip == False: succesfull_runs = True break if not succesfull_runs: self.logger.error( "None of the started awimager run finished correct") self.logger.error( "No work left to be done: exiting with error status") return 1 # If partial succes if self.error.isSet(): self.logger.error("Failed awimager node run detected. continue with" "successful tasks.") self._store_data_map(self.inputs['mapfile'], output_map, "mapfile containing produces awimages") self.outputs["mapfile"] = self.inputs['mapfile'] return 0