def __init__(self, parsetDict): # The constructor gets the subset of the NDPPP parset containing # all keys-value pairs for this step. # Note: the superclass constructor MUST be called. DPStep.__init__(self, parsetDict) parset = parameterset(parsetDict) self.itsIncr = parset.getDouble('incr', 1)
def __init__(self, id, predecessors='', inputs='', outputs='', parset=''): self.id = id self.predecessors_as_str = predecessors self.inputs = inputs self.output = outputs self.parset_as_str = str(parset) self.parset = parameterset()
def _prepare_steps(self, **kwargs): """ Prepare for running the NDPPP program. This means, for one thing, patching the parsetfile with the correct input/output MS names, start/end times if availabe, etc. If a demixing step must be performed, some extra work needs to be done. Returns: patch dictionary that must be applied to the parset. """ self.logger.debug("Time interval: %s %s" % (kwargs['start_time'], kwargs['end_time'])) # Create output directory for output MS. create_directory(os.path.dirname(kwargs['tmpfile'])) patch_dictionary = { 'msin': kwargs['infile'], 'msout': kwargs['tmpfile'], 'uselogger': 'True' } if kwargs['start_time']: patch_dictionary['msin.starttime'] = kwargs['start_time'] if kwargs['end_time']: patch_dictionary['msin.endtime'] = kwargs['end_time'] # If we need to do a demixing step, we have to do some extra work. # We have to read the parsetfile to check this. parset = parameterset(kwargs['parsetfile']) for step in parset.getStringVector('steps'): if parset.getString(step + '.type', '').startswith('demix'): patch_dictionary.update( self._prepare_demix_step(step, **kwargs)) # Return the patch dictionary that must be applied to the parset. return patch_dictionary
def _send_filtered_event_message(self, otdb_id: int, modificationTime: datetime, state: str): try: with OTDBRPC.create(exchange=self.exchange, broker=self.broker, timeout=2) as otdbrpc: parset = parameterset( otdbrpc.taskGetSpecification(otdb_id=otdb_id).get( "specification", '')) task_type = parset.get("ObsSW.Observation.processType") priority = 6 if task_type == "Observation" else 2 except Exception as e: logger.warning( 'Could not determine task type for otdb_id=%s, using default priority=4: %s', otdb_id, e) priority = 4 try: content = { "treeID": otdb_id, "state": state, "time_of_change": modificationTime } msg = EventMessage( subject=DEFAULT_FILTERED_OTDB_NOTIFICATION_SUBJECT, content=content, priority=priority) logger.info( 'sending filtered event message subject:\'%s\' content: %s', msg.subject, content) self.send(msg) except Exception as e: logger.error('Could not send event message: %s', e)
def __init__(self): super(preprocessing_pipeline, self).__init__() self.parset = parameterset() self.input_data = [] self.output_data = [] self.io_data_mask = [] self.parset_feedback_file = None
def to_parset(data, prefix=''): """ Convert the data in the variable `data` to a LOFAR parameterset. Values may contain vectors (python lists) or records (python dicts) of scalars. Deeper nested structures must be unraveled in separate key/value pairs, where the name of the nested value is moved into the key. Keys for vector values will get an indexed attached to their name. For example, the dictionary entry 'vec_rec' : [{1:'a', 2:'b'}, {3:'c'}] will be converted to the following parameterset key/value pairs vec_rec[0]={1: 'a', 2: 'b'} vec_rec[1]={3: 'c'} And, the dictionary entry 'rec_vec' : {'a':[1, 2], 'b':[3]} will be converted to rec_vec.a=[1, 2] rec_vec.b=[3] """ result = parameterset() if isinstance(data, dict): for key, value in data.iteritems(): fullkey = prefix + '.' + key if prefix else key if isinstance(value, dict): if any( isinstance(v, dict) or isinstance(v, list) for v in value.values()): result.adoptCollection(to_parset(value, fullkey)) else: result.replace(fullkey, str(value)) elif isinstance(value, list): if any( isinstance(v, dict) or isinstance(v, list) for v in value): result.adoptCollection(to_parset(value, fullkey)) else: result.replace(fullkey, str(value)) else: result.replace(fullkey, str(value)) elif isinstance(data, list): for index, value in enumerate(data): fullkey = prefix + '[%d]' % index if isinstance(value, dict): if any( isinstance(v, dict) or isinstance(v, list) for v in value.values()): result.adoptCollection(to_parset(value, fullkey)) else: result.replace(fullkey, str(value)) elif isinstance(value, list): if any( isinstance(v, dict) or isinstance(v, list) for v in value): result.adoptCollection(to_parset(value, fullkey)) else: result.replace(fullkey, str(value)) else: result.replace(fullkey, str(value)) return result
def _get_meta_data(self, number_of_major_cycles, placed_data_image_map, placed_correlated_map, full_parset, max_cycles_reached): """ Function combining all the meta data collection steps of the processing """ parset_prefix = full_parset.getString('prefix') + \ full_parset.fullModuleName('DataProducts') toplevel_meta_data = parameterset({'feedback_version': feedback_version}) toplevel_meta_data.replace( parset_prefix + ".numberOfMajorCycles", str(number_of_major_cycles)) toplevel_meta_data_path = os.path.join( self.parset_dir, "toplevel_meta_data.parset") toplevel_meta_data.replace(parset_prefix + ".max_cycles_reached", str(max_cycles_reached)) try: toplevel_meta_data.writeFile(toplevel_meta_data_path) self.logger.info("Wrote meta data to: " + toplevel_meta_data_path) except RuntimeError as err: self.logger.error( "Failed to write toplevel meta information parset: %s" % str( toplevel_meta_data_path)) return 1 skyimage_metadata = "%s_feedback_SkyImage" % (self.parset_file,) correlated_metadata = "%s_feedback_Correlated" % (self.parset_file,) # Create a parset-file containing the metadata for MAC/SAS at nodes self.run_task("get_metadata", placed_data_image_map, parset_prefix = parset_prefix, product_type = "SkyImage", metadata_file = skyimage_metadata) self.run_task("get_metadata", placed_correlated_map, parset_prefix = parset_prefix, product_type = "Correlated", metadata_file = correlated_metadata) self.send_feedback_processing(toplevel_meta_data) self.send_feedback_dataproducts(parameterset(skyimage_metadata)) self.send_feedback_dataproducts(parameterset(correlated_metadata))
def load_parameters(filename): """ Load parameters from file and return them as hash. """ if LOFAR_PARAMETERSET: data = parameterset(filename).dict() else: data = ConfigObj(filename, raise_errors=True, file_error=True) return data
def do_qa(self, otdb_id): ''' try to do all qa (quality assurance) steps for the given otdb_id resulting in an h5 MS-extract file and inspection plots :param int otdb_id: observation/pipeline otdb id for which the conversion needs to be done. :return: None ''' hdf5_file_path = None with OTDBRPC.create(exchange=self.exchange, broker=self.broker, timeout=5) as otdbrpc: parset = parameterset( otdbrpc.taskGetSpecification(otdb_id=otdb_id).get( "specification", '')) if not parset: logger.warning("could not find a parset for otdb_id %s.", otdb_id) return if parset.getBool( 'ObsSW.Observation.DataProducts.Output_Correlated.enabled' ): hdf5_file_path = self._convert_ms2hdf5(otdb_id) elif parset.getBool( 'ObsSW.Observation.DataProducts.Output_CoherentStokes.enabled' ): hdf5_file_path = self._convert_bf2hdf5(otdb_id) else: logger.info( "No uv or cs dataproducts avaiblable to convert for otdb_id %s", otdb_id) return if hdf5_file_path: # keep a note of where the h5 file was stored for this unfinished otdb_id self._unfinished_otdb_id_map[otdb_id] = hdf5_file_path # cluster it self._cluster_h5_file(hdf5_file_path, otdb_id) self._copy_hdf5_to_nfs_dir(hdf5_file_path) plot_dir_path = self._create_plots_for_h5_file( hdf5_file_path, otdb_id) plot_dir_path = self._move_plots_to_nfs_dir(plot_dir_path) # and notify that we're finished self._send_event_message( 'Finished', { 'otdb_id': otdb_id, 'hdf5_file_path': hdf5_file_path, 'plot_dir_path': plot_dir_path or '' })
def to_parset(data, prefix=''): """ Convert the data in the variable `data` to a LOFAR parameterset. Values may contain vectors (python lists) or records (python dicts) of scalars. Deeper nested structures must be unraveled in separate key/value pairs, where the name of the nested value is moved into the key. Keys for vector values will get an indexed attached to their name. For example, the dictionary entry 'vec_rec' : [{1:'a', 2:'b'}, {3:'c'}] will be converted to the following parameterset key/value pairs vec_rec[0]={1: 'a', 2: 'b'} vec_rec[1]={3: 'c'} And, the dictionary entry 'rec_vec' : {'a':[1, 2], 'b':[3]} will be converted to rec_vec.a=[1, 2] rec_vec.b=[3] """ result = parameterset() if isinstance(data, dict): for key, value in data.iteritems(): fullkey = prefix + '.' + key if prefix else key if isinstance(value, dict): if any(isinstance(v, dict) or isinstance(v, list) for v in value.values()): result.adoptCollection(to_parset(value, fullkey)) else: result.replace(fullkey, str(value)) elif isinstance(value, list): if any(isinstance(v, dict) or isinstance(v, list) for v in value): result.adoptCollection(to_parset(value, fullkey)) else: result.replace(fullkey, str(value)) else: result.replace(fullkey, str(value)) elif isinstance(data, list): for index, value in enumerate(data): fullkey = prefix + '[%d]' % index if isinstance(value, dict): if any(isinstance(v, dict) or isinstance(v, list) for v in value.values()): result.adoptCollection(to_parset(value, fullkey)) else: result.replace(fullkey, str(value)) elif isinstance(value, list): if any(isinstance(v, dict) or isinstance(v, list) for v in value): result.adoptCollection(to_parset(value, fullkey)) else: result.replace(fullkey, str(value)) else: result.replace(fullkey, str(value)) return result
def verify_and_estimate(self, parset, input_files={}): """ Create estimates for a single process based on its parset and input files""" if self._checkParsetForRequiredKeys(parset): estimates = self._calculate(parameterset(parset), input_files) else: raise ValueError('The parset is incomplete') result = {} result[self.name] = {} result[self.name]['storage'] = estimates['storage'] result[self.name]['bandwidth'] = estimates['bandwidth'] return result
def _read_files(self): """Read data file locations from parset-file""" self.logger.debug("Reading data file locations from parset-file: %s" % self.inputs['parset']) parset = parameterset(self.inputs['parset']) dps = parset.makeSubset(parset.fullModuleName('DataProducts') + '.') return [ tuple(os.path.join(location, filename).split(':')) for location, filename in zip( dps.getStringVector('Input_Correlated.locations'), dps.getStringVector('Input_Correlated.filenames')) ]
def __init__(self, filename): parset = parameterset(filename) self.filename = filename self.allocated = False self.positions = [] self.subbands = [] try: for beam in range(parset.getInt("Observation.nrBeams")): ra = parset.getFloat("Observation.Beam[%d].angle1" % beam) dec = parset.getFloat("Observation.Beam[%d].angle2" % beam) self.positions.append((ra, dec)) try: self.subbands.append(parset.get('Observation.Beam[%d].subbandList' % beam).expand().getIntVector()) except RuntimeError: self.subbands.append([]) except RuntimeError: pass try: self.time = [ parset.getString('Observation.startTime'), parset.getString('Observation.stopTime'), ] except RuntimeError: self.time = [] try: self.stations = parset.get('Observation.VirtualInstrument.stationList').expand().getStringVector() except RuntimeError: self.stations = [] try: self.clock = int(parset.getString("Observation.clockMode")[-3:]) except RuntimeError: self.clock = None try: self.antennaset = parset.getString('Observation.antennaSet') except RuntimeError: self.antennaset = None try: self.filter = parset.getString("Observation.bandFilter") except RuntimeError: self.filter = None self.campaign = {} if "Observation.Campaign.name" in parset.keys(): self.campaign['name'] = parset.getString("Observation.Campaign.name") else: self.campaign['name'] = None if "Observation.Campaign.title" in parset.keys(): self.campaign['title'] = parset.getString("Observation.Campaign.title") else: self.campaign['title'] = None
def __init__(self): """ Initialize member variables and call superclass init function """ control.__init__(self) self.parset = parameterset() self.input_data = DataMap() self.target_data = DataMap() self.output_data = DataMap() self.scratch_directory = None self.parset_feedback_file = None self.parset_dir = None self.mapfile_dir = None
def verify_and_estimate(self, parset, predecessor_estimates=[]): """ Create estimates for an observation or pipeline step based on its parset and, in case of a pipeline step, all estimates of its direct predecessor(s). """ self.verify(parset, predecessor_estimates) result = self._calculate(parameterset(parset), predecessor_estimates) logger.info('Estimates for %s:' % self.name) logger.info(pprint.pformat(result)) return result
def patch_parset(parset, data, output_dir=None): """ Generate a parset file by adding the contents of the data dictionary to the specified parset object. Write it to file, and return the filename. `parset` may either be the filename of a parset-file or an instance of `lofar.parameterset.parameterset`. """ if isinstance(parset, str): temp_parset = parameterset(parset) else: temp_parset = parset.makeSubset('') # a sneaky way to copy the parset for key, value in data.items(): temp_parset.replace(key, str(value)) fd, output = mkstemp(dir=output_dir) temp_parset.writeFile(output) os.close(fd) return output
def patch_parset(parset, data, output_dir=None): """ Generate a parset file by adding the contents of the data dictionary to the specified parset object. Write it to file, and return the filename. `parset` may either be the filename of a parset-file or an instance of `lofar.parameterset.parameterset`. """ if isinstance(parset, str): temp_parset = parameterset(parset) else: temp_parset = parset.makeSubset('') # a sneaky way to copy the parset for key, value in data.iteritems(): temp_parset.replace(key, str(value)) fd, output = mkstemp(dir=output_dir) temp_parset.writeFile(output) os.close(fd) return output
def __init__(self, parsetDict): # The constructor gets the subset of the NDPPP parset containing # all keys-value pairs for this step. # Note: the superclass constructor MUST be called. DPStep.__init__(self, parsetDict) parset = parameterset(parsetDict) self.itsTimeFill = 0. self.itsTimeFlag = 0. self.itsTimeReorder = 0. self.itsTimeSolve = 0. self.itsSols = [] self.itsTimeSlot = 0 self.itsMinBlPerAnt = parset.getInt('minBlPerAnt', 4) self.itsSolInt = parset.getInt('solint', 1) if self.itsSolInt > 1: raise ("SolInt>1 is not yet supported") self.itsNChan = parset.getInt('nchan', 0)
def processMessages(receiver, matchPrefix, execPath, msgSaveDir): while True: msg = None try: msg = receiver.get() # blocking if msg is None: continue content = msg.content() # payload type can be unicode, but parameterset only converts str to std::string message = str(content.payload) ps = lofParset.parameterset() ps.adoptBuffer(message) hosts = getOutputHosts(ps, matchPrefix) if hosts: logger.info('Received message is applicable to us, so act on it') obsId = content.sasid messageFilename = msgSaveDir + 'L' + obsId + '.parset.xml' try: saveData(messageFilename, message) hosts = uniq(hosts) hosts.sort() runProcess(execPath, messageFilename, hosts) except IOError as exc: # saveData() logger.error('Skipped running executable: failed to save message to %s: %s', exc.filename, exc.strerror) except OSError as exc: # runProcess() logger.error('Failed to run executable: %s', exc.strerror) logger.info('Done with message') except lofMess.message.MessageException as exc: # XMLDoc(), _get_data() logger.error('Failed to parse or retrieve node from XML message: %s', exc.message) finally: if msg is not None: receiver.ack(msg) # optional for topics, needed for queues
def go(self): self.logger.info("Starting vdsreader run") super(vdsreader, self).go() # ********************************************************************* # 1. Open the gvds file as a parameterset try: gvds = parameterset(self.inputs['gvds']) except: self.logger.error("Unable to read G(V)DS file") raise self.logger.info("Building list of measurementsets") # ********************************************************************** # 2. convert al partx.FileName values to ms ms_names = [ gvds.getString("Part%d.FileName" % (part_no,)) for part_no in xrange(gvds.getInt("NParts")) ] self.logger.debug(ms_names) self.outputs['data'] = ms_names # **********************************************************************\ # 3. parse start and end time and pointing information try: self.outputs['start_time'] = gvds.getString('StartTime') self.outputs['end_time'] = gvds.getString('EndTime') except: self.logger.warn("Failed to read start/end time from GVDS file") try: self.outputs['pointing'] = { 'type': gvds.getStringVector('Extra.FieldDirectionType')[0], 'dec': gvds.getStringVector('Extra.FieldDirectionDec')[0], 'ra': gvds.getStringVector('Extra.FieldDirectionRa')[0] } except: self.logger.warn("Failed to read pointing information from GVDS file") return 0
def go(self): self.logger.info("Starting vdsreader run") super(vdsreader, self).go() # ********************************************************************* # 1. Open the gvds file as a parameterset try: gvds = parameterset(self.inputs['gvds']) except: self.logger.error("Unable to read G(V)DS file") raise self.logger.info("Building list of measurementsets") # ********************************************************************** # 2. convert al partx.FileName values to ms ms_names = [ gvds.getString("Part%d.FileName" % (part_no,)) for part_no in range(gvds.getInt("NParts")) ] self.logger.debug(ms_names) self.outputs['data'] = ms_names # **********************************************************************\ # 3. parse start and end time and pointing information try: self.outputs['start_time'] = gvds.getString('StartTime') self.outputs['end_time'] = gvds.getString('EndTime') except: self.logger.warn("Failed to read start/end time from GVDS file") try: self.outputs['pointing'] = { 'type': gvds.getStringVector('Extra.FieldDirectionType')[0], 'dec': gvds.getStringVector('Extra.FieldDirectionDec')[0], 'ra': gvds.getStringVector('Extra.FieldDirectionRa')[0] } except: self.logger.warn("Failed to read pointing information from GVDS file") return 0
def _prepare_steps(self, **kwargs): """ Prepare for running the NDPPP program. This means, for one thing, patching the parsetfile with the correct input/output MS names, start/end times if availabe, etc. If a demixing step must be performed, some extra work needs to be done. Returns: patch dictionary that must be applied to the parset. """ self.logger.debug( "Time interval: %s %s" % (kwargs['start_time'], kwargs['end_time']) ) # Create output directory for output MS. create_directory(os.path.dirname(kwargs['tmpfile'])) patch_dictionary = { 'msin': kwargs['infile'], 'msout': kwargs['tmpfile'], 'uselogger': 'True' } if kwargs['start_time']: patch_dictionary['msin.starttime'] = kwargs['start_time'] if kwargs['end_time']: patch_dictionary['msin.endtime'] = kwargs['end_time'] # If we need to do a demixing step, we have to do some extra work. # We have to read the parsetfile to check this. parset = parameterset(kwargs['parsetfile']) for step in parset.getStringVector('steps'): if parset.getString(step + '.type', '').startswith('demix'): patch_dictionary.update( self._prepare_demix_step(step, **kwargs) ) # Return the patch dictionary that must be applied to the parset. return patch_dictionary
def go(self): self.logger.info("Starting BBS run") super(new_bbs, self).go() # Check for relevant input parameters in the parset-file # --------------------------------------------------------------------- self.logger.debug("Reading parset from %s" % self.inputs['parset']) self.parset = parameterset(self.inputs['parset']) self._set_input('db_host', 'BBDB.Host') self._set_input('db_user', 'BBDB.User') self._set_input('db_name', 'BBDB.Name') self._set_input('db_key', 'BBDB.Key') #self.logger.debug("self.inputs = %s" % self.inputs) # Clean the blackboard database # --------------------------------------------------------------------- self.logger.info( "Cleaning BBS database for key '%s'" % (self.inputs['db_key']) ) command = ["psql", "-h", self.inputs['db_host'], "-U", self.inputs['db_user'], "-d", self.inputs['db_name'], "-c", "DELETE FROM blackboard.session WHERE key='%s';" % self.inputs['db_key'] ] self.logger.debug(command) if subprocess.call(command) != 0: self.logger.warning( "Failed to clean BBS database for key '%s'" % self.inputs['db_key'] ) # Create a bbs_map describing the file mapping on disk # --------------------------------------------------------------------- if not self._make_bbs_map(): return 1 # Produce a GVDS file, describing the data that must be processed. gvds_file = self.run_task( "vdsmaker", self.inputs['data_mapfile'], gvds=self.inputs['gvds'] )['gvds'] # Construct a parset for BBS GlobalControl by patching the GVDS # file and database information into the supplied template # ------------------------------------------------------------------ self.logger.debug("Building parset for BBS control") # Create a location for parsets job_directory = self.config.get( "layout", "job_directory") parset_directory = os.path.join(job_directory, "parsets") create_directory(parset_directory) # patch the parset and copy result to target location remove tempfile try: bbs_parset = utilities.patch_parset( self.parset, { 'Observation': gvds_file, 'BBDB.Key': self.inputs['db_key'], 'BBDB.Name': self.inputs['db_name'], 'BBDB.User': self.inputs['db_user'], 'BBDB.Host': self.inputs['db_host'], #'BBDB.Port': self.inputs['db_name'], } ) bbs_parset_path = os.path.join(parset_directory, "bbs_control.parset") shutil.copyfile(bbs_parset, bbs_parset_path) self.logger.debug("BBS control parset is %s" % (bbs_parset_path,)) finally: # Always remove the file in the tempdir os.remove(bbs_parset) try: # When one of our processes fails, we set the killswitch. # Everything else will then come crashing down, rather than # hanging about forever. # -------------------------------------------------------------- self.killswitch = threading.Event() self.killswitch.clear() signal.signal(signal.SIGTERM, self.killswitch.set) # GlobalControl runs in its own thread # -------------------------------------------------------------- run_flag = threading.Event() run_flag.clear() bbs_control = threading.Thread( target=self._run_bbs_control, args=(bbs_parset, run_flag) ) bbs_control.start() run_flag.wait() # Wait for control to start before proceeding # We run BBS KernelControl on each compute node by directly # invoking the node script using SSH # Note that we use a job_server to send out job details and # collect logging information, so we define a bunch of # ComputeJobs. However, we need more control than the generic # ComputeJob.dispatch method supplies, so we'll control them # with our own threads. # -------------------------------------------------------------- command = "python %s" % (self.__file__.replace('master', 'nodes')) jobpool = {} bbs_kernels = [] with job_server(self.logger, jobpool, self.error) as(jobhost, jobport): self.logger.debug("Job server at %s:%d" % (jobhost, jobport)) for job_id, details in enumerate(self.bbs_map): host, files = details jobpool[job_id] = ComputeJob( host, command, arguments=[ self.inputs['kernel_exec'], files, self.inputs['db_key'], self.inputs['db_name'], self.inputs['db_user'], self.inputs['db_host'] ] ) bbs_kernels.append( threading.Thread( target=self._run_bbs_kernel, args=(host, command, job_id, jobhost, str(jobport)) ) ) self.logger.info("Starting %d threads" % len(bbs_kernels)) for thread in bbs_kernels: thread.start() self.logger.debug("Waiting for all kernels to complete") for thread in bbs_kernels: thread.join() # When GlobalControl finishes, our work here is done # ---------------------------------------------------------- self.logger.info("Waiting for GlobalControl thread") bbs_control.join() finally: os.unlink(bbs_parset) if self.killswitch.isSet(): # If killswitch is set, then one of our processes failed so # the whole run is invalid # ---------------------------------------------------------- return 1 self.outputs['mapfile'] = self.inputs['data_mapfile'] return 0
def __init__(self): super(new_bbs, self).__init__() self.bbs_map = list() self.parset = parameterset() self.killswitch = threading.Event()
def __init__(self): super(control, self).__init__() self.parset = parameterset() self.momID = 0 self.sasID = 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Get input from parset, validate and cast to pipeline 'data types' # Only perform work on existing files # Created needed directories # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_correlated_mapfile = os.path.join( mapfile_dir, "input_correlated.mapfile" ) output_correlated_mapfile = os.path.join( mapfile_dir, "output_correlated.mapfile" ) output_instrument_mapfile = os.path.join( mapfile_dir, "output_instrument.mapfile" ) self.input_data['correlated'].save(input_correlated_mapfile) self.output_data['correlated'].save(output_correlated_mapfile) self.output_data['instrument'].save(output_instrument_mapfile) if len(self.input_data['correlated']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['correlated'])) # ********************************************************************* # 2. Create database needed for performing work: # Vds, descibing data on the nodes # sourcedb, For skymodel (A-team) # parmdb for outputtting solutions # Produce a GVDS file describing the data on the compute nodes. with duration(self, "vdsmaker"): gvds_file = self.run_task( "vdsmaker", input_correlated_mapfile )['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Run NDPPP to demix the A-Team sources # TODOW: Do flagging? # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", input_correlated_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile )['mapfile'] # ********************************************************************* # 4. Run BBS with a model of the calibrator # Create a parmdb for calibration solutions # Create sourcedb with known calibration solutions # Run bbs with both # Create an empty parmdb for BBS with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", dppp_mapfile, mapfile=os.path.join(mapfile_dir, 'bbs.parmdb.mapfile'), suffix='.bbs.parmdb' )['mapfile'] # Create a sourcedb based on sourcedb's input argument "skymodel" with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, skymodel=os.path.join( self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', py_parset.getString('Calibration.SkyModel') + '.skymodel'), mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'), suffix='.bbs.sourcedb')['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the calibrator source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile )['data_mapfile'] # ********************************************************************* # 5. Perform gain outlier correction on the found calibration solutions # Swapping outliers in the gains with the median # Export the calibration solutions using gainoutliercorrection and store # the results in the files specified in the instrument mapfile. export_instrument_model = py_parset.getBool( 'Calibration.exportCalibrationParameters', False) with duration(self, "gainoutliercorrection"): self.run_task("gainoutliercorrection", (parmdb_mapfile, output_instrument_mapfile), sigma=1.0, export_instrument_model=export_instrument_model) # TODO: Parset parameter # ********************************************************************* # 6. Copy corrected MS's to their final output destination. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=output_correlated_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_correlated_mapfile ) # ********************************************************************* # 7. Create feedback file for further processing by the LOFAR framework # a. get metadata of the measurement sets # b. get metadata of the instrument models # c. join the two files and write the final feedback file correlated_metadata = os.path.join(parset_dir, "correlated.metadata") instrument_metadata = os.path.join(parset_dir, "instrument.metadata") with duration(self, "get_metadata"): self.run_task("get_metadata", output_correlated_mapfile, parset_file=correlated_metadata, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated") with duration(self, "get_metadata"): self.run_task("get_metadata", output_instrument_mapfile, parset_file=instrument_metadata, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="InstrumentModel") parset = parameterset(correlated_metadata) parset.adoptFile(instrument_metadata) parset.writeFile(self.parset_feedback_file) return 0
def __init__(self): control.__init__(self) self.parset = parameterset()
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( 'ObsSW.Observation.ObservationControl.PythonControl.') # Get input/output-data products specifications. self._get_io_product_specs() # Create some needed directories job_dir = self.config.get("layout", "job_directory") mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(mapfile_dir) parset_dir = os.path.join(job_dir, "parsets") create_directory(parset_dir) # ********************************************************************* # 2. Copy the instrument files to the correct node # The instrument files are currently located on the wrong nodes # Copy to correct nodes and assign the instrument table the now # correct data # Copy the instrument files to the corrent nodes: failures might happen # update both intrument and datamap to contain only successes! self._copy_instrument_files(mapfile_dir) # Write input- and output data map-files. data_mapfile = os.path.join(mapfile_dir, "data.mapfile") self.input_data['data'].save(data_mapfile) copied_instrument_mapfile = os.path.join(mapfile_dir, "copied_instrument.mapfile") self.input_data['instrument'].save(copied_instrument_mapfile) self.logger.debug( "Wrote input data mapfile: %s" % data_mapfile ) # Save copied files to a new mapfile corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile") self.output_data['data'].save(corrected_mapfile) self.logger.debug( "Wrote output corrected data mapfile: %s" % corrected_mapfile ) # Validate number of copied files, abort on zero files copied if len(self.input_data['data']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['data']) ) # ********************************************************************* # 3. Create database needed for performing work: # - GVDS, describing data on the compute nodes # - SourceDB, for skymodel (A-team) # - ParmDB for outputtting solutions with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds'] # Read metadata (e.g., start- and end-time) from the GVDS file. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", data_mapfile)['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", data_mapfile, skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 4. Run NDPPP to demix the A-Team sources # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task("ndppp", data_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.mapfile') )['mapfile'] # ******************************************************************** # 5. Run bss using the instrument file from the target observation # Create an empty sourcedb for BBS with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", data_mapfile )['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the target source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task("bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=copied_instrument_mapfile, sky_mapfile=sourcedb_mapfile )['data_mapfile'] # ********************************************************************* # 6. Copy the MS's to their final output destination. # When the copier recipe has run, the map-file named in # corrected_mapfile will contain an updated map of output files. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=corrected_mapfile, mapfiles_dir=mapfile_dir, mapfile=corrected_mapfile, allow_move=True ) # ********************************************************************* # 7. Create feedback for further processing by the LOFAR framework metadata_file = "%s_feedback_Correlated" % (self.parset_file,) with duration(self, "get_metadata"): self.run_task("get_metadata", corrected_mapfile, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts') ), product_type="Correlated", metadata_file=metadata_file) self.send_feedback_processing(parameterset({'feedback_version': feedback_version})) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def go(self): super(get_metadata, self).go() # ******************************************************************** # 1. Parse and validate inputs args = self.inputs['args'] product_type = self.inputs['product_type'] global_prefix = self.inputs['parset_prefix'] # Add a trailing dot (.) if not present in the prefix. if global_prefix and not global_prefix.endswith('.'): global_prefix += '.' if not product_type in self.valid_product_types: self.logger.warn( "Unknown product type: %s\n\tValid product types are: %s" % (product_type, ', '.join(self.valid_product_types)) ) # ******************************************************************** # 2. Load mapfiles self.logger.debug("Loading input-data mapfile: %s" % args[0]) data = DataMap.load(args[0]) # ******************************************************************** # 3. call node side of the recipe command = "python %s" % (self.__file__.replace('master', 'nodes')) data.iterator = DataMap.SkipIterator jobs = [] for inp in data: jobs.append( ComputeJob( inp.host, command, arguments=[ inp.file, self.inputs['product_type'] ] ) ) self._schedule_jobs(jobs) for job, inp in zip(jobs, data): if job.results['returncode'] != 0: inp.skip = True # ******************************************************************** # 4. validate performance # 4. Check job results, and create output data map file if self.error.isSet(): # Abort if all jobs failed if all(job.results['returncode'] != 0 for job in jobs): self.logger.error("All jobs failed. Bailing out!") return 1 else: self.logger.warn( "Some jobs failed, continuing with succeeded runs" ) self.logger.debug("Updating data map file: %s" % args[0]) data.save(args[0]) # ******************************************************************** # 5. Create the parset-file and return it to the caller parset = parameterset() prefix = "Output_%s_" % product_type #Underscore is needed because # Mom / LTA cannot differentiate input and output parset.replace('%snrOf%s' % (global_prefix, prefix), str(len(jobs))) prefix = global_prefix + prefix for idx, job in enumerate(jobs): self.logger.debug("job[%d].results = %s" % (idx, job.results)) # the Master/node communication adds a monitor_stats entry, # this must be remove manually here meta_data_parset = metadata.to_parset(job.results) try: meta_data_parset.remove("monitor_stats") except: pass parset.adoptCollection(meta_data_parset, '%s[%d].' % (prefix, idx)) # Return result to caller parset.writeFile(self.inputs["metadata_file"]) return 0
def doAssignment(self, specification_tree): logger.info('doAssignment: specification_tree=%s' % (specification_tree)) otdb_id = specification_tree['otdb_id'] taskType = specification_tree.get('task_type', '').lower() status = specification_tree.get('state', '').lower() if status not in ['approved', 'prescheduled']: # cep2 accepts both, cep4 only prescheduled, see below logger.info('skipping specification for otdb_id=%s because status=%s', (otdb_id, status)) #parse main parset... mainParset = parameterset(specification_tree['specification']) momId = mainParset.getInt('Observation.momID', -1) try: startTime = datetime.strptime(mainParset.getString('Observation.startTime'), '%Y-%m-%d %H:%M:%S') endTime = datetime.strptime(mainParset.getString('Observation.stopTime'), '%Y-%m-%d %H:%M:%S') except ValueError: logger.warning('cannot parse for start/end time from specification for otdb_id=%s', (otdb_id, )) # insert new task and specification in the radb # any existing specification and task with same otdb_id will be deleted automatically logger.info('doAssignment: insertSpecification momId=%s, otdb_id=%s, status=%s, taskType=%s, startTime=%s, endTime=%s' % (momId, otdb_id, status, taskType, startTime, endTime)) result = self.radbrpc.insertSpecificationAndTask(momId, otdb_id, status, taskType, startTime, endTime, str(mainParset)) if not result['inserted']: logger.error('could not insert specification and task') return specificationId = result['specification_id'] taskId = result['task_id'] logger.info('doAssignment: inserted specification (id=%s) and task (id=%s)' % (specificationId,taskId)) # do not assign resources to task for other clusters than cep4 if not self.checkClusterIsCEP4(mainParset): return if status != 'prescheduled': logger.info('skipping resource assignment for CEP4 task otdb_id=%s because status=%s' % (otdb_id, status)) return needed = self.getNeededResouces(specification_tree) logger.info('doAssignment: getNeededResouces=%s' % (needed,)) if not str(otdb_id) in needed: logger.error("no otdb_id %s found in estimator results %s" % (otdb_id, needed)) return if not taskType in needed[str(otdb_id)]: logger.error("no task type %s found in estimator results %s" % (taskType, needed[str(otdb_id)])) return # make sure the availability in the radb is up to date # TODO: this should be updated regularly try: self.updateAvailableResources('cep4') except Exception as e: logger.warning("Exception while updating available resources: %s" % str(e)) # claim the resources for this task # during the claim inserts the claims are automatically validated # and if not enough resources are available, then they are put to conflict status # also, if any claim is in conflict state, then the task is put to conflict status as well main_needed = needed[str(otdb_id)] task = self.radbrpc.getTask(taskId) claimed, claim_ids = self.claimResources(main_needed, task) if claimed: conflictingClaims = self.radbrpc.getResourceClaims(task_ids=taskId, status='conflict') if conflictingClaims: logger.warning('doAssignment: %s conflicting claims detected. Task cannot be scheduled. %s' % (len(conflictingClaims), conflictingClaims)) else: logger.info('doAssignment: all claims for task %s were succesfully claimed. Setting task status to scheduled' % (taskId,)) self.radbrpc.updateTaskAndResourceClaims(taskId, task_status='scheduled', claim_status='allocated') self.processPredecessors(specification_tree)
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. Note: return 0 on success, 1 on failure. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. # # Note that PULP will read many of these fields directly. That makes # the following fields, and possibly others, part of the API towards # PULP: # # self.config # self.logger # self.input_data # self.output_data # self.parset_feedback_file # self.job_dir # Get input/output-data products specifications. self._get_io_product_specs() self.job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(self.job_dir, "parsets") mapfile_dir = os.path.join(self.job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files # Coherent Stokes self.input_CS_mapfile = os.path.join(mapfile_dir, "input_CS_data.mapfile") self.input_data['coherent'].save(self.input_CS_mapfile) # Incoherent Stokes self.input_IS_mapfile = os.path.join(mapfile_dir, "input_IS_data.mapfile") self.input_data['incoherent'].save(self.input_IS_mapfile) # Output data self.output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile") self.output_data['data'].save(self.output_data_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.pulsar_parms = self.parset.makeSubset(self.parset.fullModuleName('Pulsar') + '.') pulsar_parset = os.path.join(parset_dir, "Pulsar.parset") self.pulsar_parms.writeFile(pulsar_parset) self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data)) # Rebuilding sys.argv without the options given automatically by framework # --auto = automatic run from framework # -q = quiet mode, no user interaction sys.argv = ['pulp.py', '--auto', '-q'] if (not self.coherentStokesEnabled): sys.argv.extend(["--noCS", "--noCV", "--noFE"]) if (not self.incoherentStokesEnabled): sys.argv.append("--noIS") # Tell PULP where to write the feedback to self.parset_feedback_file = "%s_feedback" % (self.parset_file,) # Run the pulsar pipeline self.logger.debug("Starting pulp with: " + join(sys.argv)) p = pulp.pulp(self) # TODO: MUCK self to capture the API # NOTE: PULP returns 0 on SUCCESS!! if p.go(): self.logger.error("PULP did not succeed. Bailing out!") return 1 # Read and forward the feedback try: metadata = parameterset(self.parset_feedback_file) except IOError, e: self.logger.error("Could not read feedback from %s: %s" % (metadata_file,e)) return 1
def run(self, input_image, bdsm_parameter_run1_path, bdsm_parameter_run2x_path, catalog_output_path, image_output_path, sourcedb_target_path, environment, working_directory, create_sourcdb_exec): """ :param input_image: image to look for sources in :param bdsm_parameter_run1_path: parset with bdsm parameters for the first run :param bdsm_parameter_run2x_path: second ron bdsm parameters :param catalog_output_path: Path to full list of sources found :param image_output_path: Path to fits image with all sources substracted :param sourcedb_target_path: Path to store the sourcedb created from containing all the found sources :param environment: environment for runwithlog4cplus :param working_directory: Working dir :param create_sourcdb_exec: Path to create sourcedb executable :rtype: self.outputs['source_db'] sourcedb_target_path """ #****************************************************************** # 0. Create the directories used in this recipe create_directory(working_directory) import lofar.bdsm as bdsm#@UnresolvedImport self.logger.info("Starting imager_source_finding") self.environment.update(environment) # default frequency is None (read from image), save for later cycles. # output of pybdsm forgets freq of source image frequency = None # Output of the for loop: n iterations and any source found n_itter_sourcefind = None sources_found = False max_sourcefind_itter = 5 # TODO: maximum itter is a magic value for idx in range(max_sourcefind_itter): # ****************************************************************** # 1. Select correct input image # The first iteration uses the input image, second and later use the # output of the previous iteration. The 1+ iteration have a # seperate parameter set. if idx == 0: input_image_local = input_image # input_image_cropped image_output_path_local = image_output_path + "_0" bdsm_parameter_local = parameterset(bdsm_parameter_run1_path) else: input_image_local = image_output_path + "_{0}".format( str(idx - 1)) image_output_path_local = image_output_path + "_{0}".format( str(idx)) bdsm_parameter_local = parameterset(bdsm_parameter_run2x_path) # ***************************************************************** # 2. parse the parameters and convert to python if possible # this is needed for pybdsm bdsm_parameters = {} for key in bdsm_parameter_local.keys(): parameter_value = bdsm_parameter_local.getStringVector(key)[0] try: parameter_value = eval(parameter_value) except: pass #do nothing bdsm_parameters[key] = parameter_value # pybdsm needs its filename here, to derive the log location bdsm_parameters["filename"] = input_image_local # ***************************************************************** # 3. Start pybdsm self.logger.debug( "Starting sourcefinder bdsm on {0} using parameters:".format( input_image_local)) self.logger.debug(repr(bdsm_parameters)) img = bdsm.process_image(bdsm_parameters, frequency = frequency) # Always export the catalog img.write_catalog( outfile = catalog_output_path + "_{0}".format(str(idx)), catalog_type = 'gaul', clobber = True, format = "bbs", force_output = True) # If no more matching of sources with gausians is possible (nsrc==0) # break the loop if img.nsrc == 0: n_itter_sourcefind = idx break # We have at least found a single source! self.logger.debug("Number of source found: {0}".format( img.nsrc)) # ***************************************************************** # 4. export the image self.logger.debug("Wrote list of sources to file at: {0})".format( catalog_output_path)) img.export_image(outfile = image_output_path_local, img_type = 'gaus_resid', clobber = True, img_format = "fits") self.logger.debug("Wrote fits image with substracted sources" " at: {0})".format(image_output_path_local)) # Save the frequency from image header of the original input file, # This information is not written by pybdsm to the exported image frequency = img.frequency # if not set the maximum number of itteration us performed if n_itter_sourcefind == None: n_itter_sourcefind = max_sourcefind_itter # ******************************************************************** # 5. The produced catalogs now need to be combined into a single list # Call with the number of loops and the path to the files, only combine # if we found sources self.logger.debug( "Writing source list to file: {0}".format(catalog_output_path)) self._combine_source_lists(n_itter_sourcefind, catalog_output_path) # ********************************************************************* # 6. Convert sourcelist to sourcedb self._create_source_db(catalog_output_path, sourcedb_target_path, working_directory, create_sourcdb_exec, False) # Assign the outputs self.outputs["catalog_output_path"] = catalog_output_path self.outputs["source_db"] = sourcedb_target_path return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Get input from parset, validate and cast to pipeline 'data types' # Only perform work on existing files # Created needed directories # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_correlated_mapfile = os.path.join(mapfile_dir, "input_correlated.mapfile") output_correlated_mapfile = os.path.join(mapfile_dir, "output_correlated.mapfile") output_instrument_mapfile = os.path.join(mapfile_dir, "output_instrument.mapfile") self.input_data['correlated'].save(input_correlated_mapfile) self.output_data['correlated'].save(output_correlated_mapfile) self.output_data['instrument'].save(output_instrument_mapfile) if len(self.input_data['correlated']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug( "Processing: %s" % ', '.join(str(f) for f in self.input_data['correlated'])) # ********************************************************************* # 2. Create database needed for performing work: # Vds, descibing data on the nodes # sourcedb, For skymodel (A-team) # parmdb for outputtting solutions # Produce a GVDS file describing the data on the compute nodes. with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", input_correlated_mapfile)['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", input_correlated_mapfile, mapfile=os.path.join( mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb')['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel') if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task("setupsourcedb", input_correlated_mapfile, mapfile=os.path.join( mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob')['mapfile'] # ********************************************************************* # 3. Run NDPPP to demix the A-Team sources # TODOW: Do flagging? # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", input_correlated_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always=py_parset.getStringVector( 'PreProcessing.demix_always'), demix_if_needed=py_parset.getStringVector( 'PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile)['mapfile'] # ********************************************************************* # 4. Run BBS with a model of the calibrator # Create a parmdb for calibration solutions # Create sourcedb with known calibration solutions # Run bbs with both # Create an empty parmdb for BBS with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", dppp_mapfile, mapfile=os.path.join( mapfile_dir, 'bbs.parmdb.mapfile'), suffix='.bbs.parmdb')['mapfile'] # Create a sourcedb based on sourcedb's input argument "skymodel" with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, skymodel=os.path.join( self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', py_parset.getString('Calibration.SkyModel') + '.skymodel'), mapfile=os.path.join(mapfile_dir, 'bbs.sourcedb.mapfile'), suffix='.bbs.sourcedb')['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the calibrator source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile)['data_mapfile'] # ********************************************************************* # 5. Perform gain outlier correction on the found calibration solutions # Swapping outliers in the gains with the median # Export the calibration solutions using gainoutliercorrection and store # the results in the files specified in the instrument mapfile. export_instrument_model = py_parset.getBool( 'Calibration.exportCalibrationParameters', False) with duration(self, "gainoutliercorrection"): self.run_task("gainoutliercorrection", (parmdb_mapfile, output_instrument_mapfile), sigma=1.0, export_instrument_model=export_instrument_model ) # TODO: Parset parameter # ********************************************************************* # 6. Copy corrected MS's to their final output destination. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=output_correlated_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_correlated_mapfile) # ********************************************************************* # 7. Create feedback file for further processing by the LOFAR framework # a. get metadata of the measurement sets # b. get metadata of the instrument models # c. join the two files and write the final feedback file correlated_metadata = os.path.join(parset_dir, "correlated.metadata") instrument_metadata = os.path.join(parset_dir, "instrument.metadata") with duration(self, "get_metadata"): self.run_task( "get_metadata", output_correlated_mapfile, parset_file=correlated_metadata, parset_prefix=(self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated") with duration(self, "get_metadata"): self.run_task( "get_metadata", output_instrument_mapfile, parset_file=instrument_metadata, parset_prefix=(self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="InstrumentModel") parset = parameterset(correlated_metadata) parset.adoptFile(instrument_metadata) parset.writeFile(self.parset_feedback_file) return 0
#!/usr/bin/env python from lofar.parameterset import parameterset # Test task.feedback.dataproducts from lofar.messagebus.protocols import TaskFeedbackDataproducts parset = parameterset() parset.add("foo", "bar") msg = TaskFeedbackDataproducts("from", "forUser", "summary", 1, 2, parset) # Test task.feedback.processing from lofar.messagebus.protocols import TaskFeedbackProcessing parset = parameterset() parset.add("foo", "bar") msg = TaskFeedbackProcessing("from", "forUser", "summary", 1, 2, parset) # Test task.feedback.state from lofar.messagebus.protocols import TaskFeedbackState msg = TaskFeedbackState("from", "forUser", "summary", 1, 2, True)
"/opt/pipeline/dependencies/lib/python2.5/site-packages:/opt/pipeline/framework/lib/python2.5/site-packages:/opt/LofIm/daily/pyrap/lib:/opt/LofIm/daily/lofar/lib/python2.5/site-packages:/opt/pythonlibs/lib/python/site-packages", "LD_LIBRARY_PATH": "/opt/pipeline/dependencies/lib:/opt/LofIm/daily/pyrap/lib:/opt/LofIm/daily/casacore/lib:/opt/LofIm/daily/lofar/lib:/opt/wcslib/lib/:/opt/hdf5/lib:/opt/LofIm/daily/casarest/lib:/data/sys/opt/lofar/external/log4cplus/lib", "PATH": "/opt/pipeline/dependencies/bin:/home/swinbank/sw/bin:/opt/pipeline/dependencies/bin:/usr/local/bin:/usr/bin:/usr/X11R6/bin:/bin:/usr/games:/opt/LofIm/daily/casarest/bin:/opt/LofIm/daily/casarest/bin", } # ----------------------------------------------------------------------------- # To ensure consistency in the configuration between this wrapper and the # pipeline, we will set the start time here. start_time = datetime.datetime.utcnow().replace(microsecond=0).isoformat() # We should always be called with standard command line arguments: # tree ID, parset, ... others? input_parset = parameterset(sys.argv[1]) tree_id = sys.argv[2] # check this! # Extract runtime, working, results directories from input parset runtime_directory = input_parset.getString( "ObsSW.Observation.ObservationControl.PythonControl.runtimeDirectory") working_directory = input_parset.getString( "ObsSW.Observation.ObservationControl.PythonControl.workingDirectory") results_directory = input_parset.getString( "ObsSW.Observation.ObservationControl.PythonControl.resultDirectory") # Set up configuration for later processing stages config = ConfigParser({ "job_name": tree_id, "cwd": os.getcwd(), "start_time": start_time,
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ self.logger.info("Starting imager pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # Get input/output-data products specifications. self._get_io_product_specs() # remove prepending parset identifiers, leave only pipelinecontrol full_parset = self.parset self.parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # remove this # Create directories to store communication and data files job_dir = self.config.get("layout", "job_directory") self.parset_dir = os.path.join(job_dir, "parsets") create_directory(self.parset_dir) self.mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(self.mapfile_dir) # ********************************************************************* # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") self.input_data.save(input_mapfile) # storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") self.target_data.save(target_mapfile) self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile)) # images datafiles output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile") self.output_data.save(output_image_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_image_mapfile)) # ****************************************************************** # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, target_mapfile) number_of_major_cycles = self.parset.getInt( "Imaging.number_of_major_cycles") # We start with an empty source_list map. It should contain n_output # entries all set to empty strings source_list_map_path = os.path.join(self.mapfile_dir, "initial_sourcelist.mapfile") source_list_map = DataMap.load(target_mapfile) # copy the output map for item in source_list_map: item.file = "" # set all to empty string source_list_map.save(source_list_map_path) for idx_loop in range(number_of_major_cycles): # ***************************************************************** # (2) Create dbs and sky model parmdbs_path, sourcedb_map_path = self._create_dbs( concat_ms_map_path, timeslice_map_path, source_list_map_path=source_list_map_path, skip_create_dbs=False) # ***************************************************************** # (3) bbs_imager recipe. bbs_output = self._bbs(timeslice_map_path, parmdbs_path, sourcedb_map_path, skip=False) # TODO: Extra recipe: concat timeslices using pyrap.concatms # (see prepare) # ***************************************************************** # (4) Get parameters awimager from the prepare_parset and inputs aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path, idx_loop, sourcedb_map_path, skip=False) # ***************************************************************** # (5) Source finding sourcelist_map, found_sourcedb_path = self._source_finding( aw_image_mapfile, idx_loop, skip=False) # should the output be a sourcedb? instead of a sourcelist # TODO: minbaseline should be a parset value as is maxbaseline.. minbaseline = 0 # ********************************************************************* # (6) Finalize: placed_data_image_map = self._finalize( aw_image_mapfile, processed_ms_dir, ms_per_image_map_path, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, found_sourcedb_path) # ********************************************************************* # (7) Get metadata # Create a parset containing the metadata for MAC/SAS metadata_file = "%s_feedback_SkyImage" % (self.parset_file, ) self.run_task( "get_metadata", placed_data_image_map, parset_prefix=(full_parset.getString('prefix') + full_parset.fullModuleName('DataProducts')), product_type="SkyImage", metadata_file=metadata_file) self.send_feedback_processing(parameterset()) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ self.logger.info("Starting longbaseline pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join(self.inputs['working_directory'], self.inputs['job_name']) # Get input/output-data products specifications. self._get_io_product_specs() # remove prepending parset identifiers, leave only pipelinecontrol full_parset = self.parset self.parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # remove this # Create directories to store communication and data files job_dir = self.config.get("layout", "job_directory") self.parset_dir = os.path.join(job_dir, "parsets") create_directory(self.parset_dir) self.mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(self.mapfile_dir) # ********************************************************************* # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") self.input_data.save(input_mapfile) ## *************************************************************** #output_mapfile_path = os.path.join(self.mapfile_dir, "output.mapfile") #self.output_mapfile.save(output_mapfile_path) # storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") self.target_data.save(target_mapfile) self.logger.debug("Wrote target mapfile: {0}".format(target_mapfile)) # images datafiles output_ms_mapfile = os.path.join(self.mapfile_dir, "output.mapfile") self.output_data.save(output_ms_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_ms_mapfile)) # TODO: This is a backdoor option to manually add beamtables when these # are missing on the provided ms. There is NO use case for users of the # pipeline add_beam_tables = self.parset.getBool("Imaging.addBeamTables", False) # ****************************************************************** # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._long_baseline(input_mapfile, target_mapfile, add_beam_tables, output_ms_mapfile) # ********************************************************************* # (7) Get metadata # create a parset with information that is available on the toplevel toplevel_meta_data = parameterset( {'feedback_version': feedback_version}) # get some parameters from the imaging pipeline parset: subbandgroups_per_ms = self.parset.getInt( "LongBaseline.subbandgroups_per_ms") subbands_per_subbandgroup = self.parset.getInt( "LongBaseline.subbands_per_subbandgroup") toplevel_meta_data.replace("subbandsPerSubbandGroup", str(subbands_per_subbandgroup)) toplevel_meta_data.replace("subbandGroupsPerMS", str(subbandgroups_per_ms)) # Create a parset-file containing the metadata for MAC/SAS at nodes metadata_file = "%s_feedback_Correlated" % (self.parset_file, ) self.run_task( "get_metadata", output_ms_mapfile, parset_prefix=(full_parset.getString('prefix') + full_parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=metadata_file) self.send_feedback_processing(toplevel_meta_data) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. Note: return 0 on success, 1 on failure. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. # # Note that PULP will read many of these fields directly. That makes # the following fields, and possibly others, part of the API towards # PULP: # # self.config # self.logger # self.input_data # self.output_data # self.parset_feedback_file # self.job_dir # Get input/output-data products specifications. self._get_io_product_specs() self.job_dir = self.config.get("layout", "job_directory") self.globalfs = self.config.has_option("remote", "globalfs") and self.config.getboolean("remote", "globalfs") parset_dir = os.path.join(self.job_dir, "parsets") mapfile_dir = os.path.join(self.job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files # Coherent Stokes self.input_CS_mapfile = os.path.join(mapfile_dir, "input_CS_data.mapfile") self.input_data['coherent'].save(self.input_CS_mapfile) # Incoherent Stokes self.input_IS_mapfile = os.path.join(mapfile_dir, "input_IS_data.mapfile") self.input_data['incoherent'].save(self.input_IS_mapfile) # Output data self.output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile") self.output_data['data'].save(self.output_data_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.pulsar_parms = self.parset.makeSubset(self.parset.fullModuleName('Pulsar') + '.') pulsar_parset = os.path.join(parset_dir, "Pulsar.parset") if self.globalfs: # patch for Pulp in case of DOCKER for k in [x for x in self.pulsar_parms.keys() if x.endswith("_extra_opts")]: self.pulsar_parms.replace(k, self.pulsar_parms[k].getString().replace(" ","\\\\ ")) self.pulsar_parms.writeFile(pulsar_parset) self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data)) # Rebuilding sys.argv without the options given automatically by framework # --auto = automatic run from framework # -q = quiet mode, no user interaction sys.argv = ['pulp.py', '--auto', '-q'] if self.globalfs: project = self.parset.getString(self.parset.fullModuleName('Campaign') + '.name') sys.argv.extend(['--slurm', '--globalfs', '--docker', '--docker-container=lofar-pulp:%s' % os.environ.get("LOFAR_TAG"), '--raw=/data/projects/%s' % project]) else: sys.argv.append("--auto") if (not self.coherentStokesEnabled): sys.argv.extend(["--noCS", "--noCV", "--noFE"]) if (not self.incoherentStokesEnabled): sys.argv.append("--noIS") # Tell PULP where to write the feedback to self.parset_feedback_file = "%s_feedback" % (self.parset_file,) # Run the pulsar pipeline self.logger.debug("Starting pulp with: " + join(sys.argv)) self.logger.debug("Calling pulp.pulp(self) with self = %s", pprint.pformat(vars(self))) p = pulp.pulp(self) # TODO: MUCK self to capture the API # NOTE: PULP returns 0 on SUCCESS!! if p.go(): self.logger.error("PULP did not succeed. Bailing out!") return 1 # Read and forward the feedback try: metadata = parameterset(self.parset_feedback_file) except IOError, e: self.logger.error("Could not read feedback from %s: %s" % (metadata_file,e)) return 1
def get_tasks(self): """ Convert the pipeline into DPU tasks. We assume that the pipeline can be parallelized by creating independent tasks for all its input files. Furthermore, we do take into account that there might be dependencies between different pipelines. In that case, task number i for input file i of the next pipeline will start when task number i for input file i of the previous pipeline has finished. As an example, the following shows how a calibration pipeline followed by a target pipeline (which should wait for the calibration pipeline to finish) are parallelized: Tasks 0 1 ... N Pipeline 0: SB000 SB001 SB00N (all executed independently) (calibration) Pipeline 1: SB000 SB001 SB00N (horizontally independent, but vertically depending on the previous task) (target) The dependencies between the pipelines will be handled at a later stage. """ # First, interpret the parset and get all the information about the # input and output files as was defined in the XML. self.read_parset() inputs_filenames_keys = map(lambda input: str( input['filenames']), self.inputs.values()) inputs_locations_keys = map(lambda input: str( input['locations']), self.inputs.values()) inputs_skip_keys = map(lambda input: str( input['skip']), self.inputs.values()) outputs_filenames_keys = map(lambda output: str(output['filenames']), self.outputs.values()) outputs_locations_keys = map(lambda output: str(output['locations']), self.outputs.values()) outputs_skip_keys = map(lambda output: str(output['skip']), self.outputs.values()) input_map_list = [] output_map_list = [] # Combine the information about each input and output into tuples. # Note that the order of these keys are used when creating the individual jobs: # filenames, locations, skip values input_map_keys = zip(inputs_filenames_keys, inputs_locations_keys, inputs_skip_keys ) output_map_keys = zip(outputs_filenames_keys, outputs_locations_keys, outputs_skip_keys ) # Create a DataMap for each input and each output. for filename, location, skip in input_map_keys: input_map_list.append( DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for filename, location, skip in zip( self.parset.getStringVector(filename), self.parset.getStringVector(location), self.parset.getBoolVector(skip)) ]) ) for filename, location, skip in output_map_keys: output_map_list.append( DataMap([ tuple(os.path.join(location, filename).split(':')) + (skip,) for filename, location, skip in zip( self.parset.getStringVector(filename), self.parset.getStringVector(location), self.parset.getBoolVector(skip)) ]) ) # Align the data maps in order to validate them and set the skip values # in the same way for each input and output. align_data_maps(*(input_map_list+output_map_list)) # Finally, convert everything into individual tasks. pipeline_jobs = [] job_data_product_keys = input_map_keys + output_map_keys for idx, job_data_products in enumerate(zip(*(input_map_list+ output_map_list))): job = cep_pipeline_job() # Clone the parset by creating another instance. job_parset = parameterset() job_parset.adoptArgv(str(self.parset_as_str).split('\n')) job_should_be_skipped = False # Now replace all input and output information by the (single) data # element that should be processed by this task. for [job_data_product, job_data_product_key] in zip(job_data_products, job_data_product_keys): job_should_be_skipped = job_data_product.skip job.host = job_data_product.host # We assume that the job will be launched on the node where the # data is stored. host = 'localhost' filename = os.path.basename(job_data_product.file) file_location = os.path.dirname(job_data_product.file) skip = job_data_product.skip # Remember that the key order is determined in a previous zip. job_parset.replace(job_data_product_key[0], str([filename])) job_parset.replace(job_data_product_key[1], str([host + ":" + file_location])) job_parset.replace(job_data_product_key[2], str([skip])) if job_should_be_skipped : # If skip was True for either one of the input/output elements, # we should skip this job but increase the job index. continue job.parset_as_dict = job_parset.dict() job.command = self.get_command() job.name = self.id + "_" + str(idx) pipeline_jobs.append(job) return pipeline_jobs
def __init__(self): control.__init__(self) self.parset = parameterset() self.input_data = {} self.output_data = {} self.parset_feedback_file = None
# Set up environment for pipeline run pipeline_environment = { "PYTHONPATH": "/opt/pipeline/dependencies/lib/python2.5/site-packages:/opt/pipeline/framework/lib/python2.5/site-packages:/opt/LofIm/daily/pyrap/lib:/opt/LofIm/daily/lofar/lib/python2.5/site-packages:/opt/pythonlibs/lib/python/site-packages", "LD_LIBRARY_PATH": "/opt/pipeline/dependencies/lib:/opt/LofIm/daily/pyrap/lib:/opt/LofIm/daily/casacore/lib:/opt/LofIm/daily/lofar/lib:/opt/wcslib/lib/:/opt/hdf5/lib:/opt/LofIm/daily/casarest/lib:/data/sys/opt/lofar/external/log4cplus/lib", "PATH": "/opt/pipeline/dependencies/bin:/home/swinbank/sw/bin:/opt/pipeline/dependencies/bin:/usr/local/bin:/usr/bin:/usr/X11R6/bin:/bin:/usr/games:/opt/LofIm/daily/casarest/bin:/opt/LofIm/daily/casarest/bin", } # ----------------------------------------------------------------------------- # To ensure consistency in the configuration between this wrapper and the # pipeline, we will set the start time here. start_time = datetime.datetime.utcnow().replace(microsecond=0).isoformat() # We should always be called with standard command line arguments: # tree ID, parset, ... others? input_parset = parameterset(sys.argv[1]) tree_id = sys.argv[2] # check this! # Extract runtime, working, results directories from input parset runtime_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.runtimeDirectory") working_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.workingDirectory") results_directory = input_parset.getString("ObsSW.Observation.ObservationControl.PythonControl.resultDirectory") # Set up configuration for later processing stages config = ConfigParser({ "job_name": tree_id, "cwd": os.getcwd(), "start_time": start_time, }) config.read(config_file) config.set('DEFAULT', 'runtime_directory', runtime_directory)
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles # Create a parameter-subset containing only python-control stuff. py_parset = self.parset.makeSubset( 'ObsSW.Observation.ObservationControl.PythonControl.') # Get input/output-data products specifications. self._get_io_product_specs() # Create some needed directories job_dir = self.config.get("layout", "job_directory") mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(mapfile_dir) parset_dir = os.path.join(job_dir, "parsets") create_directory(parset_dir) # ********************************************************************* # 2. Copy the instrument files to the correct node # The instrument files are currently located on the wrong nodes # Copy to correct nodes and assign the instrument table the now # correct data # Copy the instrument files to the corrent nodes: failures might happen # update both intrument and datamap to contain only successes! self._copy_instrument_files(mapfile_dir) # Write input- and output data map-files. data_mapfile = os.path.join(mapfile_dir, "data.mapfile") self.input_data['data'].save(data_mapfile) copied_instrument_mapfile = os.path.join(mapfile_dir, "copied_instrument.mapfile") self.input_data['instrument'].save(copied_instrument_mapfile) self.logger.debug("Wrote input data mapfile: %s" % data_mapfile) # Save copied files to a new mapfile corrected_mapfile = os.path.join(mapfile_dir, "corrected_data.mapfile") self.output_data['data'].save(corrected_mapfile) self.logger.debug("Wrote output corrected data mapfile: %s" % corrected_mapfile) # Validate number of copied files, abort on zero files copied if len(self.input_data['data']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['data'])) # ********************************************************************* # 3. Create database needed for performing work: # - GVDS, describing data on the compute nodes # - SourceDB, for skymodel (A-team) # - ParmDB for outputtting solutions with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", data_mapfile)['gvds'] # Read metadata (e.g., start- and end-time) from the GVDS file. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create an empty parmdb for DPPP with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task("setupparmdb", data_mapfile)['mapfile'] # Create a sourcedb to be used by the demixing phase of DPPP # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel') if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task("setupsourcedb", data_mapfile, skymodel=skymodel, suffix='.dppp.sourcedb', type='blob')['mapfile'] # ********************************************************************* # 4. Run NDPPP to demix the A-Team sources # Create a parameter-subset for DPPP and write it to file. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", data_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always=py_parset.getStringVector( 'PreProcessing.demix_always'), demix_if_needed=py_parset.getStringVector( 'PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.mapfile'))['mapfile'] # ******************************************************************** # 5. Run bss using the instrument file from the target observation # Create an empty sourcedb for BBS with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task("setupsourcedb", data_mapfile)['mapfile'] # Create a parameter-subset for BBS and write it to file. bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) # Run BBS to calibrate the target source(s). with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=copied_instrument_mapfile, sky_mapfile=sourcedb_mapfile)['data_mapfile'] # ********************************************************************* # 6. Copy the MS's to their final output destination. # When the copier recipe has run, the map-file named in # corrected_mapfile will contain an updated map of output files. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=corrected_mapfile, mapfiles_dir=mapfile_dir, mapfile=corrected_mapfile) # ********************************************************************* # 7. Create feedback for further processing by the LOFAR framework metadata_file = "%s_feedback_Correlated" % (self.parset_file, ) with duration(self, "get_metadata"): self.run_task( "get_metadata", corrected_mapfile, parset_prefix=(self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=metadata_file) self.send_feedback_processing(parameterset()) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_data_mapfile = os.path.join(mapfile_dir, "input_data.mapfile") self.input_data.save(input_data_mapfile) output_data_mapfile = os.path.join(mapfile_dir, "output_data.mapfile") self.output_data.save(output_data_mapfile) if len(self.input_data) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data)) # ********************************************************************* # 2. Create VDS-file and databases. The latter are needed when doing # demixing within DPPP. with duration(self, "vdsmaker"): gvds_file = self.run_task("vdsmaker", input_data_mapfile)['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create a parameter database that will be used by the NDPPP demixing with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a source database from a user-supplied sky model # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_data_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Average and flag data, using NDPPP. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): output_data_mapfile = self.run_task("ndppp", (input_data_mapfile, output_data_mapfile), data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile )['mapfile'] # ********************************************************************* # 6. Create feedback file for further processing by the LOFAR framework # Create a parset containing the metadata metadata_file = "%s_feedback_Correlated" % (self.parset_file,) with duration(self, "get_metadata"): self.run_task("get_metadata", output_data_mapfile, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=metadata_file) self.send_feedback_processing(parameterset({'feedback_version': feedback_version})) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def run(self, executable, infiles, db_key, db_name, db_user, db_host): """ Depricated functionality """ # executable : path to KernelControl executable # infiles : tuple of MS, instrument- and sky-model files # db_* : database connection parameters # ---------------------------------------------------------------------- self.logger.debug("executable = %s" % executable) self.logger.debug("infiles = %s" % str(infiles)) self.logger.debug("db_key = %s" % db_key) self.logger.debug("db_name = %s" % db_name) self.logger.debug("db_user = %s" % db_user) self.logger.debug("db_host = %s" % db_host) (ms, parmdb_instrument, parmdb_sky) = infiles with log_time(self.logger): if os.path.exists(ms): self.logger.info("Processing %s" % (ms)) else: self.logger.error("Dataset %s does not exist" % (ms)) return 1 # Build a configuration parset specifying database parameters # for the kernel # ------------------------------------------------------------------ self.logger.debug("Setting up BBSKernel parset") # Getting the filesystem must be done differently, using the # DataProduct keys in the parset provided by the scheduler. filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(ms)) fd, parset_file = mkstemp() kernel_parset = parameterset() for key, value in { "ObservationPart.Filesystem": filesystem, "ObservationPart.Path": ms, "BBDB.Key": db_key, "BBDB.Name": db_name, "BBDB.User": db_user, "BBDB.Host": db_host, "ParmDB.Sky": parmdb_sky, "ParmDB.Instrument": parmdb_instrument }.items(): kernel_parset.add(key, value) kernel_parset.writeFile(parset_file) os.close(fd) self.logger.debug("BBSKernel parset written to %s" % parset_file) # Run the kernel # Catch & log output from the kernel logger and stdout # ------------------------------------------------------------------ working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) try: self.logger.info("******** {0}".format( open(parset_file).read())) cmd = [executable, parset_file, "0"] self.logger.debug("Executing BBS kernel") with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(ms), os.path.basename(executable), ): bbs_kernel_process = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir) sout, serr = bbs_kernel_process.communicate() log_process_output("BBS kernel", sout, serr, self.logger) if bbs_kernel_process.returncode != 0: raise CalledProcessError(bbs_kernel_process.returncode, executable) except CalledProcessError as e: self.logger.error(str(e)) return 1 finally: os.unlink(parset_file) shutil.rmtree(working_dir) return 0
def run(self, executable, infiles, db_key, db_name, db_user, db_host): """ Depricated functionality """ # executable : path to KernelControl executable # infiles : tuple of MS, instrument- and sky-model files # db_* : database connection parameters # ---------------------------------------------------------------------- self.logger.debug("executable = %s" % executable) self.logger.debug("infiles = %s" % str(infiles)) self.logger.debug("db_key = %s" % db_key) self.logger.debug("db_name = %s" % db_name) self.logger.debug("db_user = %s" % db_user) self.logger.debug("db_host = %s" % db_host) (ms, parmdb_instrument, parmdb_sky) = infiles with log_time(self.logger): if os.path.exists(ms): self.logger.info("Processing %s" % (ms)) else: self.logger.error("Dataset %s does not exist" % (ms)) return 1 # Build a configuration parset specifying database parameters # for the kernel # ------------------------------------------------------------------ self.logger.debug("Setting up BBSKernel parset") # Getting the filesystem must be done differently, using the # DataProduct keys in the parset provided by the scheduler. filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(ms)) fd, parset_file = mkstemp() kernel_parset = parameterset() for key, value in { "ObservationPart.Filesystem": filesystem, "ObservationPart.Path": ms, "BBDB.Key": db_key, "BBDB.Name": db_name, "BBDB.User": db_user, "BBDB.Host": db_host, "ParmDB.Sky": parmdb_sky, "ParmDB.Instrument": parmdb_instrument }.iteritems(): kernel_parset.add(key, value) kernel_parset.writeFile(parset_file) os.close(fd) self.logger.debug("BBSKernel parset written to %s" % parset_file) # Run the kernel # Catch & log output from the kernel logger and stdout # ------------------------------------------------------------------ working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) try: self.logger.info("******** {0}".format(open(parset_file).read())) cmd = [executable, parset_file, "0"] self.logger.debug("Executing BBS kernel") with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(ms), os.path.basename(executable), ): bbs_kernel_process = Popen( cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir ) sout, serr = bbs_kernel_process.communicate() log_process_output("BBS kernel", sout, serr, self.logger) if bbs_kernel_process.returncode != 0: raise CalledProcessError( bbs_kernel_process.returncode, executable ) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ self.logger.info("Starting imager pipeline") # Define scratch directory to be used by the compute nodes. self.scratch_directory = os.path.join( self.inputs['working_directory'], self.inputs['job_name']) # Get input/output-data products specifications. self._get_io_product_specs() # remove prepending parset identifiers, leave only pipelinecontrol full_parset = self.parset self.parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # remove this # Create directories to store communication and data files job_dir = self.config.get("layout", "job_directory") self.parset_dir = os.path.join(job_dir, "parsets") create_directory(self.parset_dir) self.mapfile_dir = os.path.join(job_dir, "mapfiles") create_directory(self.mapfile_dir) # ********************************************************************* # (INPUT) Get the input from external sources and create pipeline types # Input measure ment sets input_mapfile = os.path.join(self.mapfile_dir, "uvdata.mapfile") self.input_data.save(input_mapfile) # storedata_map(input_mapfile, self.input_data) self.logger.debug( "Wrote input UV-data mapfile: {0}".format(input_mapfile)) # Provides location for the scratch directory and concat.ms location target_mapfile = os.path.join(self.mapfile_dir, "target.mapfile") self.target_data.save(target_mapfile) self.logger.debug( "Wrote target mapfile: {0}".format(target_mapfile)) # images datafiles output_image_mapfile = os.path.join(self.mapfile_dir, "images.mapfile") self.output_data.save(output_image_mapfile) self.logger.debug( "Wrote output sky-image mapfile: {0}".format(output_image_mapfile)) # TODO: This is a backdoor option to manually add beamtables when these # are missing on the provided ms. There is NO use case for users of the # pipeline add_beam_tables = self.parset.getBool( "Imaging.addBeamTables", False) # ****************************************************************** # (1) prepare phase: copy and collect the ms concat_ms_map_path, timeslice_map_path, ms_per_image_map_path, \ processed_ms_dir = self._prepare_phase(input_mapfile, target_mapfile, add_beam_tables) number_of_major_cycles = self.parset.getInt( "Imaging.number_of_major_cycles") # We start with an empty source_list map. It should contain n_output # entries all set to empty strings source_list_map_path = os.path.join(self.mapfile_dir, "initial_sourcelist.mapfile") source_list_map = DataMap.load(target_mapfile) # copy the output map for item in source_list_map: item.file = "" # set all to empty string source_list_map.save(source_list_map_path) for idx_loop in range(number_of_major_cycles): # ***************************************************************** # (2) Create dbs and sky model parmdbs_path, sourcedb_map_path = self._create_dbs( concat_ms_map_path, timeslice_map_path, source_list_map_path = source_list_map_path, skip_create_dbs = False) # ***************************************************************** # (3) bbs_imager recipe. bbs_output = self._bbs(timeslice_map_path, parmdbs_path, sourcedb_map_path, skip = False) # TODO: Extra recipe: concat timeslices using pyrap.concatms # (see prepare) # ***************************************************************** # (4) Get parameters awimager from the prepare_parset and inputs aw_image_mapfile, maxbaseline = self._aw_imager(concat_ms_map_path, idx_loop, sourcedb_map_path, skip = False) # ***************************************************************** # (5) Source finding sourcelist_map, found_sourcedb_path = self._source_finding( aw_image_mapfile, idx_loop, skip = False) # should the output be a sourcedb? instead of a sourcelist # TODO: minbaseline should be a parset value as is maxbaseline.. minbaseline = 0 # ********************************************************************* # (6) Finalize: placed_data_image_map = self._finalize(aw_image_mapfile, processed_ms_dir, ms_per_image_map_path, sourcelist_map, minbaseline, maxbaseline, target_mapfile, output_image_mapfile, found_sourcedb_path) # ********************************************************************* # (7) Get metadata # create a parset with information that is available on the toplevel toplevel_meta_data = parameterset() toplevel_meta_data.replace("numberOfMajorCycles", str(number_of_major_cycles)) # Create a parset containing the metadata for MAC/SAS at nodes metadata_file = "%s_feedback_SkyImage" % (self.parset_file,) self.run_task("get_metadata", placed_data_image_map, parset_prefix = ( full_parset.getString('prefix') + full_parset.fullModuleName('DataProducts') ), product_type = "SkyImage", metadata_file = metadata_file) self.send_feedback_processing(toplevel_meta_data) self.send_feedback_dataproducts(parameterset(metadata_file)) return 0
def pipeline_logic(self): """ Define the individual tasks that comprise the current pipeline. This method will be invoked by the base-class's `go()` method. """ # ********************************************************************* # 1. Prepare phase, collect data from parset and input mapfiles. py_parset = self.parset.makeSubset( self.parset.fullModuleName('PythonControl') + '.') # Get input/output-data products specifications. self._get_io_product_specs() job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # Write input- and output data map-files input_correlated_mapfile = os.path.join( mapfile_dir, "input_correlated.mapfile" ) output_correlated_mapfile = os.path.join( mapfile_dir, "output_correlated.mapfile" ) output_instrument_mapfile = os.path.join( mapfile_dir, "output_instrument.mapfile" ) self.input_data['correlated'].save(input_correlated_mapfile) self.output_data['correlated'].save(output_correlated_mapfile) self.output_data['instrument'].save(output_instrument_mapfile) if len(self.input_data['correlated']) == 0: self.logger.warn("No input data files to process. Bailing out!") return 0 self.logger.debug("Processing: %s" % ', '.join(str(f) for f in self.input_data['correlated'])) # ********************************************************************* # 2. Create VDS-file and databases. The latter are needed when doing # demixing within DPPP. with duration(self, "vdsmaker"): gvds_file = self.run_task( "vdsmaker", input_correlated_mapfile )['gvds'] # Read metadata (start, end times, pointing direction) from GVDS. with duration(self, "vdsreader"): vdsinfo = self.run_task("vdsreader", gvds=gvds_file) # Create a parameter database that will be used by the NDPPP demixing with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.parmdb.mapfile'), suffix='.dppp.parmdb' )['mapfile'] # Create a source database from a user-supplied sky model # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. skymodel = py_parset.getString('PreProcessing.SkyModel') if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", input_correlated_mapfile, mapfile=os.path.join(mapfile_dir, 'dppp.sourcedb.mapfile'), skymodel=skymodel, suffix='.dppp.sourcedb', type='blob' )['mapfile'] # ********************************************************************* # 3. Average and flag data, using NDPPP. ndppp_parset = os.path.join(parset_dir, "NDPPP.parset") py_parset.makeSubset('DPPP.').writeFile(ndppp_parset) # Run the Default Pre-Processing Pipeline (DPPP); with duration(self, "ndppp"): dppp_mapfile = self.run_task( "ndppp", input_correlated_mapfile, data_start_time=vdsinfo['start_time'], data_end_time=vdsinfo['end_time'], demix_always= py_parset.getStringVector('PreProcessing.demix_always'), demix_if_needed= py_parset.getStringVector('PreProcessing.demix_if_needed'), parset=ndppp_parset, parmdb_mapfile=parmdb_mapfile, sourcedb_mapfile=sourcedb_mapfile )['mapfile'] # ********************************************************************* # 4. Create a sourcedb from the user-supplied sky model, # and an empty parmdb. skymodel = py_parset.getString('Calibration.SkyModel') # The user-supplied sky model can either be a name, in which case the # pipeline will search for a file <name>.skymodel in the default search # path $LOFARROOT/share/pipeline/skymodels; or a full path. # It is an error if the file does not exist. if not os.path.isabs(skymodel): skymodel = os.path.join( # This should really become os.environ['LOFARROOT'] self.config.get('DEFAULT', 'lofarroot'), 'share', 'pipeline', 'skymodels', skymodel + '.skymodel' ) if not os.path.isfile(skymodel): raise PipelineException("Skymodel %s does not exist" % skymodel) with duration(self, "setupsourcedb"): sourcedb_mapfile = self.run_task( "setupsourcedb", dppp_mapfile, skymodel=skymodel, suffix='.bbs.sourcedb' )['mapfile'] with duration(self, "setupparmdb"): parmdb_mapfile = self.run_task( "setupparmdb", dppp_mapfile, suffix='.bbs.parmdb' )['mapfile'] # ********************************************************************* # 5. Run BBS to calibrate the data. # Create a parameter subset for BBS bbs_parset = os.path.join(parset_dir, "BBS.parset") py_parset.makeSubset('BBS.').writeFile(bbs_parset) with duration(self, "bbs_reducer"): bbs_mapfile = self.run_task( "bbs_reducer", dppp_mapfile, parset=bbs_parset, instrument_mapfile=parmdb_mapfile, sky_mapfile=sourcedb_mapfile )['data_mapfile'] # ********************************************************************* # 6. Copy output products to their final destination. # a. copy the measurement sets # b. copy the calculated instrument models # When the copier recipe has run, the map-files named in # output_correlated_mapfile and output_instrument_mapfile will # contain an updated map of output files. with duration(self, "copier"): self.run_task("copier", mapfile_source=bbs_mapfile, mapfile_target=output_correlated_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_correlated_mapfile ) with duration(self, "copier"): self.run_task("copier", mapfile_source=parmdb_mapfile, mapfile_target=output_instrument_mapfile, mapfiles_dir=mapfile_dir, mapfile=output_instrument_mapfile ) # ********************************************************************* # 7. Create feedback for further processing by the LOFAR framework # a. get metadata of the measurement sets # b. get metadata of the instrument models # c. join the two and write the final feedback correlated_metadata_file = "%s_feedback_Correlated" % (self.parset_file,) with duration(self, "get_metadata"): self.run_task("get_metadata", output_correlated_mapfile, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="Correlated", metadata_file=correlated_metadata_file) instrument_metadata_file = "%s_feedback_InstrumentModel" % (self.parset_file,) with duration(self, "get_metadata"): self.run_task("get_metadata", output_instrument_mapfile, parset_prefix=( self.parset.getString('prefix') + self.parset.fullModuleName('DataProducts')), product_type="InstrumentModel", metadata_file=instrument_metadata_file) self.send_feedback_processing(parameterset()) self.send_feedback_dataproducts(parameterset(correlated_metadata_file)) self.send_feedback_dataproducts(parameterset(instrument_metadata_file)) return 0