def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Script %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if parsetasfile: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) args.insert(0, parsetname) try: # **************************************************************** # Run # Change to working directory for the script pipedir = os.getcwd() os.chdir(work_dir) outdict = {} plugin = imp.load_source('main', executable) outdict = plugin.main(*args, **kwargs) os.chdir(pipedir) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
class GenericPipeline(control): inputs = { 'loglevel': ingredient.StringField( '--loglevel', help="loglevel", default='INFO', optional=True ) } def __init__(self): control.__init__(self) self.parset = Parset() self.input_data = {} self.output_data = {} self.parset_feedback_file = None #self.logger = None#logging.RootLogger('DEBUG') self.name = '' #if not overwrite: # self.inputs['job_name'] = 'generic-pipeline' # if not self.inputs.has_key("start_time"): # import datetime # self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat() # if not hasattr(self, "config"): # self.config = self._read_config() # #self._read_config() # # ...and task files, if applicable # if not self.inputs.has_key("task_files"): # try: # self.inputs["task_files"] = utilities.string_to_list( # self.config.get('DEFAULT', "task_files") # ) # except NoOptionError: # self.inputs["task_files"] = [] # self.task_definitions = ConfigParser(self.config.defaults()) # print >> sys.stderr, "Reading task definition file(s): %s" % \ # ",".join(self.inputs["task_files"]) # self.task_definitions.read(self.inputs["task_files"]) # self.go() def usage(self): """ Display usage """ print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0] print >> sys.stderr, "Parset structure should look like:\n" \ "NYI" #return 1 def go(self): #""" #Read the parset-file that was given as input argument, and set the #jobname before calling the base-class's `go()` method. #""" try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: #return self.usage() self.usage() # Set job-name to basename of parset-file w/o extension, if it's not # set on the command-line with '-j' or '--job-name' if not 'job_name' in self.inputs: self.inputs['job_name'] = ( os.path.splitext(os.path.basename(parset_file))[0]) self.name = self.inputs['job_name'] try: self.logger except: self.logger = getSearchingLogger(self.name) self.logger.setLevel(self.inputs['loglevel']) # Call the base-class's `go()` method. return super(GenericPipeline, self).go() # def pipeline_logic(self): # print 'Dummy because of wrapping inside the framework' # if overwrite: # self.execute_pipeline() #def execute_pipeline(self): def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') pipeline_steps = self.parset.makeSubset( self.parset.fullModuleName('steps') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') # construct the step name list if there were pipeline.steps.<subset> for item in pipeline_steps.keys(): if item in step_name_list: loc = step_name_list.index(item) step_name_list[loc:loc] = pipeline_steps.getStringVector(item) step_name_list.remove(item) step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines input_dictionary = { 'parset': parset_file, 'parsetobj': self.parset, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir} resultdicts = {} for section in self.config.sections(): tmp_dict = {} for entry in self.config.items(section): input_dictionary[entry[0]] = entry[1] tmp_dict[entry[0]] = entry[1] resultdicts.update({section: copy.deepcopy(tmp_dict)}) resultdicts.update({'input': input_dictionary}) resultdicts.update({self.name: input_dictionary}) if 'pipeline.mapfile' in self.parset.keywords(): resultdicts['input']['mapfile'] = str(self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str(self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) self.logger.info("Beginning step %s" % (stepname,)) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' adds = None if stepname in step_parset_obj: adds = self._construct_step_parset(inputdict, step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict['stepname'] = stepname if adds: inputdict.update(adds) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace(' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector('pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keywords(): submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, }}) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keywords(): subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in self._keys(subpipeline_parset): if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # UPDATE: do not use .keys on master parset. use .keywords(), then comments are filtered. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in self._keys(subpipeline_parset): val = subpipeline_parset[k] if not str(k).startswith('!') and not str(k).startswith('pipeline.replace.'): for item in checklist: if item+".output" in str(val): val = str(val).replace(item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: # remove replacements strings to prevent loading the same key twice if k in self._keys(self.parset): self.parset.remove(k) self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self._keys(self.parset): if str(k).startswith('!') and item == str(k).strip("! ") or str(k).startswith('pipeline.replace.') and item == str(k)[17:].strip(): self.parset.remove(k) self.parset.add('! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task( typeval, inputargs, **inputdict ) # plugins if kind_of_step == 'plugin': bla = str(self.config.get('DEFAULT', 'recipe_directories')) pluginpath = bla.rstrip(']').lstrip('[').split(',') for i, item in enumerate(pluginpath): pluginpath[i] = os.path.join(item, 'plugins') if 'pluginpath' in pipeline_args.keys(): pluginpath.append(pipeline_args.getString('pluginpath')) with duration(self, stepname): resultdict = loader.call_plugin(typeval, pluginpath, inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if activeloop[0] in resultdicts and resultdict is not None and 'break' in resultdict: resultdicts[activeloop[0]]['break'] = resultdict['break'] # ********************************************************************* # build the inputs for the master recipes. def _construct_input(self, inoutdict, controlparset, resdicts): # intermediate backward compatibility for opts subparset if controlparset.fullModuleName('opts'): argsparset = controlparset.makeSubset(controlparset.fullModuleName('opts') + '.') # hack elif 'loopcount' not in controlparset.keys(): argsparset = controlparset else: argsparset = controlparset.makeSubset(controlparset.fullModuleName('imaginary') + '.') # \hack self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts) def _construct_cmdline(self, inoutargs, controlparset, resdicts): inoutdict = {} argsparset = controlparset.makeSubset(controlparset.fullModuleName('cmdline') + '.') self._replace_output_keyword(inoutdict, argsparset, argsparset.keys(), resdicts) for k in inoutdict.keys(): inoutargs.append(inoutdict[k]) for k in controlparset.keys(): if 'cmdline' in k: controlparset.remove(k) def _construct_steps(self, step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir): step_list_copy = (copy.deepcopy(step_name_list)) counter = 0 while step_list_copy: counter -= 1 stepname = step_list_copy.pop(-1) fullparset = self.parset.makeSubset(self.parset.fullModuleName(str(stepname)) + '.') subparset = fullparset.makeSubset(fullparset.fullModuleName('control') + '.') number = 0 for item in step_list_copy: if item == stepname: number += 1 if number != 0: stepname += str(number) step_name_list[counter] = stepname step_control_dict[stepname] = subparset if fullparset.fullModuleName('argument'): stepparset = fullparset.makeSubset(fullparset.fullModuleName('argument') + '.') # ********************************************************************* # save parsets # either a filename is given in the main parset # or files will be created from subsets with stepnames.parset as filenames # for name, parset in step_parset_dict.iteritems(): try: file_parset = Parset(stepparset.getString('parset')) for k in file_parset.keywords(): if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) stepparset.remove('parset') except: pass # parset from task.cfg try: file_parset = Parset(self.task_definitions.get(str(subparset['type']), 'parset')) for k in file_parset.keywords(): if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) except: pass # for parset in control section try: file_parset = Parset(subparset.getString('parset')) for k in file_parset.keywords(): if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) subparset.remove('parset') except: pass step_parset = os.path.join(parset_dir, stepname + '.parset') stepparset.writeFile(step_parset) step_parset_files[stepname] = step_parset step_parset_obj[stepname] = stepparset def _replace_output_keyword(self, inoutdict, argsparset, keyorder, resdicts): addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []} regobj = re.compile('([\w\+_-]+)\.output\.([\w\+._-]+)') for k in keyorder: keystring = argsparset.getString(k) hitlist = regobj.findall(keystring) if hitlist: for hit in hitlist: keystring = regobj.sub(str(resdicts[hit[0]][hit[1]]), keystring, 1) if 'mapfile' in hit[1] and not 'mapfile' in k: addvals['inputkeys'].append(resdicts[hit[0]][hit[1]]) addvals['mapfiles_in'].append(resdicts[hit[0]][hit[1]]) inoutdict[k] = keystring else: inoutdict[k] = argsparset.getString(k) if k == 'flags': addvals['arguments'] = keystring if 'outputkey' in keystring: addvals['outputkey'] = 'outputkey' return addvals def _construct_step_parset(self, inoutdict, argsparset, resdicts, filename, stepname): tmp_keys = argsparset.keys() ordered_keys = [] parsetdict = {} for orig in self._keys(self.parset): for item in tmp_keys: if (stepname + '.') in orig and ('argument.'+item in orig and not 'argument.'+item+'.' in orig): ordered_keys.append(item) continue # add keys from parset files that were not in the original list for item in argsparset.keys(): if not item in ordered_keys: ordered_keys.append(item) additional = self._replace_output_keyword(parsetdict, argsparset, ordered_keys, resdicts) for k in argsparset.keys(): argsparset.replace(k, parsetdict[k]) if k == 'flags': argsparset.remove(k) argsparset.writeFile(filename) return additional #inoutdict.update(additional) def _keys(self, inparset): outlist = [] for k in inparset.keys: for l in inparset.keywords(): if k == l: outlist.append(l) return outlist def _get_parset_dicts(self): return {} def show_tasks(self): tasklist = [] tasklist = self.task_definitions.sections() for item in tasklist: print item #return tasklist def show_task(self, task): task_parset = Parset() if self.task_definitions.has_option(task,'parset'): task_parset.adoptFile(self.task_definitions.get(task,'parset')) print 'possible arguments: key = value' for k in task_parset.keywords(): print ' ',k,' ','=',' ',task_parset[k] def _add_step(self): steplist = [] def _replace_values(self): replacedict = OrderedDict() for check in self._keys(self.parset): if str(check).startswith('!'): replacedict[str(check).lstrip('!').lstrip(' ')] = str(self.parset[check]) if str(check).startswith('pipeline.replace.'): replacedict[str(check).replace('pipeline.replace.', '').lstrip(' ')] = str(self.parset[check]) #expand environment variables for k, v in replacedict.items(): replacedict[k] = os.path.expandvars(v) for check in self._keys(self.parset): for k, v in reversed(replacedict.items()): if '{{ '+k+' }}' in str(self.parset[check]): replacestring = str(self.parset[check]).replace('{{ '+k+' }}',v) self.parset.replace(check,replacestring)
def run(self, executable, initscript, infile, key, db_name, db_user, db_host): # executable: path to KernelControl executable # initscript: path to lofarinit.sh # infile: MeasurementSet for processing # key, db_name, db_user, db_host: database connection parameters # ---------------------------------------------------------------------- with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # Build a configuration parset specifying database parameters # for the kernel # ------------------------------------------------------------------ self.logger.debug("Setting up kernel parset") filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile)) fd, parset_filename = mkstemp() kernel_parset = Parset() for key, value in { "ObservationPart.Filesystem": filesystem, "ObservationPart.Path": infile, "BBDB.Key": key, "BBDB.Name": db_name, "BBDB.User": db_user, "BBDB.Host": db_host, "ParmLog": "", "ParmLoglevel": "", "ParmDB.Sky": infile + ".sky", "ParmDB.Instrument": infile + ".instrument" }.iteritems(): kernel_parset.add(key, value) kernel_parset.writeFile(parset_filename) os.close(fd) self.logger.debug("Parset written to %s" % (parset_filename, )) # Run the kernel # Catch & log output from the kernel logger and stdout # ------------------------------------------------------------------ working_dir = mkdtemp() env = read_initscript(self.logger, initscript) try: cmd = [executable, parset_filename, "0"] self.logger.debug("Executing BBS kernel") with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ): bbs_kernel_process = Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir) sout, serr = bbs_kernel_process.communicate() log_process_output("BBS kernel", sout, serr, self.logger) if bbs_kernel_process.returncode != 0: raise CalledProcessError(bbs_kernel_process.returncode, executable) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable self.msout_original = kwargs['msout'].rstrip('/') kwargs.pop('msout') self.msout_destination_dir = os.path.dirname(self.msout_original) # Set up scratch paths scratch_dir = kwargs['local_scratch_dir'] kwargs.pop('local_scratch_dir') try: os.mkdir(scratch_dir) except OSError: pass self.scratch_dir = tempfile.mkdtemp(dir=scratch_dir) self.logger.info('Using {} as scratch directory'.format(self.scratch_dir)) self.msout_scratch = os.path.join(self.scratch_dir, os.path.basename(self.msout_original)) args.append('msout=' + self.msout_scratch) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-'+ k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) args.insert(0, parsetname) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults( cmd, work_dir, self.environment, logger ) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) self.cleanup() return 1 except Exception, err: self.logger.error(str(err)) self.cleanup() return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable self.msout_original = kwargs['msout'].rstrip('/') kwargs.pop('msout') self.msout_destination_dir = os.path.dirname(self.msout_original) self.scratch_dir = tempfile.mkdtemp(dir=kwargs['local_scratch_dir']) kwargs.pop('local_scratch_dir') self.logger.info('Using {} as scratch directory'.format( self.scratch_dir)) # Set up scratch paths self.msout_scratch = os.path.join( self.scratch_dir, os.path.basename(self.msout_original)) args.append('msout=' + self.msout_scratch) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-' + k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) args.insert(0, parsetname) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, work_dir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) self.cleanup() return 1 except Exception, err: self.logger.error(str(err)) self.cleanup() return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # else: # self.logger.error("Dataset %s does not exist" % infile) # return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] # deal with multiple input files for wsclean if argsformat == 'wsclean': for i in reversed(xrange(len(args))): if str(args[i]).startswith('[') and str( args[i]).endswith(']'): tmplist = args.pop(i).lstrip('[').rstrip(']').split( ',') for val in reversed(tmplist): args.insert(i, val.strip(' \'\"')) if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): if str(v).startswith('[') and str(v).endswith(']'): v = v.lstrip('[').rstrip(']').replace(' ', '') multargs = v.split(',') else: multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-' + k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) if argsformat == 'losoto': args.append(parsetname) else: args.insert(0, parsetname) try: # **************************************************************** # Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, work_dir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run( self, executable, initscript, infile, key, db_name, db_user, db_host ): # executable: path to KernelControl executable # initscript: path to lofarinit.sh # infile: MeasurementSet for processing # key, db_name, db_user, db_host: database connection parameters # ---------------------------------------------------------------------- with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % (infile)) else: self.logger.error("Dataset %s does not exist" % (infile)) return 1 # Build a configuration parset specifying database parameters # for the kernel # ------------------------------------------------------------------ self.logger.debug("Setting up kernel parset") filesystem = "%s:%s" % (os.uname()[1], get_mountpoint(infile)) fd, parset_filename = mkstemp() kernel_parset = Parset() for key, value in { "ObservationPart.Filesystem": filesystem, "ObservationPart.Path": infile, "BBDB.Key": key, "BBDB.Name": db_name, "BBDB.User": db_user, "BBDB.Host": db_host, "ParmLog": "", "ParmLoglevel": "", "ParmDB.Sky": infile + ".sky", "ParmDB.Instrument": infile + ".instrument" }.iteritems(): kernel_parset.add(key, value) kernel_parset.writeFile(parset_filename) os.close(fd) self.logger.debug("Parset written to %s" % (parset_filename,)) # Run the kernel # Catch & log output from the kernel logger and stdout # ------------------------------------------------------------------ working_dir = mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) env = read_initscript(self.logger, initscript) try: cmd = [executable, parset_filename, "0"] self.logger.debug("Executing BBS kernel") with CatchLog4CPlus( working_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ): bbs_kernel_process = Popen( cmd, stdout=PIPE, stderr=PIPE, cwd=working_dir ) sout, serr = bbs_kernel_process.communicate() log_process_output("BBS kernel", sout, serr, self.logger) if bbs_kernel_process.returncode != 0: raise CalledProcessError( bbs_kernel_process.returncode, executable ) except CalledProcessError, e: self.logger.error(str(e)) return 1 finally:
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): if infile[0] == '[': infiles = [ms.strip(" []\'\"") for ms in infile.split(',')] reffile = infiles[0] else: reffile = infile if os.path.exists(reffile): self.logger.info("Processing %s" % reffile) else: self.logger.error("Dataset %s does not exist" % reffile) return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if not parsetasfile: self.logger.error( "Nodescript \"executable_casa.py\" requires \"parsetasfile\" to be True!" ) return 1 else: nodeparset = Parset() sublist = [] for k, v in kwargs.items(): nodeparset.add(k, v) if str(k).find('.'): if not str(k).split('.')[0] in sublist: sublist.append(str(k).split('.')[0]) #quick hacks below. for proof of concept. casastring = '' for sub in sublist: subpar = nodeparset.makeSubset( nodeparset.fullModuleName(sub) + '.') casastring = sub + '(' for k in subpar.keys(): if str(subpar[k]).find('/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]) + "'" + ',' elif str(subpar[k]).find('casastr/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]).strip('casastr/') + "'" + ',' elif str(subpar[k]).lower() == 'false' or str( subpar[k]).lower() == 'true': casastring += str(k) + '=' + str(subpar[k]) + ',' else: # Test if int/float or list of int/float try: self.logger.info('value: {}'.format(subpar[k])) test = float(str(subpar[k])) is_int_float = True except: is_int_float = False if is_int_float: casastring += str(k) + '=' + str( subpar[k]) + ',' else: if '[' in str(subpar[k]) or '(' in str( subpar[k]): # Check if list of int/float or strings list_vals = [ f.strip() for f in str( subpar[k]).strip('[]()').split(',') ] is_int_float = True for list_val in list_vals: try: test = float(list_val) except: is_int_float = False break if is_int_float: casastring += str(k) + '=' + str( subpar[k]) + ',' else: casastring += str( k) + '=' + '[{}]'.format(','.join([ "'" + list_val + "'" for list_val in list_vals ])) + ',' else: # Simple string casastring += str(k) + '=' + "'" + str( subpar[k]) + "'" + ',' casastring = casastring.rstrip(',') casastring += ')\n' # 1) return code of a casapy is not properly recognized by the pipeline # wrapping in shellscript works for succesful runs. # failed runs seem to hang the pipeline... # 2) casapy can not have two instances running from the same directory. # create tmp dirs casapydir = tempfile.mkdtemp(dir=work_dir) if casastring != '': casafilename = os.path.join( work_dir, os.path.basename(reffile) + '.casacommand.py') casacommandfile = open(casafilename, 'w') casacommandfile.write(casastring) casacommandfile.close() args.append(casafilename) somename = os.path.join( work_dir, os.path.basename(reffile) + '.casashell.sh') commandstring = '' commandstring += executable for item in args: if str(item).find(' ') > -1 or str(item).find('[') > -1: commandstring += ' "' + item + '"' else: commandstring += ' ' + item crap = open(somename, 'w') crap.write('#!/bin/bash \n') crap.write('echo "Trying CASAPY command" \n') crap.write(commandstring + ' >& casa.log\n') crap.close() # file permissions st = os.stat(somename) os.chmod( somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) try: # **************************************************************** # Run cmd = [somename] with CatchLog4CPlus( casapydir, self.logger.name + "." + os.path.basename(reffile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, casapydir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable if 'replace-sourcedb' in kwargs: self.replace_sourcedb = kwargs['replace-sourcedb'] kwargs.pop('replace-sourcedb') if 'replace-parmdb' in kwargs: self.replace_parmdb = kwargs['replace-parmdb'] kwargs.pop('replace-parmdb') if 'dry-run' in kwargs: self.dry_run = kwargs['dry-run'] kwargs.pop('dry-run') if 'sourcedb' in kwargs: self.sourcedb = kwargs['sourcedb'] kwargs.pop('sourcedb') if 'parmdb' in kwargs: self.parmdb = kwargs['parmdb'] kwargs.pop('parmdb') if 'sourcedb-name' in kwargs: self.sourcedb_basename = kwargs['sourcedb-name'] self.replace_sourcedb = True kwargs.pop('sourcedb-name') if 'parmdb-name' in kwargs: self.parmdb_basename = kwargs['parmdb-name'] self.replace_parmdb = True kwargs.pop('parmdb-name') if 'force' in kwargs: self.replace_parmdb = True self.replace_sourcedb = True kwargs.pop('force') numthreads = 1 if 'numthreads' in kwargs: numthreads = kwargs['numthreads'] kwargs.pop('numthreads') args.append('--numthreads=' + str(numthreads)) if 'observation' in kwargs: self.observation = kwargs.pop('observation') if 'catalog' in kwargs: self.catalog = kwargs.pop('catalog') self.createsourcedb() self.createparmdb() if not 'no-columns' in kwargs: #if not kwargs['no-columns']: self.addcolumns() else: kwargs.pop('no-columns') args.append('--sourcedb=' + self.sourcedb_path) args.append('--parmdb=' + self.parmdb_path) args.append(self.observation) #catalog = None # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if parsetasfile: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in list(kwargs.items()): nodeparset.add(k, v) nodeparset.writeFile(parsetname) #args.insert(0, parsetname) args.append(parsetname) #if catalog is not None: # args.append(catalog) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, work_dir, self.environment, logger) except CalledProcessError as err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception as err: self.logger.error(str(err)) return 1 # We need some signal to the master script that the script ran ok. self.outputs['ok'] = True return 0
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=True, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) self.work_dir = work_dir self.infile = infile self.executable = executable if 'replace-sourcedb' in kwargs: self.replace_sourcedb = kwargs['replace-sourcedb'] kwargs.pop('replace-sourcedb') if 'replace-parmdb' in kwargs: self.replace_parmdb = kwargs['replace-parmdb'] kwargs.pop('replace-parmdb') if 'dry-run' in kwargs: self.dry_run = kwargs['dry-run'] kwargs.pop('dry-run') if 'sourcedb' in kwargs: self.sourcedb = kwargs['sourcedb'] kwargs.pop('sourcedb') if 'parmdb' in kwargs: self.parmdb = kwargs['parmdb'] kwargs.pop('parmdb') if 'sourcedb-name' in kwargs: self.sourcedb_basename = kwargs['sourcedb-name'] self.replace_sourcedb = True kwargs.pop('sourcedb-name') if 'parmdb-name' in kwargs: self.parmdb_basename = kwargs['parmdb-name'] self.replace_parmdb = True kwargs.pop('parmdb-name') if 'force' in kwargs: self.replace_parmdb = True self.replace_sourcedb = True kwargs.pop('force') numthreads = 1 if 'numthreads' in kwargs: numthreads = kwargs['numthreads'] kwargs.pop('numthreads') args.append('--numthreads='+str(numthreads)) if 'observation' in kwargs: self.observation = kwargs.pop('observation') if 'catalog' in kwargs: self.catalog = kwargs.pop('catalog') self.createsourcedb() self.createparmdb() if not 'no-columns' in kwargs: #if not kwargs['no-columns']: self.addcolumns() else: kwargs.pop('no-columns') args.append('--sourcedb=' + self.sourcedb_path) args.append('--parmdb=' + self.parmdb_path) args.append(self.observation) #catalog = None # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # Check if script is present if not os.path.isfile(executable): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if parsetasfile: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) #args.insert(0, parsetname) args.append(parsetname) #if catalog is not None: # args.append(catalog) try: # **************************************************************** #Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults( cmd, work_dir, self.environment, logger ) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
class GenericPipeline(control): inputs = { 'loglevel': ingredient.StringField('--loglevel', help="loglevel", default='INFO', optional=True) } def __init__(self): control.__init__(self) self.parset = Parset() self.input_data = {} self.output_data = {} self.parset_feedback_file = None #self.logger = None#logging.RootLogger('DEBUG') self.name = '' #if not overwrite: # self.inputs['job_name'] = 'generic-pipeline' # if not self.inputs.has_key("start_time"): # import datetime # self.inputs["start_time"] = datetime.datetime.utcnow().replace(microsecond=0).isoformat() # if not hasattr(self, "config"): # self.config = self._read_config() # #self._read_config() # # ...and task files, if applicable # if not self.inputs.has_key("task_files"): # try: # self.inputs["task_files"] = utilities.string_to_list( # self.config.get('DEFAULT', "task_files") # ) # except NoOptionError: # self.inputs["task_files"] = [] # self.task_definitions = ConfigParser(self.config.defaults()) # print >> sys.stderr, "Reading task definition file(s): %s" % \ # ",".join(self.inputs["task_files"]) # self.task_definitions.read(self.inputs["task_files"]) # self.go() def usage(self): """ Display usage """ print >> sys.stderr, "Usage: %s [options] <parset-file>" % sys.argv[0] print >> sys.stderr, "Parset structure should look like:\n" \ "NYI" #return 1 def go(self): #""" #Read the parset-file that was given as input argument, and set the #jobname before calling the base-class's `go()` method. #""" try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: #return self.usage() self.usage() # Set job-name to basename of parset-file w/o extension, if it's not # set on the command-line with '-j' or '--job-name' if not 'job_name' in self.inputs: self.inputs['job_name'] = (os.path.splitext( os.path.basename(parset_file))[0]) self.name = self.inputs['job_name'] try: self.logger except: self.logger = getSearchingLogger(self.name) self.logger.setLevel(self.inputs['loglevel']) # Call the base-class's `go()` method. return super(GenericPipeline, self).go() # def pipeline_logic(self): # print 'Dummy because of stupid wrapping inside the framework' # if overwrite: # self.execute_pipeline() #def execute_pipeline(self): def pipeline_logic(self): try: parset_file = os.path.abspath(self.inputs['args'][0]) except IndexError: return self.usage() try: if self.parset.keys == []: self.parset.adoptFile(parset_file) self.parset_feedback_file = parset_file + "_feedback" except RuntimeError: print >> sys.stderr, "Error: Parset file not found!" return self.usage() self._replace_values() # just a reminder that this has to be implemented validator = GenericPipelineParsetValidation(self.parset) if not validator.validate_pipeline(): self.usage() exit(1) if not validator.validate_steps(): self.usage() exit(1) #set up directories job_dir = self.config.get("layout", "job_directory") parset_dir = os.path.join(job_dir, "parsets") mapfile_dir = os.path.join(job_dir, "mapfiles") # Create directories for temporary parset- and map files create_directory(parset_dir) create_directory(mapfile_dir) # ********************************************************************* # maybe we dont need a subset but just a steplist # at the moment only a list with stepnames is given for the pipeline.steps parameter # pipeline.steps=[vdsmaker,vdsreader,setupparmdb1,setupsourcedb1,ndppp1,....] # the names will be the prefix for parset subsets pipeline_args = self.parset.makeSubset( self.parset.fullModuleName('pipeline') + '.') # ********************************************************************* # forward declaration of things. just for better overview and understanding whats in here. # some of this might be removed in upcoming iterations, or stuff gets added. step_name_list = pipeline_args.getStringVector('steps') step_control_dict = {} step_parset_files = {} step_parset_obj = {} activeloop = [''] # construct the list of step names and controls self._construct_steps(step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir) # initial parameters to be saved in resultsdict so that recipes have access to this step0 # double init values. 'input' should be considered deprecated # self.name would be consistent to use in subpipelines resultdicts = { 'input': { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } } resultdicts.update({ self.name: { 'parset': parset_file, 'parsetobj': self.parset, 'job_dir': job_dir, 'parset_dir': parset_dir, 'mapfile_dir': mapfile_dir } }) if 'pipeline.mapfile' in self.parset.keys: resultdicts['input']['mapfile'] = str( self.parset['pipeline.mapfile']) resultdicts[self.name]['mapfile'] = str( self.parset['pipeline.mapfile']) # ********************************************************************* # main loop # there is a distinction between recipes and plugins for user scripts. # plugins are not used at the moment and might better be replaced with master recipes while step_name_list: stepname = step_name_list.pop(0) step = step_control_dict[stepname] #step_parset = step_parset_obj[stepname] inputdict = {} inputargs = [] resultdict = {} # default kind_of_step to recipe. try: kind_of_step = step.getString('kind') except: kind_of_step = 'recipe' try: typeval = step.getString('type') except: typeval = '' #self._construct_cmdline(inputargs, step, resultdicts) additional_input = {} if stepname in step_parset_obj: additional_input = self._construct_step_parset( step_parset_obj[stepname], resultdicts, step_parset_files[stepname], stepname) # stepname not a valid input for old recipes if kind_of_step == 'recipe': if self.task_definitions.get(typeval, 'recipe') == 'executable_args': inputdict = {'stepname': stepname} inputdict.update(additional_input) self._construct_cmdline(inputargs, step, resultdicts) if stepname in step_parset_files: inputdict['parset'] = step_parset_files[stepname] self._construct_input(inputdict, step, resultdicts) # hack, popping 'type' is necessary, why? because you deleted kind already in parsets try: inputdict.pop('type') except: pass try: inputdict.pop('kind') except: pass # \hack # more hacks. Frameworks DictField not properly implemented. Construct your own dict from input. # python buildin functions cant handle the string returned from parset class. if 'environment' in inputdict.keys(): val = inputdict['environment'].rstrip('}').lstrip('{').replace( ' ', '') splitval = str(val).split(',') valdict = {} for item in splitval: valdict[item.split(':')[0]] = item.split(':')[1] inputdict['environment'] = valdict # subpipeline. goal is to specify a pipeline within a pipeline. # load other existing pipeline parset and add them to your own. if kind_of_step == 'pipeline': subpipeline_parset = Parset() subpipeline_parset.adoptFile(typeval) submapfile = '' subpipeline_steplist = subpipeline_parset.getStringVector( 'pipeline.steps') if 'pipeline.mapfile' in subpipeline_parset.keys: submapfile = subpipeline_parset['pipeline.mapfile'] subpipeline_parset.remove('pipeline.mapfile') if 'mapfile_in' in inputdict.keys(): submapfile = inputdict.pop('mapfile_in') resultdicts.update({ os.path.splitext(os.path.basename(typeval))[0]: { 'parset': typeval, 'mapfile': submapfile, } }) #todo: take care of pluginpathes and everything other then individual steps # make a pipeline parse methods that returns everything needed. # maybe as dicts to combine them to one subpipeline_parset.remove('pipeline.steps') if 'pipeline.pluginpath' in subpipeline_parset.keys: subpipeline_parset.remove('pipeline.pluginpath') checklist = copy.deepcopy(subpipeline_steplist) for k in subpipeline_parset.keys: if 'loopsteps' in k: for item in subpipeline_parset.getStringVector(k): checklist.append(item) # ********************************************************************* # master parset did not handle formatting and comments in the parset. # proper format only after use of parset.makesubset. then it is a different object # from a different super class :(. this also explains use of parset.keys and parset.keys() # take the parset from subpipeline and add it to the master parset. # ********************************************************************* # replace names of steps with the subpipeline stepname to create a unique identifier. # replacement values starting with ! will be taken from the master parset and overwrite # the ones in the subpipeline. only works if the ! value is already in the subpipeline for k in subpipeline_parset.keys: if not str(k).startswith('#'): val = subpipeline_parset[k] if not str(k).startswith('!'): for item in checklist: if item in str(val): val = str(val).replace( item, stepname + '-' + item) self.parset.add(stepname + '-' + k, str(val)) else: self.parset.add(k, str(val)) for i, item in enumerate(subpipeline_steplist): subpipeline_steplist[i] = stepname + '-' + item for item in step_parset_obj[stepname].keys(): for k in self.parset.keys: if str(k).startswith('!') and item in k: self.parset.remove(k) self.parset.add( '! ' + item, str(step_parset_obj[stepname][item])) self._replace_values() self._construct_steps(subpipeline_steplist, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(subpipeline_steplist): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # remove replacements strings to prevent loading the same key twice for k in copy.deepcopy(self.parset.keys): if str(k).startswith('!'): self.parset.remove(k) # loop if kind_of_step == 'loop': # remember what loop is running to stop it from a conditional step if activeloop[0] is not stepname: activeloop.insert(0, stepname) # prepare counter = 0 breakloop = False if stepname in resultdicts: counter = int(resultdicts[stepname]['counter']) + 1 breakloop = resultdicts[stepname]['break'] loopsteps = step.getStringVector('loopsteps') # break at max iteration or when other step sets break variable if counter is step.getInt('loopcount'): breakloop = True if not breakloop: # add loop steps to the pipeline including the loop itself step_name_list.insert(0, stepname) self._construct_steps(loopsteps, step_control_dict, step_parset_files, step_parset_obj, parset_dir) for j in reversed(loopsteps): name = j step_control_dict[name] = step_control_dict[j] step_name_list.insert(0, name) # results for other steps to check and write states resultdict = {'counter': counter, 'break': breakloop} else: # reset values for second use of the loop (but why would you do that?) resultdict = {'counter': -1, 'break': False} activeloop.pop(0) # recipes if kind_of_step == 'recipe': with duration(self, stepname): resultdict = self.run_task(typeval, inputargs, **inputdict) # plugins if kind_of_step == 'plugin': with duration(self, stepname): resultdict = loader.call_plugin( typeval, pipeline_args.getString('pluginpath'), inputargs, **inputdict) resultdicts[stepname] = resultdict # breaking the loopstep # if the step has the keyword for loopbreaks assign the value if resultdict is not None and 'break' in resultdict: if resultdict['break']: resultdicts[activeloop[0]]['break'] = resultdict['break'] # ********************************************************************* # build the inputs for the master recipes. def _construct_input(self, inoutdict, controlparset, resdicts): # intermediate backward compatibility for opts subparset if controlparset.fullModuleName('opts'): argsparset = controlparset.makeSubset( controlparset.fullModuleName('opts') + '.') # hack elif 'loopcount' not in controlparset.keys(): argsparset = controlparset else: argsparset = controlparset.makeSubset( controlparset.fullModuleName('imaginary') + '.') # \hack self._replace_output_keyword(inoutdict, argsparset, resdicts) def _construct_cmdline(self, inoutargs, controlparset, resdicts): argsparset = controlparset.makeSubset( controlparset.fullModuleName('cmdline') + '.') for k in argsparset.keys(): if argsparset.getString(k).__contains__('.output.'): step, outvar = argsparset.getString(k).split('.output.') inoutargs.append(resdicts[step][outvar]) else: inoutargs.append(argsparset.getString(k)) try: controlparset.remove('cmdline.inmap') except: pass def _construct_steps(self, step_name_list, step_control_dict, step_parset_files, step_parset_obj, parset_dir): step_list_copy = (copy.deepcopy(step_name_list)) counter = 0 while step_list_copy: counter -= 1 stepname = step_list_copy.pop(-1) fullparset = self.parset.makeSubset( self.parset.fullModuleName(str(stepname)) + '.') subparset = fullparset.makeSubset( fullparset.fullModuleName('control') + '.') number = 0 for item in step_list_copy: if item == stepname: number += 1 if number != 0: stepname += str(number) step_name_list[counter] = stepname step_control_dict[stepname] = subparset # double implementation for intermediate backward compatibility if fullparset.fullModuleName( 'parsetarg') or fullparset.fullModuleName('argument'): if fullparset.fullModuleName('parsetarg'): stepparset = fullparset.makeSubset( fullparset.fullModuleName('parsetarg') + '.') if fullparset.fullModuleName('argument'): stepparset = fullparset.makeSubset( fullparset.fullModuleName('argument') + '.') # ********************************************************************* # save parsets # either a filename is given in the main parset # or files will be created from subsets with stepnames.parset as filenames # for name, parset in step_parset_dict.iteritems(): try: file_parset = Parset(stepparset.getString('parset')) for k in file_parset.keys: if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) stepparset.remove('parset') except: pass # parset from task.cfg try: file_parset = Parset( self.task_definitions.get(str(subparset['type']), 'parset')) for k in file_parset.keys: if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) except: pass # for parset in control section try: file_parset = Parset(subparset.getString('parset')) for k in file_parset.keys: if not k in stepparset.keys(): stepparset.add(k, str(file_parset[k])) subparset.remove('parset') except: pass step_parset = os.path.join(parset_dir, stepname + '.parset') stepparset.writeFile(step_parset) step_parset_files[stepname] = step_parset step_parset_obj[stepname] = stepparset def _replace_output_keyword(self, inoutdict, argsparset, resdicts): for k in argsparset.keys(): keystring = argsparset.getString(k) if keystring.__contains__('.output.'): if keystring.__contains__(','): keystring = keystring.rstrip(']') keystring = keystring.lstrip('[') vec = [] for item in keystring.split(','): if item.__contains__('.output.'): step, outvar = item.split('.output.') vec.append(resdicts[step][outvar]) else: vec.append(item) inoutdict[k] = vec else: step, outvar = argsparset.getString(k).split('.output.') if '+' in outvar: tmplist = str(outvar).split('+') inoutdict[k] = resdicts[step][tmplist[0]] + tmplist[1] else: inoutdict[k] = resdicts[step][outvar] else: inoutdict[k] = argsparset.getString(k) def _construct_step_parset(self, argsparset, resdicts, filename, stepname): addvals = {'inputkeys': [], 'mapfiles_in': [], 'arguments': []} # hack for original order of args tmp_keys = argsparset.keys() ordered_keys = [] for orig in self.parset.keys: for item in tmp_keys: if (stepname + '.') in orig and ( 'argument.' + item in orig and not 'argument.' + item + '.' in orig): ordered_keys.append(item) continue # \hack for k in ordered_keys: valuestring = argsparset.getString(k) if valuestring.__contains__('.output.'): if valuestring.__contains__(','): valuestring = valuestring.rstrip(']') valuestring = valuestring.lstrip('[') vec = [] for item in valuestring.split(','): if item.__contains__('.output.'): step, outvar = item.split('.output.') vec.append(resdicts[step][outvar]) if 'mapfile' in str(outvar): addvals['inputkeys'].append( resdicts[step][outvar]) addvals['mapfiles_in'].append( resdicts[step][outvar]) else: vec.append(item) argsparset.replace(k, str(vec)) if k == 'flags': addvals['arguments'] = vec argsparset.remove(k) else: step, outvar = argsparset.getString(k).split('.output.') #more ugly hacks... really needs clearly structured replacement method... if '+' in outvar: tmplist = str(outvar).split('+') argsparset.replace( k, str(resdicts[step][tmplist[0]]) + tmplist[1]) else: argsparset.replace(k, str(resdicts[step][outvar])) #if isinstance(resdicts[step][outvar], str): if 'mapfile' in str(outvar): addvals['inputkeys'].append(resdicts[step][outvar]) addvals['mapfiles_in'].append(resdicts[step][outvar]) if k == 'flags': addvals['arguments'] = str(argsparset[k]) argsparset.remove(k) else: if k == 'flags': addvals['arguments'] = str(argsparset[k]) argsparset.remove(k) #direct usage of outputkey if valuestring.__contains__('outputkey'): addvals['outputkey'] = 'outputkey' argsparset.writeFile(filename) return addvals def _get_parset_dicts(self): return {} def show_tasks(self): tasklist = [] tasklist = self.task_definitions.sections() for item in tasklist: print item #return tasklist def show_task(self, task): task_parset = Parset() if self.task_definitions.has_option(task, 'parset'): task_parset.adoptFile(self.task_definitions.get(task, 'parset')) print 'possible arguments: key = value' for k in task_parset.keys: print ' ', k, ' ', '=', ' ', task_parset[ k] def _add_step(self): steplist = [] def _replace_values(self): replacedict = {} try: import imp plugin = imp.load_source('main', str(self.parset['prepare'])) replacedict = plugin.main() except: pass for check in self.parset.keys: if str(check).startswith('!'): replacedict[str(check).lstrip('!').lstrip(' ')] = str( self.parset[check]) #print 'REPLACEDICT: ',replacedict for check in self.parset.keys: if not str(check).startswith('#'): for k, v in replacedict.iteritems(): if '{{ ' + k + ' }}' in str(self.parset[check]): replacestring = str(self.parset[check]).replace( '{{ ' + k + ' }}', v) self.parset.replace(check, replacestring)
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # hack the planet #executable = 'casa' # Time execution of this job with log_time(self.logger): if os.path.exists(infile): self.logger.info("Processing %s" % infile) else: self.logger.error("Dataset %s does not exist" % infile) return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise #print 'KWARGS: ', kwargs if not parsetasfile: for k, v in kwargs.items(): args.append('--' + k + '=' + v) else: nodeparset = Parset() sublist = [] for k, v in kwargs.items(): nodeparset.add(k, v) if str(k).find('.'): #print 'DOTPOS: ',str(k).find('.') #print 'SPLIT: ', str(k).split('.')[0] #print 'SPLIT: ', str(k).split('.')[1] if not str(k).split('.')[0] in sublist: sublist.append(str(k).split('.')[0]) #print 'SUBPARSETLIST: ', sublist #subpar = Parset() #quick hacks below. for proof of concept. subparsetlist = [] casastring = '' for sub in sublist: subpar = nodeparset.makeSubset( nodeparset.fullModuleName(sub) + '.') #print 'SUBPAR: ',subpar.keys() casastring = sub + '(' for k in subpar.keys(): #print 'SUBPARSET: ',k ,' ',subpar[k] #args.append('--' + k + '=' + subpar[k]) if str(subpar[k]).find('/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]) + "'" + ',' elif str(subpar[k]).find('/casastr/') == 0: casastring += str(k) + '=' + "'" + str( subpar[k]).strip('/casastr/') + "'" + ',' else: casastring += str(k) + '=' + str(subpar[k]) + ',' casastring = casastring.rstrip(',') casastring += ')\n' #print 'CASASTRING:' #print casastring # 1) return code of a casapy is not properly recognized by the pipeline # wrapping in shellscript works for succesful runs. # failed runs seem to hang the pipeline... # 2) casapy can not have two instances running from the same directory. # create tmp dirs casapydir = tempfile.mkdtemp(dir=work_dir) if casastring != '': casafilename = os.path.join( work_dir, os.path.basename(infile) + '.casacommand.py') casacommandfile = open(casafilename, 'w') casacommandfile.write('try:\n') casacommandfile.write(' ' + casastring) casacommandfile.write('except SystemExit:\n') casacommandfile.write(' pass\n') casacommandfile.write('except:\n') casacommandfile.write(' import os\n') casacommandfile.write(' os._exit(1)\n') casacommandfile.close() args.append(casafilename) somename = os.path.join( work_dir, os.path.basename(infile) + '.casashell.sh') commandstring = '' commandstring += executable for item in args: commandstring += ' ' + item #print 'COMMANDSTRING: ',commandstring crap = open(somename, 'w') crap.write('#!/bin/bash \n') crap.write('echo "Trying CASAPY command" \n') #crap.write('/home/zam/sfroehli/casapy-42.1.29047-001-1-64b/bin/casa' + ' --nologger'+' -c ' + casafilename) crap.write(commandstring) # crap.write('\nexit 0') crap.close() import stat st = os.stat(somename) #os.chmod(casafilename, stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) os.chmod( somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) try: # **************************************************************** # Run #cmd = [executable] + args cmd = [somename] with CatchLog4CPlus( casapydir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults(cmd, casapydir, self.environment, logger) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This function contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): if infile[0] == '[': infiles = [ms.strip(" []\'\"") for ms in infile.split(',')] reffile = infiles[0] else: reffile = infile if os.path.exists(reffile): self.logger.info("Processing %s" % reffile) else: self.logger.error("Dataset %s does not exist" % reffile) return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise if not parsetasfile: self.logger.error("Nodescript \"executable_casa.py\" requires \"parsetasfile\" to be True!") return 1 else: nodeparset = Parset() sublist = [] for k, v in kwargs.items(): nodeparset.add(k, v) if str(k).find('.'): if not str(k).split('.')[0] in sublist: sublist.append(str(k).split('.')[0]) #quick hacks below. for proof of concept. casastring = '' for sub in sublist: subpar = nodeparset.makeSubset(nodeparset.fullModuleName(sub) + '.') casastring = sub + '(' for k in subpar.keys(): if str(subpar[k]).find('/') == 0: casastring += str(k) + '=' + "'" + str(subpar[k]) + "'" + ',' elif str(subpar[k]).find('casastr/') == 0: casastring += str(k) + '=' + "'" + str(subpar[k]).strip('casastr/') + "'" + ',' elif str(subpar[k]).lower() == 'false' or str(subpar[k]).lower() == 'true': casastring += str(k) + '=' + str(subpar[k]) + ',' else: # Test if int/float or list of int/float try: self.logger.info('value: {}'.format(subpar[k])) test = float(str(subpar[k])) is_int_float = True except: is_int_float = False if is_int_float: casastring += str(k) + '=' + str(subpar[k]) + ',' else: if '[' in str(subpar[k]) or '(' in str(subpar[k]): # Check if list of int/float or strings list_vals = [f.strip() for f in str(subpar[k]).strip('[]()').split(',')] is_int_float = True for list_val in list_vals: try: test = float(list_val) except: is_int_float = False break if is_int_float: casastring += str(k) + '=' + str(subpar[k]) + ',' else: casastring += str(k) + '=' + '[{}]'.format(','.join(["'"+list_val+"'" for list_val in list_vals])) + ',' else: # Simple string casastring += str(k) + '=' + "'" + str(subpar[k]) + "'" + ',' casastring = casastring.rstrip(',') casastring += ')\n' # 1) return code of a casapy is not properly recognized by the pipeline # wrapping in shellscript works for succesful runs. # failed runs seem to hang the pipeline... # 2) casapy can not have two instances running from the same directory. # create tmp dirs casapydir = tempfile.mkdtemp(dir=work_dir) if casastring != '': casafilename = os.path.join(work_dir, os.path.basename(reffile) + '.casacommand.py') casacommandfile = open(casafilename, 'w') casacommandfile.write(casastring) casacommandfile.close() args.append(casafilename) somename = os.path.join(work_dir, os.path.basename(reffile) + '.casashell.sh') commandstring = '' commandstring += executable for item in args: if str(item).find(' ') > -1 or str(item).find('[') > -1: commandstring += ' "' + item + '"' else: commandstring += ' ' + item crap = open(somename, 'w') crap.write('#!/bin/bash \n') crap.write('echo "Trying CASAPY command" \n') crap.write(commandstring + ' >& casa.log\n') crap.close() # file permissions st = os.stat(somename) os.chmod(somename, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) try: # **************************************************************** # Run cmd = [somename] with CatchLog4CPlus( casapydir, self.logger.name + "." + os.path.basename(reffile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults( cmd, casapydir, self.environment, logger ) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1
def run(self, infile, executable, args, kwargs, work_dir='/tmp', parsetasfile=False, args_format='', environment=''): """ This method contains all the needed functionality """ # Debugging info self.logger.debug("infile = %s" % infile) self.logger.debug("executable = %s" % executable) self.logger.debug("working directory = %s" % work_dir) self.logger.debug("arguments = %s" % args) self.logger.debug("arg dictionary = %s" % kwargs) self.logger.debug("environment = %s" % environment) self.environment.update(environment) # Time execution of this job with log_time(self.logger): #if os.path.exists(infile): self.logger.info("Processing %s" % infile) # else: # self.logger.error("Dataset %s does not exist" % infile) # return 1 # Check if executable is present if not os.access(executable, os.X_OK): self.logger.error("Executable %s not found" % executable) return 1 # hurray! race condition when running with more than one process on one filesystem if not os.path.isdir(work_dir): try: os.mkdir(work_dir, ) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(work_dir): pass else: raise argsformat = args_format['args_format'] # deal with multiple input files for wsclean if argsformat == 'wsclean': for i in reversed(xrange(len(args))): if str(args[i]).startswith('[') and str(args[i]).endswith(']'): tmplist = args.pop(i).lstrip('[').rstrip(']').split(',') for val in reversed(tmplist): args.insert(i, val.strip(' \'\"')) if not parsetasfile: if argsformat == 'gnu': for k, v in kwargs.items(): args.append('--' + k + '=' + v) if argsformat == 'lofar': for k, v in kwargs.items(): args.append(k + '=' + v) if argsformat == 'argparse': for k, v in kwargs.items(): args.append('--' + k + ' ' + v) if argsformat == 'wsclean': for k, v in kwargs.items(): if str(v).startswith('[') and str(v).endswith(']'): v = v.lstrip('[').rstrip(']').replace(' ', '') multargs = v.split(',') else: multargs = v.split(' ') if multargs: multargs.reverse() for item in multargs: args.insert(0, item) else: args.insert(0, v) args.insert(0, '-'+ k) else: nodeparset = Parset() parsetname = os.path.join(work_dir, os.path.basename(infile) + '.parset') for k, v in kwargs.items(): nodeparset.add(k, v) nodeparset.writeFile(parsetname) if argsformat == 'losoto': args.append(parsetname) else: args.insert(0,parsetname) try: # **************************************************************** # Run cmd = [executable] + args with CatchLog4CPlus( work_dir, self.logger.name + "." + os.path.basename(infile), os.path.basename(executable), ) as logger: # Catch segfaults and retry catch_segfaults( cmd, work_dir, self.environment, logger ) except CalledProcessError, err: # CalledProcessError isn't properly propagated by IPython self.logger.error(str(err)) return 1 except Exception, err: self.logger.error(str(err)) return 1