def __mt_job_prepare__(self, rjobs, subjobconfigs, masterjobconfig): '''preparing jobs in multiple threads''' logger.warning( 'preparing %d subjobs ... it may take a while' % len(rjobs)) # prepare the master job (i.e. create shared inputsandbox, etc.) master_input_sandbox = IBackend.master_prepare(self, masterjobconfig) # uploading the master job if it's over the WMS sandbox limitation for f in master_input_sandbox: master_input_idx = self.__check_and_prestage_inputfile__(f) if not master_input_idx: logger.error('master input sandbox perparation failed: %s' % f) return None # the algorithm for preparing a single bulk job class MyAlgorithm(Algorithm): def __init__(self): Algorithm.__init__(self) def process(self, sj_info): my_sc = sj_info[0] my_sj = sj_info[1] try: logger.debug("preparing job %s" % my_sj.getFQID('.')) jdlpath = my_sj.backend.preparejob( my_sc, master_input_sandbox) if (not jdlpath) or (not os.path.exists(jdlpath)): raise GangaException( 'job %s not properly prepared' % my_sj.getFQID('.')) self.__appendResult__(my_sj.id, jdlpath) return True except Exception as x: log_user_exception() return False mt_data = [] for sc, sj in zip(subjobconfigs, rjobs): mt_data.append([sc, sj]) myAlg = MyAlgorithm() myData = Data(collection=mt_data) runner = MTRunner( name='lcg_jprepare', algorithm=myAlg, data=myData, numThread=10) runner.start() runner.join(-1) if len(runner.getDoneList()) < len(mt_data): return None else: # return a JDL file dictionary with subjob ids as keys, JDL file # paths as values return runner.getResults()
def __mt_bulk_submit__(self, node_jdls): '''submitting jobs in multiple threads''' job = self.getJobObject() logger.warning('submitting %d subjobs ... it may take a while' % len(node_jdls)) # the algorithm for submitting a single bulk job class MyAlgorithm(Algorithm): def __init__(self, cred_req, masterInputWorkspace, ce, arcverbose): Algorithm.__init__(self) self.inpw = masterInputWorkspace self.cred_req = cred_req self.ce = ce self.arcverbose = arcverbose def process(self, jdl_info): my_sj_id = jdl_info[0] my_sj_jdl = jdl_info[1] my_sj_jid = Grid.arc_submit(my_sj_jdl, self.ce, self.arcverbose, self.cred_req) if not my_sj_jid: return False else: self.__appendResult__(my_sj_id, my_sj_jid) return True mt_data = [] for id, jdl in node_jdls.items(): mt_data.append((id, jdl)) myAlg = MyAlgorithm(cred_req=self.credential_requirements, masterInputWorkspace=job.getInputWorkspace(), ce=self.CE, arcverbose=self.verbose) myData = Data(collection=mt_data) runner = MTRunner(name='arc_jsubmit', algorithm=myAlg, data=myData, numThread=config['SubmissionThread']) runner.start() runner.join(timeout=-1) if len(runner.getDoneList()) < len(mt_data): # not all bulk jobs are successfully submitted. canceling the # submitted jobs on WMS immediately logger.error( 'some bulk jobs not successfully (re)submitted, canceling submitted jobs on WMS' ) Grid.arc_cancel_multiple(runner.getResults().values(), self.credential_requirements) return None else: return runner.getResults()
def __mt_bulk_submit__(self, node_jdls): '''submitting jobs in multiple threads''' job = self.getJobObject() logger.warning( 'submitting %d subjobs ... it may take a while' % len(node_jdls)) # the algorithm for submitting a single bulk job class MyAlgorithm(Algorithm): def __init__(self, gridObj, masterInputWorkspace, ce, arcverbose): Algorithm.__init__(self) self.inpw = masterInputWorkspace self.gridObj = gridObj self.ce = ce self.arcverbose = arcverbose def process(self, jdl_info): my_sj_id = jdl_info[0] my_sj_jdl = jdl_info[1] #my_sj_jid = self.gridObj.arc_submit(my_sj_jdl, self.ce, self.verbose) my_sj_jid = self.gridObj.arc_submit( my_sj_jdl, self.ce, self.arcverbose) if not my_sj_jid: return False else: self.__appendResult__(my_sj_id, my_sj_jid) return True mt_data = [] for id, jdl in node_jdls.items(): mt_data.append((id, jdl)) myAlg = MyAlgorithm(gridObj=grids['GLITE'], masterInputWorkspace=job.getInputWorkspace( ), ce=self.CE, arcverbose=self.verbose) myData = Data(collection=mt_data) runner = MTRunner(name='arc_jsubmit', algorithm=myAlg, data=myData, numThread=config['SubmissionThread']) runner.start() runner.join(timeout=-1) if len(runner.getDoneList()) < len(mt_data): # not all bulk jobs are successfully submitted. canceling the # submitted jobs on WMS immediately logger.error( 'some bulk jobs not successfully (re)submitted, canceling submitted jobs on WMS') grids['GLITE'].arc_cancelMultiple(runner.getResults().values()) return None else: return runner.getResults()