def _resumeRecursiveCmd(self, dumpFileName, lastIdx=None, test=False): self._recursive = True # time.sleep(15) cmdList = Utilities.loadCache(dumpFileName) if lastIdx is None: for i, cmd in enumerate(cmdList): if not cmd[-1]: nextIdx = i break else: cmdList[lastIdx][1] = True nextIdx = lastIdx + 1 Utilities.saveCache(cmdList, dumpFileName) if len(cmdList) == nextIdx: print 'Finished recursive cmd' return while True: nextCmdTuple = cmdList[nextIdx] if not nextCmdTuple[1]: paramList = list(nextCmdTuple[0]) res = self._getSubmitParamListFromCmd( *[paramList[0]] + paramList + [dumpFileName, None, None, nextIdx]) if isinstance(res, types.TupleType): break cmdList[nextIdx][1] = True nextIdx += 1 print 'Passing done job: [%s]' % self.__getCmdFromParamList( list(nextCmdTuple[0])) if test: print nextIdx, paramList currentCmd = self.__getCmdFromParamList(paramList) print nextIdx, currentCmd (cmd, fileName, outputFile, onSubmitAction, walltime, queue, machineToUseList, machineToExcludeList, nbCpus, nodeNb, memory, jobName, expectedFormat, otherExpectedFormatList, errorFile, outFile, jobId, dumpFileName2, email, scriptName, optionList ) = self._getSubmitParamListFromCmd( *[paramList[0]] + paramList + [dumpFileName, None, None, nextIdx]) # fileName, expectedFormat, # outputFilePattern, cmd, dumpFileName=None, # fileToJobIdDict=None, cmdList=None, i=None if test: print cmd return cluster = guessHpc() jobId = cluster.\ submitJobAndGetId(cmd, errorFile=errorFile, outputFile=outFile, dependentJobId=jobId, nbProc=nbCpus, jobName=jobName, memory=memory, machineToUseList=machineToUseList, node=nodeNb, machineToExcludeList=machineToExcludeList, walltime=walltime, queue=queue, scriptName=scriptName, optionList=optionList)
def process(self, dirName): cluster = guessHpc() roundNb = 1 hasJobBeenSubmitted = False print 'Round nb %d' % roundNb for fileName in FolderIterator().getFileListInDirWithExt(dirName, 'vcf'): if not self._isFileToProcess(fileName): continue print 'File', fileName cluster.submitJobAndGetId('python %s -f %s' % (os.path.abspath( __file__), fileName), errorFile=fileName + '.err', outputFile=fileName + '.out') hasJobBeenSubmitted = True if not hasJobBeenSubmitted or not cluster.getNbRunningJobs(): print 'Done' # break roundNb += 1 cluster.waitUntilClusterIsFree()
def process(self, dirName, *args): # if self._machineList: # cluster = RemoteThreadManager(self._machineList) # else: cluster = guessHpc(nbCpus=self._nbCpus) print cluster roundNb = 1 hasJobBeenSubmitted = False jobList = [] while True: print 'Round nb %d' % roundNb for fileName in FolderIterator().getDataIterator(dirName, self._fileExt): print 'C = [%s]' % str(fileName) if not self._isFileToProcess(fileName): print 'Passing file "%s"' % str(fileName) continue if self._laneList: laneNb, seqFile1, seqFile2 = fileName sampleName, fcName, uplex = IlluminaRun( ).getSampleNameRunNameAndUplexFromPath(seqFile1) laneNb, readNb = IlluminaRun().getLaneAndReadNbfromFile( seqFile1) print fileName, sampleName, fcName, laneNb if (sampleName, fcName, laneNb) not in self._laneList and \ sampleName not in self._laneList: print 'Passing file %s' % str(fileName) continue # self._runCmdList(self, cmdList, fileName, cluster = None) print 'Processing', fileName cmd = self._getCmdToRunFromFile(fileName, *args) print 'CMD', cmd if isinstance(cmd, types.StringType): outputFile = self._outputFilePattern % fileName doneFile = outputFile + '_done' if os.path.isfile(doneFile): print 'Already processed file: %s' % fileName continue cmd += ' && touch %s' % doneFile if self._recursive and self._maxNbJobs: jobList.append((cmd, outputFile)) continue cluster.submitJobAndGetId( cmd, errorFile=outputFile + '.err', outputFile=outputFile + '.out', machineToUseList=self._machineList) else: if self._recursive and self._maxNbJobs: jobList.append((cmd, outputFile)) continue if isinstance(cluster, HpcBase) or \ isinstance(cluster, RemoteThreadManager): self._runCmdList(cmd, fileName, cluster) elif isinstance(cluster, ClusterBase): cluster.submit(self._runCmdList, (cmd, fileName)) else: self._runCmdList(cmd, fileName, cluster) # break hasJobBeenSubmitted = True if isinstance(cluster, HpcBase): break if not hasJobBeenSubmitted or not cluster.getNbRunningJobs(): print 'Done' break roundNb += 1 if not isinstance(cluster, MOABcluster): break cluster.waitUntilClusterIsFree() if self._recursive and self._maxNbJobs: self.__runMergedJobs(jobList, cluster, dirName) if isinstance(cluster, ThreadManager) or \ isinstance(cluster, RemoteThreadManager): cluster.wait() self._finishProcess(cluster, dirName, *args)
def _getSubmitParamListFromCmd(self, fileName, expectedFormat, outputFilePattern, cmd, dumpFileName=None, fileToJobIdDict=None, cmdList=None, i=None): if fileToJobIdDict is None: fileToJobIdDict = {} baseFile = onSubmitAction = walltime = queue = email = None machineToUseList = self._machineList machineToExcludeList = self._machineToExcludeList nbCpus = nodeNb = 1 memory = 4 if _guessLocation() == HpcScriptBase.GUILLIMIN: memory = 2 jobName = otherExpectedFormatList = None if isinstance(expectedFormat, types.TupleType): expectedFormat, baseExt = expectedFormat baseFile = self.__getFileWithExtensionInDir( os.path.dirname(fileName), baseExt) elif isinstance(expectedFormat, types.ListType): otherExpectedFormatList = expectedFormat[1:] expectedFormat = expectedFormat[0] print 'F', fileName, expectedFormat # , myStr(cmd) # print cmdList if fileName is not None and not \ isinstance(fileName, types.ListType) and \ expectedFormat != Utilities.getFileExtension(fileName): fileName = self.__getFileWithExtensionInDir( os.path.dirname(fileName), expectedFormat) outputFile = outputFilePattern if '%s' in outputFilePattern: outputFile = outputFilePattern % fileName doneFile = outputFile + '_done' if os.path.isfile(doneFile): print 'Already processed file: %s' % fileName if isinstance(cmd, Cmd) and cmd.onSubmitAction: self._onSubmitAction(None, fileName, outputFile, cmd) return outputFile cluster = guessHpc() logSuffix = '' if not jobName and not isinstance(cmd, types.StringType): jobName = cmd.jobName if cluster.TASK_ID_VAR and \ ((jobName and '[' in jobName) or (not isinstance(cmd, types.StringType) and cmd.scriptName and '[' in cmd.scriptName)): logSuffix = '_${%s}' % cluster.TASK_ID_VAR errorFile = outputFile + '%s.err' % logSuffix outFile = outputFile + '%s.out' % logSuffix cmdJobIdList = [] scriptName = optionList = None if not isinstance(cmd, types.StringType): nbCpus = cmd.nbCpus jobName = cmd.jobName memory = cmd.memory if cmd.targetDir: errorFile = os.path.join( cmd.targetDir, os.path.basename(errorFile)) outFile = os.path.join( cmd.targetDir, os.path.basename(outFile)) if cmd.jobIdList: cmdJobIdList = cmd.jobIdList if cmd.machineToUseList: machineToUseList = cmd.machineToUseList if cmd.machineToExcludeList: machineToExcludeList = cmd.machineToExcludeList if cmd.walltime: walltime = cmd.walltime if cmd.queue: queue = cmd.queue if cmd.email: email = cmd.email if cmd.scriptName: scriptName = cmd.scriptName if cmd.optionList: optionList = cmd.optionList nodeNb = cmd.nodeNb onSubmitAction = cmd.onSubmitAction cmd = cmd.cmd if jobName and '[' in jobName and not scriptName: # scriptName = FileNameGetter(outputFile).get('_cmd.sh') scriptName = outputFile + '_cmd.sh' cmd += ' 1> %s 2> %s' % (outFile, errorFile) errorFile = outFile = None print 'New cmd = [%s]' % cmd cmd = self._replaceInputAndOutputInStr(cmd, fileName, outputFile) if baseFile: cmd = cmd.replace('[base]', baseFile) if cmd[-1] == ';': cmd = cmd[:-1] if logSuffix: doneFile = doneFile.replace('_done', '%s_done' % logSuffix) cmd = cmd + ' && touch %s' % doneFile # print cmd # if isinstance(cluster, HpcBase): jobId = None if isinstance(fileName, types.StringType): jobId = fileToJobIdDict.get(fileName) elif fileName is not None: jobId = [fileToJobIdDict.get( currentFileName) for currentFileName in fileName if currentFileName in fileToJobIdDict] if not jobId: jobId = [] if not isinstance(jobId, types.ListType): jobId = [jobId] if otherExpectedFormatList: jobId += [fileToJobIdDict.get(otherExpectedFormat) for otherExpectedFormat in otherExpectedFormatList if otherExpectedFormat in fileToJobIdDict] jobId += cmdJobIdList if jobId: otherExpectedFormatStr = '' if otherExpectedFormatList: otherExpectedFormatStr += ' && ' + \ ' && '.join( ['[ -f %s_done ]' % otherExpectedFormat for otherExpectedFormat in otherExpectedFormatList]) if expectedFormat: cmd = 'if [ -f %s_done ]%s; then %s; fi' % ( expectedFormat, otherExpectedFormatStr, cmd) if self._recursive: if not dumpFileName: dumpFileName = self.__createCmdListDumpFileAndGetName( fileName, cmdList, i) cmd += ' && python %s -p recursiveCmd -f %s -i %d' % ( os.path.abspath(__file__), dumpFileName, i) return cmd, fileName, outputFile, onSubmitAction, walltime, queue, \ machineToUseList, machineToExcludeList, nbCpus, nodeNb, memory,\ jobName, expectedFormat, otherExpectedFormatList, errorFile,\ outFile, jobId, dumpFileName, email, scriptName, optionList