Exemplo n.º 1
0
 def _resumeRecursiveCmd(self, dumpFileName, lastIdx=None, test=False):
     self._recursive = True
     # time.sleep(15)
     cmdList = Utilities.loadCache(dumpFileName)
     if lastIdx is None:
         for i, cmd in enumerate(cmdList):
             if not cmd[-1]:
                 nextIdx = i
                 break
     else:
         cmdList[lastIdx][1] = True
         nextIdx = lastIdx + 1
     Utilities.saveCache(cmdList, dumpFileName)
     if len(cmdList) == nextIdx:
         print 'Finished recursive cmd'
         return
     while True:
         nextCmdTuple = cmdList[nextIdx]
         if not nextCmdTuple[1]:
             paramList = list(nextCmdTuple[0])
             res = self._getSubmitParamListFromCmd(
                 *[paramList[0]] + paramList + [dumpFileName, None, None,
                                                nextIdx])
             if isinstance(res, types.TupleType):
                 break
             cmdList[nextIdx][1] = True
         nextIdx += 1
         print 'Passing done job: [%s]' % self.__getCmdFromParamList(
             list(nextCmdTuple[0]))
     if test:
         print nextIdx, paramList
         currentCmd = self.__getCmdFromParamList(paramList)
         print nextIdx, currentCmd
     (cmd, fileName, outputFile, onSubmitAction, walltime, queue,
      machineToUseList, machineToExcludeList, nbCpus, nodeNb, memory,
      jobName, expectedFormat, otherExpectedFormatList, errorFile, outFile,
      jobId, dumpFileName2, email, scriptName, optionList
      ) = self._getSubmitParamListFromCmd(
         *[paramList[0]] + paramList + [dumpFileName, None, None,
                                        nextIdx])
     # fileName, expectedFormat,
     # outputFilePattern, cmd, dumpFileName=None,
     # fileToJobIdDict=None, cmdList=None, i=None
     if test:
         print cmd
         return
     cluster = guessHpc()
     jobId = cluster.\
         submitJobAndGetId(cmd, errorFile=errorFile,
                           outputFile=outFile,
                           dependentJobId=jobId, nbProc=nbCpus,
                           jobName=jobName, memory=memory,
                           machineToUseList=machineToUseList,
                           node=nodeNb,
                           machineToExcludeList=machineToExcludeList,
                           walltime=walltime, queue=queue,
                           scriptName=scriptName, optionList=optionList)
Exemplo n.º 2
0
 def process(self, dirName):
     cluster = guessHpc()
     roundNb = 1
     hasJobBeenSubmitted = False
     print 'Round nb %d' % roundNb
     for fileName in FolderIterator().getFileListInDirWithExt(dirName,
                                                              'vcf'):
         if not self._isFileToProcess(fileName):
             continue
         print 'File', fileName
         cluster.submitJobAndGetId('python %s -f %s' % (os.path.abspath(
             __file__), fileName), errorFile=fileName + '.err',
             outputFile=fileName + '.out')
         hasJobBeenSubmitted = True
     if not hasJobBeenSubmitted or not cluster.getNbRunningJobs():
         print 'Done'
         # break
     roundNb += 1
     cluster.waitUntilClusterIsFree()
Exemplo n.º 3
0
 def process(self, dirName, *args):
     # if self._machineList:
     # cluster = RemoteThreadManager(self._machineList)
     # else:
     cluster = guessHpc(nbCpus=self._nbCpus)
     print cluster
     roundNb = 1
     hasJobBeenSubmitted = False
     jobList = []
     while True:
         print 'Round nb %d' % roundNb
         for fileName in FolderIterator().getDataIterator(dirName,
                                                          self._fileExt):
             print 'C = [%s]' % str(fileName)
             if not self._isFileToProcess(fileName):
                 print 'Passing file "%s"' % str(fileName)
                 continue
             if self._laneList:
                 laneNb, seqFile1, seqFile2 = fileName
                 sampleName, fcName, uplex = IlluminaRun(
                 ).getSampleNameRunNameAndUplexFromPath(seqFile1)
                 laneNb, readNb = IlluminaRun().getLaneAndReadNbfromFile(
                     seqFile1)
                 print fileName, sampleName, fcName, laneNb
                 if (sampleName, fcName, laneNb) not in self._laneList and \
                    sampleName not in self._laneList:
                     print 'Passing file %s' % str(fileName)
                     continue
             # self._runCmdList(self, cmdList, fileName, cluster = None)
             print 'Processing', fileName
             cmd = self._getCmdToRunFromFile(fileName, *args)
             print 'CMD', cmd
             if isinstance(cmd, types.StringType):
                 outputFile = self._outputFilePattern % fileName
                 doneFile = outputFile + '_done'
                 if os.path.isfile(doneFile):
                     print 'Already processed file: %s' % fileName
                     continue
                 cmd += ' && touch %s' % doneFile
                 if self._recursive and self._maxNbJobs:
                     jobList.append((cmd, outputFile))
                     continue
                 cluster.submitJobAndGetId(
                     cmd, errorFile=outputFile + '.err',
                     outputFile=outputFile + '.out',
                     machineToUseList=self._machineList)
             else:
                 if self._recursive and self._maxNbJobs:
                     jobList.append((cmd, outputFile))
                     continue
                 if isinstance(cluster, HpcBase) or \
                    isinstance(cluster, RemoteThreadManager):
                     self._runCmdList(cmd, fileName, cluster)
                 elif isinstance(cluster, ClusterBase):
                     cluster.submit(self._runCmdList, (cmd, fileName))
                 else:
                     self._runCmdList(cmd, fileName, cluster)
                     # break
             hasJobBeenSubmitted = True
         if isinstance(cluster, HpcBase):
             break
         if not hasJobBeenSubmitted or not cluster.getNbRunningJobs():
             print 'Done'
             break
         roundNb += 1
         if not isinstance(cluster, MOABcluster):
             break
         cluster.waitUntilClusterIsFree()
     if self._recursive and self._maxNbJobs:
         self.__runMergedJobs(jobList, cluster, dirName)
     if isinstance(cluster, ThreadManager) or \
        isinstance(cluster, RemoteThreadManager):
         cluster.wait()
     self._finishProcess(cluster, dirName, *args)
Exemplo n.º 4
0
 def _getSubmitParamListFromCmd(self, fileName, expectedFormat,
                                outputFilePattern, cmd, dumpFileName=None,
                                fileToJobIdDict=None, cmdList=None, i=None):
     if fileToJobIdDict is None:
         fileToJobIdDict = {}
     baseFile = onSubmitAction = walltime = queue = email = None
     machineToUseList = self._machineList
     machineToExcludeList = self._machineToExcludeList
     nbCpus = nodeNb = 1
     memory = 4
     if _guessLocation() == HpcScriptBase.GUILLIMIN:
         memory = 2
     jobName = otherExpectedFormatList = None
     if isinstance(expectedFormat, types.TupleType):
         expectedFormat, baseExt = expectedFormat
         baseFile = self.__getFileWithExtensionInDir(
             os.path.dirname(fileName), baseExt)
     elif isinstance(expectedFormat, types.ListType):
         otherExpectedFormatList = expectedFormat[1:]
         expectedFormat = expectedFormat[0]
     print 'F', fileName, expectedFormat  # , myStr(cmd)
     # print cmdList
     if fileName is not None and not \
        isinstance(fileName, types.ListType) and \
        expectedFormat != Utilities.getFileExtension(fileName):
         fileName = self.__getFileWithExtensionInDir(
             os.path.dirname(fileName), expectedFormat)
     outputFile = outputFilePattern
     if '%s' in outputFilePattern:
         outputFile = outputFilePattern % fileName
     doneFile = outputFile + '_done'
     if os.path.isfile(doneFile):
         print 'Already processed file: %s' % fileName
         if isinstance(cmd, Cmd) and cmd.onSubmitAction:
             self._onSubmitAction(None, fileName, outputFile, cmd)
         return outputFile
     cluster = guessHpc()
     logSuffix = ''
     if not jobName and not isinstance(cmd, types.StringType):
         jobName = cmd.jobName
     if cluster.TASK_ID_VAR and \
        ((jobName and '[' in jobName) or
         (not isinstance(cmd, types.StringType) and
          cmd.scriptName and '[' in cmd.scriptName)):
         logSuffix = '_${%s}' % cluster.TASK_ID_VAR
     errorFile = outputFile + '%s.err' % logSuffix
     outFile = outputFile + '%s.out' % logSuffix
     cmdJobIdList = []
     scriptName = optionList = None
     if not isinstance(cmd, types.StringType):
         nbCpus = cmd.nbCpus
         jobName = cmd.jobName
         memory = cmd.memory
         if cmd.targetDir:
             errorFile = os.path.join(
                 cmd.targetDir, os.path.basename(errorFile))
             outFile = os.path.join(
                 cmd.targetDir, os.path.basename(outFile))
         if cmd.jobIdList:
             cmdJobIdList = cmd.jobIdList
         if cmd.machineToUseList:
             machineToUseList = cmd.machineToUseList
         if cmd.machineToExcludeList:
             machineToExcludeList = cmd.machineToExcludeList
         if cmd.walltime:
             walltime = cmd.walltime
         if cmd.queue:
             queue = cmd.queue
         if cmd.email:
             email = cmd.email
         if cmd.scriptName:
             scriptName = cmd.scriptName
         if cmd.optionList:
             optionList = cmd.optionList
         nodeNb = cmd.nodeNb
         onSubmitAction = cmd.onSubmitAction
         cmd = cmd.cmd
     if jobName and '[' in jobName and not scriptName:
         # scriptName = FileNameGetter(outputFile).get('_cmd.sh')
         scriptName = outputFile + '_cmd.sh'
         cmd += ' 1> %s 2> %s' % (outFile, errorFile)
         errorFile = outFile = None
         print 'New cmd = [%s]' % cmd
     cmd = self._replaceInputAndOutputInStr(cmd, fileName, outputFile)
     if baseFile:
         cmd = cmd.replace('[base]', baseFile)
     if cmd[-1] == ';':
         cmd = cmd[:-1]
     if logSuffix:
         doneFile = doneFile.replace('_done', '%s_done' % logSuffix)
     cmd = cmd + ' && touch %s' % doneFile
     # print cmd
     # if isinstance(cluster, HpcBase):
     jobId = None
     if isinstance(fileName, types.StringType):
         jobId = fileToJobIdDict.get(fileName)
     elif fileName is not None:
         jobId = [fileToJobIdDict.get(
             currentFileName) for currentFileName in fileName if
             currentFileName in fileToJobIdDict]
     if not jobId:
         jobId = []
     if not isinstance(jobId, types.ListType):
         jobId = [jobId]
     if otherExpectedFormatList:
         jobId += [fileToJobIdDict.get(otherExpectedFormat)
                   for otherExpectedFormat in otherExpectedFormatList if
                   otherExpectedFormat in fileToJobIdDict]
     jobId += cmdJobIdList
     if jobId:
         otherExpectedFormatStr = ''
         if otherExpectedFormatList:
             otherExpectedFormatStr += ' && ' + \
                 ' && '.join(
                     ['[ -f %s_done ]' % otherExpectedFormat for
                      otherExpectedFormat in otherExpectedFormatList])
         if expectedFormat:
             cmd = 'if [ -f %s_done ]%s; then %s; fi' % (
                 expectedFormat, otherExpectedFormatStr, cmd)
     if self._recursive:
         if not dumpFileName:
             dumpFileName = self.__createCmdListDumpFileAndGetName(
                 fileName, cmdList, i)
         cmd += ' && python %s -p recursiveCmd -f %s -i %d' % (
             os.path.abspath(__file__), dumpFileName, i)
     return cmd, fileName, outputFile, onSubmitAction, walltime, queue, \
         machineToUseList, machineToExcludeList, nbCpus, nodeNb, memory,\
         jobName, expectedFormat, otherExpectedFormatList, errorFile,\
         outFile, jobId, dumpFileName, email, scriptName, optionList