def writeCombineBAMJobsFromSAI(outDir, fastqDir, fastaPath, lockDirPath): combinedBAMJobsName = "combinedBAMFrmSingleSAIJob.jobs" combinedBAMJobsPath = os.path.join(outDir, combinedBAMJobsName) tools = workerForBam.getToolsDict() # contained all fastas against which to map the fastqs fastaFilePaths = workerForBam.getFastaFilePaths(fastaPath) # contained all fastqs fastqFilePaths = workerForBam.getFastqFilePaths(fastqDir) print "fastaFilePaths: ", fastaFilePaths with open(combinedBAMJobsPath, "w") as combinedBAMJobsFile: for fastqPath in fastqFilePaths: for fastaFilePath in fastaFilePaths: workerForBam.writeSAIIToBAMJob(combinedBAMJobsFile, fastaFilePath, fastqPath, lockDirPath, tools) return combinedBAMJobsPath
def writeCombineSAIJobs(outDir, fastqDir, fastaPath, lockDirPath, numThreads): combinedSAIJobsName = 'combinedSAIJob.jobs' combinedSAIJobsPath = os.path.join(outDir, combinedSAIJobsName) tools = workerForBam.getToolsDict() #contained all fastas against which to map the fastqs fastaFilePaths = workerForBam.getFastaFilePaths(fastaPath) #contained all fastqs fastqFilePaths = workerForBam.getFastqFilePaths(fastqDir) print 'fastaFilePaths: ', fastaFilePaths with open(combinedSAIJobsPath, 'w') as combinedSAIJobsFile: for fastqPath in fastqFilePaths: for fastaFilePath in fastaFilePaths: workerForBam.writeSAIJob(combinedSAIJobsFile, fastaFilePath,\ fastqPath, lockDirPath, tools,\ numThreads) return combinedSAIJobsPath
def writeCombineBAMJobsFromSAI(outDir, fastqDir, fastaPath, lockDirPath): combinedBAMJobsName = 'combinedBAMFrmPairedSAIsJob.jobs' combinedBAMJobsPath = os.path.join(outDir, combinedBAMJobsName) tools = workerForBam.getToolsDict() #contained all fastas against which to map the fastqs fastaFilePaths = workerForBam.getFastaFilePaths(fastaPath) print 'fastaFilePaths: ', fastaFilePaths with open(combinedBAMJobsPath, 'w') as combinedBAMJobsFile: pairedReads = getPairedReads(fastqDir) print 'pairedReads: ', pairedReads for pairedReadTuple in pairedReads: for fastaFilePath in fastaFilePaths: workerForBam.writePairedSAIToBAMJob(combinedBAMJobsFile, \ fastaFilePath,\ pairedReadTuple,\ lockDirPath, tools) return combinedBAMJobsPath
def main(): logger = multiprocessing.log_to_stderr() logger.setLevel(multiprocessing.SUBDEBUG) if len(sys.argv) >= 4: #directory containing fastq library fastqsDir = workerForBam.getAbsPath(sys.argv[1]) #directory containing other directories with fasta names fastaDir = workerForBam.getAbsPath(sys.argv[2]) #directory containing file locks lockDirPath = workerForBam.getAbsPath(sys.argv[3]) #directory containing temp output -> fastQ's, jobsFile outDir = workerForBam.getAbsPath(sys.argv[4]) #write all fastq's processing in job file #combineJobPath = writeCombineFastqJobs(outDir, fastqsDir, fastaDir,\ # lockDirPath) tools = workerForBam.getToolsDict() """retcode = workerForBam.callParallelDrone(combineJobPath,\ tools['PARALLEL_DRONE']) if retcode != 0: #error occured while calling parallel drone print "parallel drone erred, in executing combined jobs" return -1 """ #now for all scaffolds combined bams and look for SNPs parallelSNPsFinder.snpsFinder(fastaDir, outDir, lockDirPath) else: print 'err: files missing'
def snpsFinder(fastaPath, outDir, lockDirPath): #get all tools tools = workerForBam.getToolsDict() #get all scaffold folder names inside fasta dir fastaDirs = workerForBam.getAllFastas(fastaPath) #file containing snpfinder job for each scaffold snpsFinderJobFileName = fastaPath.split('/')[-1]+"SNPFinderJobs.jobs" print snpsFinderJobFileName + ' job file to be created ' try: #open the snpsFinderJob file snpsFinderJobFile = open(outDir + snpsFinderJobFileName, 'w') for fastaDir in fastaDirs: #get fasta file path fastaFilePath = fastaPath + fastaDir + "/" + fastaDir + ".fasta" writeJob(snpsFinderJobFile, fastaFilePath, lockDirPath, tools) snpsFinderJobFile.close() except IOError as (errno, strerror): print "I/O error({0}): {1}".format(errno, strerror) return -1
def writeCombineFastqJobs(outDir, fastqDir, fastaPath, lockDirPath): combinedBAMJobsName = 'combinedBAMJob.jobs' combinedBAMJobsPath = os.path.join(outDir, combinedBAMJobsName) tools = workerForBam.getToolsDict() #contained all fastas against which to map the fastqs fastaFilePaths = [] #fastaPath contains all .fasta inside a dir with same name as fasta #get all fasta name without ext ".fasta" in a list fastaDirs = workerForBam.getAllFastas(fastaPath) for fastaDir in fastaDirs: #get fasta File Path fastaFilePath = fastaPath + fastaDir + "/" + fastaDir + ".fasta" fastaFilePaths.append(fastaFilePath) print 'fastaFilePaths: ', fastaFilePaths with open(combinedBAMJobsPath, 'w') as combinedBAMJobsFile: dirContents = os.listdir(fastqDir) for fileName in dirContents: fastqPath = os.path.join(fastqDir, fileName) if os.path.isfile(fastqPath) and\ fileName.endswith('fastq'): for fastaFilePath in fastaFilePaths: workerForBam.writeJob(combinedBAMJobsFile, fastaFilePath,\ fastqPath, lockDirPath, tools) return combinedBAMJobsPath