if bam.startswith(prefix): sampleDict[individual][tissue].append(bam) # Check each idividual has a normal and every tissue has a BAM for individual in sampleDict: # Check for normal if not 'NORM' in sampleDict[individual]: raise IOError('no normal for %s' % (individual)) # check for individual BAM files for tissue in sampleDict[individual]: if len(sampleDict[individual][tissue]) < 2: raise IOError('no bam found for %s %s' % (individual, tissue)) if len(sampleDict[individual][tissue]) > 2: raise IOError('multiple bams found for %s %s' % (individual, tissue)) # Loop through individuals data slurmJobs = slurm.submitJobs() for individual, indvData in sampleDict.items(): # Create lists to store job IDs pileupFileList = [] pileupJobList = [] varscanJobList = [] # Create log directory logDir = os.path.join(args['<outdir>'], individual + '.log') if not os.path.isdir(logDir): os.mkdir(logDir) # Create and store pileup commands for tissue, tissueData in indvData.items(): # Extract sample name and BAM file name, bam = tissueData # Create and store file names pileup = os.path.join(args['<outdir>'], name + '.mpileup')
read1List.extend(read1) read1List = [os.path.abspath(x) for x in read1List] else: for prefix in args['<prefix>']: read1, read2 = fastqFind.findFastq(prefix=prefix, dirList=args['<indir>'], pair=True) read1List.extend(read1) read2List.extend(read2) read1List = [os.path.abspath(x) for x in read1List] read2List = [os.path.abspath(x) for x in read2List] # Raise Error if no Fastq files identified if len(read1List) == 0: raise IOError('Failed to find FASTQ files') # Create object to store jobs and create log folder jobObject = slurm.submitJobs() bamList = [] jobList = [] args['<logfolder>'] = args['<outbam>'][:-4] os.mkdir(args['<logfolder>']) # Perform alignment for each pair of FASTQ files for readgroup, (read1, read2) in enumerate( itertools.izip_longest(read1List, read2List)): # Modify and format read group and library id readgroup = format(readgroup + 1, '03d') # Create file names for alignment alignLog = os.path.join(args['<logfolder>'], 'rg{}.align.log'.format(readgroup)) alignBam = args['<outbam>'][:-4] + '.rg{}.sort.bam'.format(readgroup) # Generate command for alignment alignCommand = fastqAlign.bwaMemAlign(
# Generate and store standard output directories args['fastqDir'] = os.path.join(args['<outdir>'], 'trimFastq') args['fastqSampleDir'] = os.path.join(args['fastqDir'], args['name']) args['rsemTranDir'] = os.path.join(args['<outdir>'], 'rsemAlign') args['rsemTranSampleDir'] = os.path.join(args['rsemTranDir'], args['name']) args['tophatDir'] = os.path.join(args['<outdir>'], 'tophat2Align') args['tophatSampleDir'] = os.path.join(args['tophatDir'], args['name']) dirList = [args['fastqDir'], args['fastqSampleDir'], args['rsemTranDir'], args['rsemTranSampleDir'], args['tophatDir'], args['tophatSampleDir']] # Create output directories for directory in dirList: if not os.path.exists(directory): os.mkdir(directory) # Extract path and module data and create object to submit commands pmDict = slurm.parsePathModule(args['<paths>']) slurmJobs = slurm.submitJobs() ############################################################################### ## Find and process fastq files ############################################################################### # Extract fastq files and check if args['--singleend']: args['read1'] = fastqFind.findFastq( prefix = args['prefix'], dirList = args['<indir>'].split(','), pair = False ) else: args['read1'], args['read2'] = fastqFind.findFastq( prefix = args['prefix'], dirList = args['<indir>'].split(','),