def bowtie2Align( index, outFile, read1, read2 = None, bowtie2Path = 'bowtie2', threads = 1, readGroup = 1, sampleName = None, libraryID = None, platform = None, discordant = False, mixed = False, upto = None, maxInsert = None, check = True, samtoolsPath = 'samtools', memory = 2, nameSort = False ): ''' Function to generate command to peform Bowtie2 Alignment of paired FASTQ files. Function takes 9 arguments: 1) index - Suffix of Bowtie2 index. 2) outSam - Name of output SAM file. 3) read1 - Read1 FASTQ file. 4) read2 - Read2 FASTQ file. 5) path - Path to Bowtie2 executable. 6) threads - Number of thread to use. 7) discordant - Boolean; whether to output discordant pairs. 8) mixed - Boolean; whether to output mixed pairs. 9) upto - Number of reads to align 10) check - Boolean; whether to check for index entensions. ''' # Check for index extensions if not isinstance(check, bool): raise TypeError('check argument must be bool') if check: suffixes = ['.1.bt2', '.2.bt2', '.3.bt2', '.4.bt2', '.rev.1.bt2', '.rev.2.bt2'] for s in suffixes: if not os.path.isfile(index + s): raise IOError('Index file %s no found' %(index + s)) # Check and process discordant if not isinstance(discordant, bool): raise TypeError('discordant argument must be bool') if discordant: discordant = '' else: discordant = '--no-discordant' # Check mixed if not isinstance(mixed, bool): raise TypeError('mixed argument must be bool') if mixed: mixed = '' else: mixed = '--no-mixed' # Check upto argument if not upto is None: if not isinstance(upto, int): raise TypeError('upto argument must be integer') if upto < 1: raise ValueError('upto argument must be >= 1') # Check maximum insert argument if not maxInsert is None: if not isinstance(maxInsert, int): raise TypeError('maxInsert argument must be integer') if maxInsert < 1: raise ValueError('maxInsert argument must be >= 1') # Check outut file name and generate intermediate file names if outFile.endswith('.sam'): outSam = outFile outBam = '' elif outFile.endswith('.bam'): outBam = outFile outSam = outFile[:-4] + '.sam' else: raise ValueError("'outFile' argument must end '.sam' or '.bam'") # Join multiple fastq files if isinstance(read1, list): read1 = ','.join(read1) if isinstance(read2, list): read2 = ','.join(read2) # Create initial command bowtie2Command = [bowtie2Path, '--phred33', '--very-sensitive', mixed, discordant, '-p', str(threads), '-x', index, '-S', outSam] # Extend command depending on if read2 is applied if read2: bowtie2Command.extend(['-1', read1, '-2', read2]) else: bowtie2Command.extend(['-U', read1]) # Supplement additional commands if upto: bowtie2Command.extend(['-u', str(upto)]) if maxInsert: bowtie2Command.extend(['-X', str(maxInsert)]) # Add read group data if readGroup: # Create read group list rgList = ['--rg-id', str(readGroup)] if sampleName: rgList.extend(['--rg', 'SM:' + str(sampleName)]) if libraryID: rgList.extend(['--rg', 'LB:' + str(libraryID)]) if platform: rgList.extend(['--rg', 'PL:' + str(platform)]) # Add list to command bowtie2Command.extend(rgList) # Concatenate bowtie2Command command bowtie2Command = filter(None, bowtie2Command) bowtie2Command = ' '.join(bowtie2Command) # Supplement BWA command with sort command if outBam: sortCommand = samtools.sort(inFile = outSam, outFile = outBam, name = nameSort, memory = memory, delete = True, path = samtoolsPath, threads = threads) completeCommand = bowtie2Command + ' && ' + sortCommand else: completeCommand = bowtie2Command # Return complete command return(completeCommand)
def bowtie2Align( index, outFile, read1, read2 = None, bowtie2Path = 'bowtie2', threads = 1, readGroup = 1, sampleName = None, libraryID = None, platform = None, discordant = False, mixed = False, upto = None, maxInsert = None, check = True, samtoolsPath = 'samtools', memory = '2', nameSort = False ): ''' Function to generate command to peform Bowtie2 Alignment of paired FASTQ files. Function takes 9 arguments: 1) index - Suffix of Bowtie2 index. 2) outSam - Name of output SAM file. 3) read1 - Read1 FASTQ file. 4) read2 - Read2 FASTQ file. 5) path - Path to Bowtie2 executable. 6) threads - Number of thread to use. 7) discordant - Boolean; whether to output discordant pairs. 8) mixed - Boolean; whether to output mixed pairs. 9) upto - Number of reads to align 10) check - Boolean; whether to check for index entensions. ''' # Check for index extensions if not isinstance(check, bool): raise TypeError('check argument must be bool') if check: suffixes = ['.1.bt2', '.2.bt2', '.3.bt2', '.4.bt2', '.rev.1.bt2', '.rev.2.bt2'] for s in suffixes: if not os.path.isfile(index + s): raise IOError('Index file %s no found' %(index + s)) # Check and process discordant if not isinstance(discordant, bool): raise TypeError('discordant argument must be bool') if discordant: discordant = '' else: discordant = '--no-discordant' # Check mixed if not isinstance(mixed, bool): raise TypeError('mixed argument must be bool') if mixed: mixed = '' else: mixed = '--no-mixed' # Check upto argument if not upto is None: if not isinstance(upto, int): raise TypeError('upto argument must be integer') if upto < 1: raise ValueError('upto argument must be >= 1') # Check maximum insert argument if not maxInsert is None: if not isinstance(maxInsert, int): raise TypeError('maxInsert argument must be integer') if maxInsert < 1: raise ValueError('maxInsert argument must be >= 1') # Check outut file name and generate intermediate file names if outFile.endswith('.sam'): outSam = outFile outBam = '' elif outFile.endswith('.bam'): outBam = outFile outSam = outFile[:-4] + '.sam' else: raise ValueError("'outFile' argument must end '.sam' or '.bam'") # Join multiple fastq files if isinstance(read1, list): read1 = ','.join(read1) if isinstance(read2, list): read2 = ','.join(read2) # Create initial command bowtie2Command = [bowtie2Path, '--phred33', '--very-sensitive', mixed, discordant, '-p', str(threads), '-x', index, '-S', outSam] # Extend command depending on if read2 is applied if read2: bowtie2Command.extend(['-1', read1, '-2', read2]) else: bowtie2Command.extend(['-U', read1]) # Supplement additional commands if upto: bowtie2Command.extend(['-u', str(upto)]) if maxInsert: bowtie2Command.extend(['-X', str(maxInsert)]) # Add read group data if readGroup: # Create read group list rgList = ['--rg-id', str(readGroup)] if sampleName: rgList.extend(['--rg', 'SM:' + str(sampleName)]) if libraryID: rgList.extend(['--rg', 'LB:' + str(libraryID)]) if platform: rgList.extend(['--rg', 'PL:' + str(platform)]) # Add list to command bowtie2Command.extend(rgList) # Concatenate bowtie2Command command bowtie2Command = filter(None, bowtie2Command) bowtie2Command = ' '.join(bowtie2Command) # Supplement BWA command with sort command if outBam: sortCommand = samtools.sort(inFile = outSam, outFile = outBam, name = nameSort, memory = memory, delete = True, path = samtoolsPath, threads = threads) completeCommand = bowtie2Command + ' && ' + sortCommand else: completeCommand = bowtie2Command # Return complete command return(completeCommand)
def bwaMemAlign( index, outFile, read1, read2 = None, bwaPath = 'bwa', threads = 1, readGroup = '1', sampleName = None, libraryID = None, platform = None, markSecondary = True, check = True, samtoolsPath = 'samtools', memory = '2', nameSort = False ): ''' Function to generate command to perform BWA mem alignment of single end or paired end FASTQ files. If the supplied output file name ends with '.bam' then a sorted BAM file will be generated else if the file names ends with '.sam' a sam file is returned. Function takes the following 14 arguments: Args: index (str)- Full path BWA index prefix. outFile (str)- Full path to output sam or bam file. read1 (str)- Read1 FASTQ file. read2 (str)- Read2 FASTQ file. bwaPath (str)- BWA exectuable. threads (int)- Number of threads to use. readGroup (str)- Read group to be used in SAM/BAM. sampleName (str)- Name of sample to be used in header. libraryID (str)- Library ID to be used in SAM/BAM. platform (str)- Platform to be used in SAM/BAM. markSecondary (bool)- Mark secondary alignments. check (bool)- Check for index extensions and output directory. samtoolsPath (str)- Samtools executable. memory (int)- Gigabytes of memory to use in generating BAM file. nameSort (bool)- Generate a name sorted BAM file. Returns: bwaCommand (str)- Command to perform BWA alignment. Raises: IOError - If index suffixes or output directory are absent. TypeError - If arguments are of the wrong type. ValueError - If arguments have an unexpected value. ''' # Check index extensions and output directory, if required if not isinstance(check, bool): raise TypeError('check argument must be bool') if check: suffixes = ['.amb', '.ann', '.bwt', '.pac', '.sa'] for s in suffixes: if not os.path.isfile(index + s): raise IOError('Genome index file %s no found' %(index + s)) if not os.path.isdir(os.path.dirname(outFile)): raise IOError('Could not find output directory {}'.format( os.path.dirname(outFile))) # Check outut file name and generate intermediate file names if outFile.endswith('.sam'): outSam = outFile outBam = '' elif outFile.endswith('.bam'): outBam = outFile outSam = outFile[:-4] + '.sam' else: raise ValueError('outFile argument must end .sam or .bam') # Process secondary command if not isinstance(markSecondary, bool): raise TypeError('markSecondary argument must be bool') if markSecondary: markSecondary = '-M' else: markSecondary = '' # Process multiple input fastq files if isinstance(read1, list): read1 = "'< zcat " + ' '.join(read1) + "'" if isinstance(read2, list): read2 = "'< zcat " + ' '.join(read2) + "'" # Create command bwaCommand = [bwaPath, 'mem', markSecondary ,'-t', str(threads), index, read1, read2] # Remove missing elements from command bwaCommand = filter(None, bwaCommand) # Add read group data if readGroup: # Create read group string rgString = "'@RG\\tID:" + str(readGroup) if sampleName: rgString += '\\tSM:' + str(sampleName) if libraryID: rgString += '\\tLB:' + str(libraryID) if platform: rgString += '\\tPL:' + str(platform) rgString += "'" # Add string to command bwaCommand.insert(2,rgString) bwaCommand.insert(2,'-R') # Complete BWA command bwaCommand = '%s > %s' %(' '.join(bwaCommand), outSam) # Supplement BWA command with sort command, if required, and return if outBam: sortCommand = samtools.sort(inFile = outSam, outFile = outBam, name = nameSort, memory = memory, delete = True, path = samtoolsPath, threads = threads) bwaCommand = bwaCommand + ' && ' + sortCommand return(bwaCommand)
def bwaMemAlign( index, outFile, read1, read2 = None, bwaPath = 'bwa', threads = 1, readGroup = '1', sampleName = None, libraryID = None, platform = None, markSecondary = True, check = True, samtoolsPath = 'samtools', memory = '2', nameSort = False ): ''' Function to generate command to perform BWA mem alignment of single end or paired end FASTQ files. If the supplied output file name ends with '.bam' then a sorted BAM file will be generated else if the file names ends with '.sam' a sam file is returned. Function takes the following 14 arguments: Args: index (str)- Full path BWA index prefix. outFile (str)- Full path to output sam or bam file. read1 (str)- Read1 FASTQ file. read2 (str)- Read2 FASTQ file. bwaPath (str)- BWA exectuable. threads (int)- Number of threads to use. readGroup (str)- Read group to be used in SAM/BAM. sampleName (str)- Name of sample to be used in header. libraryID (str)- Library ID to be used in SAM/BAM. platform (str)- Platform to be used in SAM/BAM. markSecondary (bool)- Mark secondary alignments. check (bool)- Check for index extensions and output directory. samtoolsPath (str)- Samtools executable. memory (int)- Gigabytes of memory to use in generating BAM file. nameSort (bool)- Generate a name sorted BAM file. Returns: bwaCommand (str)- Command to perform BWA alignment. Raises: IOError - If index suffixes or output directory are absent. TypeError - If arguments are of the wrong type. ValueError - If arguments have an unexpected value. ''' # Check index extensions and output directory, if required if not isinstance(check, bool): raise TypeError('check argument must be bool') if check: suffixes = ['.amb', '.ann', '.bwt', '.pac', '.sa'] for s in suffixes: if not os.path.isfile(index + s): raise IOError('Genome index file %s no found' %(index + s)) if not os.path.isdir(os.path.dirname(outFile)): raise IOError('Could not find output directory {}'.format( os.path.dirname(outFile))) # Check outut file name and generate intermediate file names if outFile.endswith('.sam'): outSam = outFile outBam = '' elif outFile.endswith('.bam'): outBam = outFile outSam = outFile[:-4] + '.sam' else: raise ValueError('outFile argument must end .sam or .bam') # Process secondary command if not isinstance(markSecondary, bool): raise TypeError('markSecondary argument must be bool') if markSecondary: markSecondary = '-M' else: markSecondary = '' # Create command bwaCommand = [bwaPath, 'mem', markSecondary ,'-t', str(threads), index, read1, read2] # Remove missing elements from coomand bwaCommand = filter(None, bwaCommand) # Add read group data if readGroup: # Create read group string rgString = "'@RG\\tID:" + str(readGroup) if sampleName: rgString += '\\tSM:' + str(sampleName) if libraryID: rgString += '\\tLB:' + str(libraryID) if platform: rgString += '\\tPL:' + str(platform) rgString += "'" # Add string to command bwaCommand.insert(2,rgString) bwaCommand.insert(2,'-R') # Complete BWA command bwaCommand = '%s > %s' %(' '.join(bwaCommand), outSam) # Supplement BWA command with sort command, if required, and return if outBam: sortCommand = samtools.sort(inFile = outSam, outFile = outBam, name = nameSort, memory = memory, delete = True, path = samtoolsPath, threads = threads) bwaCommand = bwaCommand + ' && ' + sortCommand return(bwaCommand)