示例#1
0
def bowtie2Align(
        index, outFile, read1, read2 = None, bowtie2Path = 'bowtie2',
        threads = 1, readGroup = 1, sampleName = None, libraryID = None,
        platform = None, discordant = False, mixed = False, upto = None,
        maxInsert = None, check = True, samtoolsPath = 'samtools',
        memory = 2, nameSort = False
    ):
    ''' Function to generate command to peform Bowtie2 Alignment of
    paired FASTQ files. Function takes 9 arguments:
    
    1)  index - Suffix of Bowtie2 index.
    2)  outSam - Name of output SAM file.
    3)  read1 - Read1 FASTQ file.
    4)  read2 - Read2 FASTQ file.
    5)  path - Path to Bowtie2 executable.
    6)  threads - Number of thread to use.
    7)  discordant - Boolean; whether to output discordant pairs.
    8)  mixed - Boolean; whether to output mixed pairs.
    9)  upto - Number of reads to align
    10) check - Boolean; whether to check for index entensions.
    
    '''
    # Check for index extensions
    if not isinstance(check, bool):
        raise TypeError('check argument must be bool')
    if check:
        suffixes = ['.1.bt2', '.2.bt2', '.3.bt2', '.4.bt2', '.rev.1.bt2',
            '.rev.2.bt2']
        for s in suffixes:
            if not os.path.isfile(index + s):
                raise IOError('Index file %s no found' %(index + s))
    # Check and process discordant
    if not isinstance(discordant, bool):
        raise TypeError('discordant argument must be bool')
    if discordant:
        discordant = ''
    else:
        discordant = '--no-discordant'
    # Check mixed
    if not isinstance(mixed, bool):
        raise TypeError('mixed argument must be bool')
    if mixed:
        mixed = ''
    else:
        mixed = '--no-mixed'
    # Check upto argument
    if not upto is None:
        if not isinstance(upto, int):
            raise TypeError('upto argument must be integer')
        if upto < 1:
            raise ValueError('upto argument must be >= 1')
    # Check maximum insert argument
    if not maxInsert is None:
        if not isinstance(maxInsert, int):
            raise TypeError('maxInsert argument must be integer')
        if maxInsert < 1:
            raise ValueError('maxInsert argument must be >= 1')
    # Check outut file name and generate intermediate file names
    if outFile.endswith('.sam'):
        outSam = outFile
        outBam = ''
    elif outFile.endswith('.bam'):
        outBam = outFile
        outSam = outFile[:-4] + '.sam'
    else:
        raise ValueError("'outFile' argument must end '.sam' or '.bam'")
    # Join multiple fastq files
    if isinstance(read1, list):
        read1 = ','.join(read1)
    if isinstance(read2, list):
        read2 = ','.join(read2)
    # Create initial command
    bowtie2Command = [bowtie2Path, '--phred33', '--very-sensitive', mixed,
        discordant, '-p', str(threads), '-x', index, '-S', outSam]
    # Extend command depending on if read2 is applied
    if read2:
        bowtie2Command.extend(['-1', read1, '-2', read2])
    else:
        bowtie2Command.extend(['-U', read1])
    # Supplement additional commands
    if upto:
        bowtie2Command.extend(['-u', str(upto)])
    if maxInsert:
        bowtie2Command.extend(['-X', str(maxInsert)])
    # Add read group data
    if readGroup:
        # Create read group list
        rgList = ['--rg-id', str(readGroup)]
        if sampleName:
            rgList.extend(['--rg', 'SM:' + str(sampleName)])
        if libraryID:
            rgList.extend(['--rg', 'LB:' + str(libraryID)])
        if platform:
            rgList.extend(['--rg', 'PL:' + str(platform)])
        # Add list to command
        bowtie2Command.extend(rgList)
    # Concatenate bowtie2Command command
    bowtie2Command = filter(None, bowtie2Command)
    bowtie2Command = ' '.join(bowtie2Command)
    # Supplement BWA command with sort command
    if outBam:
        sortCommand = samtools.sort(inFile = outSam, outFile = outBam,
            name = nameSort, memory = memory, delete = True,
            path = samtoolsPath, threads = threads)
        completeCommand = bowtie2Command + ' && ' + sortCommand
    else:
        completeCommand = bowtie2Command
    # Return complete command
    return(completeCommand)
示例#2
0
def bowtie2Align(
        index, outFile, read1, read2 = None, bowtie2Path = 'bowtie2',
        threads = 1, readGroup = 1, sampleName = None, libraryID = None,
        platform = None, discordant = False, mixed = False, upto = None,
        maxInsert = None, check = True, samtoolsPath = 'samtools',
        memory = '2', nameSort = False
    ):
    ''' Function to generate command to peform Bowtie2 Alignment of
    paired FASTQ files. Function takes 9 arguments:
    
    1)  index - Suffix of Bowtie2 index.
    2)  outSam - Name of output SAM file.
    3)  read1 - Read1 FASTQ file.
    4)  read2 - Read2 FASTQ file.
    5)  path - Path to Bowtie2 executable.
    6)  threads - Number of thread to use.
    7)  discordant - Boolean; whether to output discordant pairs.
    8)  mixed - Boolean; whether to output mixed pairs.
    9)  upto - Number of reads to align
    10) check - Boolean; whether to check for index entensions.
    
    '''
    # Check for index extensions
    if not isinstance(check, bool):
        raise TypeError('check argument must be bool')
    if check:
        suffixes = ['.1.bt2', '.2.bt2', '.3.bt2', '.4.bt2', '.rev.1.bt2',
            '.rev.2.bt2']
        for s in suffixes:
            if not os.path.isfile(index + s):
                raise IOError('Index file %s no found' %(index + s))
    # Check and process discordant
    if not isinstance(discordant, bool):
        raise TypeError('discordant argument must be bool')
    if discordant:
        discordant = ''
    else:
        discordant = '--no-discordant'
    # Check mixed
    if not isinstance(mixed, bool):
        raise TypeError('mixed argument must be bool')
    if mixed:
        mixed = ''
    else:
        mixed = '--no-mixed'
    # Check upto argument
    if not upto is None:
        if not isinstance(upto, int):
            raise TypeError('upto argument must be integer')
        if upto < 1:
            raise ValueError('upto argument must be >= 1')
    # Check maximum insert argument
    if not maxInsert is None:
        if not isinstance(maxInsert, int):
            raise TypeError('maxInsert argument must be integer')
        if maxInsert < 1:
            raise ValueError('maxInsert argument must be >= 1')
    # Check outut file name and generate intermediate file names
    if outFile.endswith('.sam'):
        outSam = outFile
        outBam = ''
    elif outFile.endswith('.bam'):
        outBam = outFile
        outSam = outFile[:-4] + '.sam'
    else:
        raise ValueError("'outFile' argument must end '.sam' or '.bam'")
    # Join multiple fastq files
    if isinstance(read1, list):
        read1 = ','.join(read1)
    if isinstance(read2, list):
        read2 = ','.join(read2)
    # Create initial command
    bowtie2Command = [bowtie2Path, '--phred33', '--very-sensitive', mixed,
        discordant, '-p', str(threads), '-x', index, '-S', outSam]
    # Extend command depending on if read2 is applied
    if read2:
        bowtie2Command.extend(['-1', read1, '-2', read2])
    else:
        bowtie2Command.extend(['-U', read1])
    # Supplement additional commands
    if upto:
        bowtie2Command.extend(['-u', str(upto)])
    if maxInsert:
        bowtie2Command.extend(['-X', str(maxInsert)])
    # Add read group data
    if readGroup:
        # Create read group list
        rgList = ['--rg-id', str(readGroup)]
        if sampleName:
            rgList.extend(['--rg', 'SM:' + str(sampleName)])
        if libraryID:
            rgList.extend(['--rg', 'LB:' + str(libraryID)])
        if platform:
            rgList.extend(['--rg', 'PL:' + str(platform)])
        # Add list to command
        bowtie2Command.extend(rgList)
    # Concatenate bowtie2Command command
    bowtie2Command = filter(None, bowtie2Command)
    bowtie2Command = ' '.join(bowtie2Command)
    # Supplement BWA command with sort command
    if outBam:
        sortCommand = samtools.sort(inFile = outSam, outFile = outBam,
            name = nameSort, memory = memory, delete = True,
            path = samtoolsPath, threads = threads)
        completeCommand = bowtie2Command + ' && ' + sortCommand
    else:
        completeCommand = bowtie2Command
    # Return complete command
    return(completeCommand)
示例#3
0
def bwaMemAlign(
        index, outFile, read1, read2 = None, bwaPath = 'bwa', threads = 1,
        readGroup = '1', sampleName = None, libraryID = None, platform = None,
        markSecondary = True, check = True, samtoolsPath = 'samtools',
        memory = '2', nameSort = False
    ):
    ''' Function to generate command to perform BWA mem alignment of single
    end or paired end FASTQ files. If the supplied output file name ends with
    '.bam' then a sorted BAM file will be generated else if the file names
    ends with '.sam' a sam file is returned. Function takes the following 14
    arguments:
    
    Args:
        index (str)- Full path BWA index prefix.
        outFile (str)- Full path to output sam or bam file.
        read1 (str)- Read1 FASTQ file.
        read2 (str)- Read2 FASTQ file.
        bwaPath (str)- BWA exectuable.
        threads (int)- Number of threads to use.
        readGroup (str)- Read group to be used in SAM/BAM.
        sampleName (str)- Name of sample to be used in header.
        libraryID (str)- Library ID to be used in SAM/BAM.
        platform (str)- Platform to be used in SAM/BAM.
        markSecondary (bool)- Mark secondary alignments.
        check (bool)- Check for index extensions and output directory.
        samtoolsPath (str)- Samtools executable.
        memory (int)- Gigabytes of memory to use in generating BAM file.
        nameSort (bool)- Generate a name sorted BAM file.
    
    Returns:
        bwaCommand (str)- Command to perform BWA alignment.
    
    Raises:
        IOError - If index suffixes or output directory are absent.
        TypeError - If arguments are of the wrong type.
        ValueError - If arguments have an unexpected value.
    
    '''
    # Check index extensions and output directory, if required
    if not isinstance(check, bool):
        raise TypeError('check argument must be bool')
    if check:
        suffixes = ['.amb', '.ann', '.bwt', '.pac', '.sa']
        for s in suffixes:
            if not os.path.isfile(index + s):
                raise IOError('Genome index file %s no found' %(index + s))
        if not os.path.isdir(os.path.dirname(outFile)):
            raise IOError('Could not find output directory {}'.format(
                os.path.dirname(outFile)))
    # Check outut file name and generate intermediate file names
    if outFile.endswith('.sam'):
        outSam = outFile
        outBam = ''
    elif outFile.endswith('.bam'):
        outBam = outFile
        outSam = outFile[:-4] + '.sam'
    else:
        raise ValueError('outFile argument must end .sam or .bam')
    # Process secondary command
    if not isinstance(markSecondary, bool):
        raise TypeError('markSecondary argument must be bool')
    if markSecondary:
        markSecondary = '-M'
    else:
        markSecondary = ''
    # Process multiple input fastq files
    if isinstance(read1, list):
        read1 = "'< zcat " + ' '.join(read1) + "'"
    if isinstance(read2, list):
        read2 = "'< zcat " + ' '.join(read2) + "'"
    # Create command
    bwaCommand = [bwaPath, 'mem', markSecondary ,'-t', str(threads),
        index, read1, read2]
    # Remove missing elements from command
    bwaCommand = filter(None, bwaCommand)
    # Add read group data
    if readGroup:
        # Create read group string
        rgString = "'@RG\\tID:" + str(readGroup)
        if sampleName:
            rgString += '\\tSM:' + str(sampleName)
        if libraryID:
            rgString += '\\tLB:' + str(libraryID)
        if platform:
            rgString += '\\tPL:' + str(platform)
        rgString += "'"
        # Add string to command
        bwaCommand.insert(2,rgString)
        bwaCommand.insert(2,'-R')
    # Complete BWA command
    bwaCommand = '%s > %s' %(' '.join(bwaCommand), outSam)
    # Supplement BWA command with sort command, if required, and return
    if outBam:
        sortCommand = samtools.sort(inFile = outSam, outFile = outBam,
            name = nameSort, memory = memory, delete = True,
            path = samtoolsPath, threads = threads)
        bwaCommand = bwaCommand + ' && ' + sortCommand
    return(bwaCommand)
示例#4
0
def bwaMemAlign(
        index, outFile, read1, read2 = None, bwaPath = 'bwa', threads = 1,
        readGroup = '1', sampleName = None, libraryID = None, platform = None,
        markSecondary = True, check = True, samtoolsPath = 'samtools',
        memory = '2', nameSort = False
    ):
    ''' Function to generate command to perform BWA mem alignment of single
    end or paired end FASTQ files. If the supplied output file name ends with
    '.bam' then a sorted BAM file will be generated else if the file names
    ends with '.sam' a sam file is returned. Function takes the following 14
    arguments:
    
    Args:
        index (str)- Full path BWA index prefix.
        outFile (str)- Full path to output sam or bam file.
        read1 (str)- Read1 FASTQ file.
        read2 (str)- Read2 FASTQ file.
        bwaPath (str)- BWA exectuable.
        threads (int)- Number of threads to use.
        readGroup (str)- Read group to be used in SAM/BAM.
        sampleName (str)- Name of sample to be used in header.
        libraryID (str)- Library ID to be used in SAM/BAM.
        platform (str)- Platform to be used in SAM/BAM.
        markSecondary (bool)- Mark secondary alignments.
        check (bool)- Check for index extensions and output directory.
        samtoolsPath (str)- Samtools executable.
        memory (int)- Gigabytes of memory to use in generating BAM file.
        nameSort (bool)- Generate a name sorted BAM file.
    
    Returns:
        bwaCommand (str)- Command to perform BWA alignment.
    
    Raises:
        IOError - If index suffixes or output directory are absent.
        TypeError - If arguments are of the wrong type.
        ValueError - If arguments have an unexpected value.
    
    '''
    # Check index extensions and output directory, if required
    if not isinstance(check, bool):
        raise TypeError('check argument must be bool')
    if check:
        suffixes = ['.amb', '.ann', '.bwt', '.pac', '.sa']
        for s in suffixes:
            if not os.path.isfile(index + s):
                raise IOError('Genome index file %s no found' %(index + s))
        if not os.path.isdir(os.path.dirname(outFile)):
            raise IOError('Could not find output directory {}'.format(
                os.path.dirname(outFile)))
    # Check outut file name and generate intermediate file names
    if outFile.endswith('.sam'):
        outSam = outFile
        outBam = ''
    elif outFile.endswith('.bam'):
        outBam = outFile
        outSam = outFile[:-4] + '.sam'
    else:
        raise ValueError('outFile argument must end .sam or .bam')
    # Process secondary command
    if not isinstance(markSecondary, bool):
        raise TypeError('markSecondary argument must be bool')
    if markSecondary:
        markSecondary = '-M'
    else:
        markSecondary = ''
    # Create command
    bwaCommand = [bwaPath, 'mem', markSecondary ,'-t', str(threads),
        index, read1, read2]
    # Remove missing elements from coomand
    bwaCommand = filter(None, bwaCommand)
    # Add read group data
    if readGroup:
        # Create read group string
        rgString = "'@RG\\tID:" + str(readGroup)
        if sampleName:
            rgString += '\\tSM:' + str(sampleName)
        if libraryID:
            rgString += '\\tLB:' + str(libraryID)
        if platform:
            rgString += '\\tPL:' + str(platform)
        rgString += "'"
        # Add string to command
        bwaCommand.insert(2,rgString)
        bwaCommand.insert(2,'-R')
    # Complete BWA command
    bwaCommand = '%s > %s' %(' '.join(bwaCommand), outSam)
    # Supplement BWA command with sort command, if required, and return
    if outBam:
        sortCommand = samtools.sort(inFile = outSam, outFile = outBam,
            name = nameSort, memory = memory, delete = True,
            path = samtoolsPath, threads = threads)
        bwaCommand = bwaCommand + ' && ' + sortCommand
    return(bwaCommand)