示例#1
0
def sortBed(inBed, outBed, path = 'bedtools', delete = True):
    ''' Function to sort bed files, first by cheomsome and then by start
    position. Function takes four arguments
    
    1)  inBed - Input bed file.
    2)  outBed - Output bed file.
    3)  path - path to bedtools exectuable.
    4)  delete - Whether to delete input bed file
    
    '''
    # Check arguments
    toolbox.checkArg(delete, 'bool')
    # Create sort command:
    sortCommand = '%s sort -i %s > %s' %(path, inBed, outBed)
    # Append deletion command
    if delete == True:
        sortCommand += ' && rm %s' %(inBed)
    # Return command
    return(sortCommand)
示例#2
0
def bed2bedGraph(inBed, outBG, chrFile, path = 'bedtools', delete = False):
    ''' Function to create bedgraph from sorted bed file. Function takes
    five arguments:
    
    1)  inBed - Input bed file.
    2)  outBG - Output bedgraph file.
    3)  chrFile - A tab delimited text file of chromosome name and sizes.
    3)  path - Path to bedtools executable.
    4)  delete - Boolean, whether to delete input BED file
    
    '''
    # Check arguments
    toolbox.checkArg(delete, 'bool')
    # Create bedgraph command
    bgCommand = '%s genomecov -bg -i %s -g %s > %s' %(path, inBed, chrFile,
        outBG)
    # Append deletion command
    if delete == True:
        bgCommand += ' && rm %s' %(inBed)
    # Return command
    return(bgCommand)
示例#3
0
    
    --quality=<quality>  Trimming quality [default: 20]
    --adapter=<adapter>  Adapter sequence [default: AGATCGGAAGAGC]
    --path=<path>        Path to cutadapt [default: cutadapt]
    --help               Output this message
    
"""
# Import required modules
import os
from ngs_python.fastq import fastqFind, fastqTrim
from general_python import docopt, toolbox, moab
# Extract and process arguments
args = docopt.docopt(__doc__,version = 'v1')
args['--quality'] = int(args['--quality'])
inDir, inPrefix = os.path.split(args['<inprefix>'])
toolbox.checkArg(args['--path'], 'exc')
# Extract fastq files and generate output file names
read1In, read2In = fastqFind.findFastq(prefix = inPrefix, dirList = [inDir],
    pair = True, gzip = True)
read1Out = args['<outprefix>'] + '.R1.fastq.gz'
read2Out = args['<outprefix>'] + '.R2.fastq.gz'
trimLog = args['<outprefix>'] + '.log'
# Generate and submit trim command
trimCommand = fastqTrim.cutadaptTrimPaired(read1In = read1In,
    read2In = read2In, read1Out = read1Out, read2Out = read2Out,
    quality = args['--quality'], adapter = 'AGATCGGAAGAGC', length = 25,
    path = args['--path']
)
jobID = moab.submitJob(trimCommand, stdout = trimLog, stderr = trimLog)
print jobID
示例#4
0
    
    --quality=<quality>  Trimming quality [default: 20]
    --adapter=<adapter>  Adapter sequence [default: AGATCGGAAGAGC]
    --path=<path>        Path to cutadapt [default: cutadapt]
    --help               Output this message
    
"""
# Import required modules
import os
from ngs_python.fastq import fastqFind, fastqTrim
from general_python import docopt, toolbox, moab
# Extract and process arguments
args = docopt.docopt(__doc__, version='v1')
args['--quality'] = int(args['--quality'])
inDir, inPrefix = os.path.split(args['<inprefix>'])
toolbox.checkArg(args['--path'], 'exc')
# Extract fastq files and generate output file names
read1In, read2In = fastqFind.findFastq(prefix=inPrefix,
                                       dirList=[inDir],
                                       pair=True,
                                       gzip=True)
read1Out = args['<outprefix>'] + '.R1.fastq.gz'
read2Out = args['<outprefix>'] + '.R2.fastq.gz'
trimLog = args['<outprefix>'] + '.log'
# Generate and submit trim command
trimCommand = fastqTrim.cutadaptTrimPaired(read1In=read1In,
                                           read2In=read2In,
                                           read1Out=read1Out,
                                           read2Out=read2Out,
                                           quality=args['--quality'],
                                           adapter='AGATCGGAAGAGC',
示例#5
0
def mpileup(
        inBam, outFile, reference = '', minMapQ = 20, minBaseQ = 20,
        countOrphans = False, countDup = False, disableBAQ = True,
        maxDepth = 10000, minFilter = None, path = 'samtools'
    ):
    # Check numeric arguments
    toolbox.checkArg(minMapQ, 'int', mn = 2)
    toolbox.checkArg(minBaseQ, 'int', mn = 0)
    toolbox.checkArg(countOrphans, 'bool')
    toolbox.checkArg(countDup, 'bool')
    toolbox.checkArg(disableBAQ, 'bool')
    toolbox.checkArg(maxDepth, 'int', gt = 0)
    toolbox.checkArg(minFilter, 'int', mn = 0)
    # Create initial command
    command = [path, 'mpileup', '-q', str(minMapQ), '-Q', str(minBaseQ), '-d',
        str(maxDepth)]
    # Add otptions
    if countOrphans:
        command.append('-A')
    if disableBAQ:
        command.append('-B')
    if reference:
        command.extend(['-f', reference])
    if not countDup:
        command.extend(['--ff', '1024'])
    # Process filter
    if minFilter:
        command.append(inBam)
        awk = 'awk \'BEGIN{FS = "\\t"};{if ($4 >= %s) print $0}\'' %(minFilter)
        finalCommand = '%s | %s > %s' %(' '.join(command), awk, outFile)
    else:
        command.extend(['-o', outFile, inBam])
        finalCommand = ' '.join(command)
    # Retutn command
    return(finalCommand)
示例#6
0
# Import required modules
import os
import sys
from ngs_python.fastq import fastqFind, fastqTrim, fastqQC, fastqAlign
from ngs_python.bam import picard
from ngs_python.bed import bedtools
from general_python import docopt, moab, toolbox
# Print sommand and extract argument
print '%s\n' % (' '.join(sys.argv))
args = docopt.docopt(__doc__, version='v1')
# Extract sample prefix and name
args['prefix'], args['name'] = args['<sampledata>'].split(',')
# Extract path data and check they are executable:
paths = toolbox.fileDict(args['<paths>'], sep='\t')
for program in paths:
    toolbox.checkArg(paths[program], 'exc')
# Conver numric arguments
args['--minMapQ'] = int(args['--minMapQ'])
args['--threads'] = int(args['--threads'])
args['--interval'] = int(args['--interval'])
# Extract fastq files and check
args['read1'], args['read2'] = fastqFind.findFastq(
    prefix=args['prefix'], dirList=args['<indir>'].split(','), pair=True)
if len(args['read1']) != len(args['read2']):
    raise IOError('Unequal number of FASTQ files identified')
if len(args['read1']) < 1:
    raise IOError('Insufficient number of FASTQ files identified')
# Check output directories
if not os.path.isdir(args['<outdir>']):
    raise IOError('Output directory not found')
# Create directory names
示例#7
0
# Import required modules
import os
import re
import numpy as np
from ngs_python.structure import interactionMatrix, analyseInteraction
from general_python import docopt, toolbox
# Extract arguments
args = docopt.docopt(__doc__,version = 'v1')
# Check numerical arguments
args['<mincount>'] = int(args['<mincount>'])
args['--threads'] = int(args['--threads'])
if args['nobed']:
    args['<binsize>'] = int(args['<binsize>'])
# Check input files and output directory
if args['bed']:
    toolbox.checkArg(args['<bedfile>'], 'file')
else:
    toolbox.checkArg(args['<chrfile>'], 'file')
for f in args['<inputfiles>']:
    toolbox.checkArg(f, 'file')
toolbox.checkArg(args['<outdir>'], 'dir')
# Modify label argument
if args['--label']:
    toolbox.checkArg(args['--label'], 'str')
    args['--label'] = '_' + args['--label']
# Extract and print sample names
sampleNames = [re.search('([^/]*)\.fragLigations\.gz$',f).group(1) for f in args['<inputfiles>']]
logData = 'Samples:\n  %s\n' %(
    '\n  '.join(sampleNames) 
)
# Extract and print parameters to create bins