Exemplo n.º 1
0
args = docopt.docopt(__doc__, version='v1')
args['<threads>'] = int(args['<threads>'])
args['<prefix>'] = args['<prefix>'].split(',')
args['<indir>'] = args['<indir>'].split(',')
if not args['<outbam>'].endswith('.bam'):
    raise ValueError("Output file must end with '.bam'")
args['<outbam>'] = os.path.abspath(args['<outbam>'])
# Parse module file
pmDict = slurm.parsePathModule(args['<modules>'])
# Find fastq files
read1List = []
read2List = []
if args['--unpaired']:
    for prefix in args['<prefix>']:
        read1 = fastqFind.findFastq(prefix=prefix,
                                    dirList=args['<indir>'],
                                    pair=False)
        read1List.extend(read1)
    read1List = [os.path.abspath(x) for x in read1List]
else:
    for prefix in args['<prefix>']:
        read1, read2 = fastqFind.findFastq(prefix=prefix,
                                           dirList=args['<indir>'],
                                           pair=True)
        read1List.extend(read1)
        read2List.extend(read2)
    read1List = [os.path.abspath(x) for x in read1List]
    read2List = [os.path.abspath(x) for x in read2List]
# Raise Error if no Fastq files identified
if len(read1List) == 0:
    raise IOError('Failed to find FASTQ files')
Exemplo n.º 2
0
print '%s\n' % (' '.join(sys.argv))

###############################################################################
## Process command line arguments and create output directories
###############################################################################
# Extract arguments
args = docopt.docopt(__doc__, version='v1')
# Extract sample prefix and name = args['<sampledata>'].split(',')
args['prefix'], args['name'] = args['<sampledata>'].split(',')
# Check supplied files
toolbox.check_var(args['<gtf>'], 'file')
toolbox.check_var(args['<rrna>'], 'file')
# Extract fastq files and check
if args['--singleend']:
    args['read1'] = fastqFind.findFastq(prefix=args['prefix'],
                                        dirList=args['<indir>'].split(','),
                                        pair=False)
else:
    args['read1'], args['read2'] = fastqFind.findFastq(
        prefix=args['prefix'], dirList=args['<indir>'].split(','), pair=True)
    if len(args['read1']) != len(args['read2']):
        raise IOError('Unequal number of FASTQ files identified')
if len(args['read1']) < 1:
    raise IOError('Insufficient number of FASTQ files identified')
# Convert numerical arguments
args['--threads'] = int(args['--threads'])
args['--forprob'] = float(args['--forprob'])
args['--minlength'] = int(args['--minlength'])
args['--trimqual'] = int(args['--trimqual'])
# Generate and store standard output directories
args['fastqDir'] = os.path.join(args['<outdir>'], 'fastq')
Exemplo n.º 3
0
    
    --quality=<quality>  Trimming quality [default: 20]
    --adapter=<adapter>  Adapter sequence [default: AGATCGGAAGAGC]
    --path=<path>        Path to cutadapt [default: cutadapt]
    --help               Output this message
    
"""
# Import required modules
import os
from ngs_python.fastq import fastqFind, fastqTrim
from general_python import docopt, toolbox, moab
# Extract and process arguments
args = docopt.docopt(__doc__,version = 'v1')
args['--quality'] = int(args['--quality'])
inDir, inPrefix = os.path.split(args['<inprefix>'])
toolbox.checkArg(args['--path'], 'exc')
# Extract fastq files and generate output file names
read1In, read2In = fastqFind.findFastq(prefix = inPrefix, dirList = [inDir],
    pair = True, gzip = True)
read1Out = args['<outprefix>'] + '.R1.fastq.gz'
read2Out = args['<outprefix>'] + '.R2.fastq.gz'
trimLog = args['<outprefix>'] + '.log'
# Generate and submit trim command
trimCommand = fastqTrim.cutadaptTrimPaired(read1In = read1In,
    read2In = read2In, read1Out = read1Out, read2Out = read2Out,
    quality = args['--quality'], adapter = 'AGATCGGAAGAGC', length = 25,
    path = args['--path']
)
jobID = moab.submitJob(trimCommand, stdout = trimLog, stderr = trimLog)
print jobID
Exemplo n.º 4
0
    'maximum distance of concordant pairs: %s' %(args.maxSize),
    'remove duplicate pairs: %s' %(args.rmDuplicates),
    'remove concordant pairs: %s' %(args.rmConcordant)
)
# Create output file names
args.logFile = args.outDir + args.sampleName + '.log'
args.outFastq = args.outDir + args.sampleName + '_trimmed.fastq.gz'
args.nameSortBam = args.outDir + args.sampleName + "_nSort.bam"
args.outPairs = args.outDir + args.sampleName + ".readPairs.gz"
args.outFrags = args.outDir + args.sampleName + ".fragLigations.gz"

###############################################################################
## Process FASTQ files and perform alignment
###############################################################################
# Extract fastq file names
args.read1, args.read2 = fastqFind.findFastq(prefix = args.fastqPrefix,
    dirList = args.fastqDir.split(','), pair = True)
if len(args.read1) > 1 or len(args.read2) > 1:
    raise NotImplemented('Multiple FASTQ file input not implemented')
# Trim and merge fastq files
pf = fastqIO.parseFastq(
    fastq1 = args.read1[0],
    fastq2 = args.read2[0]
)
trimMetrics = pf.interleave_trim_reads(
    outFastq = args.outFastq,
    trim = args.cutSite,
    minLength = args.minLength
)
# Print trim metrics
print '\nTrim Metrics:\n\t%s\n\t%s\n\t%s\n\t%s' %(
    'total: ' + str(trimMetrics['total']),
Exemplo n.º 5
0
    --path=<path>        Path to cutadapt [default: cutadapt]
    --help               Output this message
    
"""
# Import required modules
import os
from ngs_python.fastq import fastqFind, fastqTrim
from general_python import docopt, toolbox, moab
# Extract and process arguments
args = docopt.docopt(__doc__, version='v1')
args['--quality'] = int(args['--quality'])
inDir, inPrefix = os.path.split(args['<inprefix>'])
toolbox.checkArg(args['--path'], 'exc')
# Extract fastq files and generate output file names
read1In, read2In = fastqFind.findFastq(prefix=inPrefix,
                                       dirList=[inDir],
                                       pair=True,
                                       gzip=True)
read1Out = args['<outprefix>'] + '.R1.fastq.gz'
read2Out = args['<outprefix>'] + '.R2.fastq.gz'
trimLog = args['<outprefix>'] + '.log'
# Generate and submit trim command
trimCommand = fastqTrim.cutadaptTrimPaired(read1In=read1In,
                                           read2In=read2In,
                                           read1Out=read1Out,
                                           read2Out=read2Out,
                                           quality=args['--quality'],
                                           adapter='AGATCGGAAGAGC',
                                           length=25,
                                           path=args['--path'])
jobID = moab.submitJob(trimCommand, stdout=trimLog, stderr=trimLog)
print jobID
Exemplo n.º 6
0
###############################################################################
## Process command line arguments and create output directories
###############################################################################
# Extract arguments
args = docopt.docopt(__doc__,version = 'v1')
# Extract sample prefix and name = args['<sampledata>'].split(',')
args['prefix'], args['name'] = args['<sampledata>'].split(',')
# Check supplied files
toolbox.check_var(args['<gtf>'], 'file')
toolbox.check_var(args['<rrna>'], 'file')
# Extract fastq files and check
if args['--singleend']:
    args['read1']  = fastqFind.findFastq(
        prefix = args['prefix'],
        dirList = args['<indir>'].split(','),
        pair = False
    )
else:
    args['read1'], args['read2'] = fastqFind.findFastq(
        prefix = args['prefix'],
        dirList = args['<indir>'].split(','),
        pair = True
    )
    if len(args['read1']) != len(args['read2']):
        raise IOError('Unequal number of FASTQ files identified')
if len(args['read1']) < 1:
    raise IOError('Insufficient number of FASTQ files identified')
# Convert numerical arguments
args['--threads'] = int(args['--threads'])
args['--forprob'] = float(args['--forprob'])
Exemplo n.º 7
0
    concatFastq.py prefix <outfastq> <prefix>
    concatFastq.py specify <outfastq> <infastq>..
    
'''
# Import modules
import os
import sys
from general_python import moab, docopt, toolbox
from ngs_python.fastq import fastqFind
# Extract arguments
args = docopt.docopt(__doc__, version = 'v1')
# Find FASTQ files by prefix
if args['prefix']:
    indir, prefix = os.path.split(args['<prefix>'])
    print indir, prefix
    args['<infastq>'] = fastqFind.findFastq(prefix = prefix,
        dirList = [indir], pair = False)
    args['<infastq>'].sort()
# Check number of FASTQ files
if len(args['<infastq>']) < 2:
    sys.exit('\nCannot concatenate %s files\n' %(len(args['<infastq>'])))
# Check output file doesnt exist
if os.path.isfile(args['<outfastq>']):
    sys.exit('\nOutput file exists. No command submitted\n')
# Print input and out files
print '\nInput files:\n%s\n\nOutput file:\n%s\n' %(
    '\n'.join(args['<infastq>']), args['<outfastq>'])
# Get user response before concatenation
print "Enter 'concat' to concatenate: "
response = raw_input()
# Submit command
if response == 'concat':
Exemplo n.º 8
0
args['prefix'], args['name'] = args['<sampledata>'].split(',')
# Split input directories into a list
args['<indir>'] = args['<indir>'].split(',')
# Read in path file
paths ={}
with open(args['<pathfile>'], 'r') as pfile:
    for line in pfile:
        program, path = line.strip().split('\t')
        paths[program] = path
# Create folder for log files
args['logdir'] = os.path.join(args['<outdir>'], args['name'] + '_log')
if not os.path.isdir(args['logdir']):
    os.mkdir(args['logdir'])
# Find fastq files
read1, read2 = fastqFind.findFastq(
    prefix = args['prefix'], dirList = args['<indir>'], pair = True,
    gzip = True
)
if len(read1) != 1 and len(read2) != 1:
    raise IOError('Failure to find single paired FASTQ files')
# Generate output files
bamPrefix = os.path.join(args['<outdir>'], args['name'])
logPrefix = os.path.join(args['logdir'], args['name'])
outfiles = {
    'initialbam' : bamPrefix + '.bam',
    'dedupbam' : bamPrefix + '_dedup.bam',
    'realignbam' : bamPrefix + '_dedup_realign.bam',
    'recalbam' : bamPrefix + '_dedup_realign_recal.bam',
    'listfile' : logPrefix + '_target.list',
    'bsqrfile' : logPrefix + '_bsqr.grp',
    'alignlog' : logPrefix + '_align.log',
    'deduplog1' : logPrefix + '_dedup_1.log',