def sortBed(inBed, outBed, path = 'bedtools', delete = True): ''' Function to sort bed files, first by cheomsome and then by start position. Function takes four arguments 1) inBed - Input bed file. 2) outBed - Output bed file. 3) path - path to bedtools exectuable. 4) delete - Whether to delete input bed file ''' # Check arguments toolbox.checkArg(delete, 'bool') # Create sort command: sortCommand = '%s sort -i %s > %s' %(path, inBed, outBed) # Append deletion command if delete == True: sortCommand += ' && rm %s' %(inBed) # Return command return(sortCommand)
def bed2bedGraph(inBed, outBG, chrFile, path = 'bedtools', delete = False): ''' Function to create bedgraph from sorted bed file. Function takes five arguments: 1) inBed - Input bed file. 2) outBG - Output bedgraph file. 3) chrFile - A tab delimited text file of chromosome name and sizes. 3) path - Path to bedtools executable. 4) delete - Boolean, whether to delete input BED file ''' # Check arguments toolbox.checkArg(delete, 'bool') # Create bedgraph command bgCommand = '%s genomecov -bg -i %s -g %s > %s' %(path, inBed, chrFile, outBG) # Append deletion command if delete == True: bgCommand += ' && rm %s' %(inBed) # Return command return(bgCommand)
--quality=<quality> Trimming quality [default: 20] --adapter=<adapter> Adapter sequence [default: AGATCGGAAGAGC] --path=<path> Path to cutadapt [default: cutadapt] --help Output this message """ # Import required modules import os from ngs_python.fastq import fastqFind, fastqTrim from general_python import docopt, toolbox, moab # Extract and process arguments args = docopt.docopt(__doc__,version = 'v1') args['--quality'] = int(args['--quality']) inDir, inPrefix = os.path.split(args['<inprefix>']) toolbox.checkArg(args['--path'], 'exc') # Extract fastq files and generate output file names read1In, read2In = fastqFind.findFastq(prefix = inPrefix, dirList = [inDir], pair = True, gzip = True) read1Out = args['<outprefix>'] + '.R1.fastq.gz' read2Out = args['<outprefix>'] + '.R2.fastq.gz' trimLog = args['<outprefix>'] + '.log' # Generate and submit trim command trimCommand = fastqTrim.cutadaptTrimPaired(read1In = read1In, read2In = read2In, read1Out = read1Out, read2Out = read2Out, quality = args['--quality'], adapter = 'AGATCGGAAGAGC', length = 25, path = args['--path'] ) jobID = moab.submitJob(trimCommand, stdout = trimLog, stderr = trimLog) print jobID
--quality=<quality> Trimming quality [default: 20] --adapter=<adapter> Adapter sequence [default: AGATCGGAAGAGC] --path=<path> Path to cutadapt [default: cutadapt] --help Output this message """ # Import required modules import os from ngs_python.fastq import fastqFind, fastqTrim from general_python import docopt, toolbox, moab # Extract and process arguments args = docopt.docopt(__doc__, version='v1') args['--quality'] = int(args['--quality']) inDir, inPrefix = os.path.split(args['<inprefix>']) toolbox.checkArg(args['--path'], 'exc') # Extract fastq files and generate output file names read1In, read2In = fastqFind.findFastq(prefix=inPrefix, dirList=[inDir], pair=True, gzip=True) read1Out = args['<outprefix>'] + '.R1.fastq.gz' read2Out = args['<outprefix>'] + '.R2.fastq.gz' trimLog = args['<outprefix>'] + '.log' # Generate and submit trim command trimCommand = fastqTrim.cutadaptTrimPaired(read1In=read1In, read2In=read2In, read1Out=read1Out, read2Out=read2Out, quality=args['--quality'], adapter='AGATCGGAAGAGC',
def mpileup( inBam, outFile, reference = '', minMapQ = 20, minBaseQ = 20, countOrphans = False, countDup = False, disableBAQ = True, maxDepth = 10000, minFilter = None, path = 'samtools' ): # Check numeric arguments toolbox.checkArg(minMapQ, 'int', mn = 2) toolbox.checkArg(minBaseQ, 'int', mn = 0) toolbox.checkArg(countOrphans, 'bool') toolbox.checkArg(countDup, 'bool') toolbox.checkArg(disableBAQ, 'bool') toolbox.checkArg(maxDepth, 'int', gt = 0) toolbox.checkArg(minFilter, 'int', mn = 0) # Create initial command command = [path, 'mpileup', '-q', str(minMapQ), '-Q', str(minBaseQ), '-d', str(maxDepth)] # Add otptions if countOrphans: command.append('-A') if disableBAQ: command.append('-B') if reference: command.extend(['-f', reference]) if not countDup: command.extend(['--ff', '1024']) # Process filter if minFilter: command.append(inBam) awk = 'awk \'BEGIN{FS = "\\t"};{if ($4 >= %s) print $0}\'' %(minFilter) finalCommand = '%s | %s > %s' %(' '.join(command), awk, outFile) else: command.extend(['-o', outFile, inBam]) finalCommand = ' '.join(command) # Retutn command return(finalCommand)
# Import required modules import os import sys from ngs_python.fastq import fastqFind, fastqTrim, fastqQC, fastqAlign from ngs_python.bam import picard from ngs_python.bed import bedtools from general_python import docopt, moab, toolbox # Print sommand and extract argument print '%s\n' % (' '.join(sys.argv)) args = docopt.docopt(__doc__, version='v1') # Extract sample prefix and name args['prefix'], args['name'] = args['<sampledata>'].split(',') # Extract path data and check they are executable: paths = toolbox.fileDict(args['<paths>'], sep='\t') for program in paths: toolbox.checkArg(paths[program], 'exc') # Conver numric arguments args['--minMapQ'] = int(args['--minMapQ']) args['--threads'] = int(args['--threads']) args['--interval'] = int(args['--interval']) # Extract fastq files and check args['read1'], args['read2'] = fastqFind.findFastq( prefix=args['prefix'], dirList=args['<indir>'].split(','), pair=True) if len(args['read1']) != len(args['read2']): raise IOError('Unequal number of FASTQ files identified') if len(args['read1']) < 1: raise IOError('Insufficient number of FASTQ files identified') # Check output directories if not os.path.isdir(args['<outdir>']): raise IOError('Output directory not found') # Create directory names
# Import required modules import os import re import numpy as np from ngs_python.structure import interactionMatrix, analyseInteraction from general_python import docopt, toolbox # Extract arguments args = docopt.docopt(__doc__,version = 'v1') # Check numerical arguments args['<mincount>'] = int(args['<mincount>']) args['--threads'] = int(args['--threads']) if args['nobed']: args['<binsize>'] = int(args['<binsize>']) # Check input files and output directory if args['bed']: toolbox.checkArg(args['<bedfile>'], 'file') else: toolbox.checkArg(args['<chrfile>'], 'file') for f in args['<inputfiles>']: toolbox.checkArg(f, 'file') toolbox.checkArg(args['<outdir>'], 'dir') # Modify label argument if args['--label']: toolbox.checkArg(args['--label'], 'str') args['--label'] = '_' + args['--label'] # Extract and print sample names sampleNames = [re.search('([^/]*)\.fragLigations\.gz$',f).group(1) for f in args['<inputfiles>']] logData = 'Samples:\n %s\n' %( '\n '.join(sampleNames) ) # Extract and print parameters to create bins