def __init__(self, **kwargs): """ Parameters ---------- kwargs: trim_galore arguments. could override later too. """ #run super to inherit parent class properties super().__init__() self.programName = "trim_galore" self.dep_list = [self.programName, 'cutadapt'] self.valid_args = [ '--cores', '-v', '-q', '--phred33', '--phred64', '--fastqc', '--fastqc_args', '-a', '-a2', '--illumina', '--nextera', '--small_rna', '--consider_already_trimmed', '--max_length', '--stringency', '-e', '--gzip', '--dont_gzip', '--length', '--max_n', '--trim-n', '-o', '--no_report_file', '--suppress_warn', '--clip_R1', '--clip_R2', '--three_prime_clip_R1', '--three_prime_clip_R2', '--2colour', '--path_to_cutadapt', '--basename', '-j', '--hardtrim5', '--hardtrim3', '--clock', '--polyA', '--rrbs', '--non_directional', '--keep', '--paired', '-t', '--retain_unpaired', '-r1', '-r2' ] #check if hisat2 exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") #initialize the passed arguments self.passedArgumentDict = kwargs
def __init__(self, **kwargs): super().__init__() self.program_name = "Trinity" self.dep_list = [self.program_name, 'jellyfish', 'bowtie2'] #check if trinity exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.program_name + " not found.") self.valid_args_list = [ '--seqType', '--max_memory', '--left', '--right', '--single', '--SS_lib_type', '--CPU', '--min_contig_length', '--long_reads', '--genome_guided_bam', '--jaccard_clip', '--trimmomatic', '--normalize_reads', '--no_distributed_trinity_exec', '--output', '--full_cleanup', '--cite', '--verbose', '--version', '--show_full_usage_info', '--KMER_SIZE', '--prep', '--no_cleanup', '--no_version_check', '--min_kmer_cov', '--inchworm_cpu', '--no_run_inchworm', '--max_reads_per_graph', '--min_glue', '--no_bowtie', '--no_run_chrysalis', '--bfly_opts', '--PasaFly', '--CuffFly', '--group_pairs_distance', '--path_reinforcement_distance', '--no_path_merging', '--min_per_id_same_path', '--max_diffs_same_path', '--max_internal_gap_same_path', '--bflyHeapSpaceMax', '--bflyHeapSpaceInit', '--bflyGCThreads', '--bflyCPU', '--bflyCalculateCPU', '--bfly_jar', '--quality_trimming_params', '--normalize_max_read_cov', '--normalize_by_read_set', '--genome_guided_max_intron', '--genome_guided_min_coverage', '--genome_guided_min_reads_per_partition', '--grid_conf', '--grid_node_CPU', '--grid_node_max_memory' ] #keep the passed arguments self.passed_args_dict = kwargs
def __init__(self,kallisto_index,**kwargs): super().__init__() self.programName="kallisto" self.dep_list=[self.programName] if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: "+ self.programName+" not found.") ##kallisto index self.validArgsIndex=['-i','--index','-k','--kmer-size','--make-unique'] ##kallisto quant self.validArgsQuant=['-i','--index','-o','--output-dir','--bias','-b','--bootstrap-samples', '--seed','--plaintext','--fusion','--single','--fr-stranded','--rf-stranded', '-l','--fragment-length','-s','--sd','-t','--threads','--pseudobam'] ##kallisto pseudo self.validArgsPseudo=['-i','--index','-o','--output-dir','-u','--umi','-b','--batch', '--single','-l','--fragment-length','-s','--sd','-t','--threads','--pseudobam'] ##kallisto h5dump self.validArgsh5dump=['-o','--output-dir'] self.valid_args=pu.get_union(self.validArgsIndex,self.validArgsQuant,self.validArgsPseudo,self.validArgsh5dump) #initialize the passed arguments self.passedArgumentDict=kwargs #if index is passed, update the passed arguments if len(kallisto_index)>0 and pu.check_files_exist(kallisto_index): print("kallisto index is: "+kallisto_index) self.kallisto_index=kallisto_index self.passedArgumentDict['-i']=self.kallisto_index else: print("No kallisto index provided. Please use build_index() now to generate an index...")
def __init__(self,threads=None): super().__init__() self.program_name="cufflinks" #check if stringtie exists if not pe.check_dependencies([self.program_name]): raise Exception("ERROR: "+ self.program_name+" not found.") #define valid arguments """ self.cufflinksArgsList=['-h','--help','-o','--output-dir','-p','--num-threads','--seed','-G','--GTF','-g','--GTF-guide','-M','--mask-file','-b','--frag-bias-correct','-u','--multi-read-correct','--library-type','--library-norm-method', '-m','--frag-len-mean','-s','--frag-len-std-dev','--max-mle-iterations','--compatible-hits-norm','--total-hits-norm','--num-frag-count-draws','--num-frag-assign-draws','--max-frag-multihits','--no-effective-length-correction', '--no-length-correction','-N','--upper-quartile-norm','--raw-mapped-norm','-L','--label','-F','--min-isoform-fraction','-j','--pre-mrna-fraction','-I','--max-intron-length','-a','--junc-alpha','-A','--small-anchor-fraction', '--min-frags-per-transfrag','--overhang-tolerance','--max-bundle-length','--max-bundle-frags','--min-intron-length','--trim-3-avgcov-thresh','--trim-3-dropoff-frac','--max-multiread-fraction','--overlap-radius', '--no-faux-reads','--3-overhang-tolerance','--intron-overhang-tolerance','-v','--verbose','-q','--quiet','--no-update-check'] self.cuffcompareArgsList=['-h','-i','-r','-R','-Q','-M','-N','-s','-e','-d','-p','-C','-F','-G','-T','-V'] self.cuffquantArgsList=['-o','--output-dir','-p','--num-threads','-M','--mask-file','-b','--frag-bias-correct','-u','--multi-read-correct','--library-type','-m','--frag-len-mean','-s','--frag-len-std-dev','-c','--min-alignment-count', '--max-mle-iterations','-v','--verbose','-q','--quiet','--seed','--no-update-check','--max-bundle-frags','--max-frag-multihits','--no-effective-length-correction','--no-length-correction','--read-skip-fraction', '--no-read-pairs','--trim-read-length','--no-scv-correction'] self.cuffdiffArgsList=['-o','--output-dir','-L','--labels','--FDR','-M','--mask-file','-C','--contrast-file','-b','--frag-bias-correct','-u','--multi-read-correct','-p','--num-threads','--no-diff','--no-js-tests','-T','--time-series', '--library-type','--dispersion-method','--library-norm-method','-m','--frag-len-mean','-s','--frag-len-std-dev','-c','--min-alignment-count','--max-mle-iterations','--compatible-hits-norm','--total-hits-norm', ' -v','--verbose','-q','--quiet','--seed','--no-update-check','--emit-count-tables','--max-bundle-frags','--num-frag-count-draws','--num-frag-assign-draws','--max-frag-multihits','--min-outlier-p','--min-reps-for-js-test', '--no-effective-length-correction','--no-length-correction','-N','--upper-quartile-norm','--geometric-norm','--raw-mapped-norm','--poisson-dispersion','--read-skip-fraction','--no-read-pairs','--trim-read-length','--no-scv-correction'] self.cuffnormArgsList=['-o','--output-dir','-L','--labels','--norm-standards-file','-p','--num-threads','--library-type','--library-norm-method','--output-format','--compatible-hits-norm','--total-hits-norm','-v','--verbose','-q','--quiet','--seed','--no-update-check'] self.cuffmergeArgsList=['h','--help','-o','-g','–-ref-gtf','-p','–-num-threads','-s','-–ref-sequence'] self.valid_args_list=pu.get_union(self.cufflinksArgsList,self.cuffcompareArgsList,self.cuffquantArgsList,self.cuffdiffArgsList,self.cuffnormArgsList,self.cuffmergeArgsList) """ if not threads: threads=os.cpu_count() self.threads=threads
def init_from_accession(self,srr_accession,directory): """ Create SRA object using provided srr accession and directory, where data is downloaded/saved This functions inits srrid, and paths to srr/fastq if they already exist thus will not be downloaded again """ #check if programs exist self.dep_list=['prefetch',"fasterq-dump"] if not pe.check_dependencies(self.dep_list): raise OSError("ERROR: Please install missing programs.") if not srr_accession: raise ValueError("Please provide a valid accession") if not directory: directory=os.getcwd() #create a dir named <srr_accession> and use as directory self.directory=os.path.join(directory,self.srr_accession) #sra file be stored here #self.sra_path=os.path.join(self.directory,self.srr_accession+".sra") #check if fastq files exist if not self.search_fastq(self.directory): #download sra and run fqdump if not self.download_sra(): pu.print_boldred('prefetch failed!!! Trying fasterq-dump...') #run fasterqdump either on downloaded SRA file or direclty return self.download_fastq() return True
def __init__(self, threads=None): """ threads: int Num threads to use """ #run super to inherit parent class properties super().__init__() self.programName = "trim_galore" self.dep_list = [self.programName, 'cutadapt'] """ self.valid_args=['--cores','-v','-q','--phred33','--phred64','--fastqc','--fastqc_args','-a','-a2', '--illumina','--nextera','--small_rna','--consider_already_trimmed', '--max_length','--stringency','-e','--gzip','--dont_gzip','--length', '--max_n','--trim-n','-o','--no_report_file','--suppress_warn', '--clip_R1','--clip_R2','--three_prime_clip_R1','--three_prime_clip_R2', '--2colour','--path_to_cutadapt','--basename','-j','--hardtrim5','--hardtrim3', '--clock','--polyA','--rrbs','--non_directional','--keep','--paired','-t', '--retain_unpaired','-r1','-r2'] """ #check if deps exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") #initialize the passed arguments if not threads: #trimgalore recommends max 8 threads threads = 8 self.threads = threads
def __init__(self,threads=None,max_memory=None): super().__init__() self.program_name="Trinity" self.dep_list=[self.program_name,'jellyfish','bowtie2'] #check if trinity exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: "+ self.program_name+" not found.") """ self.valid_args_list=['--seqType','--max_memory','--left','--right','--single','--SS_lib_type','--CPU','--min_contig_length', '--long_reads','--genome_guided_bam','--jaccard_clip','--trimmomatic','--normalize_reads','--no_distributed_trinity_exec', '--output','--full_cleanup','--cite','--verbose','--version','--show_full_usage_info','--KMER_SIZE','--prep','--no_cleanup', '--no_version_check','--min_kmer_cov','--inchworm_cpu','--no_run_inchworm','--max_reads_per_graph','--min_glue','--no_bowtie', '--no_run_chrysalis','--bfly_opts','--PasaFly','--CuffFly','--group_pairs_distance','--path_reinforcement_distance','--no_path_merging', '--min_per_id_same_path','--max_diffs_same_path','--max_internal_gap_same_path','--bflyHeapSpaceMax','--bflyHeapSpaceInit', '--bflyGCThreads','--bflyCPU','--bflyCalculateCPU','--bfly_jar','--quality_trimming_params','--normalize_max_read_cov', '--normalize_by_read_set','--genome_guided_max_intron','--genome_guided_min_coverage','--genome_guided_min_reads_per_partition', '--grid_conf','--grid_node_CPU','--grid_node_max_memory'] """ if not threads: threads=os.cpu_count() self.threads=threads #use floor(80% max available memory) by default if not max_memory: total_mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') total_mem_gib = total_mem_bytes/(1024.**3) max_memory=math.floor(total_mem_gib*0.8) self.max_memory=max_memory
def __init__(self, **kwargs): self.programName = "RiboCode" self.dep_list = [self.programName] #check if program exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") self.valid_args = [] self.passedArgumentDict = kwargs
def __init__(self, salmon_index, threads=None): super().__init__() self.programName = "salmon" self.dep_list = [self.programName] if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") """ ##salmon index self.validArgsIndex=['-v','--version','-h','--help','-t','--transcripts','-k','--kmerLen','-i', '--index','--gencode','--keepDuplicates','-p','--threads','--perfectHash', '--type','-s','--sasamp'] ##salmon quant read self.validArgsQuantReads=['--help-reads','-i','--index','-l','--libType','-r','--unmatedReads', '-1','--mates1','-2','--mates2','-o','--output','--discardOrphansQuasi', '--allowOrphansFMD','--seqBias','--gcBias','-p','--threads','--incompatPrior', '-g','--geneMap','-z','--writeMappings','--meta','--alternativeInitMode', '--auxDir','-c','--consistentHits','--dumpEq','-d','--dumpEqWeights', '--fasterMapping','--minAssignedFrags','--reduceGCMemory','--biasSpeedSamp', '--strictIntersect','--fldMax','--fldMean','--fldSD','-f','--forgettingFactor', '-m','--maxOcc','--initUniform','-w','--maxReadOcc','--noLengthCorrection', '--noEffectiveLengthCorrection','--noFragLengthDist','--noBiasLengthThreshold', '--numBiasSamples','--numAuxModelSamples','--numPreAuxModelSamples','--useVBOpt', '--rangeFactorizationBins','--numGibbsSamples','--numBootstraps','--thinningFactor', '-q','--perTranscriptPrior','--vbPrior','--writeOrphanLinks','--writeUnmappedNames', '-x','--quasiCoverage'] ##salmon quant alignment self.validArgsQuantAlign=['--help-alignment','-l','--libType','-a','--alignments','-t','--targets','-p', '--threads','--seqBias','--gcBias','--incompatPrior','--useErrorModel', '-o','--output','--meta','-g','--geneMap','--alternativeInitMode','--auxDir' ,'--noBiasLengthThreshold','--dumpEq','-d','--dumpEqWeights','--fldMax', '--fldMean','--fldSD','-f','--forgettingFactor','--minAssignedFrags', '--gencode','--reduceGCMemory','--biasSpeedSamp','--mappingCacheMemoryLimit', '-w','--maxReadOcc','--noEffectiveLengthCorrection','--noFragLengthDist', '-v','--useVBOpt','--rangeFactorizationBins','--perTranscriptPrior','--vbPrior', '--numErrorBins','--numBiasSamples','--numPreAuxModelSamples','--numAuxModelSamples', '-s','--sampleOut','-u','--sampleUnaligned','--numGibbsSamples','--numBootstraps', '--thinningFactor'] ##salmon quantmerge self.validArgsQuantMerge=['--quants','--names','-c','--column','-o','--output'] self.valid_args=pu.get_union(self.validArgsIndex,self.validArgsQuantReads,self.validArgsQuantAlign,self.validArgsQuantMerge) """ if not threads: threads = os.cpu_count() self.threads = threads #if index is passed, update the passed arguments if len(salmon_index) > 0 and pu.check_salmon_index(salmon_index): print("salmon index is: " + salmon_index) self.salmon_index = salmon_index else: print( "No salmon index provided. Please build index now to generate an index..." )
def __init__(self, threads=None, max_memory=None): self.programName = "portcullis" self.dep_list = [self.programName] #check if program exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") #use max threads by default if not threads: threads = os.cpu_count() self.threads = threads
def __init__(self, threads=None, max_memory=None): self.programName = "samtools" #check if hisat2 exists if not pe.check_dependencies([self.programName]): raise Exception("ERROR: " + self.programName + " not found.") self.threads = threads #Default: if threads are None use 80% of threads to avaoid memory issues if not self.threads: self.threads = int(os.cpu_count() * 0.8) self.max_memory = max_memory
def __init__(self, **kwargs): self.programName = "samtools" #check if hisat2 exists if not pe.check_dependencies([self.programName]): raise Exception("ERROR: " + self.programName + " not found.") self.valid_args = [ '-b', '-C', '-1', '-u', '-h', '-H', '-c', '-o', '-U', '-t', '-L', '-r', '-R', '-q', '-l', '-m', '-f', '-F', '-G', '-s', '-M', '-x', '-B', '-?', '-S', '-O', '-T', '-@' ] self.passedArgumentDict = kwargs
def __init__(self,threads=None): super().__init__() self.program_name="stringtie" #check if stringtie exists if not pe.check_dependencies([self.program_name]): raise Exception("ERROR: "+ self.program_name+" not found.") if not threads: threads=os.cpu_count() self.threads=threads """
def __init__(self, threads=None, max_memory=None): """ Parameters ---------- threads: int num threads to use max_memory: Max memory to use in GB """ #run super to inherit parent class properties super().__init__() self.programName = "bbduk.sh" self.dep_list = [self.programName] #check if program exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") """ self.valid_args=['in','in2','ref','literal','touppercase','interleaved','qin','reads','copyundefined', 'samplerate','samref','out','out2','outm','outm2','outs','stats','refstats','rpkm', 'dump','duk','nzo','overwrite','showspeed','ziplevel','fastawrap','qout','statscolumns', 'rename','refnames','trd','ordered','maxbasesout','maxbasesoutm','','json','bhist','qhist', 'qchist','aqhist','bqhist','lhist','phist','gchist','ihist','gcbins','maxhistlen','histbefore', 'ehist','qahist','indelhist','mhist','idhist','idbins','varfile','vcf','ignorevcfindels', 'k','rcomp','maskmiddle','minkmerhits','minkmerfraction','mincovfraction','hammingdistance', 'qhdist','editdistance','hammingdistance2','qhdist2','editdistance2','forbidn','removeifeitherbad', 'trimfailures','findbestmatch','skipr1','skipr2','ecco','recalibrate','sam','le.','amino', 'threads','prealloc','monitor','minrskip','maxrskip','rskip','qskip','speed','ktrim','kmask', 'maskfullycovered','ksplit','mink','qtrim','trimq','trimclip','minlength','mlf','maxlength', 'minavgquality','maqb','minbasequality','maxns','mcb','ottm','tp','tbo','strictoverlap', 'minoverlap','mininsert','tpe','forcetrimleft','forcetrimright','forcetrimright2', 'forcetrimmod','restrictleft','restrictright','mingc','maxgc','gcpairs','tossjunk', 'swift','chastityfilter','barcodefilter','barcodes','xmin','ymin','xmax','ymax','trimpolya', 'trimpolygleft','trimpolygright','trimpolyg','filterpolyg','pratio','plen','entropy','entropywindow', 'entropyk','minbasefrequency','entropytrim','entropymask','entropymark','cardinality', 'cardinalityout','loglogk','loglogbuckets','-Xmx','-eoom','-da'] """ #use max threads by default if not threads: threads = os.cpu_count() self.threads = threads #use floor(max available memory) by default if not max_memory: total_mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf( 'SC_PHYS_PAGES') total_mem_gib = total_mem_bytes / (1024.**3) max_memory = math.floor(total_mem_gib) self.max_memory = max_memory
def __init__(self, **kwargs): """ Parameters ---------- kwargs: bbduk.sh arguments. """ #run super to inherit parent class properties super().__init__() self.programName = "bbduk.sh" self.dep_list = [self.programName] #check if program exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") self.valid_args = [ 'in', 'in2', 'ref', 'literal', 'touppercase', 'interleaved', 'qin', 'reads', 'copyundefined', 'samplerate', 'samref', 'out', 'out2', 'outm', 'outm2', 'outs', 'stats', 'refstats', 'rpkm', 'dump', 'duk', 'nzo', 'overwrite', 'showspeed', 'ziplevel', 'fastawrap', 'qout', 'statscolumns', 'rename', 'refnames', 'trd', 'ordered', 'maxbasesout', 'maxbasesoutm', '', 'json', 'bhist', 'qhist', 'qchist', 'aqhist', 'bqhist', 'lhist', 'phist', 'gchist', 'ihist', 'gcbins', 'maxhistlen', 'histbefore', 'ehist', 'qahist', 'indelhist', 'mhist', 'idhist', 'idbins', 'varfile', 'vcf', 'ignorevcfindels', 'k', 'rcomp', 'maskmiddle', 'minkmerhits', 'minkmerfraction', 'mincovfraction', 'hammingdistance', 'qhdist', 'editdistance', 'hammingdistance2', 'qhdist2', 'editdistance2', 'forbidn', 'removeifeitherbad', 'trimfailures', 'findbestmatch', 'skipr1', 'skipr2', 'ecco', 'recalibrate', 'sam', 'le.', 'amino', 'threads', 'prealloc', 'monitor', 'minrskip', 'maxrskip', 'rskip', 'qskip', 'speed', 'ktrim', 'kmask', 'maskfullycovered', 'ksplit', 'mink', 'qtrim', 'trimq', 'trimclip', 'minlength', 'mlf', 'maxlength', 'minavgquality', 'maqb', 'minbasequality', 'maxns', 'mcb', 'ottm', 'tp', 'tbo', 'strictoverlap', 'minoverlap', 'mininsert', 'tpe', 'forcetrimleft', 'forcetrimright', 'forcetrimright2', 'forcetrimmod', 'restrictleft', 'restrictright', 'mingc', 'maxgc', 'gcpairs', 'tossjunk', 'swift', 'chastityfilter', 'barcodefilter', 'barcodes', 'xmin', 'ymin', 'xmax', 'ymax', 'trimpolya', 'trimpolygleft', 'trimpolygright', 'trimpolyg', 'filterpolyg', 'pratio', 'plen', 'entropy', 'entropywindow', 'entropyk', 'minbasefrequency', 'entropytrim', 'entropymask', 'entropymark', 'cardinality', 'cardinalityout', 'loglogk', 'loglogbuckets', '-Xmx', '-eoom', '-da' ] self.passedArgumentDict = kwargs
def __init__(self, **kwargs): self.programName = "portcullis" self.dep_list = [self.programName] #check if program exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") self.valid_args = [ '-t', '--threads', '-v', '--verbose', '--help', '-o', '-b', '--bam_filter', '--exon_gff', '--intron_gff', '--source', '--force', '--copy', '--use_csi', '--orientation', '--strandedness', '--separate', '--extra', '-r', '--max_length', '--canonical', '--min_cov', '--save_bad' ] self.passedArgumentDict = kwargs
def init_from_accession(self, srr_accession, location): """Create SRA object using provided srr accession and location to save the data """ self.dep_list = ['prefetch', "fasterq-dump"] if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: Please install missing programs.") if srr_accession is None: raise Exception("Please provide a valid accession") if location is None: location = os.getcwd() #pu.print_info("Creating SRA: "+srr_accession) self.srr_accession = srr_accession #create a dir named <srr_accession> and use as location self.location = os.path.join(location, self.srr_accession)
def __init__(self, bowtie2_index, **kwargs): """Bowtie2 constructor. Initialize bowtie2 index and other parameters. """ super().__init__() self.programName = "bowtie2" self.dep_list = [self.programName] if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") self.valid_args = [ '-x', '-1', '-2', '-U', '--interleaved', '-S', '-b', '-q', '--tab5', '--tab6', '--qseq', '-f', '-r', '-F', '-c', '-s', '-u', '-5', '-3', '--trim-to', '--phred33', '--phred64', '--int-quals', '--very-fast', '--fast', '--sensitive', '--very-sensitive', '--very-fast-local', '--fast-local', '--sensitive-local', '--very-sensitive-local', '-N', '-L', '-i', '--n-ceil', '--dpad', '--gbar', '--ignore-quals', '--nofw', '--norc', '--no-1mm-upfront', '--end-to-end', '--local', '--ma', '--mp', '--np', '--rdg', '--rfg', '--score-min', '-k', '-a', '-D', '-R', '-I', '-X', '--fr', '--rf', '--ff', '--no-mixed', '--no-discordant', '--dovetail', '--no-contain', '--no-overlap', '--align-paired-reads', '--preserve-tags', '-t', '--un', '--al', '--un-conc', '--al-conc', '--un-gz', '--quiet', '--met-file', '--met-stderr', '--met', '--no-unal', '--no-head', '--no-sq', '--rg-id', '--rg', '--omit-sec-seq', '--sam-no-qname-trunc', '--xeq', '--soft-clipped-unmapped-tlen', '-p', '--threads', '--reorder', '--mm', '--qc-filter', '--seed', '--non-deterministic', '--version', '-h', '--help' ] #initialize the passed arguments self.passedArgumentDict = kwargs #if index is passed, update the passed arguments if len(bowtie2_index) > 0 and pu.check_bowtie2index(bowtie2_index): print("Bowtie2 index is: " + bowtie2_index) self.bowtie2_index = bowtie2_index self.passedArgumentDict['-x'] = self.bowtie2_index else: print( "No Bowtie2 index provided. Please build index now to generate an index..." )
def __init__(self, hisat2_index="", **kwargs): super().__init__() self.programName = "hisat2" #check if hisat2 exists if not pe.check_dependencies([self.programName]): raise Exception("ERROR: " + self.programName + " not found.") self.valid_args = [ '-x', '-1', '-2', '-U', '--sra-acc', '-S', '-q', '--qseq', '-f', '-r', '-c', '-s', '-u', '-5', '-3', '--phred33', '--phred64', '--int-quals', '--sra-acc', '--n-ceil', '--ignore-quals', '--nofw', '--norc', '--pen-cansplice', '--pen-noncansplice', '--pen-canintronlen', '--pen-noncanintronlen', '--min-intronlen', '--max-intronlen', '--known-splicesite-infile', '--novel-splicesite-outfile', '--novel-splicesite-infile', '--no-temp-splicesite', '--no-spliced-alignment', '--rna-strandness', '--tmo', '--dta', '--dta-cufflinks', '--avoid-pseudogene', '--no-templatelen-adjustment', '--mp', '--sp', '--no-softclip', '--np', '--rdg', '--rfg', '--score-min', '-k', '-I', '-X', '--fr', '--rf', '--ff', '--no-mixed', '--no-discordant', '-t', '--un', '--al', '--un-conc', '--al-conc', '--un-gz', '--summary-file', '--new-summary', '--quiet', '--met-file', '--met-stderr', '--met', '--no-head', '--no-sq', '--rg-id', '--rgit-sec-seq', '-o', '-p', '--reorder', '--mm', '--qc-filter', '--seed', '--non-deterministic', '--remove-chrname', '--add-chrname', '--version' ] #initialize the passed arguments self.passedArgumentDict = kwargs #if index is passed, update the passed arguments if len(hisat2_index) > 0 and pu.check_hisatindex(hisat2_index): print("HISAT2 index is: " + hisat2_index) self.hisat2_index = hisat2_index self.passedArgumentDict['-x'] = self.hisat2_index self.index = self.hisat2_index else: print( "No Hisat2 index provided. Please build index now to generate an index using build_Index()...." )
def __init__(self, reference_gtf="", **kwargs): super().__init__() self.program_name = "stringtie" #check if stringtie exists if not pe.check_dependencies([self.program_name]): raise Exception("ERROR: " + self.program_name + " not found.") self.valid_args_list = [ '-G', '--version', '--conservative', '--rf', '--fr', '-o', '-l', '-f', '-L', '-m', '-a', '-j', '-t', '-c', '-s', '-v', '-g', '-M', '-p', '-A', '-B', '-b', '-e', '-x', '-u', '-h', '--merge', '-F', '-T', '-i' ] #keep the passed arguments self.passed_args_dict = kwargs #check the reference GTF if len(reference_gtf) > 0 and pu.check_files_exist(reference_gtf): self.reference_gtf = reference_gtf self.passed_args_dict['-G'] = reference_gtf
def __init__(self, index="", threads=None): super().__init__() self.programName = "STAR" self.dep_list = [self.programName] #check if star exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") """ self.valid_args=['--help','--parametersFiles','--sysShell','--runMode','--runThreadN','--runDirPerm','--runRNGseed','--quantMode','--quantTranscriptomeBAMcompression','--quantTranscriptomeBan','--twopassMode','--twopass1readsN', '--genomeDir','--genomeLoad','--genomeFastaFiles','--genomeChrBinNbits','--genomeSAindexNbases','--genomeSAsparseD','--genomeSuffixLengthMax','--genomeChainFiles','--genomeFileSizes', '--sjdbFileChrStartEnd','--sjdbGTFfile','--sjdbGTFchrPrefix','--sjdbGTFfeatureExon','--sjdbGTFtagExonParentTranscript','--sjdbGTFtagExonParentGene','--sjdbOverhang','--sjdbScore','--sjdbInsertSave', '--inputBAMfile','--readFilesIn','--readFilesCommand','--readMapNumber','--readMatesLengthsIn','--readNameSeparator','--clip3pNbases','--clip5pNbases','--clip3pAdapterSeq','--clip3pAdapterMMp','--clip3pAfterAdapterNbases', '--limitGenomeGenerateRAM','--limitIObufferSize','--limitOutSAMoneReadBytes','--limitOutSJoneRead','--limitOutSJcollapsed','--limitBAMsortRAM ','--limitSjdbInsertNsj','--outFileNamePrefix','--outTmpDir','--outTmpKeep', '--outStd','--outReadsUnmapped','--outQSconversionAdd','--outMultimapperOrder','--outSAMtype','--outSAMmode','--outSAMstrandField','--outSAMattributes','--outSAMattrIHstart','--outSAMunmapped','--outSAMorder', '--outSAMprimaryFlag','--outSAMreadID','--outSAMmapqUnique','--outSAMflagOR','--outSAMflagAND','--outSAMattrRGline','--outSAMheaderHD','--outSAMheaderPG','--outSAMheaderCommentFile','--outSAMfilter','--outSAMmultNmax', '--outBAMcompression','--outBAMsortingThreadN','--bamRemoveDuplicatesType','--bamRemoveDuplicatesMate2basesN','--outWigType','--outWigStrand','--outWigReferencesPrefix','--outWigNorm','--outFilterType', '--outFilterMultimapScoreRange','--outFilterMultimapNmax','--outFilterMismatchNmax','--outFilterMismatchNoverLmax','--outFilterMismatchNoverReadLmax','--outFilterScoreMin','--outFilterScoreMinOverLread', '--outFilterMatchNmin','--outFilterMatchNminOverLread','--outFilterIntronMotifs','--outSJfilterReads','--outSJfilterOverhangMin','--outSJfilterCountUniqueMin','--outSJfilterCountTotalMin','--outSJfilterDistToOtherSJmin', '--outSJfilterIntronMaxVsReadN','--scoreGap','--scoreGapNoncan','--scoreGapGCAG ','--scoreGapATAC','--scoreGenomicLengthLog2scale','--scoreDelOpen','--scoreDelBase','--scoreInsOpen','--scoreInsBase','--scoreStitchSJshift', '--seedSearchStartLmax','--seedSearchStartLmaxOverLread','--seedSearchLmax','--seedMultimapNmax','--seedPerReadNmax','--seedPerWindowNmax','--seedNoneLociPerWindow','--alignIntronMin','--alignIntronMax','--alignMatesGapMax', '--alignSJoverhangMin','--alignSJstitchMismatchNmax','--alignSJDBoverhangMin','--alignSplicedMateMapLmin','--alignSplicedMateMapLminOverLmate','--alignWindowsPerReadNmax','--alignTranscriptsPerWindowNmax','--alignTranscriptsPerReadNmax', '--alignEndsType','--alignEndsProtrude','--alignSoftClipAtReferenceEnds','--winAnchorMultimapNmax','--winBinNbits','--winAnchorDistNbins','--winFlankNbins','--winReadCoverageRelativeMin','--winReadCoverageBasesMin', '--chimOutType','--chimSegmentMin','--chimScoreMin','--chimScoreDropMax','--chimScoreSeparation','--chimScoreJunctionNonGTAG','--chimJunctionOverhangMin','--chimSegmentReadGapMax','--chimFilter','--chimMainSegmentMultNmax'] """ if not threads: threads = os.cpu_count() self.threads = threads #if index is passed, update the passed arguments if index and pu.check_starindex(index): print("STAR index is: " + index) self.star_index = index else: print( "No STAR index provided. Please build index now to generate an index using build_index()...." )
def __init__(self, index, threads=None, mode=None): self.programName = "diamond" self.dep_list = [self.programName] #check if program exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + self.programName + " not found.") """ self.valid_args=['-p','--db','-d','--out','-o','--outfmt','-f','--verbose','--log','--quiet','--in','--query','-q','--strand','--un','--al','--unal','--max-target-seqs','-k','--top','--range-culling','--compress','--evalue','-e', '--min-score','--id','--query-cover','--subject-cover','--sensitive','--more-sensitive','--block-size','-b','--index-chunks','-c','--tmpdir','-t','--gapopen','--gapextend','--frameshift','-F','--long-reads','--matrix','--custom-matrix', '--lambda','--K','--comp-based-stats','--masking','--query-gencode','--salltitles','--sallseqid','--no-self-hits','--taxonmap','--taxonnodes','--taxonlist','--algo','--bin','--min-orf','-l','--freq-sd','--id2','--window','-w', '--xdrop','-x','--ungapped-score','--hit-band','--hit-score','--gapped-xdrop','-X','--band','--shapes','-s','--shape-mask','--index-mode','--rank-ratio','--rank-ratio2','--max-hsps','--range-cover','--dbsize','--no-auto-append', '--xml-blord-format','--daa','-a','--forwardonly','--seq'] """ self.valid_commands = [ 'makedb', 'blastp', 'blastx', 'view', 'help', 'version', 'getseq', 'dbinfo' ] #use max threads by default if not threads: threads = os.cpu_count() self.threads = threads #select mode valid_modes = ['fast', 'sensitive', 'more-sensitive'] if mode in valid_modes: mode = '--' + mode else: mode = '--fast' self.mode = mode #check index self.index = index if not self.check_index(): print("No valid index provided. Please build index...") else: self.index = index
def init_from_accession(self, srr_accession, location): """Create SRA object using provided srr accession and location to save the data """ self.dep_list = ['prefetch', "fasterq-dump"] if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: Please install missing programs.") if srr_accession is None: raise Exception("Please provide a valid accession") if location is None: location = os.getcwd() #pu.print_info("Creating SRA: "+srr_accession) self.srr_accession = srr_accession #create a dir named <srr_accession> and use as location self.location = os.path.join(location, self.srr_accession) #search for existing files in location #self.search_fastq(self.location) #scan path for sra #self.search_sra(self.location) #check SRA file if pu.check_files_exist( os.path.join(self.location, self.srr_accession + ".sra")): pu.print_green(self.srr_accession + ".sra exists.") self.localSRAFilePath = os.path.join(self.location, self.srr_accession + ".sra") self.sraFileSize = pu.get_file_size(self.localSRAFilePath) #test if file is paired or single end if pe.is_paired(self.localSRAFilePath): self.layout = "PAIRED" else: self.layout = "SINGLE" #check fastq file self.search_fastq(self.location)
def check_dependency(self, deps_list): """ Check depndencies of a tool/command. Parameters ---------- deps_list : List List of command to check. Raises ------ OSError If a command is not found raise OSError. Returns ------- bool Returns true is all commands are found. """ if deps_list and not pe.check_dependencies(deps_list): #pu.print_boldred("ERROR. Please check dependencies for {}. Deps: {}".format(self._command," ".join(deps_list))) raise OSError("CommandNotFoundException") return True
def __init__(self): self.programName = "TransDecoder.LongOrfs" self.dep_list = ["TransDecoder.LongOrfs", "TransDecoder.Predict"] #check if program exists if not pe.check_dependencies(self.dep_list): raise Exception("ERROR: " + str(self.dep_list) + " not found.")