Exemplo n.º 1
0
    def __init__(self, **kwargs):
        """
        Parameters
        ----------
        kwargs:
            trim_galore arguments. could override later too.
        """

        #run super to inherit parent class properties
        super().__init__()
        self.programName = "trim_galore"
        self.dep_list = [self.programName, 'cutadapt']
        self.valid_args = [
            '--cores', '-v', '-q', '--phred33', '--phred64', '--fastqc',
            '--fastqc_args', '-a', '-a2', '--illumina', '--nextera',
            '--small_rna', '--consider_already_trimmed', '--max_length',
            '--stringency', '-e', '--gzip', '--dont_gzip', '--length',
            '--max_n', '--trim-n', '-o', '--no_report_file', '--suppress_warn',
            '--clip_R1', '--clip_R2', '--three_prime_clip_R1',
            '--three_prime_clip_R2', '--2colour', '--path_to_cutadapt',
            '--basename', '-j', '--hardtrim5', '--hardtrim3', '--clock',
            '--polyA', '--rrbs', '--non_directional', '--keep', '--paired',
            '-t', '--retain_unpaired', '-r1', '-r2'
        ]
        #check if hisat2 exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")

        #initialize the passed arguments
        self.passedArgumentDict = kwargs
Exemplo n.º 2
0
    def __init__(self, **kwargs):

        super().__init__()
        self.program_name = "Trinity"
        self.dep_list = [self.program_name, 'jellyfish', 'bowtie2']
        #check if trinity exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.program_name + " not found.")

        self.valid_args_list = [
            '--seqType', '--max_memory', '--left', '--right', '--single',
            '--SS_lib_type', '--CPU', '--min_contig_length', '--long_reads',
            '--genome_guided_bam', '--jaccard_clip', '--trimmomatic',
            '--normalize_reads', '--no_distributed_trinity_exec', '--output',
            '--full_cleanup', '--cite', '--verbose', '--version',
            '--show_full_usage_info', '--KMER_SIZE', '--prep', '--no_cleanup',
            '--no_version_check', '--min_kmer_cov', '--inchworm_cpu',
            '--no_run_inchworm', '--max_reads_per_graph', '--min_glue',
            '--no_bowtie', '--no_run_chrysalis', '--bfly_opts', '--PasaFly',
            '--CuffFly', '--group_pairs_distance',
            '--path_reinforcement_distance', '--no_path_merging',
            '--min_per_id_same_path', '--max_diffs_same_path',
            '--max_internal_gap_same_path', '--bflyHeapSpaceMax',
            '--bflyHeapSpaceInit', '--bflyGCThreads', '--bflyCPU',
            '--bflyCalculateCPU', '--bfly_jar', '--quality_trimming_params',
            '--normalize_max_read_cov', '--normalize_by_read_set',
            '--genome_guided_max_intron', '--genome_guided_min_coverage',
            '--genome_guided_min_reads_per_partition', '--grid_conf',
            '--grid_node_CPU', '--grid_node_max_memory'
        ]

        #keep the passed arguments
        self.passed_args_dict = kwargs
Exemplo n.º 3
0
 def __init__(self,kallisto_index,**kwargs):
     super().__init__() 
     self.programName="kallisto"
     self.dep_list=[self.programName]        
     if not pe.check_dependencies(self.dep_list):
         raise Exception("ERROR: "+ self.programName+" not found.")
     
     
     ##kallisto index
     self.validArgsIndex=['-i','--index','-k','--kmer-size','--make-unique']
     ##kallisto quant
     self.validArgsQuant=['-i','--index','-o','--output-dir','--bias','-b','--bootstrap-samples',
                          '--seed','--plaintext','--fusion','--single','--fr-stranded','--rf-stranded',
                          '-l','--fragment-length','-s','--sd','-t','--threads','--pseudobam']
     ##kallisto pseudo
     self.validArgsPseudo=['-i','--index','-o','--output-dir','-u','--umi','-b','--batch',
                           '--single','-l','--fragment-length','-s','--sd','-t','--threads','--pseudobam']
         ##kallisto h5dump
     self.validArgsh5dump=['-o','--output-dir']
     
     self.valid_args=pu.get_union(self.validArgsIndex,self.validArgsQuant,self.validArgsPseudo,self.validArgsh5dump)
     
     #initialize the passed arguments
     self.passedArgumentDict=kwargs
     
     #if index is passed, update the passed arguments
     if len(kallisto_index)>0 and pu.check_files_exist(kallisto_index):
         print("kallisto index is: "+kallisto_index)
         self.kallisto_index=kallisto_index
         self.passedArgumentDict['-i']=self.kallisto_index
     else:
         print("No kallisto index provided. Please use build_index() now to generate an index...")
Exemplo n.º 4
0
    def __init__(self,threads=None):
        
        super().__init__()
        self.program_name="cufflinks"
        #check if stringtie exists
        if not pe.check_dependencies([self.program_name]):
            raise Exception("ERROR: "+ self.program_name+" not found.")
            
        
        #define valid arguments
        """
        self.cufflinksArgsList=['-h','--help','-o','--output-dir','-p','--num-threads','--seed','-G','--GTF','-g','--GTF-guide','-M','--mask-file','-b','--frag-bias-correct','-u','--multi-read-correct','--library-type','--library-norm-method',
'-m','--frag-len-mean','-s','--frag-len-std-dev','--max-mle-iterations','--compatible-hits-norm','--total-hits-norm','--num-frag-count-draws','--num-frag-assign-draws','--max-frag-multihits','--no-effective-length-correction',
'--no-length-correction','-N','--upper-quartile-norm','--raw-mapped-norm','-L','--label','-F','--min-isoform-fraction','-j','--pre-mrna-fraction','-I','--max-intron-length','-a','--junc-alpha','-A','--small-anchor-fraction',
'--min-frags-per-transfrag','--overhang-tolerance','--max-bundle-length','--max-bundle-frags','--min-intron-length','--trim-3-avgcov-thresh','--trim-3-dropoff-frac','--max-multiread-fraction','--overlap-radius',
'--no-faux-reads','--3-overhang-tolerance','--intron-overhang-tolerance','-v','--verbose','-q','--quiet','--no-update-check']

        self.cuffcompareArgsList=['-h','-i','-r','-R','-Q','-M','-N','-s','-e','-d','-p','-C','-F','-G','-T','-V']
        self.cuffquantArgsList=['-o','--output-dir','-p','--num-threads','-M','--mask-file','-b','--frag-bias-correct','-u','--multi-read-correct','--library-type','-m','--frag-len-mean','-s','--frag-len-std-dev','-c','--min-alignment-count',
'--max-mle-iterations','-v','--verbose','-q','--quiet','--seed','--no-update-check','--max-bundle-frags','--max-frag-multihits','--no-effective-length-correction','--no-length-correction','--read-skip-fraction',
'--no-read-pairs','--trim-read-length','--no-scv-correction']
        self.cuffdiffArgsList=['-o','--output-dir','-L','--labels','--FDR','-M','--mask-file','-C','--contrast-file','-b','--frag-bias-correct','-u','--multi-read-correct','-p','--num-threads','--no-diff','--no-js-tests','-T','--time-series',
'--library-type','--dispersion-method','--library-norm-method','-m','--frag-len-mean','-s','--frag-len-std-dev','-c','--min-alignment-count','--max-mle-iterations','--compatible-hits-norm','--total-hits-norm',
' -v','--verbose','-q','--quiet','--seed','--no-update-check','--emit-count-tables','--max-bundle-frags','--num-frag-count-draws','--num-frag-assign-draws','--max-frag-multihits','--min-outlier-p','--min-reps-for-js-test',
'--no-effective-length-correction','--no-length-correction','-N','--upper-quartile-norm','--geometric-norm','--raw-mapped-norm','--poisson-dispersion','--read-skip-fraction','--no-read-pairs','--trim-read-length','--no-scv-correction']
        self.cuffnormArgsList=['-o','--output-dir','-L','--labels','--norm-standards-file','-p','--num-threads','--library-type','--library-norm-method','--output-format','--compatible-hits-norm','--total-hits-norm','-v','--verbose','-q','--quiet','--seed','--no-update-check']
        self.cuffmergeArgsList=['h','--help','-o','-g','–-ref-gtf','-p','–-num-threads','-s','-–ref-sequence']
        
        self.valid_args_list=pu.get_union(self.cufflinksArgsList,self.cuffcompareArgsList,self.cuffquantArgsList,self.cuffdiffArgsList,self.cuffnormArgsList,self.cuffmergeArgsList)
        """
        
        if not threads:
            threads=os.cpu_count()
        
        self.threads=threads
Exemplo n.º 5
0
 def init_from_accession(self,srr_accession,directory):
     """
     Create SRA object using provided srr accession and directory, where data is downloaded/saved
     This functions inits srrid, and paths to srr/fastq if they already exist thus will not be downloaded again
     """
     #check if programs exist
     self.dep_list=['prefetch',"fasterq-dump"]
     if not pe.check_dependencies(self.dep_list):
         raise OSError("ERROR: Please install missing programs.")
     
     if not srr_accession:
         raise ValueError("Please provide a valid accession")
         
     if not directory:
         directory=os.getcwd()
     
     #create a dir named <srr_accession> and use as directory
     self.directory=os.path.join(directory,self.srr_accession)
     
     #sra file be stored here
     #self.sra_path=os.path.join(self.directory,self.srr_accession+".sra")
     
     #check if fastq files exist
     if not self.search_fastq(self.directory):
         #download sra and run fqdump
         if not self.download_sra():
             pu.print_boldred('prefetch failed!!! Trying fasterq-dump...')
         #run fasterqdump either on downloaded SRA file or direclty
         return self.download_fastq()
     
     return True
Exemplo n.º 6
0
    def __init__(self, threads=None):
        """
        threads: int
            Num threads to use
        """

        #run super to inherit parent class properties
        super().__init__()
        self.programName = "trim_galore"
        self.dep_list = [self.programName, 'cutadapt']
        """
        self.valid_args=['--cores','-v','-q','--phred33','--phred64','--fastqc','--fastqc_args','-a','-a2',
                            '--illumina','--nextera','--small_rna','--consider_already_trimmed',
                            '--max_length','--stringency','-e','--gzip','--dont_gzip','--length',
                            '--max_n','--trim-n','-o','--no_report_file','--suppress_warn',
                            '--clip_R1','--clip_R2','--three_prime_clip_R1','--three_prime_clip_R2',
                            '--2colour','--path_to_cutadapt','--basename','-j','--hardtrim5','--hardtrim3',
                            '--clock','--polyA','--rrbs','--non_directional','--keep','--paired','-t',
                            '--retain_unpaired','-r1','-r2']
        """
        #check if deps exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")

        #initialize the passed arguments
        if not threads:
            #trimgalore recommends max 8 threads
            threads = 8
        self.threads = threads
Exemplo n.º 7
0
 def __init__(self,threads=None,max_memory=None):
     
     super().__init__()
     self.program_name="Trinity"
     self.dep_list=[self.program_name,'jellyfish','bowtie2']
     #check if trinity exists
     if not pe.check_dependencies(self.dep_list):
         raise Exception("ERROR: "+ self.program_name+" not found.")
     
     """
     self.valid_args_list=['--seqType','--max_memory','--left','--right','--single','--SS_lib_type','--CPU','--min_contig_length',
                           '--long_reads','--genome_guided_bam','--jaccard_clip','--trimmomatic','--normalize_reads','--no_distributed_trinity_exec',
                           '--output','--full_cleanup','--cite','--verbose','--version','--show_full_usage_info','--KMER_SIZE','--prep','--no_cleanup',
                           '--no_version_check','--min_kmer_cov','--inchworm_cpu','--no_run_inchworm','--max_reads_per_graph','--min_glue','--no_bowtie',
                           '--no_run_chrysalis','--bfly_opts','--PasaFly','--CuffFly','--group_pairs_distance','--path_reinforcement_distance','--no_path_merging',
                           '--min_per_id_same_path','--max_diffs_same_path','--max_internal_gap_same_path','--bflyHeapSpaceMax','--bflyHeapSpaceInit',
                           '--bflyGCThreads','--bflyCPU','--bflyCalculateCPU','--bfly_jar','--quality_trimming_params','--normalize_max_read_cov',
                           '--normalize_by_read_set','--genome_guided_max_intron','--genome_guided_min_coverage','--genome_guided_min_reads_per_partition',
                           '--grid_conf','--grid_node_CPU','--grid_node_max_memory']
     """
     
     if not threads:
         threads=os.cpu_count()
     self.threads=threads
     
     #use floor(80% max available memory) by default
     if not max_memory:
         total_mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')  
         total_mem_gib = total_mem_bytes/(1024.**3)
         max_memory=math.floor(total_mem_gib*0.8)
     
     self.max_memory=max_memory
Exemplo n.º 8
0
    def __init__(self, **kwargs):
        self.programName = "RiboCode"
        self.dep_list = [self.programName]
        #check if program exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")

        self.valid_args = []
        self.passedArgumentDict = kwargs
Exemplo n.º 9
0
    def __init__(self, salmon_index, threads=None):
        super().__init__()
        self.programName = "salmon"
        self.dep_list = [self.programName]
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")
        """
        ##salmon index
        self.validArgsIndex=['-v','--version','-h','--help','-t','--transcripts','-k','--kmerLen','-i',
                             '--index','--gencode','--keepDuplicates','-p','--threads','--perfectHash',
                             '--type','-s','--sasamp']
        ##salmon quant read
        self.validArgsQuantReads=['--help-reads','-i','--index','-l','--libType','-r','--unmatedReads',
                                  '-1','--mates1','-2','--mates2','-o','--output','--discardOrphansQuasi',
                                  '--allowOrphansFMD','--seqBias','--gcBias','-p','--threads','--incompatPrior',
                                  '-g','--geneMap','-z','--writeMappings','--meta','--alternativeInitMode',
                                  '--auxDir','-c','--consistentHits','--dumpEq','-d','--dumpEqWeights',
                                  '--fasterMapping','--minAssignedFrags','--reduceGCMemory','--biasSpeedSamp',
                                  '--strictIntersect','--fldMax','--fldMean','--fldSD','-f','--forgettingFactor',
                                  '-m','--maxOcc','--initUniform','-w','--maxReadOcc','--noLengthCorrection',
                                  '--noEffectiveLengthCorrection','--noFragLengthDist','--noBiasLengthThreshold',
                                  '--numBiasSamples','--numAuxModelSamples','--numPreAuxModelSamples','--useVBOpt',
                                  '--rangeFactorizationBins','--numGibbsSamples','--numBootstraps','--thinningFactor',
                                  '-q','--perTranscriptPrior','--vbPrior','--writeOrphanLinks','--writeUnmappedNames',
                                  '-x','--quasiCoverage']
        ##salmon quant alignment
        self.validArgsQuantAlign=['--help-alignment','-l','--libType','-a','--alignments','-t','--targets','-p',
                                  '--threads','--seqBias','--gcBias','--incompatPrior','--useErrorModel',
                                  '-o','--output','--meta','-g','--geneMap','--alternativeInitMode','--auxDir'
                                  ,'--noBiasLengthThreshold','--dumpEq','-d','--dumpEqWeights','--fldMax',
                                  '--fldMean','--fldSD','-f','--forgettingFactor','--minAssignedFrags',
                                  '--gencode','--reduceGCMemory','--biasSpeedSamp','--mappingCacheMemoryLimit',
                                  '-w','--maxReadOcc','--noEffectiveLengthCorrection','--noFragLengthDist',
                                  '-v','--useVBOpt','--rangeFactorizationBins','--perTranscriptPrior','--vbPrior',
                                  '--numErrorBins','--numBiasSamples','--numPreAuxModelSamples','--numAuxModelSamples',
                                  '-s','--sampleOut','-u','--sampleUnaligned','--numGibbsSamples','--numBootstraps',
                                  '--thinningFactor']
        ##salmon quantmerge
        self.validArgsQuantMerge=['--quants','--names','-c','--column','-o','--output']

        self.valid_args=pu.get_union(self.validArgsIndex,self.validArgsQuantReads,self.validArgsQuantAlign,self.validArgsQuantMerge)
        """

        if not threads:
            threads = os.cpu_count()

        self.threads = threads

        #if index is passed, update the passed arguments
        if len(salmon_index) > 0 and pu.check_salmon_index(salmon_index):
            print("salmon index is: " + salmon_index)
            self.salmon_index = salmon_index
        else:
            print(
                "No salmon index provided. Please build index now to generate an index..."
            )
Exemplo n.º 10
0
    def __init__(self, threads=None, max_memory=None):
        self.programName = "portcullis"
        self.dep_list = [self.programName]
        #check if program exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")

        #use max threads by default
        if not threads:
            threads = os.cpu_count()
        self.threads = threads
Exemplo n.º 11
0
    def __init__(self, threads=None, max_memory=None):
        self.programName = "samtools"
        #check if hisat2 exists
        if not pe.check_dependencies([self.programName]):
            raise Exception("ERROR: " + self.programName + " not found.")

        self.threads = threads
        #Default: if threads are None use 80% of threads to avaoid memory issues
        if not self.threads:
            self.threads = int(os.cpu_count() * 0.8)

        self.max_memory = max_memory
Exemplo n.º 12
0
    def __init__(self, **kwargs):
        self.programName = "samtools"
        #check if hisat2 exists
        if not pe.check_dependencies([self.programName]):
            raise Exception("ERROR: " + self.programName + " not found.")

        self.valid_args = [
            '-b', '-C', '-1', '-u', '-h', '-H', '-c', '-o', '-U', '-t', '-L',
            '-r', '-R', '-q', '-l', '-m', '-f', '-F', '-G', '-s', '-M', '-x',
            '-B', '-?', '-S', '-O', '-T', '-@'
        ]

        self.passedArgumentDict = kwargs
Exemplo n.º 13
0
 def __init__(self,threads=None):
     
     super().__init__()
     self.program_name="stringtie"
     #check if stringtie exists
     if not pe.check_dependencies([self.program_name]):
         raise Exception("ERROR: "+ self.program_name+" not found.")
         
     if not threads:
         threads=os.cpu_count()
     self.threads=threads
     
     """
Exemplo n.º 14
0
    def __init__(self, threads=None, max_memory=None):
        """
        Parameters
        ----------
        
        threads: int
            num threads to use
        max_memory: Max memory to use in GB
        """
        #run super to inherit parent class properties
        super().__init__()
        self.programName = "bbduk.sh"
        self.dep_list = [self.programName]
        #check if program exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")
        """
        self.valid_args=['in','in2','ref','literal','touppercase','interleaved','qin','reads','copyundefined',
                            'samplerate','samref','out','out2','outm','outm2','outs','stats','refstats','rpkm',
                            'dump','duk','nzo','overwrite','showspeed','ziplevel','fastawrap','qout','statscolumns',
                            'rename','refnames','trd','ordered','maxbasesout','maxbasesoutm','','json','bhist','qhist',
                            'qchist','aqhist','bqhist','lhist','phist','gchist','ihist','gcbins','maxhistlen','histbefore',
                            'ehist','qahist','indelhist','mhist','idhist','idbins','varfile','vcf','ignorevcfindels',
                            'k','rcomp','maskmiddle','minkmerhits','minkmerfraction','mincovfraction','hammingdistance',
                            'qhdist','editdistance','hammingdistance2','qhdist2','editdistance2','forbidn','removeifeitherbad',
                            'trimfailures','findbestmatch','skipr1','skipr2','ecco','recalibrate','sam','le.','amino',
                            'threads','prealloc','monitor','minrskip','maxrskip','rskip','qskip','speed','ktrim','kmask',
                            'maskfullycovered','ksplit','mink','qtrim','trimq','trimclip','minlength','mlf','maxlength',
                            'minavgquality','maqb','minbasequality','maxns','mcb','ottm','tp','tbo','strictoverlap',
                            'minoverlap','mininsert','tpe','forcetrimleft','forcetrimright','forcetrimright2',
                            'forcetrimmod','restrictleft','restrictright','mingc','maxgc','gcpairs','tossjunk',
                            'swift','chastityfilter','barcodefilter','barcodes','xmin','ymin','xmax','ymax','trimpolya',
                            'trimpolygleft','trimpolygright','trimpolyg','filterpolyg','pratio','plen','entropy','entropywindow',
                            'entropyk','minbasefrequency','entropytrim','entropymask','entropymark','cardinality',
                            'cardinalityout','loglogk','loglogbuckets','-Xmx','-eoom','-da']
        """

        #use max threads by default
        if not threads:
            threads = os.cpu_count()
        self.threads = threads

        #use floor(max available memory) by default
        if not max_memory:
            total_mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf(
                'SC_PHYS_PAGES')
            total_mem_gib = total_mem_bytes / (1024.**3)
            max_memory = math.floor(total_mem_gib)

        self.max_memory = max_memory
Exemplo n.º 15
0
    def __init__(self, **kwargs):
        """
        Parameters
        ----------
        
        kwargs:
            bbduk.sh arguments.
        """
        #run super to inherit parent class properties
        super().__init__()
        self.programName = "bbduk.sh"
        self.dep_list = [self.programName]
        #check if program exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")

        self.valid_args = [
            'in', 'in2', 'ref', 'literal', 'touppercase', 'interleaved', 'qin',
            'reads', 'copyundefined', 'samplerate', 'samref', 'out', 'out2',
            'outm', 'outm2', 'outs', 'stats', 'refstats', 'rpkm', 'dump',
            'duk', 'nzo', 'overwrite', 'showspeed', 'ziplevel', 'fastawrap',
            'qout', 'statscolumns', 'rename', 'refnames', 'trd', 'ordered',
            'maxbasesout', 'maxbasesoutm', '', 'json', 'bhist', 'qhist',
            'qchist', 'aqhist', 'bqhist', 'lhist', 'phist', 'gchist', 'ihist',
            'gcbins', 'maxhistlen', 'histbefore', 'ehist', 'qahist',
            'indelhist', 'mhist', 'idhist', 'idbins', 'varfile', 'vcf',
            'ignorevcfindels', 'k', 'rcomp', 'maskmiddle', 'minkmerhits',
            'minkmerfraction', 'mincovfraction', 'hammingdistance', 'qhdist',
            'editdistance', 'hammingdistance2', 'qhdist2', 'editdistance2',
            'forbidn', 'removeifeitherbad', 'trimfailures', 'findbestmatch',
            'skipr1', 'skipr2', 'ecco', 'recalibrate', 'sam', 'le.', 'amino',
            'threads', 'prealloc', 'monitor', 'minrskip', 'maxrskip', 'rskip',
            'qskip', 'speed', 'ktrim', 'kmask', 'maskfullycovered', 'ksplit',
            'mink', 'qtrim', 'trimq', 'trimclip', 'minlength', 'mlf',
            'maxlength', 'minavgquality', 'maqb', 'minbasequality', 'maxns',
            'mcb', 'ottm', 'tp', 'tbo', 'strictoverlap', 'minoverlap',
            'mininsert', 'tpe', 'forcetrimleft', 'forcetrimright',
            'forcetrimright2', 'forcetrimmod', 'restrictleft', 'restrictright',
            'mingc', 'maxgc', 'gcpairs', 'tossjunk', 'swift', 'chastityfilter',
            'barcodefilter', 'barcodes', 'xmin', 'ymin', 'xmax', 'ymax',
            'trimpolya', 'trimpolygleft', 'trimpolygright', 'trimpolyg',
            'filterpolyg', 'pratio', 'plen', 'entropy', 'entropywindow',
            'entropyk', 'minbasefrequency', 'entropytrim', 'entropymask',
            'entropymark', 'cardinality', 'cardinalityout', 'loglogk',
            'loglogbuckets', '-Xmx', '-eoom', '-da'
        ]

        self.passedArgumentDict = kwargs
Exemplo n.º 16
0
    def __init__(self, **kwargs):
        self.programName = "portcullis"
        self.dep_list = [self.programName]
        #check if program exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")

        self.valid_args = [
            '-t', '--threads', '-v', '--verbose', '--help', '-o', '-b',
            '--bam_filter', '--exon_gff', '--intron_gff', '--source',
            '--force', '--copy', '--use_csi', '--orientation',
            '--strandedness', '--separate', '--extra', '-r', '--max_length',
            '--canonical', '--min_cov', '--save_bad'
        ]

        self.passedArgumentDict = kwargs
Exemplo n.º 17
0
    def init_from_accession(self, srr_accession, location):
        """Create SRA object using provided srr accession and location to save the data
        """
        self.dep_list = ['prefetch', "fasterq-dump"]
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: Please install missing programs.")

        if srr_accession is None:
            raise Exception("Please provide a valid accession")

        if location is None:
            location = os.getcwd()

        #pu.print_info("Creating SRA: "+srr_accession)
        self.srr_accession = srr_accession
        #create a dir named <srr_accession> and use as location
        self.location = os.path.join(location, self.srr_accession)
Exemplo n.º 18
0
    def __init__(self, bowtie2_index, **kwargs):
        """Bowtie2 constructor. Initialize bowtie2 index and other parameters.
        """

        super().__init__()
        self.programName = "bowtie2"
        self.dep_list = [self.programName]
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")

        self.valid_args = [
            '-x', '-1', '-2', '-U', '--interleaved', '-S', '-b', '-q',
            '--tab5', '--tab6', '--qseq', '-f', '-r', '-F', '-c', '-s', '-u',
            '-5', '-3', '--trim-to', '--phred33', '--phred64', '--int-quals',
            '--very-fast', '--fast', '--sensitive', '--very-sensitive',
            '--very-fast-local', '--fast-local', '--sensitive-local',
            '--very-sensitive-local', '-N', '-L', '-i', '--n-ceil', '--dpad',
            '--gbar', '--ignore-quals', '--nofw', '--norc', '--no-1mm-upfront',
            '--end-to-end', '--local', '--ma', '--mp', '--np', '--rdg',
            '--rfg', '--score-min', '-k', '-a', '-D', '-R', '-I', '-X', '--fr',
            '--rf', '--ff', '--no-mixed', '--no-discordant', '--dovetail',
            '--no-contain', '--no-overlap', '--align-paired-reads',
            '--preserve-tags', '-t', '--un', '--al', '--un-conc', '--al-conc',
            '--un-gz', '--quiet', '--met-file', '--met-stderr', '--met',
            '--no-unal', '--no-head', '--no-sq', '--rg-id', '--rg',
            '--omit-sec-seq', '--sam-no-qname-trunc', '--xeq',
            '--soft-clipped-unmapped-tlen', '-p', '--threads', '--reorder',
            '--mm', '--qc-filter', '--seed', '--non-deterministic',
            '--version', '-h', '--help'
        ]

        #initialize the passed arguments
        self.passedArgumentDict = kwargs

        #if index is passed, update the passed arguments
        if len(bowtie2_index) > 0 and pu.check_bowtie2index(bowtie2_index):
            print("Bowtie2 index is: " + bowtie2_index)
            self.bowtie2_index = bowtie2_index
            self.passedArgumentDict['-x'] = self.bowtie2_index
        else:
            print(
                "No Bowtie2 index provided. Please build index now to generate an index..."
            )
Exemplo n.º 19
0
    def __init__(self, hisat2_index="", **kwargs):

        super().__init__()
        self.programName = "hisat2"
        #check if hisat2 exists
        if not pe.check_dependencies([self.programName]):
            raise Exception("ERROR: " + self.programName + " not found.")

        self.valid_args = [
            '-x', '-1', '-2', '-U', '--sra-acc', '-S', '-q', '--qseq', '-f',
            '-r', '-c', '-s', '-u', '-5', '-3', '--phred33', '--phred64',
            '--int-quals', '--sra-acc', '--n-ceil', '--ignore-quals', '--nofw',
            '--norc', '--pen-cansplice', '--pen-noncansplice',
            '--pen-canintronlen', '--pen-noncanintronlen', '--min-intronlen',
            '--max-intronlen', '--known-splicesite-infile',
            '--novel-splicesite-outfile', '--novel-splicesite-infile',
            '--no-temp-splicesite', '--no-spliced-alignment',
            '--rna-strandness', '--tmo', '--dta', '--dta-cufflinks',
            '--avoid-pseudogene', '--no-templatelen-adjustment', '--mp',
            '--sp', '--no-softclip', '--np', '--rdg', '--rfg', '--score-min',
            '-k', '-I', '-X', '--fr', '--rf', '--ff', '--no-mixed',
            '--no-discordant', '-t', '--un', '--al', '--un-conc', '--al-conc',
            '--un-gz', '--summary-file', '--new-summary', '--quiet',
            '--met-file', '--met-stderr', '--met', '--no-head', '--no-sq',
            '--rg-id', '--rgit-sec-seq', '-o', '-p', '--reorder', '--mm',
            '--qc-filter', '--seed', '--non-deterministic', '--remove-chrname',
            '--add-chrname', '--version'
        ]

        #initialize the passed arguments
        self.passedArgumentDict = kwargs

        #if index is passed, update the passed arguments
        if len(hisat2_index) > 0 and pu.check_hisatindex(hisat2_index):
            print("HISAT2 index is: " + hisat2_index)
            self.hisat2_index = hisat2_index
            self.passedArgumentDict['-x'] = self.hisat2_index
            self.index = self.hisat2_index
        else:
            print(
                "No Hisat2 index provided. Please build index now to generate an index using build_Index()...."
            )
Exemplo n.º 20
0
    def __init__(self, reference_gtf="", **kwargs):

        super().__init__()
        self.program_name = "stringtie"
        #check if stringtie exists
        if not pe.check_dependencies([self.program_name]):
            raise Exception("ERROR: " + self.program_name + " not found.")
        self.valid_args_list = [
            '-G', '--version', '--conservative', '--rf', '--fr', '-o', '-l',
            '-f', '-L', '-m', '-a', '-j', '-t', '-c', '-s', '-v', '-g', '-M',
            '-p', '-A', '-B', '-b', '-e', '-x', '-u', '-h', '--merge', '-F',
            '-T', '-i'
        ]

        #keep the passed arguments
        self.passed_args_dict = kwargs

        #check the reference GTF
        if len(reference_gtf) > 0 and pu.check_files_exist(reference_gtf):
            self.reference_gtf = reference_gtf
            self.passed_args_dict['-G'] = reference_gtf
Exemplo n.º 21
0
    def __init__(self, index="", threads=None):

        super().__init__()
        self.programName = "STAR"

        self.dep_list = [self.programName]
        #check if star exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")
        """
        self.valid_args=['--help','--parametersFiles','--sysShell','--runMode','--runThreadN','--runDirPerm','--runRNGseed','--quantMode','--quantTranscriptomeBAMcompression','--quantTranscriptomeBan','--twopassMode','--twopass1readsN',
                            '--genomeDir','--genomeLoad','--genomeFastaFiles','--genomeChrBinNbits','--genomeSAindexNbases','--genomeSAsparseD','--genomeSuffixLengthMax','--genomeChainFiles','--genomeFileSizes',
                            '--sjdbFileChrStartEnd','--sjdbGTFfile','--sjdbGTFchrPrefix','--sjdbGTFfeatureExon','--sjdbGTFtagExonParentTranscript','--sjdbGTFtagExonParentGene','--sjdbOverhang','--sjdbScore','--sjdbInsertSave',
                            '--inputBAMfile','--readFilesIn','--readFilesCommand','--readMapNumber','--readMatesLengthsIn','--readNameSeparator','--clip3pNbases','--clip5pNbases','--clip3pAdapterSeq','--clip3pAdapterMMp','--clip3pAfterAdapterNbases',
                            '--limitGenomeGenerateRAM','--limitIObufferSize','--limitOutSAMoneReadBytes','--limitOutSJoneRead','--limitOutSJcollapsed','--limitBAMsortRAM ','--limitSjdbInsertNsj','--outFileNamePrefix','--outTmpDir','--outTmpKeep',
                            '--outStd','--outReadsUnmapped','--outQSconversionAdd','--outMultimapperOrder','--outSAMtype','--outSAMmode','--outSAMstrandField','--outSAMattributes','--outSAMattrIHstart','--outSAMunmapped','--outSAMorder',
                            '--outSAMprimaryFlag','--outSAMreadID','--outSAMmapqUnique','--outSAMflagOR','--outSAMflagAND','--outSAMattrRGline','--outSAMheaderHD','--outSAMheaderPG','--outSAMheaderCommentFile','--outSAMfilter','--outSAMmultNmax',
                            '--outBAMcompression','--outBAMsortingThreadN','--bamRemoveDuplicatesType','--bamRemoveDuplicatesMate2basesN','--outWigType','--outWigStrand','--outWigReferencesPrefix','--outWigNorm','--outFilterType',
                            '--outFilterMultimapScoreRange','--outFilterMultimapNmax','--outFilterMismatchNmax','--outFilterMismatchNoverLmax','--outFilterMismatchNoverReadLmax','--outFilterScoreMin','--outFilterScoreMinOverLread',
                            '--outFilterMatchNmin','--outFilterMatchNminOverLread','--outFilterIntronMotifs','--outSJfilterReads','--outSJfilterOverhangMin','--outSJfilterCountUniqueMin','--outSJfilterCountTotalMin','--outSJfilterDistToOtherSJmin',
                            '--outSJfilterIntronMaxVsReadN','--scoreGap','--scoreGapNoncan','--scoreGapGCAG ','--scoreGapATAC','--scoreGenomicLengthLog2scale','--scoreDelOpen','--scoreDelBase','--scoreInsOpen','--scoreInsBase','--scoreStitchSJshift',
                            '--seedSearchStartLmax','--seedSearchStartLmaxOverLread','--seedSearchLmax','--seedMultimapNmax','--seedPerReadNmax','--seedPerWindowNmax','--seedNoneLociPerWindow','--alignIntronMin','--alignIntronMax','--alignMatesGapMax',
                            '--alignSJoverhangMin','--alignSJstitchMismatchNmax','--alignSJDBoverhangMin','--alignSplicedMateMapLmin','--alignSplicedMateMapLminOverLmate','--alignWindowsPerReadNmax','--alignTranscriptsPerWindowNmax','--alignTranscriptsPerReadNmax',
                            '--alignEndsType','--alignEndsProtrude','--alignSoftClipAtReferenceEnds','--winAnchorMultimapNmax','--winBinNbits','--winAnchorDistNbins','--winFlankNbins','--winReadCoverageRelativeMin','--winReadCoverageBasesMin',
                            '--chimOutType','--chimSegmentMin','--chimScoreMin','--chimScoreDropMax','--chimScoreSeparation','--chimScoreJunctionNonGTAG','--chimJunctionOverhangMin','--chimSegmentReadGapMax','--chimFilter','--chimMainSegmentMultNmax']
        """

        if not threads:
            threads = os.cpu_count()

        self.threads = threads

        #if index is passed, update the passed arguments
        if index and pu.check_starindex(index):
            print("STAR index is: " + index)
            self.star_index = index
        else:
            print(
                "No STAR index provided. Please build index now to generate an index using build_index()...."
            )
Exemplo n.º 22
0
    def __init__(self, index, threads=None, mode=None):
        self.programName = "diamond"
        self.dep_list = [self.programName]
        #check if program exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")
        """
        self.valid_args=['-p','--db','-d','--out','-o','--outfmt','-f','--verbose','--log','--quiet','--in','--query','-q','--strand','--un','--al','--unal','--max-target-seqs','-k','--top','--range-culling','--compress','--evalue','-e',
                         '--min-score','--id','--query-cover','--subject-cover','--sensitive','--more-sensitive','--block-size','-b','--index-chunks','-c','--tmpdir','-t','--gapopen','--gapextend','--frameshift','-F','--long-reads','--matrix','--custom-matrix',
                         '--lambda','--K','--comp-based-stats','--masking','--query-gencode','--salltitles','--sallseqid','--no-self-hits','--taxonmap','--taxonnodes','--taxonlist','--algo','--bin','--min-orf','-l','--freq-sd','--id2','--window','-w',
                         '--xdrop','-x','--ungapped-score','--hit-band','--hit-score','--gapped-xdrop','-X','--band','--shapes','-s','--shape-mask','--index-mode','--rank-ratio','--rank-ratio2','--max-hsps','--range-cover','--dbsize','--no-auto-append',
                         '--xml-blord-format','--daa','-a','--forwardonly','--seq']     
        """
        self.valid_commands = [
            'makedb', 'blastp', 'blastx', 'view', 'help', 'version', 'getseq',
            'dbinfo'
        ]

        #use max threads by default
        if not threads:
            threads = os.cpu_count()
        self.threads = threads

        #select mode
        valid_modes = ['fast', 'sensitive', 'more-sensitive']

        if mode in valid_modes:
            mode = '--' + mode
        else:
            mode = '--fast'

        self.mode = mode

        #check index
        self.index = index
        if not self.check_index():
            print("No valid index provided. Please build index...")
        else:
            self.index = index
Exemplo n.º 23
0
    def init_from_accession(self, srr_accession, location):
        """Create SRA object using provided srr accession and location to save the data
        """
        self.dep_list = ['prefetch', "fasterq-dump"]
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: Please install missing programs.")

        if srr_accession is None:
            raise Exception("Please provide a valid accession")

        if location is None:
            location = os.getcwd()
        #pu.print_info("Creating SRA: "+srr_accession)
        self.srr_accession = srr_accession
        #create a dir named <srr_accession> and use as location
        self.location = os.path.join(location, self.srr_accession)

        #search for existing files in location
        #self.search_fastq(self.location)
        #scan path for sra
        #self.search_sra(self.location)

        #check SRA file
        if pu.check_files_exist(
                os.path.join(self.location, self.srr_accession + ".sra")):
            pu.print_green(self.srr_accession + ".sra exists.")
            self.localSRAFilePath = os.path.join(self.location,
                                                 self.srr_accession + ".sra")
            self.sraFileSize = pu.get_file_size(self.localSRAFilePath)
            #test if file is paired or single end
            if pe.is_paired(self.localSRAFilePath):
                self.layout = "PAIRED"
            else:
                self.layout = "SINGLE"

        #check fastq file
        self.search_fastq(self.location)
Exemplo n.º 24
0
    def check_dependency(self, deps_list):
        """
        Check depndencies of a tool/command.

        Parameters
        ----------
        deps_list : List
            List of command to check.

        Raises
        ------
        OSError
            If a command is not found raise OSError.

        Returns
        -------
        bool
            Returns true is all commands are found.

        """
        if deps_list and not pe.check_dependencies(deps_list):
            #pu.print_boldred("ERROR. Please check dependencies for {}. Deps: {}".format(self._command," ".join(deps_list)))
            raise OSError("CommandNotFoundException")
        return True
Exemplo n.º 25
0
 def __init__(self):
     self.programName = "TransDecoder.LongOrfs"
     self.dep_list = ["TransDecoder.LongOrfs", "TransDecoder.Predict"]
     #check if program exists
     if not pe.check_dependencies(self.dep_list):
         raise Exception("ERROR: " + str(self.dep_list) + " not found.")