def DiscreteSubstitutionModel(*args, **kw): deprecated("class", "cogent.evolve.discrete_markov.DiscreteSubstitutionModel", "cogent.evolve.substitution_model.DiscreteSubstitutionModel", '1.6') from cogent.evolve.substitution_model import DiscreteSubstitutionModel return DiscreteSubstitutionModel(*args, **kw)
def setMotifProbs(self, motif_probs, locus=None, bin=None, is_constant=None, is_independent=None, auto=False, **kwargs): if 'is_const' in kwargs: is_constant = kwargs.pop('is_const') deprecated('argument', 'is_const', 'is_constant', 1.6) motif_probs = self.model.adaptMotifProbs(motif_probs, auto=auto) if is_constant is None: is_constant = not self.optimise_motif_probs self.model.setParamControllerMotifProbs(self, motif_probs, is_constant=is_constant, bin=bin, locus=locus, is_independent=is_independent, **kwargs) if not auto: self.mprobs_from_alignment = False # should be done per-locus
def setMotifProbsFromData(self, align, locus=None, is_constant=None, include_ambiguity=False, is_independent=None, auto=False, pseudocount=None, **kwargs): if 'is_const' in kwargs: is_constant = kwargs.pop('is_const') deprecated('argument', 'is_const', 'is_constant', 1.6) counts = self.model.countMotifs(align, include_ambiguity=include_ambiguity) if is_constant is None: is_constant = not self.optimise_motif_probs if pseudocount is None: if is_constant: pseudocount = 0.0 else: pseudocount = 0.5 counts += pseudocount mprobs = counts/(1.0*sum(counts)) self.setMotifProbs(mprobs, locus=locus, is_constant=is_constant, is_independent=is_independent, auto=auto, **kwargs)
def setParamRule(self, par_name, is_independent=None, is_constant=False, value=None, lower=None, init=None, upper=None, **scope_info): """Define a model constraint for par_name. Parameters can be set constant or split according to tree/bin scopes. Arguments: - par_name: The model parameter being modified. - is_constant, value: if True, the parameter is held constant at value, if provided, or the likelihood functions current value. - is_independent: whether the partition specified by scope/bin arguments are to be considered independent. - lower, init, upper: specify the lower bound, initial value and upper bound for optimisation. Can be set separately. - bin, bins: the name(s) of the bin to apply rule. - locus, loci: the name of the locus/loci to apply rule. - **scope_info: tree scope arguments - edge, edges: The name of the tree edge(s) affected by rule. ?? - tip_names: a tuple of two tip names, specifying a tree scope to apply rule. - outgroup_name: A tip name that, provided along with tip_names, ensures a consistently specified tree scope. - is_clade: The rule applies to all edges descending from the most recent common ancestor defined by the tip_names+outgroup_name arguments. - is_stem: The rule applies to the edge preceding the most recent common ancestor defined by the tip_names+outgroup_name arguments. """ if 'is_const' in scope_info: is_constant = scope_info.pop('is_const') deprecated('argument', 'is_const', 'is_constant', 1.6) par_name = str(par_name) scopes = {} for (single, plural) in [ ('bin', 'bins'), ('locus', 'loci'), ('position', 'positions'), ('motif', 'motifs'), ]: if single in scope_info: v = scope_info.pop(single) if v: assert isinstance(v, basestring), ('%s=, maybe?' % plural) assert plural not in scope_info scopes[single] = [v] elif plural in scope_info: v = scope_info.pop(plural) if v: scopes[single] = v edges = self._process_scope_info(**scope_info) if edges: scopes['edge'] = edges if is_constant: assert not (init or lower or upper) elif init is not None: assert not value value = init self.assignAll(par_name, scopes, value, lower, upper, is_constant, is_independent)
class Muscle(CommandLineApplication): """Muscle application controller""" deprecated('class', 'cogent.app.muscle.Muscle', 'cogent.app.muscle_v38.Muscle', '1.6') _options ={ # Minimum spacing between anchor columns. [Integer] '-anchorspacing':ValuedParameter('-',Name='anchorspacing',Delimiter=' '), # Center parameter. Should be negative [Float] '-center':ValuedParameter('-',Name='center',Delimiter=' '), # Clustering method. cluster1 is used in iteration 1 # and 2, cluster2 in later iterations '-cluster1':ValuedParameter('-',Name='cluster1',Delimiter=' '), '-cluster2':ValuedParameter('-',Name='cluster2',Delimiter=' '), # Minimum length of diagonal. '-diaglength':ValuedParameter('-',Name='diaglength',Delimiter=' '), # Discard this many positions at ends of diagonal. '-diagmargin':ValuedParameter('-',Name='diagmargin',Delimiter=' '), # Distance measure for iteration 1. '-distance1':ValuedParameter('-',Name='distance1',Delimiter=' '), # Distance measure for iterations 2, 3 ... '-distance2':ValuedParameter('-',Name='distance2',Delimiter=' '), # The gap open score. Must be negative. '-gapopen':ValuedParameter('-',Name='gapopen',Delimiter=' '), # Window size for determining whether a region is hydrophobic. '-hydro':ValuedParameter('-',Name='hydro',Delimiter=' '), # Multiplier for gap open/close penalties in hydrophobic regions. '-hydrofactor':ValuedParameter('-',Name='hydrofactor',Delimiter=' '), # Where to find the input sequences. '-in':ValuedParameter('-',Name='in',Delimiter=' ', Quote="\""), '-in1':ValuedParameter('-',Name='in1',Delimiter=' ', Quote="\""), '-in2':ValuedParameter('-',Name='in2',Delimiter=' ', Quote="\""), # Log file name (delete existing file). '-log':ValuedParameter('-',Name='log',Delimiter=' '), # Log file name (append to existing file). '-loga':ValuedParameter('-',Name='loga',Delimiter=' '), # Maximum distance between two diagonals that allows them to merge # into one diagonal. '-maxdiagbreak':ValuedParameter('-',Name='maxdiagbreak',Delimiter=' '), # Maximum time to run in hours. The actual time may exceed the # requested limit by a few minutes. Decimals are allowed, so 1.5 # means one hour and 30 minutes. '-maxhours':ValuedParameter('-',Name='maxhours',Delimiter=' '), # Maximum number of iterations. '-maxiters':ValuedParameter('-',Name='maxiters',Delimiter=' '), # Maximum memory in Mb '-maxmb': ValuedParameter('-', Name='maxmb', Delimiter=' '), # Maximum number of new trees to build in iteration 2. '-maxtrees':ValuedParameter('-',Name='maxtrees',Delimiter=' '), # Minimum score a column must have to be an anchor. '-minbestcolscore':ValuedParameter('-',Name='minbestcolscore',Delimiter=' '), # Minimum smoothed score a column must have to be an anchor. '-minsmoothscore':ValuedParameter('-',Name='minsmoothscore',Delimiter=' '), # Objective score used by tree dependent refinement. # sp=sum-of-pairs score. # spf=sum-of-pairs score (dimer approximation) # spm=sp for < 100 seqs, otherwise spf # dp=dynamic programming score. # ps=average profile-sequence score. # xp=cross profile score. '-objscore':ValuedParameter('-',Name='objscore',Delimiter=' '), # Where to write the alignment. '-out':ValuedParameter('-',Name='out',Delimiter=' ', Quote="\""), # Where to write the file in phylip sequenctial format (v3.6 only). '-physout':ValuedParameter('-',Name='physout',Delimiter=' '), # Where to write the file in phylip interleaved format (v3.6 only). '-phyiout':ValuedParameter('-',Name='phyiout',Delimiter=' '), # Set to profile for aligning two alignments and adding seqs to an # existing alignment '-profile':FlagParameter(Prefix='-',Name='profile'), # Method used to root tree; root1 is used in iteration 1 and 2, root2 # in later iterations. '-root1':ValuedParameter('-',Name='root1',Delimiter=' '), '-root2':ValuedParameter('-',Name='root2',Delimiter=' '), # Sequence type. '-seqtype':ValuedParameter('-',Name='seqtype',Delimiter=' '), # Maximum value of column score for smoothing purposes. '-smoothscoreceil':ValuedParameter('-',Name='smoothscoreceil',Delimiter=' '), # Constant used in UPGMB clustering. Determines the relative fraction # of average linkage (SUEFF) vs. nearest-neighbor linkage (1 . SUEFF). '-SUEFF':ValuedParameter('-',Name='SUEFF',Delimiter=' '), # Save tree produced in first or second iteration to given file in # Newick (Phylip-compatible) format. '-tree1':ValuedParameter('-',Name='tree1',Delimiter=' ', Quote="\""), '-tree2':ValuedParameter('-',Name='tree2',Delimiter=' ', Quote="\""), # Sequence weighting scheme. # weight1 is used in iterations 1 and 2. # weight2 is used for tree-dependent refinement. # none=all sequences have equal weight. # henikoff=Henikoff & Henikoff weighting scheme. # henikoffpb=Modified Henikoff scheme as used in PSI-BLAST. # clustalw=CLUSTALW method. # threeway=Gotoh three-way method. '-weight1':ValuedParameter('-',Name='weight1',Delimiter=' '), '-weight2':ValuedParameter('-',Name='weight2',Delimiter=' '), # Use anchor optimization in tree dependent refinement iterations '-anchors':FlagParameter(Prefix='-',Name='anchors'), # Write output in CLUSTALW format (default is FASTA). '-clw':FlagParameter(Prefix='-',Name='clw'), # Cluster sequences '-cluster':FlagParameter(Prefix='-',Name='cluster'), # neighborjoining is "unrecognized" #'-neighborjoining':FlagParameter(Prefix='-',Name='neighborjoining'), # Write output in CLUSTALW format with the "CLUSTAL W (1.81)" header # rather than the MUSCLE version. This is useful when a post-processing # step is picky about the file header. '-clwstrict':FlagParameter(Prefix='-',Name='clwstrict'), # Do not catch exceptions. '-core':FlagParameter(Prefix='-',Name='core'), # Write output in FASTA format. Alternatives include .clw, # .clwstrict, .msf and .html. '-fasta':FlagParameter(Prefix='-',Name='fasta'), # Group similar sequences together in the output. This is the default. # See also .stable. '-group':FlagParameter(Prefix='-',Name='group'), # Write output in HTML format (default is FASTA). '-html':FlagParameter(Prefix='-',Name='html'), # Use log-expectation profile score (VTML240). Alternatives are to use # -sp or -sv. This is the default for amino acid sequences. '-le':FlagParameter(Prefix='-',Name='le'), # Write output in MSF format (default is FASTA). '-msf':FlagParameter(Prefix='-',Name='msf'), # Disable anchor optimization. Default is -anchors. '-noanchors':FlagParameter(Prefix='-',Name='noanchors'), # Catch exceptions and give an error message if possible. '-nocore':FlagParameter(Prefix='-',Name='nocore'), # Do not display progress messages. '-quiet':FlagParameter(Prefix='-',Name='quiet'), # Input file is already aligned, skip first two iterations and begin # tree dependent refinement. '-refine':FlagParameter(Prefix='-',Name='refine'), # Use sum-of-pairs protein profile score (PAM200). Default is -le. '-sp':FlagParameter(Prefix='-',Name='sp'), # Use sum-of-pairs nucleotide profile score (BLASTZ parameters). This # is the only option for nucleotides, and is therefore the default. '-spn':FlagParameter(Prefix='-',Name='spn'), # Preserve input order of sequences in output file. Default is to group # sequences by similarity (-group). '-stable':FlagParameter(Prefix='-',Name='stable'), # Use sum-of-pairs profile score (VTML240). Default is -le. '-sv':FlagParameter(Prefix='-',Name='sv'), # Diagonal optimization '-diags':FlagParameter(Prefix='-',Name='diags'), '-diags1':FlagParameter(Prefix='-',Name='diags1'), '-diags2':FlagParameter(Prefix='-',Name='diags2'), # Terminal gaps penalized with full penalty. # [1] Not fully supported in this version. '-termgapsfull':FlagParameter(Prefix='-',Name='termgapsfull'), # Terminal gaps penalized with half penalty. # [1] Not fully supported in this version. '-termgapshalf':FlagParameter(Prefix='-',Name='termgapshalf'), # Terminal gaps penalized with half penalty if gap relative to # longer sequence, otherwise with full penalty. # [1] Not fully supported in this version. '-termgapshalflonger':FlagParameter(Prefix='-',Name='termgapshalflonger'), # Write parameter settings and progress messages to log file. '-verbose':FlagParameter(Prefix='-',Name='verbose'), # Write version string to stdout and exit. '-version':FlagParameter(Prefix='-',Name='version'), } _parameters = {} _parameters.update(_options) _command = "muscle" def _input_as_seqs(self,data): lines = [] for i,s in enumerate(data): #will number the sequences 1,2,3,etc. lines.append(''.join(['>',str(i+1)])) lines.append(s) return self._input_as_lines(lines) def _input_as_lines(self,data): if data: self.Parameters['-in']\ .on(super(Muscle,self)._input_as_lines(data)) return '' def _input_as_string(self,data): """Makes data the value of a specific parameter This method returns the empty string. The parameter will be printed automatically once set. """ if data: self.Parameters['-in'].on(str(data)) return '' def _input_as_multiline_string(self, data): if data: self.Parameters['-in']\ .on(super(Muscle,self)._input_as_multiline_string(data)) return '' def _input_as_multifile(self, data): """For use with the -profile option This input handler expects data to be a tuple containing two filenames. Index 0 will be set to -in1 and index 1 to -in2 """ if data: try: filename1, filename2 = data except: raise ValueError("Expected two filenames") self.Parameters['-in'].off() self.Parameters['-in1'].on(filename1) self.Parameters['-in2'].on(filename2) return '' def _align_out_filename(self): if self.Parameters['-out'].isOn(): aln_filename = self._absolute(str(self.Parameters['-out'].Value)) else: raise ValueError("No output file specified.") return aln_filename def _tree1_out_filename(self): if self.Parameters['-tree1'].isOn(): aln_filename = self._absolute(str(self.Parameters['-tree1'].Value)) else: raise ValueError("No tree output file specified.") return aln_filename def _tree2_out_filename(self): if self.Parameters['-tree2'].isOn(): tree_filename = self._absolute(str(self.Parameters['-tree2'].Value)) else: raise ValueError("No tree output file specified.") return tree_filename def _get_result_paths(self,data): result = {} if self.Parameters['-out'].isOn(): out_name = self._align_out_filename() result['MuscleOut'] = ResultPath(Path=out_name,IsWritten=True) if self.Parameters['-tree1'].isOn(): out_name = self._tree1_out_filename() result['Tree1Out'] = ResultPath(Path=out_name,IsWritten=True) if self.Parameters['-tree2'].isOn(): out_name = self._tree2_out_filename() result['Tree2Out'] = ResultPath(Path=out_name,IsWritten=True) return result def getHelp(self): """Muscle help""" help_str = """ """ return help_str
def getLocalViterbiScoreAndAlignment(self, posterior_probs=False, **kw): deprecated('method', 'getLocalViterbiScoreAndAlignment', 'getViterbiScoreAndAlignment(local=True)', '1.7', stack_level=3) kw['posterior_probs'] = posterior_probs return self.getViterbiScoreAndAlignment(local=True, **kw)
def testfunction(self): deprecated('method', 'testfunction','getLogLikelihood', '1.6') return self.getLogLikelihood()
def setpar(self, param_name, value, edge=None, **scope): deprecated('method', 'setpar','setParamRule', '1.6') return self.setParamRule(param_name, edge=edge, value=value, is_constant=True, **scope)
def testfunction(self): deprecated('method', 'testfunction', 'getLogLikelihood', '1.6') return self.getLogLikelihood()