Пример #1
0
def DiscreteSubstitutionModel(*args, **kw):
    deprecated("class",
        "cogent.evolve.discrete_markov.DiscreteSubstitutionModel",
        "cogent.evolve.substitution_model.DiscreteSubstitutionModel",
        '1.6')
    from cogent.evolve.substitution_model import DiscreteSubstitutionModel
    return DiscreteSubstitutionModel(*args, **kw)
Пример #2
0
 def setMotifProbs(self, motif_probs, locus=None, bin=None, is_constant=None, 
             is_independent=None, auto=False, **kwargs):
     if 'is_const' in kwargs:
         is_constant = kwargs.pop('is_const')
         deprecated('argument', 'is_const', 'is_constant', 1.6)
     
     motif_probs = self.model.adaptMotifProbs(motif_probs, auto=auto)
     if is_constant is None:
         is_constant = not self.optimise_motif_probs
     self.model.setParamControllerMotifProbs(self, motif_probs, 
         is_constant=is_constant, bin=bin, locus=locus, 
         is_independent=is_independent, **kwargs)
     if not auto:
         self.mprobs_from_alignment = False  # should be done per-locus
Пример #3
0
 def setMotifProbsFromData(self, align, locus=None, is_constant=None, 
             include_ambiguity=False, is_independent=None, auto=False,
             pseudocount=None, **kwargs):
     if 'is_const' in kwargs:
         is_constant = kwargs.pop('is_const')
         deprecated('argument', 'is_const', 'is_constant', 1.6)
     
     counts = self.model.countMotifs(align,
             include_ambiguity=include_ambiguity)
     if is_constant is None:
         is_constant = not self.optimise_motif_probs
     if pseudocount is None:
         if is_constant:
             pseudocount = 0.0
         else:
             pseudocount = 0.5
     counts += pseudocount
     mprobs = counts/(1.0*sum(counts))
     self.setMotifProbs(mprobs, locus=locus, is_constant=is_constant, 
             is_independent=is_independent, auto=auto, **kwargs)
Пример #4
0
 def setParamRule(self, par_name, is_independent=None, is_constant=False,
         value=None, lower=None, init=None, upper=None, **scope_info):
     """Define a model constraint for par_name. Parameters can be set
     constant or split according to tree/bin scopes.
     
     Arguments:
         - par_name: The model parameter being modified.
         - is_constant, value: if True, the parameter is held constant at
           value, if provided, or the likelihood functions current value.
         - is_independent: whether the partition specified by scope/bin
           arguments are to be considered independent.
         - lower, init, upper: specify the lower bound, initial value and
           upper bound for optimisation. Can be set separately.
         - bin, bins: the name(s) of the bin to apply rule.
         - locus, loci: the name of the locus/loci to apply rule.
         - **scope_info: tree scope arguments
           
           - edge, edges: The name of the tree edge(s) affected by rule. ??
           - tip_names: a tuple of two tip names, specifying a tree scope
             to apply rule.
           - outgroup_name: A tip name that, provided along with tip_names,
             ensures a consistently specified tree scope.
           - is_clade: The rule applies to all edges descending from the most
             recent common ancestor defined by the tip_names+outgroup_name
             arguments.
           - is_stem: The rule applies to the edge preceding the most recent
             common ancestor defined by the tip_names+outgroup_name
             arguments.
     """
     if 'is_const' in scope_info:
         is_constant = scope_info.pop('is_const')
         deprecated('argument', 'is_const', 'is_constant', 1.6)
     
     par_name = str(par_name)
             
     scopes = {}
     for (single, plural) in [
             ('bin', 'bins'),
             ('locus', 'loci'),
             ('position', 'positions'),
             ('motif', 'motifs'),
             ]:
         if single in scope_info:
             v = scope_info.pop(single)
             if v:
                 assert isinstance(v, basestring), ('%s=, maybe?' % plural)
                 assert plural not in scope_info
                 scopes[single] = [v]
         elif plural in scope_info:
             v = scope_info.pop(plural)
             if v:
                 scopes[single] = v
             
     edges = self._process_scope_info(**scope_info)
     if edges:
         scopes['edge'] = edges
     
     if is_constant:
         assert not (init or lower or upper)
     elif init is not None:
         assert not value
         value = init
     self.assignAll(par_name, scopes, value, lower, upper, is_constant, 
             is_independent)
 def setParamRule(self, par_name, is_independent=None, is_constant=False,
         value=None, lower=None, init=None, upper=None, **scope_info):
     """Define a model constraint for par_name. Parameters can be set
     constant or split according to tree/bin scopes.
     
     Arguments:
         - par_name: The model parameter being modified.
         - is_constant, value: if True, the parameter is held constant at
           value, if provided, or the likelihood functions current value.
         - is_independent: whether the partition specified by scope/bin
           arguments are to be considered independent.
         - lower, init, upper: specify the lower bound, initial value and
           upper bound for optimisation. Can be set separately.
         - bin, bins: the name(s) of the bin to apply rule.
         - locus, loci: the name of the locus/loci to apply rule.
         - **scope_info: tree scope arguments
           
           - edge, edges: The name of the tree edge(s) affected by rule. ??
           - tip_names: a tuple of two tip names, specifying a tree scope
             to apply rule.
           - outgroup_name: A tip name that, provided along with tip_names,
             ensures a consistently specified tree scope.
           - is_clade: The rule applies to all edges descending from the most
             recent common ancestor defined by the tip_names+outgroup_name
             arguments.
           - is_stem: The rule applies to the edge preceding the most recent
             common ancestor defined by the tip_names+outgroup_name
             arguments.
     """
     if 'is_const' in scope_info:
         is_constant = scope_info.pop('is_const')
         deprecated('argument', 'is_const', 'is_constant', 1.6)
     
     par_name = str(par_name)
             
     scopes = {}
     for (single, plural) in [
             ('bin', 'bins'),
             ('locus', 'loci'),
             ('position', 'positions'),
             ('motif', 'motifs'),
             ]:
         if single in scope_info:
             v = scope_info.pop(single)
             if v:
                 assert isinstance(v, basestring), ('%s=, maybe?' % plural)
                 assert plural not in scope_info
                 scopes[single] = [v]
         elif plural in scope_info:
             v = scope_info.pop(plural)
             if v:
                 scopes[single] = v
             
     edges = self._process_scope_info(**scope_info)
     if edges:
         scopes['edge'] = edges
     
     if is_constant:
         assert not (init or lower or upper)
     elif init is not None:
         assert not value
         value = init
     self.assignAll(par_name, scopes, value, lower, upper, is_constant, 
             is_independent)
Пример #6
0
class Muscle(CommandLineApplication):
    """Muscle application controller"""

    deprecated('class',
               'cogent.app.muscle.Muscle',
               'cogent.app.muscle_v38.Muscle',
               '1.6')
    
    _options ={
        # Minimum spacing between anchor columns. [Integer]
        '-anchorspacing':ValuedParameter('-',Name='anchorspacing',Delimiter=' '),
        # Center parameter. Should be negative [Float]
        '-center':ValuedParameter('-',Name='center',Delimiter=' '),
        
        # Clustering method. cluster1 is used in iteration 1
        # and 2, cluster2 in later iterations
        '-cluster1':ValuedParameter('-',Name='cluster1',Delimiter=' '),
        '-cluster2':ValuedParameter('-',Name='cluster2',Delimiter=' '),
        
        # Minimum length of diagonal.
        '-diaglength':ValuedParameter('-',Name='diaglength',Delimiter=' '),
        
        # Discard this many positions at ends of diagonal.
        '-diagmargin':ValuedParameter('-',Name='diagmargin',Delimiter=' '),
        
        # Distance measure for iteration 1.
        '-distance1':ValuedParameter('-',Name='distance1',Delimiter=' '),
        
        # Distance measure for iterations 2, 3 ...
        '-distance2':ValuedParameter('-',Name='distance2',Delimiter=' '),
        
        # The gap open score. Must be negative.
        '-gapopen':ValuedParameter('-',Name='gapopen',Delimiter=' '),
        
        # Window size for determining whether a region is hydrophobic.
        '-hydro':ValuedParameter('-',Name='hydro',Delimiter=' '),
        
        # Multiplier for gap open/close penalties in hydrophobic regions.
        '-hydrofactor':ValuedParameter('-',Name='hydrofactor',Delimiter=' '),
        
        # Where to find the input sequences.
        '-in':ValuedParameter('-',Name='in',Delimiter=' ', Quote="\""),
        '-in1':ValuedParameter('-',Name='in1',Delimiter=' ', Quote="\""),
        '-in2':ValuedParameter('-',Name='in2',Delimiter=' ', Quote="\""),
        
        # Log file name (delete existing file).
        '-log':ValuedParameter('-',Name='log',Delimiter=' '),
        
        # Log file name (append to existing file).
        '-loga':ValuedParameter('-',Name='loga',Delimiter=' '),
        
        # Maximum distance between two diagonals that allows them to merge
        # into one diagonal.
        '-maxdiagbreak':ValuedParameter('-',Name='maxdiagbreak',Delimiter=' '),
        
        # Maximum time to run in hours. The actual time may exceed the
        # requested limit by a few minutes. Decimals are allowed, so 1.5
        # means one hour and 30 minutes.
        '-maxhours':ValuedParameter('-',Name='maxhours',Delimiter=' '),
        
        # Maximum number of iterations.
        '-maxiters':ValuedParameter('-',Name='maxiters',Delimiter=' '),

        # Maximum memory in Mb
        '-maxmb': ValuedParameter('-', Name='maxmb', Delimiter=' '),
        
        # Maximum number of new trees to build in iteration 2.
        '-maxtrees':ValuedParameter('-',Name='maxtrees',Delimiter=' '),
        
        # Minimum score a column must have to be an anchor.
        '-minbestcolscore':ValuedParameter('-',Name='minbestcolscore',Delimiter=' '),
        
        # Minimum smoothed score a column must have to be an anchor.
        '-minsmoothscore':ValuedParameter('-',Name='minsmoothscore',Delimiter=' '),
        
        # Objective score used by tree dependent refinement.
        # sp=sum-of-pairs score.
        # spf=sum-of-pairs score (dimer approximation)
        # spm=sp for < 100 seqs, otherwise spf
        # dp=dynamic programming score.
        # ps=average profile-sequence score.
        # xp=cross profile score.
        '-objscore':ValuedParameter('-',Name='objscore',Delimiter=' '),
        
        # Where to write the alignment.
        '-out':ValuedParameter('-',Name='out',Delimiter=' ', Quote="\""),
        
        # Where to write the file in phylip sequenctial format (v3.6 only).
        '-physout':ValuedParameter('-',Name='physout',Delimiter=' '),
        
        # Where to write the file in phylip interleaved format (v3.6 only).
        '-phyiout':ValuedParameter('-',Name='phyiout',Delimiter=' '),

        # Set to profile for aligning two alignments and adding seqs to an 
        # existing alignment
        '-profile':FlagParameter(Prefix='-',Name='profile'),

        # Method used to root tree; root1 is used in iteration 1 and 2, root2
        # in later iterations.
        '-root1':ValuedParameter('-',Name='root1',Delimiter=' '),
        '-root2':ValuedParameter('-',Name='root2',Delimiter=' '),
        
        # Sequence type.
        '-seqtype':ValuedParameter('-',Name='seqtype',Delimiter=' '),
        
        # Maximum value of column score for smoothing purposes.
        '-smoothscoreceil':ValuedParameter('-',Name='smoothscoreceil',Delimiter=' '),
        
        # Constant used in UPGMB clustering. Determines the relative fraction
        # of average linkage (SUEFF) vs. nearest-neighbor linkage (1 . SUEFF).
        '-SUEFF':ValuedParameter('-',Name='SUEFF',Delimiter=' '),
        
        # Save tree produced in first or second iteration to given file in
        # Newick (Phylip-compatible) format.
        '-tree1':ValuedParameter('-',Name='tree1',Delimiter=' ', Quote="\""),
        '-tree2':ValuedParameter('-',Name='tree2',Delimiter=' ', Quote="\""),
        
        # Sequence weighting scheme.
        # weight1 is used in iterations 1 and 2.
        # weight2 is used for tree-dependent refinement.
        # none=all sequences have equal weight.
        # henikoff=Henikoff & Henikoff weighting scheme.
        # henikoffpb=Modified Henikoff scheme as used in PSI-BLAST.
        # clustalw=CLUSTALW method.
        # threeway=Gotoh three-way method.
        '-weight1':ValuedParameter('-',Name='weight1',Delimiter=' '),
        '-weight2':ValuedParameter('-',Name='weight2',Delimiter=' '),
        
        # Use anchor optimization in tree dependent refinement iterations
        '-anchors':FlagParameter(Prefix='-',Name='anchors'),
        
        # Write output in CLUSTALW format (default is FASTA).
        '-clw':FlagParameter(Prefix='-',Name='clw'),
        
        # Cluster sequences
        '-cluster':FlagParameter(Prefix='-',Name='cluster'),
        # neighborjoining is "unrecognized"
        #'-neighborjoining':FlagParameter(Prefix='-',Name='neighborjoining'),

        
        # Write output in CLUSTALW format with the "CLUSTAL W (1.81)" header
        # rather than the MUSCLE version. This is useful when a post-processing
        # step is picky about the file header.
        '-clwstrict':FlagParameter(Prefix='-',Name='clwstrict'),
        
        # Do not catch exceptions.
        '-core':FlagParameter(Prefix='-',Name='core'),
        
        # Write output in FASTA format. Alternatives include .clw,
        # .clwstrict, .msf and .html.
        '-fasta':FlagParameter(Prefix='-',Name='fasta'),
        
        # Group similar sequences together in the output. This is the default.
        # See also .stable.
        '-group':FlagParameter(Prefix='-',Name='group'),
        
        # Write output in HTML format (default is FASTA).
        '-html':FlagParameter(Prefix='-',Name='html'),
        
        # Use log-expectation profile score (VTML240). Alternatives are to use
        # -sp or -sv. This is the default for amino acid sequences.
        '-le':FlagParameter(Prefix='-',Name='le'),
        
        # Write output in MSF format (default is FASTA).
        '-msf':FlagParameter(Prefix='-',Name='msf'),
        
        # Disable anchor optimization. Default is -anchors.
        '-noanchors':FlagParameter(Prefix='-',Name='noanchors'),
        
        # Catch exceptions and give an error message if possible.
        '-nocore':FlagParameter(Prefix='-',Name='nocore'),
        
        # Do not display progress messages.
        '-quiet':FlagParameter(Prefix='-',Name='quiet'),
        
        # Input file is already aligned, skip first two iterations and begin
        # tree dependent refinement.
        '-refine':FlagParameter(Prefix='-',Name='refine'),
        
        # Use sum-of-pairs protein profile score (PAM200). Default is -le.
        '-sp':FlagParameter(Prefix='-',Name='sp'),
        
        # Use sum-of-pairs nucleotide profile score (BLASTZ parameters). This
        # is the only option for nucleotides, and is therefore the default.
        '-spn':FlagParameter(Prefix='-',Name='spn'),
        
        # Preserve input order of sequences in output file. Default is to group
        # sequences by similarity (-group).
        '-stable':FlagParameter(Prefix='-',Name='stable'),
        
        # Use sum-of-pairs profile score (VTML240). Default is -le.
        '-sv':FlagParameter(Prefix='-',Name='sv'),
        
        # Diagonal optimization
        '-diags':FlagParameter(Prefix='-',Name='diags'),
        '-diags1':FlagParameter(Prefix='-',Name='diags1'),
        '-diags2':FlagParameter(Prefix='-',Name='diags2'),

        
        # Terminal gaps penalized with full penalty.
        # [1] Not fully supported in this version.
        '-termgapsfull':FlagParameter(Prefix='-',Name='termgapsfull'),
        
        # Terminal gaps penalized with half penalty.
        # [1] Not fully supported in this version.
        '-termgapshalf':FlagParameter(Prefix='-',Name='termgapshalf'),
        
        # Terminal gaps penalized with half penalty if gap relative to
        # longer sequence, otherwise with full penalty.
        # [1] Not fully supported in this version.
        '-termgapshalflonger':FlagParameter(Prefix='-',Name='termgapshalflonger'),
        
        # Write parameter settings and progress messages to log file.
        '-verbose':FlagParameter(Prefix='-',Name='verbose'),
        
        # Write version string to stdout and exit.
        '-version':FlagParameter(Prefix='-',Name='version'),
    }
    
    _parameters = {}
    _parameters.update(_options)
    _command = "muscle"
    
    def _input_as_seqs(self,data):
        lines = []
        for i,s in enumerate(data):
            #will number the sequences 1,2,3,etc.
            lines.append(''.join(['>',str(i+1)]))
            lines.append(s)
        return self._input_as_lines(lines)
    
    def _input_as_lines(self,data):
        if data:
            self.Parameters['-in']\
                .on(super(Muscle,self)._input_as_lines(data))
        
        return ''
    
    def _input_as_string(self,data):
        """Makes data the value of a specific parameter
        
        This method returns the empty string. The parameter will be printed
        automatically once set.
        """
        if data:
            self.Parameters['-in'].on(str(data))
        return ''
    
    def _input_as_multiline_string(self, data):
        if data:
            self.Parameters['-in']\
                .on(super(Muscle,self)._input_as_multiline_string(data))
        return ''

    def _input_as_multifile(self, data):
        """For use with the -profile option

        This input handler expects data to be a tuple containing two
        filenames. Index 0 will be set to -in1 and index 1 to -in2
        """
        if data:
            try:
                filename1, filename2 = data
            except:
                raise ValueError("Expected two filenames")

            self.Parameters['-in'].off()
            self.Parameters['-in1'].on(filename1)
            self.Parameters['-in2'].on(filename2)
        return ''

    def _align_out_filename(self):
        
        if self.Parameters['-out'].isOn():
            aln_filename = self._absolute(str(self.Parameters['-out'].Value))
        else:
            raise ValueError("No output file specified.")
        return aln_filename
    
    def _tree1_out_filename(self):
        
        if self.Parameters['-tree1'].isOn():
            aln_filename = self._absolute(str(self.Parameters['-tree1'].Value))
        else:
            raise ValueError("No tree output file specified.")
        return aln_filename
    
    def _tree2_out_filename(self):
        
        if self.Parameters['-tree2'].isOn():
            tree_filename = self._absolute(str(self.Parameters['-tree2'].Value))
        else:
            raise ValueError("No tree output file specified.")
        return tree_filename
    
    def _get_result_paths(self,data):
        
        result = {}
        if self.Parameters['-out'].isOn():
            out_name = self._align_out_filename()
            result['MuscleOut'] = ResultPath(Path=out_name,IsWritten=True)
        if self.Parameters['-tree1'].isOn():
            out_name = self._tree1_out_filename()
            result['Tree1Out'] = ResultPath(Path=out_name,IsWritten=True)
        if self.Parameters['-tree2'].isOn():
            out_name = self._tree2_out_filename()
            result['Tree2Out'] = ResultPath(Path=out_name,IsWritten=True)
        return result

    
    def getHelp(self):
        """Muscle help"""
        
        help_str = """
"""
        return help_str
Пример #7
0
 def getLocalViterbiScoreAndAlignment(self, posterior_probs=False, **kw):
     deprecated('method', 'getLocalViterbiScoreAndAlignment', 
             'getViterbiScoreAndAlignment(local=True)', '1.7', stack_level=3)
     kw['posterior_probs'] = posterior_probs
     return self.getViterbiScoreAndAlignment(local=True, **kw)
Пример #8
0
 def testfunction(self):
     deprecated('method', 'testfunction','getLogLikelihood', '1.6')
     return self.getLogLikelihood()
Пример #9
0
 def setpar(self, param_name, value, edge=None, **scope):
     deprecated('method', 'setpar','setParamRule', '1.6')
     return self.setParamRule(param_name, edge=edge, value=value, is_constant=True, **scope)
 def testfunction(self):
     deprecated('method', 'testfunction', 'getLogLikelihood', '1.6')
     return self.getLogLikelihood()