Пример #1
0
    def add_param_logs(self, logger):
        """
        Add entires to the logfile corresponding to individual feature
        substition rates if rate variation is configured.
        """
        if self.config.admin.log_fine_probs:
            if not self.single_sitemodel:
                plate = xml.plate(logger, var="feature", range=self.features)
                xml.log(plate,
                        idref="featureLikelihood:%s:$(feature)" % self.name)
            if self.rate_variation:
                xml.log(logger, idref="featureClockRatePrior.s:%s" % self.name)
                xml.log(logger,
                        idref="featureClockRateGammaScalePrior.s:%s" %
                        self.name)

        if self.frequencies == "estimate":
            self.add_frequency_logs(logger)
        if self.rate_variation:
            plate = xml.plate(logger, var="rate", range=self.all_rates)
            xml.log(plate, idref="featureClockRate:%s:$(rate)" % self.name)
            # Log the scale, but not the shape, as it is always 1 / scale
            # We prefer the scale because it is positively correlated with extent of variation
            xml.log(logger, idref="featureClockRateGammaShape:%s" % self.name)
Пример #2
0
    def add_state(self, state):
        """Construct the model's state nodes.

        Add parameters for Gamma-distributed rate heterogenetiy, if
        configured.

        """

        if self.frequencies == "estimate":
            self.add_frequency_state(state)

        if self.rate_variation:
            if self.feature_rates:
                # If user specified rates have been provided for either
                # features or partitions, we need to list each rate
                # individually
                for rate in self.all_rates:
                    xml.parameter(state,
                                  text=self.feature_rates.get(rate, 1.0),
                                  id="featureClockRate:%s:%s" %
                                  (self.name, rate),
                                  name="stateNode")
            else:
                # If not, and everything is initialised to the same
                # value, we can just whack 'em all in a big plate
                plate = xml.plate(state, var="rate", range=self.all_rates)
                xml.parameter(plate,
                              text="1.0",
                              id="featureClockRate:%s:$(rate)" % self.name,
                              name="stateNode")

            # Give Gamma shape parameter a finite domain
            # Must be > 1.0 for the distribution to be bell-shaped,
            # rather than L-shaped.  The domain [1.1,1000] limits feature
            # rate variation to the realms of vague plausibity
            xml.parameter(state,
                          text="5.0",
                          id="featureClockRateGammaShape:%s" % self.name,
                          lower="1.1",
                          upper="100.0",
                          name="stateNode")
            # Gamma scale parameter's domain is defined *implicilty*
            # by the fact that the operators maintain shape*scale = 1.0
            xml.parameter(state,
                          text="0.2",
                          id="featureClockRateGammaScale:%s" % self.name,
                          name="stateNode")
Пример #3
0
 def add_operators(self):
     """
     Add all <operator> elements.
     """
     self.add_tree_operators()
     for clock in self.config.clocks:
         clock.add_operators(self.run)
     for model in self.config.all_models:
         model.add_operators(self.run)
     # Add one DeltaExchangeOperator for feature rates per clock
     for clock in self.config.clocks:
         clock_models = [
             m for m in self.config.models
             if m.rate_variation and m.clock == clock
         ]
         if not clock_models:
             continue
         # Add one big DeltaExchangeOperator which operates on all
         # feature clock rates from all models
         delta = xml.operator(self.run,
                              id="featureClockRateDeltaExchanger:%s" %
                              clock.name,
                              spec="DeltaExchangeOperator",
                              weight="3.0")
         for model in clock_models:
             xml.parameter(xml.plate(delta,
                                     var="rate",
                                     range=model.all_rates),
                           idref="featureClockRate:%s:$(rate)" % model.name)
         # Add weight vector if there has been any binarisation
         if any([
                 w != 1 for w in itertools.chain(
                     *[m.weights for m in clock_models])
         ]):
             xml.weightvector(
                 delta,
                 text=" ".join(
                     itertools.chain(
                         *[map(str, m.weights) for m in clock_models])),
                 id="featureClockRateWeightParameter:%s" % clock.name,
                 spec="parameter.IntegerParameter",
                 dimension=str(sum([len(m.weights) for m in clock_models])),
                 estimate="false")
Пример #4
0
 def add_prior(self, prior):
     """
     Add prior distributions for Gamma-distributed rate heterogenetiy, if
     configured.
     """
     if self.rate_variation:
         # Gamma prior with mean 1 over all mutation rates
         sub_prior = xml.prior(prior,
                               id="featureClockRatePrior.s:%s" % self.name,
                               name="distribution")
         compound = xml.input(sub_prior,
                              id="featureClockRateCompound:%s" % self.name,
                              spec="beast.core.parameter.CompoundValuable",
                              name="x")
         plate = xml.plate(compound, var="rate", range=self.all_rates)
         xml.var(plate, idref="featureClockRate:%s:$(rate)" % self.name)
         xml.input(sub_prior,
                   id="featureClockRatePriorGamma:%s" % self.name,
                   spec="beast.math.distributions.Gamma",
                   name="distr",
                   alpha="@featureClockRateGammaShape:%s" % self.name,
                   beta="@featureClockRateGammaScale:%s" % self.name)
         # Exponential hyperprior on scale of Gamma prior
         # Exponential prior favours small scales over large scales, i.e. less rate variation
         # Mean scale 0.23 chosen for general sensibility, e.g.:
         #   - Prior distribution is roughly 50/50 that ratio of fastest
         #     to slowest feature rate in a dataset of size 200 is below
         #     or above 10.
         #   - Prior probability of roughly 0.90 that this ratio is below
         #     100.
         sub_prior = xml.prior(
             prior,
             id="featureClockRateGammaScalePrior.s:%s" % self.name,
             name="distribution",
             x="@featureClockRateGammaScale:%s" % self.name)
         xml.Exponential(
             sub_prior,
             id="featureClockRateGammaShapePriorExponential.s:%s" %
             self.name,
             mean="0.23",
             name="distr")
Пример #5
0
    def add_taxon_set(self, parent, label, langs, define_taxa=False):
        """
        Add a TaxonSet element with the specified set of languages.

        If a TaxonSet previously defined by this method contains exactly the
        same set of taxa, a reference to that TaxonSet will be added instead.
        By default, each TaxonSet will contain references to the taxa,
        assuming that they have been defined previously (most probably in the
        definition of the tree).  If this is not the case, passing
        define_taxa=True will define, rather than refer to, the taxa.
        """
        # Kill duplicates
        langs = sorted(list(set(langs)))

        # If we've been asked to build an emtpy TaxonSet, something is very wrong,
        # so better to die loud and early
        assert langs
        # Refer to any previous TaxonSet with the same languages
        for idref, taxa in self._taxon_sets.items():
            if langs == taxa:
                xml.taxonset(parent, idref=idref)
                return
        if len(langs) == 1 and label == langs[0]:
            # Single taxa are IDs already. They cannot also be taxon set ids.
            label = "tx_{:}".format(label)
        # Otherwise, create and register a new TaxonSet
        taxonset = xml.taxonset(parent, id=label, spec="TaxonSet")
        ## If the taxonset is more than 3 languages in size, use plate notation to minimise XML filesize
        if len(langs) > 3:
            xml.taxon(xml.plate(taxonset, var="language", range=langs),
                      attrib={"id" if define_taxa else "idref": "$(language)"})
        ## Otherwise go for the more readable notation...
        else:
            for lang in langs:
                xml.taxon(taxonset,
                          attrib={"id" if define_taxa else "idref": lang})
        self._taxon_sets[label] = langs