def add_param_logs(self, logger): """ Add entires to the logfile corresponding to individual feature substition rates if rate variation is configured. """ if self.config.admin.log_fine_probs: if not self.single_sitemodel: plate = xml.plate(logger, var="feature", range=self.features) xml.log(plate, idref="featureLikelihood:%s:$(feature)" % self.name) if self.rate_variation: xml.log(logger, idref="featureClockRatePrior.s:%s" % self.name) xml.log(logger, idref="featureClockRateGammaScalePrior.s:%s" % self.name) if self.frequencies == "estimate": self.add_frequency_logs(logger) if self.rate_variation: plate = xml.plate(logger, var="rate", range=self.all_rates) xml.log(plate, idref="featureClockRate:%s:$(rate)" % self.name) # Log the scale, but not the shape, as it is always 1 / scale # We prefer the scale because it is positively correlated with extent of variation xml.log(logger, idref="featureClockRateGammaShape:%s" % self.name)
def add_state(self, state): """Construct the model's state nodes. Add parameters for Gamma-distributed rate heterogenetiy, if configured. """ if self.frequencies == "estimate": self.add_frequency_state(state) if self.rate_variation: if self.feature_rates: # If user specified rates have been provided for either # features or partitions, we need to list each rate # individually for rate in self.all_rates: xml.parameter(state, text=self.feature_rates.get(rate, 1.0), id="featureClockRate:%s:%s" % (self.name, rate), name="stateNode") else: # If not, and everything is initialised to the same # value, we can just whack 'em all in a big plate plate = xml.plate(state, var="rate", range=self.all_rates) xml.parameter(plate, text="1.0", id="featureClockRate:%s:$(rate)" % self.name, name="stateNode") # Give Gamma shape parameter a finite domain # Must be > 1.0 for the distribution to be bell-shaped, # rather than L-shaped. The domain [1.1,1000] limits feature # rate variation to the realms of vague plausibity xml.parameter(state, text="5.0", id="featureClockRateGammaShape:%s" % self.name, lower="1.1", upper="100.0", name="stateNode") # Gamma scale parameter's domain is defined *implicilty* # by the fact that the operators maintain shape*scale = 1.0 xml.parameter(state, text="0.2", id="featureClockRateGammaScale:%s" % self.name, name="stateNode")
def add_operators(self): """ Add all <operator> elements. """ self.add_tree_operators() for clock in self.config.clocks: clock.add_operators(self.run) for model in self.config.all_models: model.add_operators(self.run) # Add one DeltaExchangeOperator for feature rates per clock for clock in self.config.clocks: clock_models = [ m for m in self.config.models if m.rate_variation and m.clock == clock ] if not clock_models: continue # Add one big DeltaExchangeOperator which operates on all # feature clock rates from all models delta = xml.operator(self.run, id="featureClockRateDeltaExchanger:%s" % clock.name, spec="DeltaExchangeOperator", weight="3.0") for model in clock_models: xml.parameter(xml.plate(delta, var="rate", range=model.all_rates), idref="featureClockRate:%s:$(rate)" % model.name) # Add weight vector if there has been any binarisation if any([ w != 1 for w in itertools.chain( *[m.weights for m in clock_models]) ]): xml.weightvector( delta, text=" ".join( itertools.chain( *[map(str, m.weights) for m in clock_models])), id="featureClockRateWeightParameter:%s" % clock.name, spec="parameter.IntegerParameter", dimension=str(sum([len(m.weights) for m in clock_models])), estimate="false")
def add_prior(self, prior): """ Add prior distributions for Gamma-distributed rate heterogenetiy, if configured. """ if self.rate_variation: # Gamma prior with mean 1 over all mutation rates sub_prior = xml.prior(prior, id="featureClockRatePrior.s:%s" % self.name, name="distribution") compound = xml.input(sub_prior, id="featureClockRateCompound:%s" % self.name, spec="beast.core.parameter.CompoundValuable", name="x") plate = xml.plate(compound, var="rate", range=self.all_rates) xml.var(plate, idref="featureClockRate:%s:$(rate)" % self.name) xml.input(sub_prior, id="featureClockRatePriorGamma:%s" % self.name, spec="beast.math.distributions.Gamma", name="distr", alpha="@featureClockRateGammaShape:%s" % self.name, beta="@featureClockRateGammaScale:%s" % self.name) # Exponential hyperprior on scale of Gamma prior # Exponential prior favours small scales over large scales, i.e. less rate variation # Mean scale 0.23 chosen for general sensibility, e.g.: # - Prior distribution is roughly 50/50 that ratio of fastest # to slowest feature rate in a dataset of size 200 is below # or above 10. # - Prior probability of roughly 0.90 that this ratio is below # 100. sub_prior = xml.prior( prior, id="featureClockRateGammaScalePrior.s:%s" % self.name, name="distribution", x="@featureClockRateGammaScale:%s" % self.name) xml.Exponential( sub_prior, id="featureClockRateGammaShapePriorExponential.s:%s" % self.name, mean="0.23", name="distr")
def add_taxon_set(self, parent, label, langs, define_taxa=False): """ Add a TaxonSet element with the specified set of languages. If a TaxonSet previously defined by this method contains exactly the same set of taxa, a reference to that TaxonSet will be added instead. By default, each TaxonSet will contain references to the taxa, assuming that they have been defined previously (most probably in the definition of the tree). If this is not the case, passing define_taxa=True will define, rather than refer to, the taxa. """ # Kill duplicates langs = sorted(list(set(langs))) # If we've been asked to build an emtpy TaxonSet, something is very wrong, # so better to die loud and early assert langs # Refer to any previous TaxonSet with the same languages for idref, taxa in self._taxon_sets.items(): if langs == taxa: xml.taxonset(parent, idref=idref) return if len(langs) == 1 and label == langs[0]: # Single taxa are IDs already. They cannot also be taxon set ids. label = "tx_{:}".format(label) # Otherwise, create and register a new TaxonSet taxonset = xml.taxonset(parent, id=label, spec="TaxonSet") ## If the taxonset is more than 3 languages in size, use plate notation to minimise XML filesize if len(langs) > 3: xml.taxon(xml.plate(taxonset, var="language", range=langs), attrib={"id" if define_taxa else "idref": "$(language)"}) ## Otherwise go for the more readable notation... else: for lang in langs: xml.taxon(taxonset, attrib={"id" if define_taxa else "idref": lang}) self._taxon_sets[label] = langs