def add_substmodel(self, sitemodel, feature, fname): key = self._get_substmodel_key(feature) if key and key in self.subst_models: substmodel = xml.substModel(sitemodel, idref=self.subst_models[key]) else: sm_id = "mk.s:%s" % fname substmodel = xml.substModel(sitemodel, id=sm_id, spec="LewisMK", datatype="@featureDataType.%s" % fname) # Do empirical frequencies # We don't need to do anything for uniform freqs # as the implementation of LewisMK handles it if self.frequencies == "empirical": xml.frequencies(substmodel, id="feature_freqs.s:%s" % fname, spec="Frequencies", data="@feature_data_%s" % fname) elif self.frequencies == "approx": xml.frequencies( substmodel, id="feature_freqs.s:%s" % fname, spec="Frequencies", frequencies=self._get_approx_freq_string(feature)) elif self.frequencies == "estimate": xml.frequencies(substmodel, id="feature_freqs.s:%s" % fname, spec="Frequencies", frequencies="@feature_freqs_param.s:%s" % fname) self.subst_models[key] = sm_id return substmodel
def add_sitemodel(self, distribution): site = xml.siteModel(distribution, id="sphericalGeoSiteModel", spec="SiteModel") xml.substModel(site, id="sphericalDiffusionSubstModel", spec="sphericalGeo.SphericalDiffusionModel", precision="@sphericalPrecision", fast="true", threshold="1")
def _add_substmodel(self, sitemodel, feature, name): if self.share_params: name = self.name self.subst_model_id = "%s:pdcovarion.s" % name subst_model_id = "%s:pdcovarion.s" % name substmodel = xml.substModel( sitemodel, id=subst_model_id, spec="BirthDeathCovarion2", deathprob="@{:}:pdcovarion_death.s".format(name), originLength="@{:s}:pdcovarion_origin.s".format(name), switchRate="@{:}:pdcovarion_s.s".format(name)) # Numerical instability is an issue with this model, so we give the # option of using a more robust method of computing eigenvectors. if self.use_robust_eigensystem: # pragma: no cover raise ValueError( "Currently, Beast's pseudo-Dollo covarion model does not " "support robust eigensystems.") substmodel.set("eigenSystem", "beast.evolution.substitutionmodel.RobustEigenSystem") # The "vfrequencies" parameter here is the frequencies # of the *visible* states (present/absent) and should # be based on the data (if we are doing an empirical # analysis) if self.frequencies == "estimate": substmodel.set("vfrequencies","@%s:visiblefrequencies.s" % name) else: vfreq = xml.vfrequencies( substmodel, id="%s:visiblefrequencies.s" % name, dimension="3", spec="parameter.RealParameter") if self.frequencies == "empirical": # pragma: no cover raise ValueError("Dollo model {:} cannot derive empirical " "frequencies from data".format(self.name)) else: vfreq.text="0.94 0.05 0.01" # These are the frequencies of the *hidden* states # (fast / slow), and are just set to 50: 50. They could be estimated, # in principle, but this seems to lead to serious instability problems # so we don't expose that possibility to the user. xml.parameter( substmodel, text="0.5 0.5", id="%s: hiddenfrequencies.s" % name, dimension="2", name="hfrequencies", lower="0.0", upper="1.0")
def add_substmodel(self, sitemodel, feature, fname): # If we're sharing one substmodel across all features and have already # created it, just reference it and that's it if self.subst_model_id: sitemodel.set("substModel", "@%s" % self.subst_model_id) return # Otherwise, create a substmodel name = self.name if self.share_params else fname subst_model_id = "binaryCTMC.s:%s" % name if self.share_params: self.subst_model_id = subst_model_id substmodel = xml.substModel(sitemodel, id=subst_model_id, spec="GeneralSubstitutionModel") xml.parameter(substmodel, text="1.0 1.0", id="rates.s:%s" % name, dimension=2, estimate="false", name="rates") if self.frequencies == "estimate": xml.frequencies(substmodel, id="estimatedFrequencies.s:%s" % name, spec="Frequencies", frequencies="@freqs_param.s:%s" % name) elif self.frequencies == "empirical": attribs = { "id": "empiricalFrequencies.s:%s" % name, "spec": "Frequencies" } if self.share_params: if self.single_sitemodel: attribs["data"] = "@filtered_data_%s" % name else: attribs["frequencies"] = self.build_freq_str() else: attribs["data"] = "@feature_data_%s" % name xml.frequencies(substmodel, attrib=attribs) elif self.frequencies == "uniform": xml.frequencies(substmodel, text="0.5 0.5", id="frequencies.s:%s" % name, dimension="2", spec="parameter.RealParameter")
def add_substmodel(self, sitemodel, feature, fname): attribs = { "id": "svs.s:%s"%fname, "rateIndicator": "@rateIndicator.s:%s" % fname, "rates": "@relativeGeoRates.s:%s" % fname, "spec": "SVSGeneralSubstitutionModel"} if not self.symmetric: attribs['symmetric'] = 'false' if self.use_robust_eigensystem: attribs["eigenSystem"] = "beast.evolution.substitutionmodel.RobustEigenSystem" substmodel = xml.substModel(sitemodel, **attribs) attribs = { "id": "feature_freqs.s:%s"%fname, "spec": "Frequencies", } freq_string=None if self.frequencies == "estimate": attribs["frequencies"] = "@feature_freqs_param.s:%s"%fname elif self.frequencies == "uniform": freq_string = str(1.0/self.valuecounts[feature]) elif self.frequencies == "empirical": #TODO: Do this in the BEAStly way freqs = [ self.counts[feature].get( self.unique_values[feature][v], 0) for v in range(self.valuecounts[feature])] norm = float(sum(freqs)) freqs = [f/norm for f in freqs] # Sometimes, due to WALS oddities, there's a zero frequency, and that makes BEAST sad. So do some smoothing in these cases: if 0 in freqs: freqs = [0.1/self.valuecounts[feature] + 0.9*f for f in freqs] norm = float(sum(freqs)) freq_string = " ".join([str(c/norm) for c in freqs]) else: raise ValueError( "Model BSVS does not recognize frequencies %r, " "should be 'uniform' or 'empirical'." % self.frequencies) freq = xml.frequencies(substmodel, **attribs) if self.frequencies != "estimate": xml.parameter( freq, text=freq_string, dimension=self.valuecounts[feature], id="feature_frequencies.s:%s" % fname, name="frequencies")
def add_substmodel(self, sitemodel, feature, fname): # If we're sharing one substmodel across all features and have already # created it, just reference it and that's it if self.share_params and self.subst_model_id: sitemodel.set("substModel", "@%s" % self.subst_model_id) return # Otherwise, create a substmodel name = self.name if self.share_params else fname subst_model_id = "covarion.s:%s" % name if self.share_params: self.subst_model_id = subst_model_id substmodel = xml.substModel(sitemodel, id=subst_model_id, spec="BinaryCovarion", alpha="@covarion_alpha.s:%s" % name, switchRate="@covarion_s.s:%s" % name) # Numerical instability is an issue with this model, so we give the # option of using a more robust method of computing eigenvectors. if self.use_robust_eigensystem: substmodel.set( "eigenSystem", "beast.evolution.substitutionmodel.RobustEigenSystem") # The "vfrequencies" parameter here is the frequencies # of the *visible* states (present/absent) and should # be based on the data (if we are doing an empirical # analysis) if self.frequencies == "estimate": substmodel.set("vfrequencies", "@freqs_param.s:%s" % name) else: vfreq = xml.vfrequencies(substmodel, id="%s:visiblefrequencies.s" % name, dimension="2", spec="parameter.RealParameter") if self.frequencies == "empirical": if self.share_params: vfreq.text = self.build_freq_str() else: vfreq.text = self.build_freq_str(feature) else: vfreq.text = "0.5 0.5" # These are the frequencies of the *hidden* states # (fast / slow), and are just set to 50:50. They could be estimated, # in principle, but this seems to lead to serious instability problems # so we don't expose that possibility to the user. xml.parameter(substmodel, text="0.5 0.5", id="%s:hiddenfrequencies.s" % name, dimension="2", lower="0.0", name="hfrequencies", upper="1.0") # Dummy frequencies - these do nothing and are required # to stop the BinaryCovarion model complaining that the # "frequencies" input is not specified, which is # inherited behaviour from GeneralSubstitutionModel # which probably should have been overridden... xml.frequencies(substmodel, id="%s:dummyfrequences.s" % name, spec="Frequencies", frequencies="0.5 0.5")