def get_beta_diversity(self): # FIXME: you can't call this unless you call microbiome_sequence_alignment() first sequence_set = list(set(self.alignment)) self.beta_diversity = 0 number_sequence = len(sequence_set) if number_sequence > 1: for i in range(1, number_sequence): fre_i = self.alignment.count(sequence_set[i]) / float(self.number_of_host_in_population) for j in range(i): fre_j = self.alignment.count(sequence_set[j]) / float(self.number_of_host_in_population) self.beta_diversity += fre_i * fre_j * different_element( sequence_set[i], sequence_set[j]) / self.number_of_environmental_species # NOTE: Can factor out fre_i, it's faster but less clear # temp_div += fre_j * different_element( # sequence_set[i], sequence_set[j]) / self.number_of_environmental_species # self.beta_diversity += (fre_i * temp_div) # self.beta_diversity = self.beta_diversity * self.number_of_host_in_population / (self.number_of_host_in_population - 1) * 2 self.beta_diversity *= self.beta_diversity_coef # return self.beta_diversity # else: return self.beta_diversity
def watterson_tajima(self): # http://en.wikipedia.org/wiki/Tajima%27s_D a_1 = 0 a_2 = 0 for i in range(1, self.number_of_host_in_population): a_1 += 1 / float(i) a_2 += 1 / float(i ** 2) self.theta = self.number_of_segregating_site / a_1 K = 0 N = 0 b_1 = (self.number_of_host_in_population + 1) / float(3 * self.number_of_host_in_population - 3) b_2 = float(2) / 9 * (self.number_of_host_in_population ** 2 + self.number_of_host_in_population + 3) / (self.number_of_host_in_population ** 2 - self.number_of_host_in_population) c_1 = b_1 - 1 / a_1 c_2 = b_2 - (self.number_of_host_in_population + 2) / (a_1 * self.number_of_host_in_population) + a_2 / (a_1 ** 2) e_1 = c_1 / a_1 e_2 = c_2 / (a_1 ** 2 + a_2) for i in range(1, self.number_of_host_in_population): for j in range(i): K += different_element(self.alignment[i], self.alignment[j]) N += 1 k = K / float(N) if self.theta != 0: self.D = (k - self.theta) / (e_1 * self.number_of_segregating_site + e_2 * self.number_of_segregating_site * (self.number_of_segregating_site - 1)) ** 0.5 else: self.D = None