def __init__(self, *args, **kwargs): super(DistanceTest, self).__init__(*args, **kwargs) utils.ModulesCommonTest.set_test_data() self.pattern = word_pattern.create(self.dna_records.seq_list, 2) self.counts = word_vector.Counts(self.dna_records.length_list, self.pattern) self.freqs = word_vector.Freqs(self.dna_records.length_list, self.pattern)
def test_counts_pattern1(self): counts = word_vector.Counts(self.dna_records.length_list, self.pattern1) exp = ["A\t8 4 6", "G\t6 6 3", "C\t6 3 4", "T\t5 5 2"] lengths = [25, 18, 15] self.assertEqual(counts.format(decimal_places=0), "\n".join(exp)) # Counts in a sequence should sum to sequence length. for i in range(len(counts.data)): self.assertEqual(sum(counts.data[i]), lengths[i])
def test_counts_pattern2(self): counts = word_vector.Counts(self.dna_records.length_list, self.pattern2) exp = [ "TA\t3 3 2", "AC\t4 1 2", "GG\t1 4 2", "AG\t1 2 1", "CT\t0 3 1", "AA\t2 0 1", "AT\t1 1 1", "CA\t1 0 2", "CC\t2 0 1", "CG\t3 0 0", "GA\t1 1 1", "GT\t3 0 0", "TT\t1 1 0", "TC\t0 1 0", "TG\t1 0 0" ] self.assertEqual(counts.format(decimal_places=0), "\n".join(exp)) for i in range(len(counts.data)): self.assertEqual(sum(counts.data[i]), counts.seq_lengths[i] - 1)
def __init__(self, *args, **kwargs): super(DistanceTest, self).__init__(*args, **kwargs) utils.ModulesCommonTest.set_test_data() self.patterns = [] self.counts = [] self.freqs = [] for i in range(1, 5): p = word_pattern.create(self.pep_records.seq_list, i) self.patterns.append(p) c = word_vector.Counts(self.pep_records.length_list, p) self.counts.append(c) f = word_vector.Freqs(self.pep_records.length_list, p) self.freqs.append(f)
elif method == "wmetric": matrix = subsmat.get('blosum62') dist = wmetric.Distance(seq_records, matrix) matrix = distmatrix.create(seq_records.id_list, dist) matrix.display() elif method == "d2": patterns = [] for i in range(1, 5 + 1): p = word_pattern.create(seq_records.seq_list, i) patterns.append(p) counts = [] for p in patterns: c = word_vector.Counts(seq_records.length_list, p) counts.append(c) countsweight = [] weights = seqcontent.get_weights('protein') weightmodel = word_vector.WeightModel(weights) for p in patterns: c = word_vector.CountsWeight(seq_records, p, weightmodel) countsweight.append(c) dist = word_d2.Distance(countsweight) matrix = distmatrix.create(seq_records.id_list, dist) matrix.display() elif method == "lempelziv": distance = lempelziv.Distance(seq_records) l = ['d', 'd_star', 'd1', 'd1_star', 'd1_star2']