def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.word_size: p = word_pattern.create(seq_records.seq_list, args.word_size) else: p = word_pattern.read(args.word_pattern) if args.reduce_alphabet: p = p.reduce_alphabet(seqcontent.get_reduced_alphabet(args.molecule)) if args.merge_revcomp: p = p.merge_revcomp() freqs = word_vector.Freqs(seq_records.length_list, p) dist = word_distance.Distance(freqs, args.distance) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def test_distance_hamming(self): dist = word_sets_distance.Distance(self.pep_records, 2, 'hamming') matrix = distmatrix.create(self.pep_records.id_list, dist) exp = [ " 4", "seq1 0 22 44 37", "seq2 22 0 26 31", "seq3 44 26 0 29", "seq4 37 31 29 0" ] self.assertEqual(matrix.format(0), "\n".join(exp))
def test_kld_freqs(self): dist = word_distance.Distance(self.freqs, 'kld') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ " 3", "seq1 0.0000000 0.0932800 0.0435210", "seq2 0.0932800 0.0000000 0.0447391", "seq3 0.0435210 0.0447391 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(data))
def test_manhattan_freqs(self): dist = word_distance.Distance(self.freqs, 'manhattan') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ " 3", "seq1 0.0000000 1.2156863 0.7619048", "seq2 1.2156863 0.0000000 0.7899160", "seq3 0.7619048 0.7899160 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(data))
def test_canberra_freqs(self): dist = word_distance.Distance(self.freqs, 'canberra') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ " 3", "seq1 0.0000000 10.3372258 7.1836838", "seq2 10.3372258 0.0000000 6.6280959", "seq3 7.1836838 6.6280959 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(data))
def test_distance_dna_euclidnorm(self): dist = bbc.Distance(self.vector) matrix = distmatrix.create(self.dna_records.id_list, dist) exp = [ " 3", "seq1 0.0000000 1.0227476 1.9351116", "seq2 1.0227476 0.0000000 1.4469591", "seq3 1.9351116 1.4469591 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(exp))
def test_distance_dna_google(self): dist = bbc.Distance(self.vector, 'google') matrix = distmatrix.create(self.dna_records.id_list, dist) exp = [ " 3", "seq1 0.0000000 73.1311144 37.1219467", "seq2 73.1311144 0.0000000 33.2221873", "seq3 37.1219467 33.2221873 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(exp))
def test_lcc_freqs(self): dist = word_distance.Distance(self.freqs, 'lcc') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ " 3", "seq1 0.0000000 0.6205496 0.4017554", "seq2 0.6205496 0.0000000 0.2550506", "seq3 0.4017554 0.2550506 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(data))
def test_braycurtis_freqs(self): dist = word_distance.Distance(self.freqs, 'braycurtis') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ " 3", "seq1 0.0000000 0.6078431 0.3809524", "seq2 0.6078431 0.0000000 0.3949580", "seq3 0.3809524 0.3949580 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(data))
def test_diff_abs_mult_freqs(self): dist = word_distance.Distance(self.freqs, 'diff_abs_mult') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ " 3", "seq1 0.0000000 0.0621975 0.0404611", "seq2 0.0621975 0.0000000 0.0531478", "seq3 0.0404611 0.0531478 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(data))
def test_jsd_freqs(self): dist = word_distance.Distance(self.freqs, 'jsd') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ " 3", "seq1 0.0000000 0.4608882 0.2550278", "seq2 0.4608882 0.0000000 0.2457790", "seq3 0.2550278 0.2457790 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(data))
def test_chebyshev_freqs(self): dist = word_distance.Distance(self.freqs, 'chebyshev') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ " 3", "seq1 0.0000000 0.1936275 0.1250000", "seq2 0.1936275 0.0000000 0.1428571", "seq3 0.1250000 0.1428571 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(data))
def test_google_freqs(self): dist = word_distance.Distance(self.freqs, 'google') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 0.6078431 0.3809524', 'seq2 0.6078431 0.0000000 0.3949580', 'seq3 0.3809524 0.3949580 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def test_minkowski_freqs(self): dist = word_distance.Distance(self.freqs, 'minkowski') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ " 3", "seq1 0.0000000 0.3763512 0.2532387", "seq2 0.3763512 0.0000000 0.2603008", "seq3 0.2532387 0.2603008 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(data))
def test_distance1(self): vecs = fcgr.create_vectors(self.dna_records, 2) dist = fcgr.Distance(vecs) matrix = distmatrix.create(self.dna_records.id_list, dist) exp = [ " 3", "seq1 0.0000000 7.5498344 5.7445626", "seq2 7.5498344 0.0000000 4.2426407", "seq3 5.7445626 4.2426407 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(exp))
def test_distance_d(self): matrix = distmatrix.create(self.pep_records.id_list, self.dist) exp = [ " 4", "seq1 0 9 15 20", "seq2 9 0 10 18", "seq3 15 10 0 17", "seq4 20 18 17 0" ] self.assertEqual(matrix.format(decimal_places=0), "\n".join(exp))
def test_distance(self): dist = ncd.Distance(self.pep_records) matrix = distmatrix.create(self.pep_records.id_list, dist) exp = [ " 4", "seq1 0.0000000 0.2698413 0.3809524 0.5238095", "seq2 0.2698413 0.0000000 0.2950820 0.4666667", "seq3 0.3809524 0.2950820 0.0000000 0.4754098", "seq4 0.5238095 0.4666667 0.4754098 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(exp))
def test_euclid_squared_counts(self): # The result of this method is identical to that from decaf+py. dist = distance.Distance(self.counts, 'euclid_squared') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 57.0000000 30.0000000', 'seq2 57.0000000 0.0000000 19.0000000', 'seq3 30.0000000 19.0000000 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def calc_distances(seqs): seq_records = seqrecords.SeqRecords() for seq in seqs: seq_records.add(seq.name, "".join(seq.sequence)) dist = ncd.Distance(seq_records) matrix = distmatrix.create(seq_records.id_list, dist) return matrix.data
def test_euclid_seqlen2_freqs(self): # The result of this method is identical to that from decaf+py. dist = word_distance.Distance(self.freqs, 'euclid_seqlen2') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 0.0072101 0.0038263', 'seq2 0.0072101 0.0000000 0.0039866', 'seq3 0.0038263 0.0039866 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def test_distance2(self): vecs = fcgr.create_vectors(self.dna_records, 2) dist = fcgr.Distance(vecs, 'google') matrix = distmatrix.create(self.dna_records.id_list, dist) exp = [ " 3", "seq1 0.0000000 0.5833333 0.5416667", "seq2 0.5833333 0.0000000 0.4210526", "seq3 0.5416667 0.4210526 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(exp))
def test_diff_abs_add_freqs(self): # The result of this method is identical to that from decaf+py. dist = word_distance.Distance(self.freqs, 'diff_abs_add') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 0.0810458 0.0507937', 'seq2 0.0810458 0.0000000 0.0526611', 'seq3 0.0507937 0.0526611 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def test_diff_abs_mult2_freqs(self): # The result of this method is identical to that from decaf+py. dist = word_distance.Distance(self.freqs, 'diff_abs_mult2') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 0.0621975 0.0404611', 'seq2 0.0621975 0.0000000 0.0531478', 'seq3 0.0404611 0.0531478 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def test_euclid_norm_freqs(self): # The result of this method is identical to that from decaf+py. dist = word_distance.Distance(self.freqs, 'euclid_norm') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 0.3763512 0.2532387', 'seq2 0.3763512 0.0000000 0.2603008', 'seq3 0.2532387 0.2603008 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def test_angle_cos_evol_freqs(self): # The result of this method is identical to that from decaf+py. dist = word_distance.Distance(self.freqs, 'angle_cos_evol') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 0.3281368 0.1625980', 'seq2 0.3281368 0.0000000 0.1347925', 'seq3 0.1625980 0.1347925 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def test_euclid_norm_counts(self): # The result of this method is identical to that from decaf+py. dist = word_distance.Distance(self.counts, 'euclid_norm') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 7.5498344 5.4772256', 'seq2 7.5498344 0.0000000 4.3588989', 'seq3 5.4772256 4.3588989 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def test_angle_cos_diss_freqs(self): # The result of this method is identical to that from decaf+py. dist = word_distance.Distance(self.freqs, 'angle_cos_diss') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 0.2797355 0.1500672', 'seq2 0.2797355 0.0000000 0.1261027', 'seq3 0.1500672 0.1261027 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def test_euclid_squared_freqs(self): # The result of this method is identical to that from decaf+py. dist = word_distance.Distance(self.freqs, 'euclid_squared') matrix = distmatrix.create(self.dna_records.id_list, dist) data = [ ' 3', 'seq1 0.0000000 0.1416402 0.0641298', 'seq2 0.1416402 0.0000000 0.0677565', 'seq3 0.0641298 0.0677565 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(data))
def test_distance_d1_star(self): self.dist.set_disttype('d1_star') matrix = distmatrix.create(self.pep_records.id_list, self.dist) exp = [ " 4", "seq1 0.0000000 0.3404255 0.5283019 0.5348837", "seq2 0.3404255 0.0000000 0.4042553 0.5121951", "seq3 0.5283019 0.4042553 0.0000000 0.5135135", "seq4 0.5348837 0.5121951 0.5135135 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(exp))
def test_distance_d1_star2(self): self.dist.set_disttype('d1_star2') matrix = distmatrix.create(self.pep_records.id_list, self.dist) exp = [ " 4", "seq1 0.0000000 0.3404255 0.5436893 0.5609756", "seq2 0.3404255 0.0000000 0.4130435 0.5384615", "seq3 0.5436893 0.4130435 0.0000000 0.5205479", "seq4 0.5609756 0.5384615 0.5205479 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(exp))