Exemplo n.º 1
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)
    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    if args.reduce_alphabet:
        p = p.reduce_alphabet(seqcontent.get_reduced_alphabet(args.molecule))
    if args.merge_revcomp:
        p = p.merge_revcomp()

    freqs = word_vector.Freqs(seq_records.length_list, p)

    dist = word_distance.Distance(freqs, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Exemplo n.º 2
0
 def __init__(self, *args, **kwargs):
     super(DistanceTest, self).__init__(*args, **kwargs)
     utils.ModulesCommonTest.set_test_data()
     self.pattern = word_pattern.create(self.dna_records.seq_list, 2)
     self.counts = word_vector.Counts(self.dna_records.length_list,
                                      self.pattern)
     self.freqs = word_vector.Freqs(self.dna_records.length_list,
                                    self.pattern)
Exemplo n.º 3
0
    def test_freqs_pattern1(self):
        freqs = word_vector.Freqs(self.dna_records.length_list, self.pattern1)

        exp = [
            "A\t0.320 0.222 0.400",
            "G\t0.240 0.333 0.200",
            "C\t0.240 0.167 0.267",
            "T\t0.200 0.278 0.133",
        ]
        self.assertEqual(freqs.format(), "\n".join(exp))
        # Freqs in a given sequence should sum to 1.
        for seqrow in freqs.data:
            self.assertEqual('{:.3f}'.format(sum(seqrow)), '1.000')
Exemplo n.º 4
0
 def __init__(self, *args, **kwargs):
     super(DistanceTest, self).__init__(*args, **kwargs)
     utils.ModulesCommonTest.set_test_data()
     self.patterns = []
     self.counts = []
     self.freqs = []
     for i in range(1, 5):
         p = word_pattern.create(self.pep_records.seq_list, i)
         self.patterns.append(p)
         c = word_vector.Counts(self.pep_records.length_list, p)
         self.counts.append(c)
         f = word_vector.Freqs(self.pep_records.length_list, p)
         self.freqs.append(f)
Exemplo n.º 5
0
 def test_freqs_pattern2(self):
     freqs = word_vector.Freqs(self.dna_records.length_list, self.pattern2)
     for seqrow in freqs.data:
         self.assertEqual('{:.3f}'.format(sum(seqrow)), '1.000')
     exp = [
         "TA\t0.125 0.176 0.143", "GG\t0.042 0.235 0.143",
         "AC\t0.167 0.059 0.143", "CT\t0.000 0.176 0.071",
         "AG\t0.042 0.118 0.071", "CA\t0.042 0.000 0.143",
         "AT\t0.042 0.059 0.071", "GA\t0.042 0.059 0.071",
         "AA\t0.083 0.000 0.071", "CC\t0.083 0.000 0.071",
         "CG\t0.125 0.000 0.000", "GT\t0.125 0.000 0.000",
         "TT\t0.042 0.059 0.000", "TC\t0.000 0.059 0.000",
         "TG\t0.042 0.000 0.000"
     ]
     self.assertEqual(freqs.format(), "\n".join(exp))
Exemplo n.º 6
0
 def test_equal_freqs_pattern2(self):
     # The result of this method is identical to that from decaf+py.
     p = word_pattern.create(self.dna_records.seq_list, 2, True)
     freq = word_vector.Freqs(self.dna_records.length_list, p)
     freqmodel = word_vector.EqualFreqs(alphabet_size=4)
     freqs_std = word_vector.FreqsStd(self.dna_records.length_list, p,
                                      freqmodel)
     exp = [
         "TA\t0.113 0.189 0.169", "AC\t0.150 0.063 0.169",
         "GG\t0.030 0.201 0.135", "CT\t0.000 0.189 0.084",
         "AG\t0.038 0.126 0.084", "CA\t0.038 0.000 0.169",
         "AT\t0.038 0.063 0.084", "GA\t0.038 0.063 0.084",
         "AA\t0.060 0.000 0.067", "CC\t0.060 0.000 0.067",
         "CG\t0.113 0.000 0.000", "GT\t0.113 0.000 0.000",
         "TT\t0.030 0.050 0.000", "TC\t0.000 0.063 0.000",
         "TG\t0.038 0.000 0.000"
     ]
     self.assertEqual(freqs_std.format(), "\n".join(exp))