Exemplo n.º 1
0
    def test_parses_a_graph(self, tmpdir):
        # given
        kmer_size = 3
        mc_builder = (builder.Mccortex().with_dna_sequence(
            'ACGTT').with_kmer_size(kmer_size))

        expected_kmers = [
            KmerRecord('AAC', (1, ), [as_edge_set('......G.')]),
            KmerRecord('ACG', (2, ), [as_edge_set('a......T')]),
        ]

        # when
        output_graph = mc_builder.build(tmpdir)

        kmer_generator = kmer_generator_from_stream(open(output_graph, 'rb'))

        # then
        actual_kmers = list(kmer_generator)
        for kmer in actual_kmers:
            logger.info(kmer)
        for expected_kmer, kmer in zip(expected_kmers, actual_kmers):
            assert kmer.kmer == expected_kmer.kmer
            assert kmer.coverage == expected_kmer.coverage
            assert kmer.edges == expected_kmer.edges
        assert len(actual_kmers) == len(expected_kmers)
Exemplo n.º 2
0
    def test_gets_aaa_for_ttt_query(self, RAClass):
        # given
        graph_builder = builder.Graph()
        graph_builder.with_kmer_size(3)
        graph_builder.with_num_colors(1)

        expected_kmer = KmerRecord('AAA', [1], [as_edge_set('........')])
        graph_builder.with_kmer_record(expected_kmer)

        cg = RAClass(graph_builder.build())

        # when
        assert expected_kmer.kmer == cg.get_kmer_for_string('AAA').kmer
        assert expected_kmer.kmer == cg.get_kmer_for_string('TTT').kmer
Exemplo n.º 3
0
    def test_gets_aaa(self):
        # given
        graph_builder = (builder.Graph().with_kmer_size(3).with_num_colors(1))

        expected_kmer = KmerRecord('AAA', (1, ), [as_edge_set('........')])
        graph_builder.with_kmer_record(expected_kmer)

        cg = self.RAClass(graph_builder.build())

        # when
        for kmer in cg.values():
            assert expected_kmer.kmer == kmer.kmer
            assert np.all(expected_kmer.coverage == kmer.coverage)
            assert expected_kmer.edges == kmer.edges
Exemplo n.º 4
0
    def test_retrieves_kmer_by_random_access(self, tmpdir):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            'ACGTTT').with_kmer_size(kmer_size).build(tmpdir))

        expected = KmerRecord('AAC', (1, ), [as_edge_set('A.....G.')])
        cg = RandomAccess(open(output_graph, 'rb'))

        # when
        actual = cg['AAC']

        # then
        logger.info(actual)

        assert actual.kmer == expected.kmer
        assert actual.coverage == expected.coverage
        assert actual.edges == expected.edges
Exemplo n.º 5
0
 def with_kmer(self,
               kmer_string,
               color_coverage=1,
               edges='........',
               repeat_color_edges_n_times=None):
     if ' ' in kmer_string:
         assert '  ' not in kmer_string
         num_words = kmer_string.count(' ')
         kmer_words = kmer_string.split(' ')
         kmer_string = kmer_words.pop(0)
         assert num_words % 2 == 0
         num_colors = num_words // 2
         self.with_num_colors(num_colors)
         color_coverage = [int(word) for word in kmer_words[0:num_colors]]
         edges = kmer_words[num_colors:]
     revcomp = str(Seq(kmer_string).reverse_complement())
     if revcomp < kmer_string:
         raise Exception(
             "kmer_string '{}' is not lexlo.  Please fix.".format(
                 kmer_string))
     if (repeat_color_edges_n_times and isinstance(edges, str)
             and isinstance(color_coverage, int)):
         self.with_num_colors(repeat_color_edges_n_times)
         edges = [edges for _ in range(repeat_color_edges_n_times)]
         color_coverage = [
             color_coverage for _ in range(repeat_color_edges_n_times)
         ]
     if isinstance(edges, str):
         edges = [edges]
     if isinstance(color_coverage, int):
         color_coverage = [color_coverage]
     if self.kmer_size_is_set:
         assert self.kmer_size == len(kmer_string)
     else:
         self.with_kmer_size(len(kmer_string))
     return self.with_kmer_record(
         KmerRecord(kmer_string, color_coverage,
                    tuple([as_edge_set(e) for e in edges])))
Exemplo n.º 6
0
    def test_parses_a_graph_with_kmer_size_32(self, tmpdir):
        # given
        kmer_size = 33
        contig = ''.join(list(repeat('A', kmer_size)))
        mc_builder = (builder.Mccortex().with_dna_sequence(
            contig).with_kmer_size(kmer_size))

        expected_kmers = [
            KmerRecord(contig, (1, ), [as_edge_set('........')]),
        ]

        # when
        output_graph = mc_builder.build(tmpdir)

        kmer_generator = kmer_generator_from_stream(open(output_graph, 'rb'))

        # then
        actual_kmers = list(kmer_generator)
        for kmer in actual_kmers:
            logger.info(kmer)
        for expected_kmer, kmer in zip(expected_kmers, actual_kmers):
            assert kmer.kmer == expected_kmer.kmer
            assert kmer.coverage == expected_kmer.coverage
            assert kmer.edges == expected_kmer.edges