示例#1
0
def test_simple_model_initialisation_1():
    """Test how the table counts are initialised in a simple model.
    A simple model that has emitted one of each base from the empty context
    must have one table for each base in the root context and no other tables."""
    model = cacto.cactomodelfromseqs(('A', 'C', 'G', 'T'))
    t = model.t.copy()
    t[model.prefixindex.topdown().value.id] -= 1
    assert (0 == t).all()
示例#2
0
def test_simple_model_initialisation_1():
    """Test how the table counts are initialised in a simple model.
    A simple model that has emitted one of each base from the empty context
    must have one table for each base in the root context and no other tables."""
    model = cacto.cactomodelfromseqs(('A', 'C', 'G', 'T'))
    t = model.t.copy()
    t[model.prefixindex.topdown().value.id] -= 1
    assert (0 == t).all()
示例#3
0
def test_seqs_log_likelihood():
    for trainingseqs in prefix_seq_sets:
        model = cacto.cactomodelfromseqs(trainingseqs)
        for predictionseqs in prefix_seq_sets:
            logging.info(
                'likelihood/base: %.3f',
                math.exp(
                    model.seqsloglikelihood(predictionseqs) /
                    sum(map(len, predictionseqs))))
示例#4
0
def test_model_initialisation_1():
    """Test how the table counts are initialised."""
    model = cacto.cactomodelfromseqs(('CGAT',))
    seqan.traverse.depthfirsttraversal(model.prefixindex, model.log_table_counts)
    t = model.t.copy()
    i_cga = model.prefixindex.topdown()
    if not i_cga.goDown('CGA'[::-1]):
        raise ValueError('Should have been able to find prefix "CGA"')
    assert ([0,0,0,1] == t[i_cga.value.id]).all()
示例#5
0
def test_model_initialisation_1():
    """Test how the table counts are initialised."""
    model = cacto.cactomodelfromseqs(('CGAT', ))
    seqan.traverse.depthfirsttraversal(model.prefixindex,
                                       model.log_table_counts)
    t = model.t.copy()
    i_cga = model.prefixindex.topdown()
    if not i_cga.goDown('CGA'[::-1]):
        raise ValueError('Should have been able to find prefix "CGA"')
    assert ([0, 0, 0, 1] == t[i_cga.value.id]).all()
示例#6
0
def _test_empty_model_predictions():
    """Currently dumps core due to seqan bug."""
    seqs = tuple('', )
    model = cacto.cactomodelfromseqs(seqs)
    #
    # No matter what the context we should see p = 1/4
    #
    for u in (
            '',
            'A',
            'GC',
    ):
        x = cacto.Value('A')
        logger.info('p(%s|%s) = %.3e', x, u, model.p_x_given_u(x, u))
        assert abs(.25 - model.p(x, u)) < 1e-15
示例#7
0
def _test_empty_model_predictions():
    """Currently dumps core due to seqan bug."""
    seqs = tuple('',)
    model = cacto.cactomodelfromseqs(seqs)
    #
    # No matter what the context we should see p = 1/4
    #
    for u in (
        '',
        'A',
        'GC',
    ):
        x = cacto.Value('A')
        logger.info('p(%s|%s) = %.3e', x, u, model.p_x_given_u(x, u))
        assert abs(.25 - model.p(x, u)) < 1e-15
示例#8
0
def test_simple_model_predictions():
    seqs = (
        'A',
        'C',
        'G',
        'T',
    )
    model = cacto.cactomodelfromseqs(seqs)
    #
    # No matter what the context we should see p(x|u) = 1/4
    #
    for u in (
            '',
            'A',
            'GC',
    ):
        x = cacto.Value('A')
        logger.info('p(%s|%s) = %.3e', x, u, model.p_x_given_u(x, u))
        p = model.p_x_given_u(x, u)
        if abs(.25 - model.p_x_given_u(x, u)) >= 1e-15:
            raise ValueError('p not close to 1/4')
示例#9
0
def test_simple_model_predictions():
    seqs = (
        'A',
        'C',
        'G',
        'T',
    )
    model = cacto.cactomodelfromseqs(seqs)
    #
    # No matter what the context we should see p(x|u) = 1/4
    #
    for u in (
        '',
        'A',
        'GC',
    ):
        x = cacto.Value('A')
        logger.info('p(%s|%s) = %.3e', x, u, model.p_x_given_u(x, u))
        p = model.p_x_given_u(x, u)
        if abs(.25 - model.p_x_given_u(x, u)) >= 1e-15:
            raise ValueError('p not close to 1/4')
示例#10
0
def test_model_predictions():
    import seqan
    for seqs, test_xs_us in prediction_sets:
        model = cacto.cactomodelfromseqs(seqs)
        posterior = model.calculateposterior()
        for x, u in test_xs_us:
            logging.debug('%s|%s', x, u)
            p = model.p_x_given_u(cacto.Value(x), u)
            i = model._locate_context(u, topdownhistory=True)
            post = posterior[i.value.id]
            p2 = model.p_xord_given_ui(cacto.Value(x).ordValue, i)
            # Check that the three different methods of calculating likelihoods
            # give similar results
            assertareclose(p, p2)
            assertareclose(p, post[cacto.Value(x).ordValue])
            assertareclose(1., post.sum())  # Check posterior adds to 1
        if False:  # Choose whether to build graph or not
            import seqan.io.graphtool
            builder = seqan.io.graphtool.Builder(model.prefixindex)
            seqan.io.graphtool.GT.graph_draw(
                builder.graph,
                pos=seqan.io.graphtool.GT.sfdp_layout(builder.graph),
                vertex_size=2,
                vertex_fill_color="lightgrey",
                vertex_font_size=8,
                vertex_text=builder.map_vertices(
                    lambda it: '{0} {1} {2} {3}'.format(*map(
                        int, model._su(it)))),
                vertex_pen_width=seqan.io.graphtool.root_vertex_property(
                    builder),
                edge_text=seqan.io.graphtool.edge_labels_for_output(builder),
                edge_color=seqan.io.graphtool.color_edges_by_first_symbol(
                    builder),
                edge_end_marker="none",
                edge_pen_width=2,
                #edge_dash_style=seqan.io.graphtool.dash_non_suffix_edges(builder, suffix),
                #edge_pen_width=builder.edge_lengths,
                #output="graphtool.png"
            )
示例#11
0
def test_model_predictions():
    import seqan
    for seqs, test_xs_us in prediction_sets:
        model = cacto.cactomodelfromseqs(seqs)
        posterior = model.calculateposterior()
        for x, u in test_xs_us:
            logging.debug('%s|%s', x, u)
            p = model.p_x_given_u(cacto.Value(x), u)
            i = model._locate_context(u, topdownhistory=True)
            post = posterior[i.value.id]
            p2 = model.p_xord_given_ui(cacto.Value(x).ordValue, i)
            # Check that the three different methods of calculating likelihoods
            # give similar results
            assertareclose(p, p2)
            assertareclose(p, post[cacto.Value(x).ordValue])
            assertareclose(1., post.sum())  # Check posterior adds to 1
        if False:  # Choose whether to build graph or not
            import seqan.io.graphtool
            builder = seqan.io.graphtool.Builder(model.prefixindex)
            seqan.io.graphtool.GT.graph_draw(
                builder.graph,
                pos=seqan.io.graphtool.GT.sfdp_layout(builder.graph),
                vertex_size=2,
                vertex_fill_color="lightgrey",
                vertex_font_size=8,
                vertex_text=builder.map_vertices(
                    lambda it: '{0} {1} {2} {3}'.format(*map(int, model._su(it)))),
                vertex_pen_width=seqan.io.graphtool.root_vertex_property(builder),
                edge_text=seqan.io.graphtool.edge_labels_for_output(builder),
                edge_color=seqan.io.graphtool.color_edges_by_first_symbol(builder),
                edge_end_marker="none",
                edge_pen_width=2,
                #edge_dash_style=seqan.io.graphtool.dash_non_suffix_edges(builder, suffix),
                #edge_pen_width=builder.edge_lengths,
                #output="graphtool.png"
            )
示例#12
0
def test_seqs_log_likelihood():
    for trainingseqs in prefix_seq_sets:
        model = cacto.cactomodelfromseqs(trainingseqs)
        for predictionseqs in prefix_seq_sets:
            logging.info('likelihood/base: %.3f',
                math.exp(model.seqsloglikelihood(predictionseqs)/sum(map(len, predictionseqs))))