Пример #1
0
def test2(num_obs):
    # Each of the 2 nodes contains a 4-node order-2 Hmm; the nodes are connected in single chain
    dimension = 2

    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make two Hmms with 4 states and order 2 (self loop, forward 1)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm1 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    arc_id = gb.new_arc(node_id0, node_id1)
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, dict())

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Пример #2
0
def test1(num_obs):
    # 1 node contains a 4-node order-2 Hmm
    dimension = 2
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make one Hmm with 4 states and order 2 (self loop, forward 1)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True)
    hmm_mgr = HmmMgr((hmm0, ))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, dict())

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Пример #3
0
    def __init__(self, processors, sendee=None, sending=True):
        super(ChainProcessor, self).__init__(sendee, sending=sending)
        processors = tuple(processors)
        if not processors:
            raise ValueError("expected at least one element in processors chain, got zero")

        # front of the chain, where we push stuff
        self.head = processors[0]


        # create chain of processors, linking each processor's send function to
        # each successor's process() function....
        gb = GraphBuilder()
        nodes, starts, ends = gb.add_graph(processors[0].graph)
        assert len(starts) == 1 and len(ends) == 1
        if len(processors) > 1:
            succ_iter = iter(processors)
            succ_iter.next()
            for pred, succ in izip(processors, succ_iter):
                pred.set_sendee(succ.process)
                nodes, new_starts, new_ends = gb.add_graph(succ.graph)
                gb.new_arc(ends[0], new_starts[0])
                starts, ends = new_starts, new_ends
                assert len(starts) == 1 and len(ends) == 1
            assert succ == processors[-1]

        # set up the list that will collect what the final element pushes
        self.collector = list()
        processors[-1].set_sendee(self.collector.append)

        self._graph = FrozenGraph(gb)
Пример #4
0
def _test11():
    # A reduced version of test10
    ret = ""
    # GmmMgr setup

    num_states = 2
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 2))

    # The topology here is slightly complex than the previous example
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id4)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id2, node_id4)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 2 (self loop, forward 1)
    # The models in the middle are special and can skip.
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=False)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0, ), 1, 1, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    spd = {}
    spd[(0, 1)] = (0.4, )
    spd[(0, 2)] = (0.6, )

    spd[(2, 3)] = (0.4, )
    spd[(2, 4)] = (0.6, )

    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd)

    if do_display:
        tg0.dot_display()
        tg0.dot_display(expand_hmms=True)

    with DebugPrint("bwt_ctsh") if True else DebugPrint():
        result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(
        full=True)

    return ret
Пример #5
0
def test4(num_passes, num_obs):
    # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a
    # diamond pattern
    ret = ""

    dimension = 2

    # Data generator setup and data generation
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 10
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr = GmmMgr(models)

    # Hmm setup
    # Make three Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2)
    num_states = 4
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm1 = make_forward_hmm(gmm_mgr, num_states + 2, 3, exact=True)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    # TrainingGraph setup
    gb = GraphBuilder()
    # Note that here we are using the same HMM in two different TG nodes
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 2))
    node_id3 = gb.new_node((3, 0))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id1, node_id3)
    arc_id = gb.new_arc(node_id2, node_id3)
    gr0 = FrozenGraph(gb)

    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)

    tg0 = TrainingGraph(gr0, hmm_mgr, spd)

    # Now adapt original TrainingGraph
    for i in xrange(num_passes):
        gmm_mgr.set_adaptation_state("INITIALIZING")
        gmm_mgr.clear_all_accumulators()
        tg0.begin_training()
        gmm_mgr.set_adaptation_state("ACCUMULATING")
        for obs in obs_list:
            tg0.train_one_sequence(obs)
        tg0.end_training()
        gmm_mgr.set_adaptation_state("APPLYING")
        gmm_mgr.apply_all_accumulators()
        gmm_mgr.set_adaptation_state("NOT_ADAPTING")

    ret = tg0.to_string(full=True)
    return ret
Пример #6
0
def _sequence_to_linear_graph(s):
    gb = GraphBuilder()
    start = gb.new_node()
    for item in s:
        end = gb.new_node()
        gb.new_arc(start, end, item)
        start = end
    return FrozenGraph(gb)
Пример #7
0
    def __init__(self, sendee=None, sending=True, label=None):
        self._sendee = None
        self._sending = sending

        if sendee is not None:
            self.set_sendee(sendee)
        
        self._label = label if label is not None else type(self).__name__
        gb = GraphBuilder()
        node = gb.new_node(self._label)
        self._graph = FrozenGraph(gb)
Пример #8
0
def _test9():
    # Like test8, but now HMMs have multiple inputs and outputs.
    ret = ""
    # GmmMgr setup

    num_states = 3
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 1))
    node_id5 = gb.new_node((5, 2))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id4, node_id5)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2)
    # The models in the middle are special and can skip directly
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5,
                           0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0,
                                  0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05),
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                           0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    with DebugPrint("bwt_vrfy") if False else DebugPrint():
        tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict())

    result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(
        full=True)

    return ret
Пример #9
0
def build_it(linesstring):
    builder = SetGraphBuilder()
    boxen = set()
    for arc in StringIO.StringIO(linesstring):
        parts = arc.split()
        if not parts or parts[0].startswith('#'):
            continue
        if len(parts) == 1:
            builder.add_node(*parts)
        elif len(parts) == 2:
            builder.add_arc(*parts)
        else:
            assert False, str(parts)
    return FrozenGraph(builder)
Пример #10
0
def test5(num_obs, do_display=False):
    # A test in which one of the HMMs has a transition from an input directly to
    # an output, so it can behave as an epsilon.  This node is between two other
    # nodes in a linear arrangement.

    # Data generator setup and data generation
    dimension = 2
    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make two Hmms with 2 states and order 2 (self loop, forward 1) The model
    # in the middle is special in that it can skip directly from the input state
    # to the output state.
    seed(0)
    num_states = 2
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr1, (0, ), 1, 1, trans)
    hmm2 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    # node_id2 = gb.new_node((2,2))
    arc_id = gb.new_arc(node_id0, node_id1)
    # arc_id = gb.new_arc(node_id1, node_id2)
    gr0 = FrozenGraph(gb)
    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict())

    if do_display:
        tg0.dot_display()
        tg0.dot_display(expand_hmms=True)

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Пример #11
0
def test3(num_obs):
    # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a
    # diamond pattern
    dimension = 2

    obs_gen = make_data_generator(dimension)
    obs_list = [obs_gen.next() for i in xrange(num_obs)]

    # GmmMgr setup
    num_states = 4
    num_models = 20
    models = make_standard_gmms(dimension, num_models)
    gmm_mgr1 = GmmMgr(models[0:10])
    gmm_mgr2 = GmmMgr(models[10:20])

    # Hmm setup
    # Make four Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2)
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    # NB: the asymetry between the two successors is a key part of this test; otherwise,
    # there are no differences between the transition probs going to these successors,
    # which is the tricky case
    hmm1 = make_forward_hmm(gmm_mgr1, num_states + 2, 3, exact=True)
    hmm2 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    hmm3 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2, hmm3))

    # TrainingGraph setup
    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 2))
    node_id3 = gb.new_node((3, 3))
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id1, node_id3)
    arc_id = gb.new_arc(node_id2, node_id3)
    gr0 = FrozenGraph(gb)
    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)
    tg0 = TrainingGraph(gr0, hmm_mgr, spd)

    valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1,
                                         gmm_mgr2)
    return ret
Пример #12
0
def labels_to_lattice(labels):
    """
    From a sequence of labels, build a linear lattice with labels on the arcs.
    The result will have node labels which are guaranteed to be unique.

    >>> label_dict = {'A': 1, 'B': 4, 'C': 9}
    >>> labels_to_lattice(('A', 'B', 'C'))
    FrozenGraph(GraphTables(((0, 1, 2, 3), (0, 1, 2), (1, 2, 3), ('A', 'B', 'C'))))
    """
    gb = SetGraphBuilder()
    counter = itertools.count()
    start = gb.add_node(counter.next())
    for l in labels:
        end = gb.add_node(counter.next())
        gb.add_arc(start, end, l)
        start = end
    return FrozenGraph(gb)
Пример #13
0
def _lattice_nbest_align(l1, l2, cost_fn=None, substring_cost=False):
    if not (isinstance(l1, FrozenGraph) and isinstance(l2, FrozenGraph)):
        raise ValueError("lattice_nbest_align needs two FrozenGraph arguments")

    if cost_fn is None:
        cost_fn = _std_cost
    else:
        cost_fn = _make_safe_cost_fn(cost_fn)

    l1_canon = l1.get_canonical_DAG()
    l2_canon = l2.get_canonical_DAG()
    len1 = l1_canon.get_num_arcs()
    len2 = l2_canon.get_num_arcs()

    assert l1_canon.is_lattice()
    assert l2_canon.is_lattice()

    terms = l1_canon.get_terminals()
    lat_start1 = terms[0][0]
    lat_end1 = terms[1][0]
    terms = l2_canon.get_terminals()
    lat_start2 = terms[0][0]
    lat_end2 = terms[1][0]

    # The lattices to be aligned, l1 and l2, have labels on the arcs;
    # any labels on their nodes will be ignored.  We begin by finding
    # a topological ordering of the lattice arcs, so that each arc is
    # assigned a non-negative index.  We construct a 2-D lattice, g,
    # with nodes representing pairs of arcs in the l1 and l2.  An
    # additional row and column on the top and left of g represent an
    # initial position prior consuming any tokens from l1 and l2,
    # respectively.  The arcs in g are labeled with triples giving the
    # orginal labels from l1 and l2 (or None if the arc's start node
    # is on the left side or top row) and the local cost of the edit
    # represented by the arc.  There are two slightly tricky parts.
    # First, because the alignment is done on lattices, the cost
    # lattice may have links which cross several rows or columns,
    # depending on the adjacencies of l1 and l2.  Second, there's an
    # offset of 1 between the arc numbering for l1 and l2 and the node
    # numbering in g, because of the initial row and column.  Thus,
    # the node in g corresponding to the arc pair <a1, a2> is at
    # indices <a1+1, a2+1>.  One egregious (but very handy) abuse of
    # this arrangement is the occasional use of -1 as an ersatz arcID,
    # which will be converted to a 0 index into g.  Finally, because
    # the graph iterpath function requires a single start and end
    # node, we tie all the potential end nodes in g to a special end
    # node with "ground" arcs.  A node in g is a potential end node if
    # the pair of arcs it represents are each incident on the terminal
    # node of their respective lattices.

    gb = GraphBuilder()
    # Initialize cost lattice nodes
    node_array = [[gb.new_node() for j in range(len2 + 1)]
                  for i in range(len1 + 1)]
    # We use this single node to tie all end nodes together
    end_node = gb.new_node()

    # Add first row of arcs
    for i in xrange(len1):
        start, end, x = l1_canon.get_arc(i)
        insert_cost = cost_fn(x, None)
        if end == lat_end1 and len2 == 0:
            gb.new_arc(node_array[i + 1][0], end_node, (None, None, 0))
            # print "Added ground arc for l1 arc from %d to %d with label %s" % (start, end, x)
        pred_arcs = l1_canon.get_node_in_arcs(start)
        if start == lat_start1:
            pred_arcs.append(-1)
        for arc in pred_arcs:
            gb.new_arc(node_array[arc + 1][0], node_array[i + 1][0],
                       (x, None, insert_cost))
            # print ("Processed l1 arc from %d to %d with label %s - added %d arcs"
            #         % (start, end, x, len(pred_arcs)))

    # Add first column of arcs
    for j in xrange(len2):
        start, end, y = l2_canon.get_arc(j)
        delete_cost = 0 if substring_cost else cost_fn(None, y)
        if end == lat_end2 and len1 == 0:
            gb.new_arc(node_array[0][j + 1], end_node, (None, None, 0))
            # print "Added ground arc for l1 arc from %d to %d with label %s" % (start, end, y)
        pred_arcs = l2_canon.get_node_in_arcs(start)
        if start == lat_start2:
            pred_arcs.append(-1)
        for arc in pred_arcs:
            gb.new_arc(node_array[0][arc + 1], node_array[0][j + 1],
                       (None, y, delete_cost))
            # print ("Processed l1 arc from %d to %d with label %s - added %d arcs"
            #         % (start, end, y, len(pred_arcs)))

    # Construct remainder of cost lattice
    for i in xrange(len1):
        for j in xrange(len2):
            start1, end1, x = l1_canon.get_arc(i)
            start2, end2, y = l2_canon.get_arc(j)
            pred_arcs1 = l1_canon.get_node_in_arcs(start1)
            pred_arcs2 = l2_canon.get_node_in_arcs(start2)
            if start1 == lat_start1:
                pred_arcs1.append(-1)
            if start2 == lat_start2:
                pred_arcs2.append(-1)
            insert_cost = cost_fn(x, None)
            delete_cost = 0 if (substring_cost and i == len1 - 1) else cost_fn(
                None, y)
            subst_cost = cost_fn(x, y)

            num_added = 0
            if end1 == lat_end1 and end2 == lat_end2:
                gb.new_arc(node_array[i + 1][j + 1], end_node, (None, None, 0))
                # print "Added ground arc for l1,l1 arcs with labels %s, %s" % (x,y)

            for arc in pred_arcs1:
                gb.new_arc(node_array[arc + 1][j + 1],
                           node_array[i + 1][j + 1], (x, None, insert_cost))
                num_added += 1

            for arc in pred_arcs2:
                gb.new_arc(node_array[i + 1][arc + 1],
                           node_array[i + 1][j + 1], (None, y, delete_cost))
                num_added += 1

            for arc1 in pred_arcs1:
                for arc2 in pred_arcs2:
                    gb.new_arc(node_array[arc1 + 1][arc2 + 1],
                               node_array[i + 1][j + 1], (x, y, subst_cost))
                    num_added += 1
                    # print ("Processed l1 arc from %d to %d with label %s "
                    #        "and l1 arc from %d to %d with label %s  - added %d arcs"
                    #          % (start1, end1, x, start2, end2, y, num_added))

    g = FrozenGraph(gb)
    assert g.is_lattice()

    # print "g.get_terminals() = %s, %s" % g.get_terminals()

    def graph_cost_fn(label):
        return label[2]

    def iter_helper(path):
        arc_labels = [path[2].get_arc_label(arc) for arc in path[1][:-1]]
        return (path[0], tuple(arc_labels))

    return imap(iter_helper,
                g.iterpaths(graph_cost_fn, node_array[0][0], end_node))
Пример #14
0
def go(wordnames, do_display=False):
    """
    A first example of composing finite CFGs.
    """

    comlexcfg = make_recognizer(StringIO(comlextop))
    #for non_terminal in sorted(comlexcfg.non_terminals):
    #    print non_terminal
    #for terminal in sorted(comlexcfg.terminals):
    #    print terminal
    phonecfg = make_recognizer(StringIO(comlexphones))

    wordrecognizer = comlexcfg.recognizer(wordnames)
    links, (start_id,
            is_sentential) = explore_finite(comlexcfg.recognizer(wordnames))
    g1 = FrozenGraph(make_initialized_set_graph_builder(links))

    printgraph(g1, 'Pronunciation lattice')
    do_display and display(g1, '\\n'.join(wordnames))

    breadth_first = deque()
    global_start = start_id = unstarted = object()
    seen_symbols = set()
    links = set()
    links2 = set()
    send_arg = None
    count = -1
    while True:
        is_sentential, end_id, legal, exception = wordrecognizer(send_arg)
        if global_start is unstarted:
            global_start = end_id
        if exception is not None: raise exception
        if not legal: assert is_sentential

        #print 'legal:', ' '.join(legal)

        if start_id is not unstarted:
            links.add(
                ((start_id, was_sentential), (end_id, is_sentential), symbol))

            count += 1
            count = 0
            substart2 = start_id, substart
            links2.add(((start_id, was_sentential), (substart2, False), '(-'))
            for (sub_start_id, sub_was_sentential), (
                    sub_end_id, sub_is_sentential), subsymbol in sublinks:
                sub_start_id = start_id, sub_start_id
                sub_end_id = start_id, sub_end_id
                ##                 if sub_start_id == substart:
                ##                     links2.add(((start_id, was_sentential), (sub_start_id, False), '(-'))
                links2.add((
                    (sub_start_id, False),
                    #((end_id, is_sentential) if sub_is_sentential else (sub_end_id, False)),
                    (sub_end_id, False),
                    subsymbol))
                if sub_is_sentential:
                    links2.add(
                        ((sub_end_id, False), (end_id, is_sentential), '(-'))

        for symbol in sorted(legal):
            breadth_first.appendleft(
                ((end_id, is_sentential), symbol,
                 explore_finite(phonecfg.recognizer([symbol]))))
            sublinks, (sub_start_id, sub_is_sentential) = explore_finite(
                phonecfg.recognizer([symbol]))
            if symbol not in seen_symbols:
                seen_symbols.add(symbol)
                subgraph = FrozenGraph(
                    make_initialized_set_graph_builder(sublinks))
                printgraph(subgraph, 'Phoneme %s' % (symbol, ))
                do_display and display(subgraph, symbol)
        if not breadth_first:
            break

        (start_id, was_sentential), symbol, (sublinks, (
            substart, substart_sentential)) = breadth_first.pop()
        send_arg = start_id, symbol

    g = FrozenGraph(make_initialized_set_graph_builder(links))
    None and do_display and display(g)

    g3 = FrozenGraph(make_initialized_set_graph_builder(links2))
    printgraph(g3, 'HMM graph')
    do_display and display(g3)
Пример #15
0
def go(do_display=False):
    """
    Generate a dependency graph, and display it if optional do_display is True.

    >>> go(do_display=False)
    digraph  { 
      node [shape=box];
      ranksep=0.4;
      {rank=same; "n05";}
      {rank=same; "n01"; "n02";}
      {rank=same; "n04"; "n06";}
      {rank=same; "n00"; "n03"; "n08"; "n10";}
      {rank=same; "n07"; "n09";}
      n00  [label="Acoustic Models", style=bold, shape=box];
      n01  [label="Graph  /  Lattice", style=bold, shape=octagon];
      n02  [label="Serialization", style=bold, shape=octagon];
      n03  [label="Audio", style=bold, shape=box];
      n04  [label="Dataflow", style=bold, shape=octagon];
      n05  [label="          Utilities  /  Containers          ", style=bold, shape=octagon];
      n06  [label="CFG", style=bold, shape=octagon];
      n07  [label="Decoding", style=bold, shape=box];
      n08  [label="Lexicon", style=bold, shape=box];
      n09  [label="HTK Files", style=bold, shape=octagon];
      n10  [label="Signal Processing", style=bold, shape=box];
      n00 -> n01;
      n00 -> n02;
      n03 -> n04;
      n03 -> n05;
      n06 -> n01;
      n06 -> n05;
      n04 -> n01;
      n04 -> n05;
      n07 -> n06;
      n07 -> n04;
      n07 -> n08;
      n01 -> n02;
      n01 -> n05;
      n09 -> n00;
      n09 -> n04;
      n09 -> n08;
      n08 -> n06;
      n02 -> n05;
      n10 -> n04;
    }
    """

    g = FrozenGraph(make_initialized_set_graph_builder(dependencies))

    # make the rank sub graphs
    ranks = dict_of(set)
    for id in xrange(g.num_nodes):
        name, rank, color = g.get_node_label(id)
        ranks[rank].add('n%02d' %(id,))
    rankglobals = list()
    for rank, names in sorted(ranks.iteritems()):
        rankglobals.append('{rank=same; "' + '"; "'.join(sorted(names)) + '";}')

    # log it
    globals=['node [shape=box];', 'ranksep=0.4;'] + rankglobals
    node_label_callback=lambda x, *_: str(x[0])    
    #node_attributes_callback=lambda x, *_: ['color=%s' % (x[2],)]
    node_attributes_callback=lambda x, *_: ['style=bold', 'shape=octagon'] if x[2] else ['style=bold', 'shape=box']
    for line in g.dot_iter(globals=globals, node_label_callback=node_label_callback, node_attributes_callback=node_attributes_callback):
        print line,

    # display it
    do_display and g.dot_display(globals=globals, node_label_callback=node_label_callback, node_attributes_callback=node_attributes_callback)
Пример #16
0
def build_model_lattice(label_lattice, model_dict, epsilon_index):
    """
    From a lattice with labels on the arcs and a dict mapping labels to model
    indices, build a lattice with (node-index, model index) pairs on the nodes,
    usable for constructing a TrainingGraph.

    The resulting lattice may have new epsilon nodes as the new start and end
    nodes; these will be given epsilon_index as their model indices.  Note that
    this function requires that label_lattice have unique labels on nodes.  XXX
    maybe do this node-labeling ourselves here?

    >>> label_dict = {'A': 1, 'B': 4, 'C': 9}
    >>> lat = labels_to_lattice(('A', 'B', 'C'))
    >>> lat
    FrozenGraph(GraphTables(((0, 1, 2, 3), (0, 1, 2), (1, 2, 3), ('A', 'B', 'C'))))

    >>> result = build_model_lattice(lat, label_dict, 15)
    >>> print result
    FrozenGraph(GraphTables((((0, 1), (1, 4), (2, 9)), (0, 1), (1, 2), (None, None))))

    # >>> result.dot_display()

    """
    if not label_lattice.is_lattice() or label_lattice.has_self_loop():
        raise ValueError("label_lattice is not a lattice or has a self loop")

    counter = itertools.count()

    # we need our node labels to be pairs of ints in which the first int is
    # unique and the second is the index of the model from the callers
    # label_dict
    def model_node_labeler(pred_node_label, arc_label, succ_node_label):
        if not model_dict.has_key(arc_label):
            raise KeyError("Failed on lookup of label %s" % (arc_label))
        model_index = model_dict[arc_label]
        return (counter.next(), model_index)

    def empty_arc_labeler(in_arc_label, node_label, out_arc_label):
        return None

    line_graph = label_lattice.get_line_graph(model_node_labeler,
                                              empty_arc_labeler)
    starts, ends = line_graph.get_terminals()
    num_starts = len(starts)
    num_ends = len(ends)
    # If we started with a lattice, the line graph must have some terminals
    assert num_starts >= 1 and num_ends >= 1

    start_labels = (line_graph.get_label(node_id) for node_id in starts)
    end_labels = (line_graph.get_label(node_id) for node_id in ends)
    gb = SetGraphBuilder(line_graph)

    # Tie terminals together with epsilons if necessary
    if num_starts > 1:
        new_start_label = gb.add_node((counter.next(), epsilon_index))
        for node_label in start_labels:
            gb.add_arc(new_start_label, node_label)

    if num_ends > 1:
        new_end_label = gb.add_node((counter.next(), epsilon_index))
        for node_label in end_labels:
            gb.new_arc(node_label, new_end_label)

    return FrozenGraph(gb)
Пример #17
0
def _test10():
    # Like test9, but now HMMs are arranged in a diamond pattern so inter-HMM
    # probabilities come into play
    ret = ""
    # GmmMgr setup

    num_states = 3
    dimension = 2
    models = []
    for i in xrange(num_states):
        dm = DummyModel(dimension, 1.0)
        models.append(dm)

    gmm_mgr = GmmMgr(models)

    gb = GraphBuilder()
    node_id0 = gb.new_node((0, 0))
    node_id1 = gb.new_node((1, 1))
    node_id2 = gb.new_node((2, 1))
    node_id3 = gb.new_node((3, 1))
    node_id4 = gb.new_node((4, 1))
    node_id5 = gb.new_node((5, 2))

    # The topology here is more complex than previous examples
    arc_id = gb.new_arc(node_id0, node_id1)
    arc_id = gb.new_arc(node_id1, node_id5)
    arc_id = gb.new_arc(node_id0, node_id2)
    arc_id = gb.new_arc(node_id2, node_id3)
    arc_id = gb.new_arc(node_id3, node_id4)
    arc_id = gb.new_arc(node_id3, node_id5)
    arc_id = gb.new_arc(node_id4, node_id5)
    gr0 = FrozenGraph(gb)

    # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2)
    # The models in the middle are special and can skip.
    seed(0)
    hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm1 = Hmm(1)
    trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5,
                           0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0,
                                  0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05),
                   (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                           0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)))
    hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans)
    hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True)
    hmm_mgr = HmmMgr((hmm0, hmm1, hmm2))

    spd = {}
    spd[(0, 1)] = (0.4, 0.3, 0.8)
    spd[(0, 2)] = (0.6, 0.7, 0.2)

    spd[(3, 4)] = (0.4, 0.3, 0.8)
    spd[(3, 5)] = (0.6, 0.7, 0.2)

    tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd)

    with DebugPrint("bwt_ctsh") if True else DebugPrint():
        result_hmm = tg0.convert_to_standalone_hmm()
    ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string(
        full=True)

    return ret