def test2(num_obs): # Each of the 2 nodes contains a 4-node order-2 Hmm; the nodes are connected in single chain dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make two Hmms with 4 states and order 2 (self loop, forward 1) num_states = 4 seed(0) hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True) hmm1 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1)) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) arc_id = gb.new_arc(node_id0, node_id1) gr0 = FrozenGraph(gb) tg0 = TrainingGraph(gr0, hmm_mgr, dict()) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def test1(num_obs): # 1 node contains a 4-node order-2 Hmm dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make one Hmm with 4 states and order 2 (self loop, forward 1) num_states = 4 seed(0) hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=True) hmm_mgr = HmmMgr((hmm0, )) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) gr0 = FrozenGraph(gb) tg0 = TrainingGraph(gr0, hmm_mgr, dict()) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def __init__(self, processors, sendee=None, sending=True): super(ChainProcessor, self).__init__(sendee, sending=sending) processors = tuple(processors) if not processors: raise ValueError("expected at least one element in processors chain, got zero") # front of the chain, where we push stuff self.head = processors[0] # create chain of processors, linking each processor's send function to # each successor's process() function.... gb = GraphBuilder() nodes, starts, ends = gb.add_graph(processors[0].graph) assert len(starts) == 1 and len(ends) == 1 if len(processors) > 1: succ_iter = iter(processors) succ_iter.next() for pred, succ in izip(processors, succ_iter): pred.set_sendee(succ.process) nodes, new_starts, new_ends = gb.add_graph(succ.graph) gb.new_arc(ends[0], new_starts[0]) starts, ends = new_starts, new_ends assert len(starts) == 1 and len(ends) == 1 assert succ == processors[-1] # set up the list that will collect what the final element pushes self.collector = list() processors[-1].set_sendee(self.collector.append) self._graph = FrozenGraph(gb)
def _test11(): # A reduced version of test10 ret = "" # GmmMgr setup num_states = 2 dimension = 2 models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) gmm_mgr = GmmMgr(models) gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 1)) node_id3 = gb.new_node((3, 1)) node_id4 = gb.new_node((4, 2)) # The topology here is slightly complex than the previous example arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id1, node_id4) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id2, node_id3) arc_id = gb.new_arc(node_id3, node_id4) arc_id = gb.new_arc(node_id2, node_id4) gr0 = FrozenGraph(gb) # Make two Hmms with 3 states and order 2 (self loop, forward 1) # The models in the middle are special and can skip. seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=False) hmm1 = Hmm(1) trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr, (0, ), 1, 1, trans) hmm2 = make_forward_hmm(gmm_mgr, num_states, order=2, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) spd = {} spd[(0, 1)] = (0.4, ) spd[(0, 2)] = (0.6, ) spd[(2, 3)] = (0.4, ) spd[(2, 4)] = (0.6, ) tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd) if do_display: tg0.dot_display() tg0.dot_display(expand_hmms=True) with DebugPrint("bwt_ctsh") if True else DebugPrint(): result_hmm = tg0.convert_to_standalone_hmm() ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string( full=True) return ret
def test4(num_passes, num_obs): # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a # diamond pattern ret = "" dimension = 2 # Data generator setup and data generation obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 10 models = make_standard_gmms(dimension, num_models) gmm_mgr = GmmMgr(models) # Hmm setup # Make three Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2) num_states = 4 seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True) hmm1 = make_forward_hmm(gmm_mgr, num_states + 2, 3, exact=True) hmm2 = make_forward_hmm(gmm_mgr, num_states, 3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) # TrainingGraph setup gb = GraphBuilder() # Note that here we are using the same HMM in two different TG nodes node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 2)) node_id3 = gb.new_node((3, 0)) arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id1, node_id3) arc_id = gb.new_arc(node_id2, node_id3) gr0 = FrozenGraph(gb) spd = {} spd[(0, 1)] = (0.4, 0.3, 0.8) spd[(0, 2)] = (0.6, 0.7, 0.2) tg0 = TrainingGraph(gr0, hmm_mgr, spd) # Now adapt original TrainingGraph for i in xrange(num_passes): gmm_mgr.set_adaptation_state("INITIALIZING") gmm_mgr.clear_all_accumulators() tg0.begin_training() gmm_mgr.set_adaptation_state("ACCUMULATING") for obs in obs_list: tg0.train_one_sequence(obs) tg0.end_training() gmm_mgr.set_adaptation_state("APPLYING") gmm_mgr.apply_all_accumulators() gmm_mgr.set_adaptation_state("NOT_ADAPTING") ret = tg0.to_string(full=True) return ret
def _sequence_to_linear_graph(s): gb = GraphBuilder() start = gb.new_node() for item in s: end = gb.new_node() gb.new_arc(start, end, item) start = end return FrozenGraph(gb)
def __init__(self, sendee=None, sending=True, label=None): self._sendee = None self._sending = sending if sendee is not None: self.set_sendee(sendee) self._label = label if label is not None else type(self).__name__ gb = GraphBuilder() node = gb.new_node(self._label) self._graph = FrozenGraph(gb)
def _test9(): # Like test8, but now HMMs have multiple inputs and outputs. ret = "" # GmmMgr setup num_states = 3 dimension = 2 models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) gmm_mgr = GmmMgr(models) gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 1)) node_id3 = gb.new_node((3, 1)) node_id4 = gb.new_node((4, 1)) node_id5 = gb.new_node((5, 2)) arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id1, node_id2) arc_id = gb.new_arc(node_id2, node_id3) arc_id = gb.new_arc(node_id3, node_id4) arc_id = gb.new_arc(node_id4, node_id5) gr0 = FrozenGraph(gb) # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2) # The models in the middle are special and can skip directly seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm1 = Hmm(1) trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans) hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) with DebugPrint("bwt_vrfy") if False else DebugPrint(): tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict()) result_hmm = tg0.convert_to_standalone_hmm() ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string( full=True) return ret
def build_it(linesstring): builder = SetGraphBuilder() boxen = set() for arc in StringIO.StringIO(linesstring): parts = arc.split() if not parts or parts[0].startswith('#'): continue if len(parts) == 1: builder.add_node(*parts) elif len(parts) == 2: builder.add_arc(*parts) else: assert False, str(parts) return FrozenGraph(builder)
def test5(num_obs, do_display=False): # A test in which one of the HMMs has a transition from an input directly to # an output, so it can behave as an epsilon. This node is between two other # nodes in a linear arrangement. # Data generator setup and data generation dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make two Hmms with 2 states and order 2 (self loop, forward 1) The model # in the middle is special in that it can skip directly from the input state # to the output state. seed(0) num_states = 2 hmm0 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False) hmm1 = Hmm(1) trans = array(((0.0, 0.5, 0.5), (0.0, 0.5, 0.5), (0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr1, (0, ), 1, 1, trans) hmm2 = make_forward_hmm(gmm_mgr1, num_states, 2, exact=False) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) # node_id2 = gb.new_node((2,2)) arc_id = gb.new_arc(node_id0, node_id1) # arc_id = gb.new_arc(node_id1, node_id2) gr0 = FrozenGraph(gb) tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=dict()) if do_display: tg0.dot_display() tg0.dot_display(expand_hmms=True) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def test3(num_obs): # Each of the 4 nodes contains a 4 (or 6)-node order-3 Hmm; the nodes are connected in a # diamond pattern dimension = 2 obs_gen = make_data_generator(dimension) obs_list = [obs_gen.next() for i in xrange(num_obs)] # GmmMgr setup num_states = 4 num_models = 20 models = make_standard_gmms(dimension, num_models) gmm_mgr1 = GmmMgr(models[0:10]) gmm_mgr2 = GmmMgr(models[10:20]) # Hmm setup # Make four Hmms with 4 (or 6) states and order 3 (self loop, forward 1, forward 2) seed(0) hmm0 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True) # NB: the asymetry between the two successors is a key part of this test; otherwise, # there are no differences between the transition probs going to these successors, # which is the tricky case hmm1 = make_forward_hmm(gmm_mgr1, num_states + 2, 3, exact=True) hmm2 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True) hmm3 = make_forward_hmm(gmm_mgr1, num_states, 3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2, hmm3)) # TrainingGraph setup gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 2)) node_id3 = gb.new_node((3, 3)) arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id1, node_id3) arc_id = gb.new_arc(node_id2, node_id3) gr0 = FrozenGraph(gb) spd = {} spd[(0, 1)] = (0.4, 0.3, 0.8) spd[(0, 2)] = (0.6, 0.7, 0.2) tg0 = TrainingGraph(gr0, hmm_mgr, spd) valid, ret = validate_training_graph(tg0, gmm_mgr1, hmm_mgr, obs_list, 1, gmm_mgr2) return ret
def labels_to_lattice(labels): """ From a sequence of labels, build a linear lattice with labels on the arcs. The result will have node labels which are guaranteed to be unique. >>> label_dict = {'A': 1, 'B': 4, 'C': 9} >>> labels_to_lattice(('A', 'B', 'C')) FrozenGraph(GraphTables(((0, 1, 2, 3), (0, 1, 2), (1, 2, 3), ('A', 'B', 'C')))) """ gb = SetGraphBuilder() counter = itertools.count() start = gb.add_node(counter.next()) for l in labels: end = gb.add_node(counter.next()) gb.add_arc(start, end, l) start = end return FrozenGraph(gb)
def _lattice_nbest_align(l1, l2, cost_fn=None, substring_cost=False): if not (isinstance(l1, FrozenGraph) and isinstance(l2, FrozenGraph)): raise ValueError("lattice_nbest_align needs two FrozenGraph arguments") if cost_fn is None: cost_fn = _std_cost else: cost_fn = _make_safe_cost_fn(cost_fn) l1_canon = l1.get_canonical_DAG() l2_canon = l2.get_canonical_DAG() len1 = l1_canon.get_num_arcs() len2 = l2_canon.get_num_arcs() assert l1_canon.is_lattice() assert l2_canon.is_lattice() terms = l1_canon.get_terminals() lat_start1 = terms[0][0] lat_end1 = terms[1][0] terms = l2_canon.get_terminals() lat_start2 = terms[0][0] lat_end2 = terms[1][0] # The lattices to be aligned, l1 and l2, have labels on the arcs; # any labels on their nodes will be ignored. We begin by finding # a topological ordering of the lattice arcs, so that each arc is # assigned a non-negative index. We construct a 2-D lattice, g, # with nodes representing pairs of arcs in the l1 and l2. An # additional row and column on the top and left of g represent an # initial position prior consuming any tokens from l1 and l2, # respectively. The arcs in g are labeled with triples giving the # orginal labels from l1 and l2 (or None if the arc's start node # is on the left side or top row) and the local cost of the edit # represented by the arc. There are two slightly tricky parts. # First, because the alignment is done on lattices, the cost # lattice may have links which cross several rows or columns, # depending on the adjacencies of l1 and l2. Second, there's an # offset of 1 between the arc numbering for l1 and l2 and the node # numbering in g, because of the initial row and column. Thus, # the node in g corresponding to the arc pair <a1, a2> is at # indices <a1+1, a2+1>. One egregious (but very handy) abuse of # this arrangement is the occasional use of -1 as an ersatz arcID, # which will be converted to a 0 index into g. Finally, because # the graph iterpath function requires a single start and end # node, we tie all the potential end nodes in g to a special end # node with "ground" arcs. A node in g is a potential end node if # the pair of arcs it represents are each incident on the terminal # node of their respective lattices. gb = GraphBuilder() # Initialize cost lattice nodes node_array = [[gb.new_node() for j in range(len2 + 1)] for i in range(len1 + 1)] # We use this single node to tie all end nodes together end_node = gb.new_node() # Add first row of arcs for i in xrange(len1): start, end, x = l1_canon.get_arc(i) insert_cost = cost_fn(x, None) if end == lat_end1 and len2 == 0: gb.new_arc(node_array[i + 1][0], end_node, (None, None, 0)) # print "Added ground arc for l1 arc from %d to %d with label %s" % (start, end, x) pred_arcs = l1_canon.get_node_in_arcs(start) if start == lat_start1: pred_arcs.append(-1) for arc in pred_arcs: gb.new_arc(node_array[arc + 1][0], node_array[i + 1][0], (x, None, insert_cost)) # print ("Processed l1 arc from %d to %d with label %s - added %d arcs" # % (start, end, x, len(pred_arcs))) # Add first column of arcs for j in xrange(len2): start, end, y = l2_canon.get_arc(j) delete_cost = 0 if substring_cost else cost_fn(None, y) if end == lat_end2 and len1 == 0: gb.new_arc(node_array[0][j + 1], end_node, (None, None, 0)) # print "Added ground arc for l1 arc from %d to %d with label %s" % (start, end, y) pred_arcs = l2_canon.get_node_in_arcs(start) if start == lat_start2: pred_arcs.append(-1) for arc in pred_arcs: gb.new_arc(node_array[0][arc + 1], node_array[0][j + 1], (None, y, delete_cost)) # print ("Processed l1 arc from %d to %d with label %s - added %d arcs" # % (start, end, y, len(pred_arcs))) # Construct remainder of cost lattice for i in xrange(len1): for j in xrange(len2): start1, end1, x = l1_canon.get_arc(i) start2, end2, y = l2_canon.get_arc(j) pred_arcs1 = l1_canon.get_node_in_arcs(start1) pred_arcs2 = l2_canon.get_node_in_arcs(start2) if start1 == lat_start1: pred_arcs1.append(-1) if start2 == lat_start2: pred_arcs2.append(-1) insert_cost = cost_fn(x, None) delete_cost = 0 if (substring_cost and i == len1 - 1) else cost_fn( None, y) subst_cost = cost_fn(x, y) num_added = 0 if end1 == lat_end1 and end2 == lat_end2: gb.new_arc(node_array[i + 1][j + 1], end_node, (None, None, 0)) # print "Added ground arc for l1,l1 arcs with labels %s, %s" % (x,y) for arc in pred_arcs1: gb.new_arc(node_array[arc + 1][j + 1], node_array[i + 1][j + 1], (x, None, insert_cost)) num_added += 1 for arc in pred_arcs2: gb.new_arc(node_array[i + 1][arc + 1], node_array[i + 1][j + 1], (None, y, delete_cost)) num_added += 1 for arc1 in pred_arcs1: for arc2 in pred_arcs2: gb.new_arc(node_array[arc1 + 1][arc2 + 1], node_array[i + 1][j + 1], (x, y, subst_cost)) num_added += 1 # print ("Processed l1 arc from %d to %d with label %s " # "and l1 arc from %d to %d with label %s - added %d arcs" # % (start1, end1, x, start2, end2, y, num_added)) g = FrozenGraph(gb) assert g.is_lattice() # print "g.get_terminals() = %s, %s" % g.get_terminals() def graph_cost_fn(label): return label[2] def iter_helper(path): arc_labels = [path[2].get_arc_label(arc) for arc in path[1][:-1]] return (path[0], tuple(arc_labels)) return imap(iter_helper, g.iterpaths(graph_cost_fn, node_array[0][0], end_node))
def go(wordnames, do_display=False): """ A first example of composing finite CFGs. """ comlexcfg = make_recognizer(StringIO(comlextop)) #for non_terminal in sorted(comlexcfg.non_terminals): # print non_terminal #for terminal in sorted(comlexcfg.terminals): # print terminal phonecfg = make_recognizer(StringIO(comlexphones)) wordrecognizer = comlexcfg.recognizer(wordnames) links, (start_id, is_sentential) = explore_finite(comlexcfg.recognizer(wordnames)) g1 = FrozenGraph(make_initialized_set_graph_builder(links)) printgraph(g1, 'Pronunciation lattice') do_display and display(g1, '\\n'.join(wordnames)) breadth_first = deque() global_start = start_id = unstarted = object() seen_symbols = set() links = set() links2 = set() send_arg = None count = -1 while True: is_sentential, end_id, legal, exception = wordrecognizer(send_arg) if global_start is unstarted: global_start = end_id if exception is not None: raise exception if not legal: assert is_sentential #print 'legal:', ' '.join(legal) if start_id is not unstarted: links.add( ((start_id, was_sentential), (end_id, is_sentential), symbol)) count += 1 count = 0 substart2 = start_id, substart links2.add(((start_id, was_sentential), (substart2, False), '(-')) for (sub_start_id, sub_was_sentential), ( sub_end_id, sub_is_sentential), subsymbol in sublinks: sub_start_id = start_id, sub_start_id sub_end_id = start_id, sub_end_id ## if sub_start_id == substart: ## links2.add(((start_id, was_sentential), (sub_start_id, False), '(-')) links2.add(( (sub_start_id, False), #((end_id, is_sentential) if sub_is_sentential else (sub_end_id, False)), (sub_end_id, False), subsymbol)) if sub_is_sentential: links2.add( ((sub_end_id, False), (end_id, is_sentential), '(-')) for symbol in sorted(legal): breadth_first.appendleft( ((end_id, is_sentential), symbol, explore_finite(phonecfg.recognizer([symbol])))) sublinks, (sub_start_id, sub_is_sentential) = explore_finite( phonecfg.recognizer([symbol])) if symbol not in seen_symbols: seen_symbols.add(symbol) subgraph = FrozenGraph( make_initialized_set_graph_builder(sublinks)) printgraph(subgraph, 'Phoneme %s' % (symbol, )) do_display and display(subgraph, symbol) if not breadth_first: break (start_id, was_sentential), symbol, (sublinks, ( substart, substart_sentential)) = breadth_first.pop() send_arg = start_id, symbol g = FrozenGraph(make_initialized_set_graph_builder(links)) None and do_display and display(g) g3 = FrozenGraph(make_initialized_set_graph_builder(links2)) printgraph(g3, 'HMM graph') do_display and display(g3)
def go(do_display=False): """ Generate a dependency graph, and display it if optional do_display is True. >>> go(do_display=False) digraph { node [shape=box]; ranksep=0.4; {rank=same; "n05";} {rank=same; "n01"; "n02";} {rank=same; "n04"; "n06";} {rank=same; "n00"; "n03"; "n08"; "n10";} {rank=same; "n07"; "n09";} n00 [label="Acoustic Models", style=bold, shape=box]; n01 [label="Graph / Lattice", style=bold, shape=octagon]; n02 [label="Serialization", style=bold, shape=octagon]; n03 [label="Audio", style=bold, shape=box]; n04 [label="Dataflow", style=bold, shape=octagon]; n05 [label=" Utilities / Containers ", style=bold, shape=octagon]; n06 [label="CFG", style=bold, shape=octagon]; n07 [label="Decoding", style=bold, shape=box]; n08 [label="Lexicon", style=bold, shape=box]; n09 [label="HTK Files", style=bold, shape=octagon]; n10 [label="Signal Processing", style=bold, shape=box]; n00 -> n01; n00 -> n02; n03 -> n04; n03 -> n05; n06 -> n01; n06 -> n05; n04 -> n01; n04 -> n05; n07 -> n06; n07 -> n04; n07 -> n08; n01 -> n02; n01 -> n05; n09 -> n00; n09 -> n04; n09 -> n08; n08 -> n06; n02 -> n05; n10 -> n04; } """ g = FrozenGraph(make_initialized_set_graph_builder(dependencies)) # make the rank sub graphs ranks = dict_of(set) for id in xrange(g.num_nodes): name, rank, color = g.get_node_label(id) ranks[rank].add('n%02d' %(id,)) rankglobals = list() for rank, names in sorted(ranks.iteritems()): rankglobals.append('{rank=same; "' + '"; "'.join(sorted(names)) + '";}') # log it globals=['node [shape=box];', 'ranksep=0.4;'] + rankglobals node_label_callback=lambda x, *_: str(x[0]) #node_attributes_callback=lambda x, *_: ['color=%s' % (x[2],)] node_attributes_callback=lambda x, *_: ['style=bold', 'shape=octagon'] if x[2] else ['style=bold', 'shape=box'] for line in g.dot_iter(globals=globals, node_label_callback=node_label_callback, node_attributes_callback=node_attributes_callback): print line, # display it do_display and g.dot_display(globals=globals, node_label_callback=node_label_callback, node_attributes_callback=node_attributes_callback)
def build_model_lattice(label_lattice, model_dict, epsilon_index): """ From a lattice with labels on the arcs and a dict mapping labels to model indices, build a lattice with (node-index, model index) pairs on the nodes, usable for constructing a TrainingGraph. The resulting lattice may have new epsilon nodes as the new start and end nodes; these will be given epsilon_index as their model indices. Note that this function requires that label_lattice have unique labels on nodes. XXX maybe do this node-labeling ourselves here? >>> label_dict = {'A': 1, 'B': 4, 'C': 9} >>> lat = labels_to_lattice(('A', 'B', 'C')) >>> lat FrozenGraph(GraphTables(((0, 1, 2, 3), (0, 1, 2), (1, 2, 3), ('A', 'B', 'C')))) >>> result = build_model_lattice(lat, label_dict, 15) >>> print result FrozenGraph(GraphTables((((0, 1), (1, 4), (2, 9)), (0, 1), (1, 2), (None, None)))) # >>> result.dot_display() """ if not label_lattice.is_lattice() or label_lattice.has_self_loop(): raise ValueError("label_lattice is not a lattice or has a self loop") counter = itertools.count() # we need our node labels to be pairs of ints in which the first int is # unique and the second is the index of the model from the callers # label_dict def model_node_labeler(pred_node_label, arc_label, succ_node_label): if not model_dict.has_key(arc_label): raise KeyError("Failed on lookup of label %s" % (arc_label)) model_index = model_dict[arc_label] return (counter.next(), model_index) def empty_arc_labeler(in_arc_label, node_label, out_arc_label): return None line_graph = label_lattice.get_line_graph(model_node_labeler, empty_arc_labeler) starts, ends = line_graph.get_terminals() num_starts = len(starts) num_ends = len(ends) # If we started with a lattice, the line graph must have some terminals assert num_starts >= 1 and num_ends >= 1 start_labels = (line_graph.get_label(node_id) for node_id in starts) end_labels = (line_graph.get_label(node_id) for node_id in ends) gb = SetGraphBuilder(line_graph) # Tie terminals together with epsilons if necessary if num_starts > 1: new_start_label = gb.add_node((counter.next(), epsilon_index)) for node_label in start_labels: gb.add_arc(new_start_label, node_label) if num_ends > 1: new_end_label = gb.add_node((counter.next(), epsilon_index)) for node_label in end_labels: gb.new_arc(node_label, new_end_label) return FrozenGraph(gb)
def _test10(): # Like test9, but now HMMs are arranged in a diamond pattern so inter-HMM # probabilities come into play ret = "" # GmmMgr setup num_states = 3 dimension = 2 models = [] for i in xrange(num_states): dm = DummyModel(dimension, 1.0) models.append(dm) gmm_mgr = GmmMgr(models) gb = GraphBuilder() node_id0 = gb.new_node((0, 0)) node_id1 = gb.new_node((1, 1)) node_id2 = gb.new_node((2, 1)) node_id3 = gb.new_node((3, 1)) node_id4 = gb.new_node((4, 1)) node_id5 = gb.new_node((5, 2)) # The topology here is more complex than previous examples arc_id = gb.new_arc(node_id0, node_id1) arc_id = gb.new_arc(node_id1, node_id5) arc_id = gb.new_arc(node_id0, node_id2) arc_id = gb.new_arc(node_id2, node_id3) arc_id = gb.new_arc(node_id3, node_id4) arc_id = gb.new_arc(node_id3, node_id5) arc_id = gb.new_arc(node_id4, node_id5) gr0 = FrozenGraph(gb) # Make two Hmms with 3 states and order 3 (self loop, forward 1, forward 2) # The models in the middle are special and can skip. seed(0) hmm0 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm1 = Hmm(1) trans = array(((0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0), (0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.5), (0.0, 0.0, 0.0, 0.5, 0.35, 0.1, 0.05), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0), (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))) hmm1.build_model(gmm_mgr, (0, ), 3, 3, trans) hmm2 = make_forward_hmm(gmm_mgr, num_states, order=3, exact=True) hmm_mgr = HmmMgr((hmm0, hmm1, hmm2)) spd = {} spd[(0, 1)] = (0.4, 0.3, 0.8) spd[(0, 2)] = (0.6, 0.7, 0.2) spd[(3, 4)] = (0.4, 0.3, 0.8) spd[(3, 5)] = (0.6, 0.7, 0.2) tg0 = TrainingGraph(gr0, hmm_mgr, split_prob_dict=spd) with DebugPrint("bwt_ctsh") if True else DebugPrint(): result_hmm = tg0.convert_to_standalone_hmm() ret += "\n\n========= TG CONVERTED TO Hmm =========\n\n" + result_hmm.to_string( full=True) return ret