def _read_slf_header(self, fields): """Reads SLF lattice header fields and saves them in member variables. :type fields: list of strs :param fields: fields, such as name="value" """ for field in fields: name, value = _split_slf_field(field) if (name == 'UTTERANCE') or (name == 'U'): self.utterance_id = value elif (name == 'SUBLAT') or (name == 'S'): raise InputError("Sub-lattices are not supported.") elif name == 'base': value = numpy.float64(value) if value == 0.0: self._log_scale = None else: self._log_scale = logprob_type(numpy.log(value)) elif name == 'lmscale': self.lm_scale = logprob_type(value) elif name == 'wdpenalty': self.wi_penalty = logprob_type(value) elif name == 'start': self._initial_node_id = int(value) elif name == 'end': self._final_node_ids.append(int(value)) elif (name == 'NODES') or (name == 'N'): self._num_nodes = int(value) elif (name == 'LINKS') or (name == 'L'): self._num_links = int(value)
def _read_slf_link(self, link_id, fields): """Reads SLF lattice link fields and creates such link. :type link_id: int :param link_id: ID of the link :type fields: list of strs :param fields: the rest of the link fields after ID """ start_node = None end_node = None word = None ac_logprob = None lm_logprob = None for field in fields: name, value = _split_slf_field(field) if (name == 'START') or (name == 'S'): start_node = self.nodes[int(value)] elif (name == 'END') or (name == 'E'): end_node = self.nodes[int(value)] elif (name == 'WORD') or (name == 'W'): word = value elif (name == 'acoustic') or (name == 'a'): if self._log_scale is None: ac_logprob = logprob_type(numpy.log(numpy.float64(value))) else: ac_logprob = logprob_type(value) * self._log_scale elif (name == 'language') or (name == 'l'): if self._log_scale is None: lm_logprob = logprob_type(numpy.log(numpy.float64(value))) else: lm_logprob = logprob_type(value) * self._log_scale if start_node is None: raise InputError( "Start node is not specified for link {}.".format(link_id)) if end_node is None: raise InputError( "End node is not specified for link {}.".format(link_id)) link = self._add_link(start_node, end_node) link.word = word link.ac_logprob = ac_logprob link.lm_logprob = lm_logprob if link.word is not None and \ (link.word.startswith('!') or link.word.startswith('#')): link.word = None
def __init__(self, history=(), state=None, ac_logprob=logprob_type(0.0), lat_lm_logprob=logprob_type(0.0), nn_lm_logprob=logprob_type(0.0)): """Constructs a token with given recurrent state and logprobs. The constructor won't compute the total logprob. The user is responsible for computing it when necessary, to avoid unnecessary overhead. New tokens will not have recombination hash and total log probability set. :type history: list of ints :param history: word IDs that the token has passed :type state: RecurrentState :param state: the state of the recurrent layers for a single sequence :type ac_logprob: logprob_type :param ac_logprob: sum of the acoustic log probabilities of the lattice links :type lat_lm_logprob: logprob_type :param lat_lm_logprob: sum of the LM log probabilities of the lattice links :type nn_lm_logprob: logprob_type :param nn_lm_logprob: sum of the NNLM log probabilities of the lattice links """ self.history = history self.state = [] if state is None else state self.ac_logprob = ac_logprob self.lat_lm_logprob = lat_lm_logprob self.nn_lm_logprob = nn_lm_logprob self.recombination_hash = None self.graph_logprob = None self.total_logprob = None
def __init__(self, lattice_file): """Reads an SLF lattice file. If ``lattice_file`` is ``None``, creates an empty lattice (useful for testing). :type lattice_file: file object :param lattice_file: a file in SLF lattice format """ super().__init__() # No log conversion by default. "None" means the lattice file uses # linear probabilities. self._log_scale = logprob_type(1.0) self._initial_node_id = None self._final_node_ids = [] if lattice_file is None: self._num_nodes = 0 self._num_links = 0 return self._num_nodes = None self._num_links = None for line in lattice_file: fields = _split_slf_line(line) self._read_slf_header(fields) if (self._num_nodes is not None) and (self._num_links is not None): break if self.wi_penalty is not None: if self._log_scale is None: self.wi_penalty = numpy.log(self.wi_penalty) else: self.wi_penalty *= self._log_scale self.nodes = [self.Node(node_id) for node_id in range(self._num_nodes)] for line in lattice_file: fields = _split_slf_line(line) if not fields: continue name, value = _split_slf_field(fields[0]) if name == 'I': self._read_slf_node(int(value), fields[1:]) elif name == 'J': self._read_slf_link(int(value), fields[1:]) if len(self.links) != self._num_links: raise InputError( "Number of links in SLF lattice doesn't match the " "LINKS field.") if self._initial_node_id is not None: self.initial_node = self.nodes[self._initial_node_id] else: # Find the node with no incoming links. self.initial_node = None for node in self.nodes: if len(node.in_links) == 0: self.initial_node = node break if self.initial_node is None: raise InputError("Could not find initial node in SLF lattice.") final_nodes_found = 0 for node in self.nodes: if node.id in self._final_node_ids or len(node.out_links) == 0: node.final = True final_nodes_found += 1 if final_nodes_found == 0: raise InputError("Could not find final node in SLF lattice.") elif final_nodes_found > 1: # Peter: Not sure if multiple final nodes are allowed, but for now raise an input error. The # decoder supports multiple final nodes no problem raise InputError("More then one final node in SLF lattice.") # If word identity information is not present in node definitions then # it must appear in link definitions. self._move_words_to_links()
def decode(self, lattice): """Propagates tokens through given lattice and returns a list of tokens in the final nodes. Propagates tokens at a node to every outgoing link by creating a copy of each token and updating the language model scores according to the link. The function returns two lists. The first list contains the final tokens, sorted in the descending order of total log probability. I.e. the first token in the list represents the best path through the lattice. The second list contains the tokens that were dropped during recombination. This is needed for constructing a new rescored lattice. :type lattice: Lattice :param lattice: a word lattice to be decoded :rtype: a tuple of two lists of LatticeDecoder.Tokens :returns: a list of the final tokens sorted by probability (most likely token first), and a list of the tokens that were dropped during recombination """ if self._lm_scale is not None: lm_scale = logprob_type(self._lm_scale) elif lattice.lm_scale is not None: lm_scale = logprob_type(lattice.lm_scale) else: lm_scale = logprob_type(1.0) if self._wi_penalty is not None: wi_penalty = logprob_type(self._wi_penalty) elif lattice.wi_penalty is not None: wi_penalty = logprob_type(lattice.wi_penalty) else: wi_penalty = logprob_type(0.0) tokens = [list() for _ in lattice.nodes] recomb_tokens = [] initial_state = RecurrentState(self._network.recurrent_state_size) initial_token = self.Token(history=(self._sos_id, ), state=initial_state) initial_token.recompute_hash(self._recombination_order) initial_token.recompute_total(self._nnlm_weight, lm_scale, wi_penalty, self._linear_interpolation) tokens[lattice.initial_node.id].append(initial_token) lattice.initial_node.best_logprob = initial_token.total_logprob sorted_nodes = lattice.sorted_nodes() self._nodes_processed = 0 final_tokens = [] for node in sorted_nodes: stats = self._prune(node, sorted_nodes, tokens, recomb_tokens) num_new_tokens = 0 node_tokens = tokens[node.id] assert node_tokens if node.final: new_tokens = self._propagate(node_tokens, None, lm_scale, wi_penalty) final_tokens.extend(new_tokens) num_new_tokens += len(new_tokens) for link in node.out_links: new_tokens = self._propagate(node_tokens, link, lm_scale, wi_penalty) tokens[link.end_node.id].extend(new_tokens) # If there are lots of tokens in the end node, prune already to # conserve memory. if self._max_tokens_per_node is not None and \ len(tokens[link.end_node.id]) > self._max_tokens_per_node * 2: self._prune(link.end_node, sorted_nodes, tokens, recomb_tokens) num_new_tokens += len(new_tokens) stats['new'] = num_new_tokens self._nodes_processed += 1 self._log_stats(stats, node.id, len(sorted_nodes)) if len(final_tokens) == 0: raise InputError("Could not reach a final node of word lattice.") final_tokens = self._sorted_recombined_tokens(final_tokens, recomb_tokens) return final_tokens, recomb_tokens
def __init__(self, network, decoding_options, profile=False): """Creates a Theano function that computes the output probabilities for a single time step. Creates the function self._step_function that takes as input a set of word sequences and the current recurrent states. It uses the previous states and word IDs to compute the output distributions, and computes the probabilities of the target words. All invocations of ``decode()`` will use the given NNLM weight and LM scale when computing the total probability. If LM scale is not given, uses the value provided in the lattice files. If it's not provided in a lattice file either, performs no scaling of LM log probabilities. ``decoding_options`` should countain the following elements: nnlm_weight : float weight of the neural network probabilities when interpolating with the lattice probabilities lm_scale : float if other than ``None``, the decoder will scale language model log probabilities by this factor; otherwise the scaling factor will be read from the lattice file wi_penalty : float penalize word insertion by adding this value to the total log probability of a token as many times as there are words unk_penalty : float if set to other than None, used as <unk> token score use_shortlist : bool if set to ``True``, <unk> token probability is distributed among the out-of-shortlist words according to their unigram probabilities unk_from_lattice : bool if set to ``True``, the probability for <unk> tokens is taken from the lattice alone linear_interpolation : bool if set to ``True``, use linear instead of (pseudo) log-linear interpolation of language model probabilities max_tokens_per_node : int if set to other than None, leave only this many tokens at each node beam : float if set to other than None, prune tokens whose total log probability is further than this from the best token at each point in time recombination_order : int number of words to consider when deciding whether two tokens should be recombined, or ``None`` for the entire word history prune_extra_limit : float if set, adjust the beam and max_tokens_per_node pruning relative to the number of tokens; the limits are divided by the number of tokens and multiplied by this factor abs_min_beam : float when using prune_extra_limit, this is the minimum that the beam will be adjusted to abs_min_max_tokens : float when using prune_extra_limit, this is the minimum that the maximum number of tokens will be adjusted to :type network: Network :param network: the neural network object :type decoding_options: dict :param decoding_options: a dictionary of decoding options (see above) :type profile: bool :param profile: if set to True, creates a Theano profile object """ self._network = network self._vocabulary = network.vocabulary self._nnlm_weight = logprob_type(decoding_options['nnlm_weight']) self._lm_scale = decoding_options['lm_scale'] self._wi_penalty = decoding_options['wi_penalty'] self._unk_penalty = decoding_options['unk_penalty'] self._unk_from_lattice = decoding_options['unk_from_lattice'] self._linear_interpolation = decoding_options['linear_interpolation'] self._max_tokens_per_node = decoding_options['max_tokens_per_node'] self._beam = decoding_options['beam'] if self._beam is not None: self._beam = logprob_type(self._beam) self._recombination_order = decoding_options['recombination_order'] self._prune_extra_limit = decoding_options.get('prune_extra_limit', None) self._abs_min_beam = decoding_options.get('abs_min_beam', 0) self._abs_min_max_tokens = decoding_options.get( 'abs_min_max_tokens', 0) if self._prune_extra_limit is None: self._abs_min_beam = self._abs_min_max_tokens = 0 if decoding_options['use_shortlist'] and \ self._vocabulary.has_unigram_probs(): oos_logprobs = numpy.log(self._vocabulary.get_oos_probs()) self._oos_logprobs = oos_logprobs.astype(theano.config.floatX) else: self._oos_logprobs = None self._sos_id = self._vocabulary.word_to_id['<s>'] self._eos_id = self._vocabulary.word_to_id['</s>'] self._unk_id = self._vocabulary.word_to_id['<unk>'] inputs = [ network.input_word_ids, network.input_class_ids, network.target_class_ids ] inputs.extend(network.recurrent_state_input) outputs = [tensor.log(network.target_probs())] outputs.extend(network.recurrent_state_output) # Ignore unused input, because is_training is only used by dropout # layer. self._step_function = theano.function(inputs, outputs, givens=[(network.is_training, numpy.int8(0))], name='step_predictor', profile=profile, on_unused_input='ignore')