def _get_initial_hypos(self): """Get the list of initial ``PartialHypothesis``. """ self.cur_fst = load_fst( utils.get_path(self.fst_path, self.current_sen_id + 1)) init_hypo = PartialHypothesis(self.get_predictor_states()) init_hypo.fst_node = self._find_start_node() return [init_hypo]
def __init__(self, path, slave_predictor): """Constructor for the fsttok wrapper Args: path (string): Path to an FST which transduces characters to predictor tokens slave_predictor (Predictor): Wrapped predictor """ super(FSTTokPredictor, self).__init__() self.max_pending_score = 5.0 # TODO: Add to config self.slave_predictor = slave_predictor if isinstance(slave_predictor, UnboundedVocabularyPredictor): logging.fatal("fsttok cannot wrap an unbounded " "vocabulary predictor.") self.trans_fst = utils.load_fst(path)
def initialize(self, src_sentence): """Loads the FST from the file system and consumes the start of sentence symbol. Args: src_sentence (list): Not used """ self.cur_fst = load_fst( utils.get_path(self.fst_path, self.current_sen_id + 1)) self.cur_nodes = [] if self.cur_fst: self.cur_nodes = self._follow_eps({self.cur_fst.start(): 0.0}) self.consume(utils.GO_ID) if not self.cur_nodes: logging.warn("The lattice for sentence %d does not contain any " "valid path. Please double-check that the lattice " "is not empty and that paths start with the begin-of-" "sentence symbol." % (self.current_sen_id + 1))
def initialize(self, src_sentence): """Loads the FST from the file system and consumes the start of sentence symbol. Args: src_sentence (list): Not used """ self.cur_fst = load_fst(utils.get_path(self.fst_path, self.current_sen_id+1)) self.cur_nodes = [] if self.cur_fst: self.cur_nodes = self._follow_eps({self.cur_fst.start(): 0.0}) self.consume(utils.GO_ID) if not self.cur_nodes: logging.warn("The lattice for sentence %d does not contain any " "valid path. Please double-check that the lattice " "is not empty and that paths start with the begin-of-" "sentence symbol." % (self.current_sen_id+1))
def __init__(self, path): """Loads subword->char FST, determinizes and minimizes it. Args: path (string): Path to an FST from subword unit to char sequence """ self.token2char_fst = utils.load_fst(path) self.token2char_fst.rmepsilon() self.token2char_fst.determinize() self.token2char_fst.minimize() self.word_begin_tokens = {arc.ilabel: True for arc in self.token2char_fst.arcs(self.token2char_fst.start())} self.char2token_fst = fst.Fst(self.token2char_fst) self.char2token_fst.invert() self.cmap = dict(utils.trg_cmap) self.cmap[" "] = self.cmap["</w>"] del self.cmap["</w>"] self.inv_cmap = {(i,c) for c,i in self.cmap.iteritems()}
def __init__(self, path, fst_unk_id, max_pending_score, slave_predictor): """Constructor for the fsttok wrapper Args: path (string): Path to an FST which transduces characters to predictor tokens fst_unk_id (int): ID used to represent UNK in the FSTs (usually 999999998) max_pending_score (float): Maximum pending score in a ``CombinedState`` instance. slave_predictor (Predictor): Wrapped predictor """ super(FSTTokPredictor, self).__init__() self.max_pending_score = max_pending_score self.fst_unk_id = fst_unk_id self.slave_predictor = slave_predictor if isinstance(slave_predictor, UnboundedVocabularyPredictor): logging.fatal("fsttok cannot wrap an unbounded " "vocabulary predictor.") self.trans_fst = utils.load_fst(path)
def initialize(self, src_sentence): """Loads the FST from the file system and consumes the start of sentence symbol. Args: src_sentence (list): Not used """ self.cur_fst = load_fst(utils.get_path(self.fst_path, self.current_sen_id+1)) self.cur_node = self.cur_fst.start() if self.cur_fst else None self.bos_score = self.consume(utils.GO_ID) if not self.bos_score: # Override None self.bos_score = 0.0 if self.cur_node is None: logging.warn("The lattice for sentence %d does not contain any " "valid path. Please double-check that the lattice " "is not empty and that paths contain the begin-of-" "sentence symbol %d. If you are using a different " "begin-of-sentence symbol, double-check --indexing_" "scheme." % (self.current_sen_id+1, utils.GO_ID))
def initialize(self, src_sentence): """Loads the FST from the file system and consumes the start of sentence symbol. Args: src_sentence (list): Not used """ self.cur_fst = load_fst( utils.get_path(self.fst_path, self.current_sen_id + 1)) self.cur_node = self.cur_fst.start() if self.cur_fst else None self.bos_score = self.consume(utils.GO_ID) if not self.bos_score: # Override None self.bos_score = 0.0 if self.cur_node is None: logging.warn("The lattice for sentence %d does not contain any " "valid path. Please double-check that the lattice " "is not empty and that paths the begin-of-" "sentence symbol %d. If you are using a different " "begin-of-sentence symbol, double-check --indexing_" "scheme." % (self.current_sen_id + 1, utils.GO_ID))
def __init__(self, path): """Loads subword->char FST, determinizes and minimizes it. Args: path (string): Path to an FST from subword unit to char sequence """ self.token2char_fst = utils.load_fst(path) self.token2char_fst.rmepsilon() self.token2char_fst.determinize() self.token2char_fst.minimize() self.word_begin_tokens = { arc.ilabel: True for arc in self.token2char_fst.arcs(self.token2char_fst.start()) } self.char2token_fst = fst.Fst(self.token2char_fst) self.char2token_fst.invert() self.cmap = dict(utils.trg_cmap) self.cmap[" "] = self.cmap["</w>"] del self.cmap["</w>"] self.inv_cmap = {(i, c) for c, i in self.cmap.iteritems()}