Python load_fst示例，cam.sgnmt.utils.load_fst Python示例

示例#1

0

显示文件

 def _get_initial_hypos(self):
     """Get the list of initial ``PartialHypothesis``. """
     self.cur_fst = load_fst(
         utils.get_path(self.fst_path, self.current_sen_id + 1))
     init_hypo = PartialHypothesis(self.get_predictor_states())
     init_hypo.fst_node = self._find_start_node()
     return [init_hypo]

示例#2

0

显示文件

 def __init__(self, path, slave_predictor):
     """Constructor for the fsttok wrapper
     
     Args:
         path (string): Path to an FST which transduces characters 
                        to predictor tokens
         slave_predictor (Predictor): Wrapped predictor
     """
     super(FSTTokPredictor, self).__init__()
     self.max_pending_score = 5.0 # TODO: Add to config
     self.slave_predictor = slave_predictor
     if isinstance(slave_predictor, UnboundedVocabularyPredictor):
         logging.fatal("fsttok cannot wrap an unbounded "
                       "vocabulary predictor.")
     self.trans_fst = utils.load_fst(path)

示例#3

0

显示文件

 def initialize(self, src_sentence):
     """Loads the FST from the file system and consumes the start
     of sentence symbol. 
     
     Args:
         src_sentence (list):  Not used
     """
     self.cur_fst = load_fst(
         utils.get_path(self.fst_path, self.current_sen_id + 1))
     self.cur_nodes = []
     if self.cur_fst:
         self.cur_nodes = self._follow_eps({self.cur_fst.start(): 0.0})
     self.consume(utils.GO_ID)
     if not self.cur_nodes:
         logging.warn("The lattice for sentence %d does not contain any "
                      "valid path. Please double-check that the lattice "
                      "is not empty and that paths start with the begin-of-"
                      "sentence symbol." % (self.current_sen_id + 1))

示例#4

0

显示文件

文件： automata.py 项目： ucam-smt/sgnmt

 def initialize(self, src_sentence):
     """Loads the FST from the file system and consumes the start
     of sentence symbol. 
     
     Args:
         src_sentence (list):  Not used
     """
     self.cur_fst = load_fst(utils.get_path(self.fst_path,
                                            self.current_sen_id+1))
     self.cur_nodes = []
     if self.cur_fst:
         self.cur_nodes = self._follow_eps({self.cur_fst.start(): 0.0})
     self.consume(utils.GO_ID)
     if not self.cur_nodes:
         logging.warn("The lattice for sentence %d does not contain any "
                      "valid path. Please double-check that the lattice "
                      "is not empty and that paths start with the begin-of-"
                      "sentence symbol." % (self.current_sen_id+1))

示例#5

0

显示文件

文件： multisegbeam.py 项目： ucam-smt/sgnmt

 def __init__(self, path):
     """Loads subword->char FST, determinizes and minimizes it.
     
     Args:
         path (string): Path to an FST from subword unit to char
                        sequence
     """
     self.token2char_fst = utils.load_fst(path)
     self.token2char_fst.rmepsilon()
     self.token2char_fst.determinize()
     self.token2char_fst.minimize()
     self.word_begin_tokens = {arc.ilabel: True 
         for arc in self.token2char_fst.arcs(self.token2char_fst.start())}
     self.char2token_fst = fst.Fst(self.token2char_fst)
     self.char2token_fst.invert()
     self.cmap = dict(utils.trg_cmap)
     self.cmap[" "] = self.cmap["</w>"]
     del self.cmap["</w>"]
     self.inv_cmap = {(i,c) for c,i in self.cmap.iteritems()}

示例#6

0

显示文件

文件： tokenization.py 项目： ml-lab/sgnmt

 def __init__(self, path, fst_unk_id, max_pending_score, slave_predictor):
     """Constructor for the fsttok wrapper
     
     Args:
         path (string): Path to an FST which transduces characters 
                        to predictor tokens
         fst_unk_id (int): ID used to represent UNK in the FSTs
                           (usually 999999998)
         max_pending_score (float): Maximum pending score in a
                                    ``CombinedState`` instance.
         slave_predictor (Predictor): Wrapped predictor
     """
     super(FSTTokPredictor, self).__init__()
     self.max_pending_score = max_pending_score
     self.fst_unk_id = fst_unk_id
     self.slave_predictor = slave_predictor
     if isinstance(slave_predictor, UnboundedVocabularyPredictor):
         logging.fatal("fsttok cannot wrap an unbounded "
                       "vocabulary predictor.")
     self.trans_fst = utils.load_fst(path)

示例#7

0

显示文件

文件： tokenization.py 项目： ucam-smt/sgnmt

 def __init__(self, path, fst_unk_id, max_pending_score, slave_predictor):
     """Constructor for the fsttok wrapper
     
     Args:
         path (string): Path to an FST which transduces characters 
                        to predictor tokens
         fst_unk_id (int): ID used to represent UNK in the FSTs
                           (usually 999999998)
         max_pending_score (float): Maximum pending score in a
                                    ``CombinedState`` instance.
         slave_predictor (Predictor): Wrapped predictor
     """
     super(FSTTokPredictor, self).__init__()
     self.max_pending_score = max_pending_score 
     self.fst_unk_id = fst_unk_id
     self.slave_predictor = slave_predictor
     if isinstance(slave_predictor, UnboundedVocabularyPredictor):
         logging.fatal("fsttok cannot wrap an unbounded "
                       "vocabulary predictor.")
     self.trans_fst = utils.load_fst(path)

示例#8

0

显示文件

文件： automata.py 项目： ucam-smt/sgnmt

 def initialize(self, src_sentence):
     """Loads the FST from the file system and consumes the start
     of sentence symbol. 
     
     Args:
         src_sentence (list):  Not used
     """
     self.cur_fst = load_fst(utils.get_path(self.fst_path,
                                            self.current_sen_id+1))
     self.cur_node = self.cur_fst.start() if self.cur_fst else None
     self.bos_score = self.consume(utils.GO_ID)
     if not self.bos_score: # Override None
         self.bos_score = 0.0
     if self.cur_node is None:
         logging.warn("The lattice for sentence %d does not contain any "
                      "valid path. Please double-check that the lattice "
                      "is not empty and that paths contain the begin-of-"
                      "sentence symbol %d. If you are using a different "
                      "begin-of-sentence symbol, double-check --indexing_"
                      "scheme." % (self.current_sen_id+1, utils.GO_ID))

示例#9

0

显示文件

文件： automata.py 项目： Jack44Wang/sgnmt

 def initialize(self, src_sentence):
     """Loads the FST from the file system and consumes the start
     of sentence symbol. 
     
     Args:
         src_sentence (list):  Not used
     """
     self.cur_fst = load_fst(
         utils.get_path(self.fst_path, self.current_sen_id + 1))
     self.cur_node = self.cur_fst.start() if self.cur_fst else None
     self.bos_score = self.consume(utils.GO_ID)
     if not self.bos_score:  # Override None
         self.bos_score = 0.0
     if self.cur_node is None:
         logging.warn("The lattice for sentence %d does not contain any "
                      "valid path. Please double-check that the lattice "
                      "is not empty and that paths the begin-of-"
                      "sentence symbol %d. If you are using a different "
                      "begin-of-sentence symbol, double-check --indexing_"
                      "scheme." % (self.current_sen_id + 1, utils.GO_ID))

示例#10

0

显示文件

 def __init__(self, path):
     """Loads subword->char FST, determinizes and minimizes it.
     
     Args:
         path (string): Path to an FST from subword unit to char
                        sequence
     """
     self.token2char_fst = utils.load_fst(path)
     self.token2char_fst.rmepsilon()
     self.token2char_fst.determinize()
     self.token2char_fst.minimize()
     self.word_begin_tokens = {
         arc.ilabel: True
         for arc in self.token2char_fst.arcs(self.token2char_fst.start())
     }
     self.char2token_fst = fst.Fst(self.token2char_fst)
     self.char2token_fst.invert()
     self.cmap = dict(utils.trg_cmap)
     self.cmap[" "] = self.cmap["</w>"]
     del self.cmap["</w>"]
     self.inv_cmap = {(i, c) for c, i in self.cmap.iteritems()}