def __init__( self, wordlm, subwordlm, word_dict, subword_dict, subwordlm_weight=0.8, oov_penalty=1.0, open_vocab=True, ): super(MultiLevelLM, self).__init__() self.wordlm = wordlm self.subwordlm = subwordlm self.word_eos = word_dict["<eos>"] self.word_unk = word_dict["<unk>"] self.var_word_eos = torch.LongTensor([self.word_eos]) self.var_word_unk = torch.LongTensor([self.word_unk]) self.space = subword_dict["<space>"] self.eos = subword_dict["<eos>"] self.lexroot = make_lexical_tree(word_dict, subword_dict, self.word_unk) self.log_oov_penalty = math.log(oov_penalty) self.open_vocab = open_vocab self.subword_dict_size = len(subword_dict) self.subwordlm_weight = subwordlm_weight self.normalized = True
def __init__( self, wordlm, word_dict, subword_dict, oov_penalty=0.0001, open_vocab=True ): super(LookAheadWordLM, self).__init__() self.wordlm = wordlm self.word_eos = word_dict["<eos>"] self.word_unk = word_dict["<unk>"] self.var_word_eos = torch.LongTensor([self.word_eos]) self.var_word_unk = torch.LongTensor([self.word_unk]) self.space = subword_dict["<space>"] self.eos = subword_dict["<eos>"] self.lexroot = make_lexical_tree(word_dict, subword_dict, self.word_unk) self.oov_penalty = oov_penalty self.open_vocab = open_vocab self.subword_dict_size = len(subword_dict) self.zero_tensor = torch.FloatTensor([self.zero]) self.normalized = True
def __init__(self, wordlm, word_dict, subword_dict, oov_penalty=0.0001, open_vocab=True): super(LookAheadWordLM, self).__init__() self.wordlm = wordlm self.word_eos = word_dict["<eos>"] self.word_unk = word_dict["<unk>"] self.xp_word_eos = self.xp.full(1, self.word_eos, "i") self.xp_word_unk = self.xp.full(1, self.word_unk, "i") self.space = subword_dict["<space>"] self.eos = subword_dict["<eos>"] self.lexroot = make_lexical_tree(word_dict, subword_dict, self.word_unk) self.oov_penalty = oov_penalty self.open_vocab = open_vocab self.subword_dict_size = len(subword_dict) self.normalized = True
def __init__(self, vocabulary, meetingpath, charlist, bpe=False): """Meeting-wise KB in decoder """ self.meetingdict = {} self.meetingdict_sym = {} self.meetingmask = {} self.meetinglextree = {} self.chardict = {} self.charlist = charlist self.bpe = bpe for i, char in enumerate(charlist): self.chardict[char] = i self.maxlen = 0 self.unkidx = vocabulary.get_idx('<unk>') for filename in os.listdir(meetingpath): worddict, wordlist = {}, [] with open(os.path.join(meetingpath, filename)) as fin: for word in fin: word = tuple(word.split()) if bpe else word.strip() worddict[word] = len(wordlist) + 1 wordlist.append(word) self.meetingdict[filename] = vocabulary.get_ids(wordlist, oov_sym='<blank>') self.meetinglextree[filename] = make_lexical_tree( worddict, self.chardict, -1) self.maxlen = len( wordlist) if len(wordlist) > self.maxlen else self.maxlen # pad meeting wordlist for meeting, wordlist in self.meetingdict.items(): self.meetingdict_sym[meeting] = vocabulary.get_syms( self.meetingdict[meeting]) self.meetingdict[meeting] = wordlist + [self.unkidx] * ( self.maxlen - len(wordlist) + 1) self.meetingmask[meeting] = [0] * (len(wordlist)) + [1] * ( self.maxlen - len(wordlist)) + [0] self.unkidx = self.maxlen self.maxlen = self.maxlen + 1 self.vocab = vocabulary self.char_worddict, self.char_dictmask, self.charind, self.char_wordlist = self.get_character_dict( )
def __init__(self, wordlm, subwordlm, word_dict, subword_dict, subwordlm_weight=0.8, oov_penalty=1.0, open_vocab=True): super(MultiLevelLM, self).__init__() self.wordlm = wordlm self.subwordlm = subwordlm self.word_eos = word_dict['<eos>'] self.word_unk = word_dict['<unk>'] self.xp_word_eos = self.xp.full(1, self.word_eos, 'i') self.xp_word_unk = self.xp.full(1, self.word_unk, 'i') self.space = subword_dict['<space>'] self.eos = subword_dict['<eos>'] self.lexroot = make_lexical_tree(word_dict, subword_dict, self.word_unk) self.log_oov_penalty = math.log(oov_penalty) self.open_vocab = open_vocab self.subword_dict_size = len(subword_dict) self.subwordlm_weight = subwordlm_weight self.normalized = True