"""Converts a list of indices to words.""" if until_eos: try: idxs = idxs[:idxs.index(self.word2idx('</s>'))] except ValueError: pass return ' '.join(self.idx2word[idx] for idx in idxs) def convert_words_to_idxs(self, words, add_bos=False, add_eos=False): """Converts a list of words to a list of indices.""" idxs = [self.word2idx(w) for w in words] if add_bos: idxs.insert(0, self.word2idx('<s>')) if add_eos: idxs.append(self.word2idx('</s>')) return idxs def __len__(self): """Returns the size of the vocabulary.""" return len(self.idx2word) def __repr__(self): return "Vocabulary with {} items".format(self.__len__()) tokenizer = DistilBertTokenizer() model = DistilBertModel() tokenizer = tokenizer.from_pretrained(pretrained_weights) model = model.from_pretrained(pretrained_weights)
app = dash.Dash( __name__, server=server, routes_pathname_prefix='/dash/' ) nltk.download('stopwords') bert_model_name = "uncased" bert_ckpt_dir = os.path.join("MODEL/", bert_model_name) bert_ckpt_file = os.path.join(bert_ckpt_dir, "distilbert-base-uncased-tf_model.h5") bert_config_file = os.path.join(bert_ckpt_dir, "distilbert-base-uncased-config.json") tokenizer = DistilBertTokenizer(vocab_file=os.path.join(bert_ckpt_dir, "distilbert-base-uncased-vocab.txt")) with open('lxkeywords.pkl', 'rb') as handle: words = pickle.load(handle) with open('classes/move_mainc1122.pkl', 'rb') as handle: classes_main = pickle.load(handle) with open('classes/move1.pkl', 'rb') as handle1: subclasses1 = pickle.load(handle1) with open('classes/move2.pkl', 'rb') as handle2: subclasses2 = pickle.load(handle2) with open('classes/move3.pkl', 'rb') as handle3: