def predict(self, sent, asp="null", k=1): """ :param sent: processed sentence :param asp: an aspect mentioned inside sent :param k: int :return: top k predictions """ wl = self.args.vocab.wl ## set model in eval model self.model.eval() fake_label = [0] words, asp_loc = self.word2idx(sent, asp) word_ids, sequence_lengths = seqPAD.pad_sequences([words], pad_tok=0, wthres=wl) data_tensors = Data2tensor.sort_tensors(fake_label, [asp_loc], word_ids, sequence_lengths, self.device) fake_label_tensor, aspect_tensor, word_tensor, sequence_lengths, word_seq_recover = data_tensors arange_tensor = Data2tensor.idx2tensor( list(range(word_tensor.size(0))), self.device) label_score = self.model(word_tensor, sequence_lengths, aspect_tensor, arange_tensor) label_prob, label_pred = self.model.inference(label_score, k) return label_prob, label_pred
def evaluate_batch(self, eva_data): with torch.no_grad(): wl = self.args.vocab.wl batch_size = self.args.batch_size ## set model in eval model self.model.eval() start = time.time() y_true = Data2tensor.idx2tensor([], self.device) y_pred = Data2tensor.idx2tensor([], self.device) for i, (words, label_ids) in enumerate( self.args.vocab.minibatches(eva_data, batch_size=batch_size)): word_ids, sequence_lengths = seqPAD.pad_sequences(words, pad_tok=0, wthres=wl) data_tensors = Data2tensor.sort_tensors( label_ids, word_ids, sequence_lengths, self.device) label_tensor, word_tensor, sequence_lengths, word_seq_recover = data_tensors y_true = torch.cat([y_true, label_tensor]) label_score = self.model(word_tensor, sequence_lengths) label_prob, label_pred = self.model.inference(label_score, k=1) y_pred = torch.cat([y_pred, label_pred]) #measures = Classifier.class_metrics(y_true, y_pred.squeeze()) measures = Classifier.class_metrics( y_true.data.cpu().numpy(), y_pred.squeeze().data.cpu().numpy()) end = time.time() - start speed = len(y_true) / end return measures, speed
def predict(self, doc="", topk=5): """ Inputs: doc: a document topk: number of recommended tokens Outputs: A list form of predicted labels and their probabilities e,g, [('5_star', 0.2020701915025711), ('3_star', 0.2010505348443985), ('2_star', 0.2006799429655075), ('1_star', 0.1990940123796463), ('4_star', 0.1971053034067154)] """ doc_ids = self.word2idx(doc.split()) ####################### # YOUR CODE STARTS HERE pred_lb, pred_probs = None, None #convert to tensor doc_tensor = Data2tensor.idx2tensor(doc_ids) doc_lengths_tensor = Data2tensor.idx2tensor(len(doc_tensor)) #call the model output, _, _ = self.model(doc_tensor.unsqueeze(0), doc_lengths_tensor.unsqueeze(0)) #get the probablities and predicted label pred_probs, pred_lb = self.model.inference(output, topk) #applied and to list to get individual element out of tensor #as a tensor value cannot be compared with the index positon of labels in args.vocab.i2l pred_probs = pred_probs.flatten().tolist() pred_lb = pred_lb.flatten().tolist() #get label information for the predicted output pred_lb = [self.args.vocab.i2l[x] for x in pred_lb] # YOUR CODE ENDS HERE ####################### return list(zip(pred_lb, pred_probs))
def train_batch(self,train_data): wl = self.args.vocab.wl clip_rate = self.args.clip batch_size = self.args.batch_size num_train = len(train_data) total_batch = num_train//batch_size+1 prog = Progbar(target=total_batch) ## set model in train model self.model.train() train_loss = [] for i,(words, label_ids) in enumerate(self.args.vocab.minibatches(train_data, batch_size=batch_size)): word_ids, sequence_lengths = seqPAD.pad_sequences(words, pad_tok=0, wthres=wl) data_tensors = Data2tensor.sort_tensors(label_ids, word_ids,sequence_lengths,self.device) label_tensor, word_tensor, sequence_lengths, word_seq_recover = data_tensors self.model.zero_grad() label_score = self.model(word_tensor, sequence_lengths) # print("inside training batch, ", label_score.size(), label_tensor.size(), label_score, label_tensor) batch_loss = self.model.NLL_loss(label_score, label_tensor) train_loss.append(batch_loss.item()) batch_loss.backward() if clip_rate>0: torch.nn.utils.clip_grad_norm_(self.model.parameters(), clip_rate) self.optimizer.step() prog.update(i + 1, [("Train loss", batch_loss.item())]) return np.mean(train_loss)
def evaluate_batch(self, eval_data): start_time = time.time() eval_batch = self.args.vocab.minibatches( eval_data, batch_size=self.args.batch_size) # Turn on evaluation mode which disables dropout. self.model.eval() total_loss = 0. total_word = 0 with torch.no_grad(): for seq_batch in eval_batch: word_pad_ids, seq_lens = seqPAD.pad_sequences( seq_batch, pad_tok=self.args.vocab.w2i[PAD]) seq_tensor = Data2tensor.idx2tensor(word_pad_ids, self.device) hidden = self.model.init_hidden(seq_tensor.size(0)) for i in range(0, seq_tensor.size(1) - 1, self.args.bptt): data, target = self.bptt_batch(seq_tensor, i) mask_target = target > 0 output, hidden = self.model(data, hidden) batch_loss = self.model.NLL_loss(output, target) total_loss += batch_loss.item() hidden = self.repackage_hidden(hidden) total_word = total_word + mask_target.sum().item() cur_loss = total_loss / total_word elapsed = time.time() - start_time print('-' * 89) print('| EVALUATION | words {:5d} | lr {:02.2f} | words/s {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format(total_word, self.args.lr, total_word / elapsed, cur_loss, math.exp(cur_loss))) print('-' * 89) return cur_loss, total_word, elapsed
def recommend(self, context="", topk=5): """ Inputs: context: the text form of given context topk: number of recommended tokens Outputs: A list form of recommended words and their probabilities e,g, [('i', 0.044447630643844604), ('it', 0.027285737916827202), ("don't", 0.026111900806427002), ('will', 0.023868300020694733), ('had', 0.02248169668018818)] """ rec_wds, rec_probs = [], [] ####################### # YOUR CODE STARTS HERE #get the data from args.vocab.i2w data = self.args.vocab.i2w #get the context data for which the topk values need to be fetched context = context.split() #split the context into tokens #get the index of the words in the context idx = [] for i in context: #print(i) for j in range(0, len(data)): #if word is present in data then append its index position in idx list if i == data[j]: idx.append(j) #print("index",idx) #convert the index to the tensor idx = Data2tensor.idx2tensor(idx) #print(idx) prob = 0 label = 0 batch_size = 1 hidden = self.model.init_hidden(batch_size) #print(hidden) output, hidden = self.model.forward(idx.reshape(1, -1), hidden) #print(hidden) #get the topk words and their probablities p, l = self.model.inference(output, topk) prob = list(p[0][-1]) label = list(l[0][-1]) #print(prob,label) #the below list comprehension #is used to get words based on the index position in label #it matches the values stored in label which are index position #so the values in label are matched wiht values in data #the word residing at that index position is returned and appended to rec_wds list rec_wds += [data[k.item()] for k in label] #the prob list contains tensor so item is used to get a number and not a tensor #so in below list comprehension p.item() or tensor.item() returns number and not a tensor rec_probs += [k.item() for k in prob] # YOUR CODE ENDS HERE ####################### return list(zip(rec_wds, rec_probs))
def evaluate_batch(self, eval_data): start_time = time.time() eval_batch = self.args.vocab.minibatches_with_label( eval_data, batch_size=self.args.batch_size) # Turn on evaluation mode which disables dropout. self.model.eval() total_loss = 0. total_docs = 0 y_true, y_pred = [], [] with torch.no_grad(): for doc_batch, lb_batch in eval_batch: doc_pad_ids, doc_lengths = seqPAD.pad_sequences( doc_batch, pad_tok=self.args.vocab.w2i[PAD]) ####################### # YOUR CODE STARTS HERE doc_tensor = Data2tensor.idx2tensor(doc_pad_ids, self.device) doc_lengths_tensor = Data2tensor.idx2tensor( doc_lengths, self.device) lb_tensor = Data2tensor.idx2tensor(lb_batch, self.device) total_docs += doc_tensor.size(0) output, _, _ = self.model(doc_tensor, doc_lengths_tensor) loss = self.model.NLL_loss(output, lb_tensor) label_prob, label_pred = self.model.inference(output, k=1) #print("shape label_tensor",lb_tensor.shape) #print("shape label_pred",label_pred.squeeze(1).shape) y_true.extend(lb_tensor) y_pred.extend(label_pred.squeeze(1)) total_loss += loss.item() # YOUR CODE ENDS HERE ####################### precision, recall, f1, acc = Sentimentmodel.cal_metrics(y_true, y_pred) cur_loss = total_loss / total_docs elapsed = time.time() - start_time metrics = { "precision": precision * 100, "recall": recall * 100, "f1": f1 * 100, "acc": acc * 100, "loss": cur_loss } return metrics, total_docs, elapsed
def train_batch(self, train_data): total_loss = 0. total_docs = 0 start_time = time.time() train_batch = self.args.vocab.minibatches_with_label( train_data, batch_size=self.args.batch_size) # Turn on training mode which enables dropout. self.model.train() for batch, (doc_batch, lb_batch) in enumerate(train_batch): doc_pad_ids, doc_lengths = seqPAD.pad_sequences( doc_batch, pad_tok=self.args.vocab.w2i[PAD]) doc_tensor = Data2tensor.idx2tensor(doc_pad_ids, self.device) doc_lengths_tensor = Data2tensor.idx2tensor( doc_lengths, self.device) lb_tensor = Data2tensor.idx2tensor(lb_batch, self.device) # doc_tensor = [batch_size, max_doc_length] total_docs += doc_tensor.size(0) self.model.zero_grad() output, _, _ = self.model(doc_tensor, doc_lengths_tensor) loss = self.model.NLL_loss(output, lb_tensor) avg_loss = loss / doc_tensor.size(0) avg_loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.clip) # update parameters in all sub-graphs self.model_optimizer.step() # for p in self.model.parameters(): # p.data.add_(p.grad.data, alpha=-self.args.lr) total_loss += loss.item() cur_loss = total_loss / total_docs elapsed = time.time() - start_time # print('-' * 89) # print('| TRAINING | epoch {:3d} | documents {:5d} | lr {:02.2f} | documents/s {:5.2f} | ' # 'loss {:5.2f}'.format(epoch, total_docs, self.args.lr, total_docs / elapsed, cur_loss)) # print('-' * 89) return cur_loss, total_docs, elapsed
def train_batch(self, train_data): wl = self.args.vocab.wl cl = self.args.vocab.cl clip_rate = self.args.clip batch_size = self.args.batch_size num_train = len(train_data) total_batch = num_train // batch_size + 1 prog = Progbar(target=total_batch) ## set model in train model self.model.train() train_loss = [] for i, (words, label_ids) in enumerate( self.args.vocab.minibatches(train_data, batch_size=batch_size)): char_ids, word_ids = zip(*words) word_ids, sequence_lengths = seqPAD.pad_sequences(word_ids, pad_tok=0, wthres=wl, cthres=cl) char_ids, word_lengths = seqPAD.pad_sequences(char_ids, pad_tok=0, nlevels=2, wthres=wl, cthres=cl) label_ids, _ = seqPAD.pad_sequences(label_ids, pad_tok=0, wthres=wl, cthres=cl) data_tensors = Data2tensor.sort_tensors(label_ids, word_ids, sequence_lengths, char_ids, word_lengths) label_tensor, word_tensor, sequence_lengths, word_seq_recover, char_tensor, word_lengths, char_seq_recover = data_tensors mask_tensor = word_tensor > 0 label_score = self.model(word_tensor, sequence_lengths, char_tensor, word_lengths, char_seq_recover) batch_loss = self.model.NLL_loss(label_score, mask_tensor, label_tensor) train_loss.append(batch_loss.data.tolist()[0]) self.model.zero_grad() batch_loss.backward() if clip_rate > 0: torch.nn.utils.clip_grad_norm(self.model.parameters(), clip_rate) self.optimizer.step() prog.update(i + 1, [("Train loss", batch_loss.data.tolist()[0])]) return np.mean(train_loss)
def predict_null(classifier, sent, asp, i2l): from utils.data_utils import Data2tensor, seqPAD wl = classifier.args.vocab.wl ## set model in eval model classifier.model.eval() fake_label = [0] words, asp_loc = classifier.word2idx(sent, asp) word_ids, sequence_lengths = seqPAD.pad_sequences([words], pad_tok=0, wthres=wl) data_tensors = Data2tensor.sort_tensors(fake_label, [asp_loc], word_ids, sequence_lengths, classifier.device) fake_label_tensor, aspect_tensor, word_tensor, sequence_lengths, word_seq_recover = data_tensors arange_tensor = Data2tensor.idx2tensor(list(range(word_tensor.size(0))), classifier.device) word_h_n = classifier.model.rnn.get_all_hiddens(word_tensor, sequence_lengths).mean(1) label_score = classifier.model.hidden2tag(word_h_n) label_score = classifier.model.dropfinal(label_score) label_prob, label_pred = classifier.model.inference(label_score, len(i2l)) return label_prob, label_pred
def predict(self, sent, k=1): cl = self.args.vocab.cl ## set model in eval model self.model.eval() fake_label = [0] words = self.word2idx(sent) word_ids, sequence_lengths = seqPAD.pad_sequences([words], pad_tok=0, wthres=cl) data_tensors = Data2tensor.sort_tensors(fake_label, word_ids,sequence_lengths, volatile_flag=True) fake_label_tensor, word_tensor, sequence_lengths, word_seq_recover = data_tensors label_score = self.model(word_tensor, sequence_lengths) label_prob, label_pred = self.model.inference(label_score, k) return label_prob, label_pred
def evaluate_batch(self, eva_data): with torch.no_grad(): wl = self.args.vocab.wl batch_size = self.args.batch_size ## set model in eval model self.model.eval() start = time.time() y_true = Data2tensor.idx2tensor([], self.device) y_pred = Data2tensor.idx2tensor([], self.device) for i, (words, asp_locs, label_ids) in enumerate( self.args.vocab.minibatches(eva_data, batch_size=batch_size)): word_ids, sequence_lengths = seqPAD.pad_sequences(words, pad_tok=0, wthres=wl) data_tensors = Data2tensor.sort_tensors( label_ids, asp_locs, word_ids, sequence_lengths, self.device) label_tensor, aspect_tensor, word_tensor, sequence_lengths, word_seq_recover = data_tensors arange_tensor = Data2tensor.idx2tensor( list(range(word_tensor.size(0))), self.device) y_true = torch.cat([y_true, label_tensor]) label_score = self.model(word_tensor, sequence_lengths, aspect_tensor, arange_tensor) label_prob, label_pred = self.model.inference(label_score, k=1) y_pred = torch.cat([y_pred, label_pred]) # acc = metrics.accuracy_score(y_true, y_pred) # print(y_pred.size()) # print(y_true.size()) measures = Classifier.class_metrics(y_true, y_pred.squeeze()) end = time.time() - start speed = len(y_true) / end # print("Gradient flag: ", label_score.requires_grad) return measures, speed
def train_batch(self, train_data, epoch=0): total_loss = 0. total_word = 0 total_seq = 0 start_time = time.time() train_batch = self.args.vocab.minibatches( train_data, batch_size=self.args.batch_size) # Turn on training mode which enables dropout. self.model.train() for batch, seq_batch in enumerate(train_batch): word_pad_ids, seq_lens = seqPAD.pad_sequences( seq_batch, pad_tok=self.args.vocab.w2i[PAD]) seq_tensor = Data2tensor.idx2tensor(word_pad_ids, self.device) # seq_tensor = [batch_size, seq_len] total_seq += seq_tensor.size(0) hidden = self.model.init_hidden(seq_tensor.size(0)) for i in range(0, seq_tensor.size(1) - 1, self.args.bptt): # data = [batch_size, bptt] # target = [batch_size, bptt] data, target = self.bptt_batch(seq_tensor, i) mask_target = target > 0 # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = self.repackage_hidden(hidden) self.model.zero_grad() output, hidden = self.model(data, hidden) loss = self.model.NLL_loss(output, target) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.clip) for p in self.model.parameters(): p.data.add_(-self.args.lr, p.grad.data) total_loss += loss.item() total_word = total_word + mask_target.sum().item() cur_loss = total_loss / total_word elapsed = time.time() - start_time print('-' * 89) print( '| TRAINING | epoch {:3d} | batch {:5d} | sequences {:5d} | words {:5d} | lr {:02.2f} | ' 'words/s {:5.2f} | loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch + 1, total_seq, total_word, self.args.lr, total_word / elapsed, cur_loss, math.exp(cur_loss))) print('-' * 89)
def wd_pred(model, vocab, sentence): """ Predict next word """ with torch.no_grad(): words = sentence.split(' ') for i, word in enumerate(words): # transform word to tensor word_idx = vocab.w2i[word] word_tensor = Data2tensor.idx2tensor([[word_idx]]) if i == 0: hidden = model.init_hidden(word_tensor.size(0)) output, hidden = model(word_tensor, hidden) label_prob, label_pred = model.inference(output) word_idx = label_pred.data[0][0].data.numpy()[0] return vocab.i2w[word_idx]
def rev_gen(model, vocab, start_word=SOS): """ Generate a review starts with 'start_word', ends with '</s>' """ print('Generating sample review .....................') with torch.no_grad(): word_idx = vocab.w2i[start_word] all_words = [] all_words.append(start_word) while word_idx != vocab.w2i[EOS]: word_tensor = Data2tensor.idx2tensor([[word_idx]]) hidden = model.init_hidden(word_tensor.size(0)) output, hidden = model(word_tensor, hidden) label_prob, label_pred = model.inference(output) word_idx = label_pred.data[0][0].data.numpy()[0] all_words.append(vocab.i2w[word_idx]) return ' '.join(all_words)
def scoring(sent, args, classifier): cl = args.vocab.cl ## set model in eval model classifier.model.eval() fake_label = [0] words = classifier.word2idx(sent) word_ids, sequence_lengths = seqPAD.pad_sequences([words], pad_tok=0, wthres=cl) data_tensors = Data2tensor.sort_tensors(fake_label, word_ids, sequence_lengths, volatile_flag=True) fake_label_tensor, word_tensor, sequence_lengths, word_seq_recover = data_tensors label_score = classifier.model(word_tensor, sequence_lengths) # label_prob, label_pred = classifier.model.inference(label_score) return label_score
def predict_null(self, sent, asp): wl = self.classifier.args.vocab.wl ## set model in eval model self.classifier.model.eval() fake_label = [0] words, asp_loc = self.classifier.word2idx(sent, asp) word_ids, sequence_lengths = seqPAD.pad_sequences([words], pad_tok=0, wthres=wl) data_tensors = Data2tensor.sort_tensors(fake_label, [asp_loc], word_ids, sequence_lengths, self.classifier.device) fake_label_tensor, aspect_tensor, word_tensor, sequence_lengths, word_seq_recover = data_tensors word_h_n = self.classifier.model.rnn.get_all_hiddens( word_tensor, sequence_lengths).mean(1) label_score = self.classifier.model.hidden2tag(word_h_n) label_score = self.classifier.model.dropfinal(label_score) label_prob, label_pred = self.classifier.model.inference( label_score, len(self.i2l)) return label_prob, label_pred
def predict(self, sent): numtags = len(self.args.vocab.l2i) wl = self.args.vocab.wl cl = self.args.vocab.cl ## set model in eval model self.model.eval() words = self.word2idx(sent) char_ids, word_ids = zip(*words) fake_label = [[0] * len(word_ids)] char_ids, word_ids = zip(*words) word_ids, sequence_lengths = seqPAD.pad_sequences([word_ids], pad_tok=0, wthres=wl, cthres=cl) char_ids, word_lengths = seqPAD.pad_sequences([char_ids], pad_tok=0, nlevels=2, wthres=wl, cthres=cl) data_tensors = Data2tensor.sort_tensors(fake_label, word_ids, sequence_lengths, char_ids, word_lengths, volatile_flag=True) fake_label_tensor, word_tensor, sequence_lengths, word_seq_recover, char_tensor, word_lengths, char_seq_recover = data_tensors label_score = self.model(word_tensor, sequence_lengths, char_tensor, word_lengths, char_seq_recover) if numtags > 2: label_prob, label_pred = label_score.data.max(1) else: label_prob = F.sigmoid(label_score.squeeze()) label_pred = (label_prob >= 0.5).data.long() return label_prob, label_pred
def evaluate_batch(self, eva_data): cl = self.args.vocab.cl batch_size = self.args.batch_size ## set model in eval model self.model.eval() num_label = 0 num_correct = 0 for i,(words, label_ids) in enumerate(self.args.vocab.minibatches(eva_data, batch_size=batch_size)): word_ids, sequence_lengths = seqPAD.pad_sequences(words, pad_tok=0, wthres=cl) data_tensors = Data2tensor.sort_tensors(label_ids, word_ids,sequence_lengths, volatile_flag=True) label_tensor, word_tensor, sequence_lengths, word_seq_recover = data_tensors label_score = self.model(word_tensor, sequence_lengths) label_prob, label_pred = self.model.inference(label_score, k=1) assert len(label_pred)==len(label_tensor) correct_pred = (label_pred.squeeze()==label_tensor.data).sum() assert correct_pred <=batch_size num_label += len(label_tensor) num_correct += correct_pred acc = num_correct/num_label return acc
def evaluate_batch(self, eva_data): wl = self.args.vocab.wl cl = self.args.vocab.cl batch_size = self.args.batch_size ## set model in eval model self.model.eval() correct_preds = 0. total_preds = 0. total_correct = 0. accs = [] pred_results = [] gold_results = [] for i, (words, label_ids) in enumerate( self.args.vocab.minibatches(eva_data, batch_size=batch_size)): char_ids, word_ids = zip(*words) word_ids, sequence_lengths = seqPAD.pad_sequences(word_ids, pad_tok=0, wthres=wl, cthres=cl) char_ids, word_lengths = seqPAD.pad_sequences(char_ids, pad_tok=0, nlevels=2, wthres=wl, cthres=cl) label_ids, _ = seqPAD.pad_sequences(label_ids, pad_tok=0, wthres=wl, cthres=cl) data_tensors = Data2tensor.sort_tensors(label_ids, word_ids, sequence_lengths, char_ids, word_lengths, volatile_flag=True) label_tensor, word_tensor, sequence_lengths, word_seq_recover, char_tensor, word_lengths, char_seq_recover = data_tensors mask_tensor = word_tensor > 0 label_score = self.model(word_tensor, sequence_lengths, char_tensor, word_lengths, char_seq_recover) label_prob, label_pred = self.model.inference( label_score, mask_tensor) pred_label, gold_label = recover_label(label_pred, label_tensor, mask_tensor, self.args.vocab.l2i, word_seq_recover) pred_results += pred_label gold_results += gold_label acc, p, r, f = get_ner_fmeasure(gold_results, pred_results) # label_pred = label_pred.cpu().data.numpy() # label_tensor = label_tensor.cpu().data.numpy() # sequence_lengths = sequence_lengths.cpu().data.numpy() # # for lab, lab_pred, length in zip(label_tensor, label_pred, sequence_lengths): # lab = lab[:length] # lab_pred = lab_pred[:length] # accs += [a==b for (a, b) in zip(lab, lab_pred)] # # lab_chunks = set(NERchunks.get_chunks(lab, self.args.vocab.l2i)) # lab_pred_chunks = set(NERchunks.get_chunks(lab_pred, self.args.vocab.l2i)) # # correct_preds += len(lab_chunks & lab_pred_chunks) # total_preds += len(lab_pred_chunks) # total_correct += len(lab_chunks) # # p = correct_preds / total_preds if correct_preds > 0 else 0 # r = correct_preds / total_correct if correct_preds > 0 else 0 # f = 2 * p * r / (p + r) if correct_preds > 0 else 0 # acc = np.mean(accs) return acc, f
# coding: utf-8 import argparse import time import math import torch import os import torch.onnx import torch.optim as optim from sklearn import metrics from utils.data_utils import Vocab, Txtfile, Data2tensor, SaveloadHP, seqPAD, PAD from utils.core_nns_emb import UniLSTMModel, BiLSTMModel # Set the random seed manually for reproducibility. Data2tensor.set_randseed(1234) class Sentimentmodel(object): def __init__(self, args): self.args = args self.device = torch.device("cuda" if self.args.use_cuda else "cpu") self.word2idx = self.args.vocab.wd2idx(self.args.vocab.w2i, allow_unk=self.args.allow_unk, start_end=self.args.se_words) self.label2idx = self.args.vocab.tag2idx(self.args.vocab.l2i) self.ntokens = len(self.args.vocab.w2i) self.nlabels = len(self.args.vocab.l2i) if args.bidirect: self.model = BiLSTMModel(args.model, self.ntokens, args.emb_size, args.hidden_size,