def main(): Config = config.get_args() set_seed(Config.seed) word2ix, ix2word, max_len, avg_len = build_word_dict(Config.train_path) test_data = CommentDataSet(Config.test_path, word2ix, ix2word) test_loader = DataLoader( test_data, batch_size=16, shuffle=False, num_workers=0, collate_fn=mycollate_fn, ) weight = torch.zeros(len(word2ix), Config.embedding_dim) model = SentimentModel(embedding_dim=Config.embedding_dim, hidden_dim=Config.hidden_dim, LSTM_layers=Config.LSTM_layers, drop_prob=Config.drop_prob, pre_weight=weight) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device = torch.device("cpu") criterion = nn.CrossEntropyLoss() model.load_state_dict(torch.load(Config.model_save_path), strict=True) # 模型加载 confuse_meter = ConfuseMeter() confuse_meter = test(test_loader, device, model, criterion)
def main(): Config = config.get_args() set_seed(Config.seed) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") word2ix, ix2word, max_len, avg_len = build_word_dict(Config.train_path) weight = torch.zeros(len(word2ix), Config.embedding_dim) model = SentimentModel(embedding_dim=Config.embedding_dim, hidden_dim=Config.hidden_dim, LSTM_layers=Config.LSTM_layers, drop_prob=Config.drop_prob, pre_weight=weight) model.load_state_dict(torch.load(Config.model_save_path), strict=True) # 模型加载 # comment_str = "忘不掉的一句台词,是杜邦公司笑着对男主说:“Sue me”。我记得前段时间某件事,也是同样的说辞,“欢迎来起诉中华有为”。也是同样的跋扈。若干年后,会看到改编的电影吗。" result = predict(Config.comment_str, model, device, word2ix) print(Config.comment_str, result)
def create_model(log_dir=None, val_tuple=None, vocab=None): return SentimentModel( config={ 'vocab_size': MAX_WORDS, 'log_dir': log_dir, 'tensorboard': { 'vocab': vocab, 'val_tuple': val_tuple, }, })
def test(args): if args.device == "gpu": cfg_proto = tf.ConfigProto(intra_op_parallelism_threads=2) else: cfg_proto = None with tf.Session(config=cfg_proto) as sess: # Loading the vocabulary files vocab, rev_vocab = load_vocab(args) args.vocab_size = len(rev_vocab) # Creating test model with tf.variable_scope("model", reuse=None): model_test = SentimentModel(args, None, mode='eval') # Reload model from checkpoints, if any steps_done = initialize_weights(sess, model_test, args, mode='test') logger.info("loaded %d completed steps", steps_done) test_set = load_eval_data(args, split='test') correct, incorrect, losses = evaluate(sess, model_test, test_set, args) with open(os.path.join(args.train_dir, 'incorrect.txt'), 'w') as f: f.write(str(incorrect)) percent_correct = float(correct) * 100.0 / len(test_set) logger.info("Correct Predictions - %.4f. Eval Losses - %.4f", percent_correct, losses)
from model import SentimentModel from transformers import AutoTokenizer, AutoModel import pandas as pd import csv import torch s = ["Negative", "Random", "Positive"] bert_model = AutoModel.from_pretrained('google/bert_uncased_L-4_H-256_A-4') bert_tokenizer = AutoTokenizer.from_pretrained( 'google/bert_uncased_L-4_H-256_A-4') model = SentimentModel(bert_model) model.freeze_weights() model.lstm.load_state_dict(torch.load('rnn.pth')) model.layers.load_state_dict(torch.load('checkpoint.pth')) model.eval() f = open('result.csv', 'w', newline='') out_file = csv.writer(f) data = pd.read_csv('results_ocr.csv') out_file.writerow(['Filename', 'Category']) for idx, row in data.iterrows(): if type(row['Text']) == float: out_file.writerow([row['Filename'], "Random"]) else: scores = model([row['Text']], bert_tokenizer) sentiment = torch.argmax(torch.exp(scores), dim=1) out_file.writerow([row['Filename'], s[sentiment]])
import nltk, re from nltk.corpus import stopwords from nltk.stem import SnowballStemmer from math import ceil import config #from tqdm import tqdm # create and configure the app app = Flask(__name__, instance_relative_config=True) app.config.from_object("config.Config") filename = app.config['MODEL'] max_length = app.config['SENTENCE_MAX_LENGTH'] embedding_dim = app.config['EMBEDDING_DIM'] vacab_size = app.config['VOCABULARY_SIZE'] model = SentimentModel(embedding_dim, vacab_size, max_length) model.load_weights(filename) def preprocess(text, stem=False): stemmer = SnowballStemmer('english') text_cleaning_re = "@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+" text = re.sub(text_cleaning_re, ' ', str(text).lower()).strip() tokens = [] stop_words = stopwords.words('english') for token in text.split(): if token not in stop_words: if stem: tokens.append(stemmer.stem(token))
import torch.nn as nn from data import load_file from model import SentimentModel device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') TEXT, LABEL, train, valid, test, train_iter, valid_iter, test_iter = load_file(filepath='data/', device=device) INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 100 HIDDEN_DIM = 256 OUTPUT_DIM = 1 model = SentimentModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM) optimizer = torch.optim.SGD(model.parameters(), lr=3e-3) criterion = nn.BCEWithLogitsLoss() model = model.to(device) criterion = criterion.to(device) def binary_accuracy(preds, y): rounded_preds = torch.round(torch.sigmoid(preds)) correct = (rounded_preds == y).float() acc = correct.sum() / len(correct) return acc
def run(fold=0): # kfold type of data input data = pd.read_csv(config.TRAIN_FOLDS_FILE) df_train = data[data['kfold'] != fold].reset_index(drop=True) df_valid = data[data['kfold'] == fold].reset_index(drop=True) train_data = CommentData(comments=df_train['Comment'], labels=df_train['Label_encoded'], sentiments=df_train['Sentiment_encoded']) train_dataloader = torch.utils.data.DataLoader( train_data, batch_size=config.TRAIN_BATCH_SIZE, # num_workers = 4 ) valid_data = CommentData(comments=df_valid['Comment'], labels=df_valid['Label_encoded'], sentiments=df_valid['Sentiment_encoded']) valid_dataloader = torch.utils.data.DataLoader( valid_data, batch_size=config.VALID_BATCH_SIZE, # num_workers = 4 ) device = torch.device('cuda') model_config = RobertaConfig.from_pretrained(config.ROBERTA_PATH) model_config.output_hidden_states = True model = SentimentModel(model_config, config.OUTPUT_SIZE) model.to(device) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }] num_train_steps = int( len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS) optimizer = AdamW(optimizer_parameters, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) # train_fn(data_loader, model, device, optimizer, scheduler=None) train_loss_rec = [] eval_loss_rec = [] early_stopping = utils.EarlyStopping(patience=5, mode='min') for epoch in range(config.EPOCHS): print(f'########### fold = {fold} epoch = {epoch} ############') loss_train = engine.train_fn(data_loader=train_dataloader, model=model, device=device, optimizer=optimizer, scheduler=scheduler) train_loss_rec.append(loss_train) losses_eval = engine.eval_fn(valid_dataloader, model, device) eval_loss_rec.append(losses_eval) print(f'train_loss = {loss_train} eval_loss = {losses_eval}') # print(f'save model_{fold}.bin') # torch.save(model.state_dict(), config.OUTPUT_PATH + f'/model_{fold}.bin') early_stopping(losses_eval, model, model_path=config.OUTPUT_PATH + f'/model_label_{fold}.bin') if early_stopping.early_stop: print('Early stopping') break
def train(args): max_epochs = args.config.max_epochs batch_size = args.config.batch_size if args.device == "gpu": cfg_proto = tf.ConfigProto(intra_op_parallelism_threads=2) cfg_proto.gpu_options.allow_growth = True else: cfg_proto = None with tf.Session(config=cfg_proto) as sess: # Loading the vocabulary files vocab, rev_vocab = load_vocab(args) args.vocab_size = len(rev_vocab) # Loading all the training data train_files, training_size = load_train_data(args) queue = tf.train.string_input_producer(train_files, num_epochs=max_epochs, shuffle=True) # Creating training model with tf.variable_scope("model", reuse=None): model = SentimentModel(args, queue, mode='train') # Reload model from checkpoints, if any steps_done = initialize_weights(sess, model, args, mode='train') logger.info("loaded %d completed steps", steps_done) # Load the w2v embeddings if steps_done == 0 and args.config.cnn_mode != 'rand': w2v_array = load_w2v(args, rev_vocab) initialize_w2v(sess, model, w2v_array) # Reusing weights for evaluation model with tf.variable_scope("model", reuse=True): model_eval = SentimentModel(args, None, mode='eval') dev_set = load_eval_data(args, split='dev') # This need not be zero due to incomplete runs epoch = model.epoch.eval() remaining_examples = training_size * max_epochs - ( model.global_step.eval() * batch_size) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) percent_best = 0.0 while epoch < max_epochs: logger.info("Epochs done - %d", epoch) frac_num_batches = float(remaining_examples) / (max_epochs - epoch) / batch_size if epoch == max_epochs - 1: # Last batch may have some extra elements num_batches = math.ceil(frac_num_batches) else: num_batches = round(frac_num_batches) num_batches = int(num_batches) logger.info( "%d remaining examples, %d epochs left, %.4f fractional number of batches, %d chosen", remaining_examples, max_epochs - epoch, frac_num_batches, num_batches) remaining_examples -= num_batches * batch_size epoch_start = time.time() if coord.should_stop(): break for i in range(1, num_batches + 1): output_feed = [model.updates, model.clip, model.losses] _, _, losses = sess.run(output_feed) if i % 100 == 0: logger.info( "minibatches done %d. Training Loss %.4f. Time elapsed in epoch %.4f.", i, losses, (time.time() - epoch_start) / 3600.0) if i % args.config.eval_frequency == 0 or i == num_batches: logger.info("Evaluating model after %d minibatches", i) correct, _, losses = evaluate(sess, model_eval, dev_set, args) percent_correct = float(correct) * 100.0 / len(dev_set) logger.info("Correct Predictions - %.4f. Eval Loss - %.4f", percent_correct, losses) if percent_correct > percent_best: percent_best = percent_correct logger.info("Saving Best Model") checkpoint_path = os.path.join(args.best_dir, "sentence.ckpt") model.best_saver.save(sess, checkpoint_path, global_step=model.global_step, write_meta_graph=False) # Also save the model for continuing in future checkpoint_path = os.path.join(args.train_dir, "sentence.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step, write_meta_graph=False) # Update epoch counter sess.run(model.epoch_incr) epoch += 1 checkpoint_path = os.path.join(args.train_dir, "sentence.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step, write_meta_graph=False) coord.request_stop() coord.join(threads)
# preprocess and save word encodings preprocessor = Preprocessor(max_vocab=args.max_vocab) data = preprocessor.fit_transform(dataset=data) preprocessor.save(args.prepro_save_path) # validation split data.split_data(validation_count=args.validation_count) train_ds, val_ds = data.to_dataset() # to dataLoaders train_set = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True) val_set = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False) print('Initializing model...') mod = SentimentModel( len(preprocessor.vocab2enc) + 3, args.embedding_dim, args.hidden_dim) opt = Adam(mod.parameters(), lr=args.lr) print('Training...') fit(training=train_set, model=mod, validation=val_set, optimizer=opt, loss=torch.nn.BCELoss(), epochs=args.epochs) # Saving model print('Saving model...') torch.save(mod, args.model_save_path) print('Done!')
def predict(): # kfold type of data input data = pd.read_csv(config.TEST_FILE) data['Label_encoded'] = 0 data['Sentiment_encoded'] = 0 df_test = data test_data = CommentData(comments=df_test['Comment'], labels=df_test['Label_encoded'], sentiments=df_test['Sentiment_encoded']) test_dataloader = torch.utils.data.DataLoader( test_data, batch_size=config.TEST_BATCH_SIZE, # num_workers = 4 ) # model device = torch.device('cuda') model_config = BertConfig.from_pretrained(config.BERT_PATH) model_config.output_hidden_states = True model0 = SentimentModel(model_config, config.OUTPUT_SIZE_SENTIMENT) model0.to(device) # model0 = nn.DataParallel(model0) model0.load_state_dict(torch.load(config.SAVED_MODEL_PATH + '/model_0.bin')) model0.eval() model1 = SentimentModel(model_config, config.OUTPUT_SIZE_SENTIMENT) model1.to(device) # model1 = nn.DataParallel(model1) model1.load_state_dict(torch.load(config.SAVED_MODEL_PATH + '/model_1.bin')) model1.eval() model2 = SentimentModel(model_config, config.OUTPUT_SIZE_SENTIMENT) model2.to(device) # model2 = nn.DataParallel(model2) model2.load_state_dict(torch.load(config.SAVED_MODEL_PATH + '/model_2.bin')) model2.eval() model3 = SentimentModel(model_config, config.OUTPUT_SIZE_SENTIMENT) model3.to(device) # model3 = nn.DataParallel(model3) model3.load_state_dict(torch.load(config.SAVED_MODEL_PATH + '/model_3.bin')) model3.eval() model4 = SentimentModel(model_config, config.OUTPUT_SIZE_SENTIMENT) model4.to(device) # model4 = nn.DataParallel(model4) model4.load_state_dict(torch.load(config.SAVED_MODEL_PATH + '/model_4.bin')) model4.eval() # process raw output model_prediction = [] with torch.no_grad(): tq0 = tqdm(test_dataloader, total=len(test_dataloader)) for bi, data in tqdm(enumerate(tq0)): # load data / ready to input input_ids = data['input_ids'] token_type_ids = data['token_type_ids'] attention_mask = data['attention_mask'] label = data['label'] sentiment = data['sentiment'] # prepare input data input_ids = input_ids.to(device, dtype=torch.long) token_type_ids = token_type_ids.to(device, dtype=torch.long) attention_mask = attention_mask.to(device, dtype=torch.long) label = label.to(device, dtype=torch.long) sentiment = sentiment.to(device, dtype=torch.long) # forward(self, ids, mask, type_ids) out0 = model0(ids=input_ids, mask=attention_mask, type_ids=token_type_ids) out1 = model1(ids=input_ids, mask=attention_mask, type_ids=token_type_ids) out2 = model2(ids=input_ids, mask=attention_mask, type_ids=token_type_ids) out3 = model3(ids=input_ids, mask=attention_mask, type_ids=token_type_ids) out4 = model4(ids=input_ids, mask=attention_mask, type_ids=token_type_ids) out = (out0 + out1 + out2 + out3 + out4) / 5 out = torch.softmax(out, dim=1).cpu().detach().numpy() for ix, result in enumerate(out): pred = np.argmax(result) model_prediction.append(pred) sample = pd.read_csv(config.TEST_FILE) sample['sentiment_pred'] = model_prediction sample.to_csv(config.OUTPUT_PATH + '/pred_sentiment.csv', index=False)
def main(): Config = config.get_args() set_seed(Config.seed) word2ix, ix2word, max_len, avg_len = build_word_dict(Config.train_path) train_data = CommentDataSet(Config.train_path, word2ix, ix2word) train_loader = DataLoader( train_data, batch_size=16, shuffle=True, num_workers=0, collate_fn=mycollate_fn, ) validation_data = CommentDataSet(Config.validation_path, word2ix, ix2word) validation_loader = DataLoader( validation_data, batch_size=16, shuffle=True, num_workers=0, collate_fn=mycollate_fn, ) test_data = CommentDataSet(Config.test_path, word2ix, ix2word) test_loader = DataLoader( test_data, batch_size=16, shuffle=False, num_workers=0, collate_fn=mycollate_fn, ) weight = pre_weight(len(word2ix), Config.pred_word2vec_path, Config.embedding_dim, word2ix, ix2word) model = SentimentModel(embedding_dim=Config.embedding_dim, hidden_dim=Config.hidden_dim, LSTM_layers=Config.LSTM_layers, drop_prob=Config.drop_prob, pre_weight=weight) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") optimizer = optim.Adam(model.parameters(), lr=Config.lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # 学习率调整 criterion = nn.CrossEntropyLoss() # 因为使用tensorboard画图会产生很多日志文件,这里进行清空操作 if os.path.exists(Config.tensorboard_path): shutil.rmtree(Config.tensorboard_path) os.mkdir(Config.tensorboard_path) for epoch in range(Config.epochs): train_loader = tqdm(train_loader) train_loader.set_description( '[%s%04d/%04d %s%f]' % ('Epoch:', epoch + 1, Config.epochs, 'lr:', scheduler.get_lr()[0])) train(epoch, Config.epochs, train_loader, device, model, criterion, optimizer, scheduler, Config.tensorboard_path) validate(epoch, validation_loader, device, model, criterion, Config.tensorboard_path) # 模型保存 if os.path.exists(Config.model_save_path) == False: os.mkdir('./modelDict/') torch.save(model.state_dict(), Config.model_save_path) confuse_meter = ConfuseMeter() confuse_meter = test(test_loader, device, model, criterion)
test_data = batched_dataset.take(TEST_BATCHES) train_data = batched_dataset.skip(TEST_BATCHES) VOCAB_LENGTH = len(tokenizer.vocab) EMB_DIM = 200 CNN_FILTERS = 100 DNN_UNITS = 256 OUTPUT_CLASSES = 6 DROPOUT_RATE = 0.2 NB_EPOCHS = 10 model = SentimentModel(vocabulary_size=VOCAB_LENGTH, embedding_dimensions=EMB_DIM, cnn_filters=CNN_FILTERS, dnn_units=DNN_UNITS, model_output_classes=OUTPUT_CLASSES, dropout_rate=DROPOUT_RATE) if OUTPUT_CLASSES == 2: model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) else: model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["sparse_categorical_accuracy"]) model.fit(train_data, epochs=NB_EPOCHS) # -------------------------------------------------------------------------------
def preprocess_and_train(): # read dataset data = pd.read_csv('./training.1600000.processed.noemoticon.csv', encoding='latin-1', header=None) data.columns = ('target','uid', 'time', 'query', 'user', 'text') # create new dataframe sent_df = pd.DataFrame(None, columns=('target', 'text')) sent_df['target'] = data['target'] sent_df['text'] = data['text'].apply(preprocess_text) sent_df['tweet_size'] = data['text'].apply(lambda x:len(x.split())) # select random sample of 400,000 tweets from total dataset (training on a smaller dataset) sent_df_sample = sent_df[(sent_df['tweet_size']>10) & (sent_df['target']==0)].sample(n=200000, random_state=SentConfig.SEED) sent_df_sample = sent_df_sample.append(sent_df[(sent_df['tweet_size']>10) & (sent_df['target']==4)].sample(n=200000, random_state=SentConfig.SEED)) # split dataset into train, test, validation set train, test = train_test_split(sent_df_sample, test_size=0.1) train, val = train_test_split(train, test_size=0.05) # create necessary dataloaders, for advantage of batching by pytorch train_dl = SentimentDL(train) val_dl = SentimentDL(val) test_dl = SentimentDL(test) train_loader = DataLoader(train_dl, batch_size=SentConfig.TRAIN_BATCH_SIZE, shuffle=True) validation_loader = DataLoader(val_dl, batch_size=SentConfig.VALID_BATCH_SIZE, shuffle=True) test_loader = DataLoader(test_dl, batch_size=SentConfig.VALID_BATCH_SIZE, shuffle=True) # select the cuda device if available device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # create model object model = SentimentModel() model.to(device) # ready with optimizer and scheduler objects # do not apply weight decay in AdamW to, bias layer and normalization terms no_decay = ['bias', 'LayerNorm.weight', 'LayerNorm.bias'] # taken from https://huggingface.co/transformers/training.html # more named parameteres in model.named_parameters() optimizer_grouped_parameters = [ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] # optim = AdamW(model.parameters(), lr=5e-5) optim = AdamW(optimizer_grouped_parameters, lr=5e-5) # learning rate scheduling num_train_steps = int((train_dl.__len__()/SentConfig.TRAIN_BATCH_SIZE)*SentConfig.EPOCHS) num_warmup_steps = int(0.05*num_train_steps) scheduler = get_cosine_schedule_with_warmup(optim, num_warmup_steps, num_train_steps) # Training : done on the basis of attaining better F1 score on the validation dataset scores = [] min_f1 = 0 for epoch in range(SentConfig.EPOCHS): _ = train_function(train_loader, model, optim, scheduler, device) _, results = evaluation_function(validation_loader, model, device) validation_f1 = round(f1_score(results[:,1], results[:,0]),4) accuracy = round(accuracy_score(results[:,1], results[:,0]), 4) scores.append((validation_f1, accuracy)) print('epoch num: ', epoch, 'f1 score: ',validation_f1 , 'accuracy: ', accuracy) if validation_f1 > min_f1: # save model if validation f1 score is torch.save(model.state_dict(), "SentimentModel.bin") # update max loss min_f1 = validation_f1 # plotting scores scores = np.array(scores) fig, ax = plt.subplots(1, 2, figsize=(14,6)) ax[0].plot(range(SentConfig.EPOCHS), scores[:,0], 'r') ax[1].plot(range(SentConfig.EPOCHS), scores[:,1]) ax[0].set(xlabel='Epoch num', ylabel='F1 Score') ax[1].set(xlabel='Epoch num', ylabel='Accuracy') ax[0].set_title('validation set f1 score at each epoch') ax[1].set_title('validation set accuracy at each apoch') # F1 score calculation on test predictions state_dict_ = torch.load('SentimentModel.bin') model = SentimentModel() model.load_state_dict(state_dict_) model.to(device) _, results = evaluation_function(test_loader, model, device, inference=True) print(classification_report(results[:,1], results[:,0])) print(round(accuracy_score(results[:,1], results[:,0]),4))
to_lower_case = bert_layer.resolved_object.do_lower_case.numpy() tokenizer = BertTokenizer(vocabulary_file, to_lower_case) VOCAB_LENGTH = len(tokenizer.vocab) EMB_DIM = 200 CNN_FILTERS = 100 DNN_UNITS = 256 OUTPUT_CLASSES = 6 DROPOUT_RATE = 0.2 NB_EPOCHS = 8 model = SentimentModel(vocabulary_size=VOCAB_LENGTH, embedding_dimensions=EMB_DIM, cnn_filters=CNN_FILTERS, dnn_units=DNN_UNITS, model_output_classes=OUTPUT_CLASSES, dropout_rate=DROPOUT_RATE) if OUTPUT_CLASSES == 2: model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) else: model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["sparse_categorical_accuracy"]) latest = tf.train.latest_checkpoint('./new_weights') model.load_weights(latest)
test_data = TensorDataset(torch.LongTensor(x_test), torch.LongTensor(y_test)) batch_size = 50 train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True) # Instantiate the model w/ hyperparams vocab_size = len(vocab_to_int)+1 # +1 for the 0 padding output_size = 1 embedding_dim = 400 hidden_dim = 256 n_layers = 2 model = SentimentModel(vocab_size, output_size, embedding_dim, hidden_dim, n_layers) # print(model) # loss and optimization functions lr=0.01 criterion = torch.nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # training params epochs = 2 counter = 0 print_every = 100 clip = 5 model.train()