def getLSTMModel(item_train_dl, item_valid_dl, item_vocab, user_train_dl, user_valid_dl, user_vocab, embedding_dim, hidden_dim): print('Finding the best LSTM Models....') lstm_item_model = LSTM(item_vocab, embedding_dim, hidden_dim) lstm_user_model = LSTM(user_vocab, embedding_dim, hidden_dim) lstm_item_output = train_LSTMmodel(lstm_item_model, item_train_dl, item_valid_dl, 1, epochs=30, lr=0.01) lstm_user_output = train_LSTMmodel(lstm_user_model, user_train_dl, user_valid_dl, 2, epochs=30, lr=0.01) item_model_PATH = 'model/model1.pt' user_model_PATH = 'model/model2.pt' item_LSTMmodel = LSTM(item_vocab, embedding_dim, hidden_dim) user_LSTMmodel = LSTM(user_vocab, embedding_dim, hidden_dim) item_LSTMmodel.load_state_dict(torch.load(item_model_PATH)) user_LSTMmodel.load_state_dict(torch.load(user_model_PATH)) return item_LSTMmodel, user_LSTMmodel
def Train(train_loader, val_loader, weight_pos): print("Start Training!") if sys.argv[1] == "LSTM": model = LSTM(NUM_TASKS, BATCH_SIZE, DIM_EMB).cuda() elif sys.argv[1] == "CNN": model = CNN(NUM_TASKS, BATCH_SIZE, DIM_EMB).cuda() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) loss_criterion = nn.BCEWithLogitsLoss(pos_weight=weight_pos.cuda()) last_val_score = 0.0 for epoch in range(N_EPOCH): print("epoch " + str(epoch) + ": ") total_loss = 0.0 #i = 0 for x, y in train_loader: x = x.cuda() y = y.cuda() model.zero_grad() probs = model.forward(x).cuda() #print(i) #print(probs) loss = loss_criterion(probs, y) total_loss += loss loss.backward() #retain_graph=True) optimizer.step() #i += 1 print(f"loss on epoch {epoch} = {total_loss}") val_score = Val(val_loader, model) print(f"val_score on epoch {epoch} = {val_score}") if val_score <= last_val_score: break last_val_score = val_score return model
def main(hparams): datamodule = DrivingDataMadule('v0.1', 1000, 60, 1000) # model = LSTM.load_from_checkpoint( # "/home/sepehr/PycharmProjects/Neuropad/DAD/model/lightning_logs/version_31/checkpoints/checkpoint.ckpt" # ) model = LSTM() trainer = pl.Trainer(gpus=-1, max_epochs=100, accelerator='ddp', callbacks=[LSTMCallback()], precision=16, num_nodes=1) trainer.fit(model=model, datamodule=datamodule)
def get_model(): if exp_model == 'MLP': return MLP(hist_len, pred_len, in_dim) elif exp_model == 'LSTM': return LSTM(hist_len, pred_len, in_dim, city_num, batch_size, device) elif exp_model == 'GRU': return GRU(hist_len, pred_len, in_dim, city_num, batch_size, device) elif exp_model == 'nodesFC_GRU': return nodesFC_GRU(hist_len, pred_len, in_dim, city_num, batch_size, device) elif exp_model == 'GC_LSTM': return GC_LSTM(hist_len, pred_len, in_dim, city_num, batch_size, device, graph.edge_index) elif exp_model == 'PM25_GNN': return PM25_GNN(hist_len, pred_len, in_dim, city_num, batch_size, device, graph.edge_index, graph.edge_attr, wind_mean, wind_std) elif exp_model == 'PM25_GNN_nosub': return PM25_GNN_nosub(hist_len, pred_len, in_dim, city_num, batch_size, device, graph.edge_index, graph.edge_attr, wind_mean, wind_std) else: raise Exception('Wrong model name!')
test_iter = BucketIterator(test, batch_size=opt.batch_size, sort=False, train=False, shuffle=False) # endregion # %% # region Define the model # if opt.notrain: # model = torch.load(opt.weight_datapath + "model.pt") # model.state_dict = torch.load(opt.weight_datapath + './state.pt') if not opt.notrain: if opt.model == 'LSTM': model = LSTM().to(device) elif opt.model == 'GRU': model = GRU().to(device) elif opt.model == 'AGRU': model = AGRU().to(device) elif opt.model == 'SharedGRU': model = SharedGRU().to(device) elif opt.model == 'SharedAGRU': model = SharedAGRU().to(device) elif opt.model == 'CNN': model = CNN().to(device) model.text_embedding_layer.weight.data.copy_( TITLE.vocab.vectors).to(device) for para in model.text_embedding_layer.parameters(): para.requires_grad = False # FIXME speedup
TEXT.build_vocab(train, max_size=10000) LABEL.build_vocab(train) BATCH_SIZE = 1 train_iterator, valid_iterator = data.BucketIterator.splits( (train, val), batch_size=BATCH_SIZE, sort_key=lambda x: len(x.text), repeat=False) INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 100 HIDDEN_DIM = 256 OUTPUT_DIM = 1 model = LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, BATCH_SIZE) optimizer = optim.SGD(model.parameters(), lr=1e-3) criterion = nn.BCEWithLogitsLoss() device = torch.device('cuda') model = model.to(device) criterion = criterion.to(device) def binary_accuracy(preds, y): """ Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8 """ # round predictions to the closest integer rounded_preds = torch.round(torch.sigmoid(preds)) correct = (rounded_preds == y).float() # convert into float for division
def main(): global epoch # Get arguments, setup, prepare data and print some info args = parse() log_path = os.path.join("logs", args.name) if not os.path.exists(log_path): os.makedirs(log_path) writer = SummaryWriter(log_path) if args.task == 'babi': train_dataset = bAbIDataset(args.dataset_path, args.babi_task) val_dataset = bAbIDataset(args.dataset_path, args.babi_task, train=False) else: raise NotImplementedError # Setting up the Model if args.model == 'lstm': model = LSTM(40, train_dataset.num_vocab, 100, args.device, sentence_size=max(train_dataset.sentence_size, train_dataset.query_size)) print("Using LSTM") else: # model = REN(args.num_blocks, train_dataset.num_vocab, 100, args.device, train_dataset.sentence_size, # train_dataset.query_size).to(args.device) model = RecurrentEntityNetwork(train_dataset.num_vocab, device=args.device, sequence_length=max( train_dataset.sentence_size, train_dataset.query_size)) print("Using EntNet") if args.multi: # TODO: Whats this? model = torch.nn.DataParallel(model, device_ids=args.gpu_range) if args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: Exception("Invalid optimizer") if args.cyc_lr: cycle_momentum = True if args.optimizer == 'sgd' else False lr_scheduler = torch.optim.lr_scheduler.CyclicLR( optimizer, 5e-5, args.lr, cycle_momentum=cycle_momentum, step_size_up=args.cyc_step_size_up) else: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.5) # Before we are getting started, let's get ready to give some feedback print("Dataset size: ", len(train_dataset)) print("Sentence size:", train_dataset.sentence_size) print("Vocab set", [ str(i) + ': ' + str(train_dataset.vocab[i]) for i in range(len(train_dataset.vocab)) ]) # Prepare Visdom Visdom.start() lr_plt = Visdom.Plot2D("Curent learning rate", store_interval=1, xlabel="Epochs", ylabel="Learning Rate") # TODO: Check legend train_loss = Visdom.Plot2D("Loss on Train Data", store_interval=1, xlabel="iteration", ylabel="loss", legend=['one', 2, 'three']) train_accuracy = Visdom.Plot2D("Accuracy on Train Data", store_interval=1, xlabel="iteration", ylabel="accuracy") validation_loss = Visdom.Plot2D("Loss on Validation Set", store_interval=1, xlabel="epoch", ylabel="loss") validation_accuracy = Visdom.Plot2D("Accuracy on Validation Set", store_interval=1, xlabel="epoch", ylabel="accuracy") babi_text_plt = Visdom.Text("Network Output") train_plots = {'loss': train_loss, 'accuracy': train_accuracy} val_plots = {'text': babi_text_plt} epoch = 0 # Register Variables and plots to save saver = Saver(os.path.join(args.output_path, args.name), short_interval=args.save_interval) saver.register('train_loss', StateSaver(train_loss)) saver.register('train_accuracy', StateSaver(train_accuracy)) saver.register('validation_loss', StateSaver(validation_loss)) saver.register('validation_accuracy', StateSaver(validation_accuracy)) saver.register('lr_plot', StateSaver(lr_plt)) saver.register("model", StateSaver(model)) saver.register("optimizer", StateSaver(optimizer)) saver.register("epoch", GlobalVarSaver('epoch')) # saver.register("train_dataset", StateSaver(train_dataset)) # saver.register("val_dataset", StateSaver(val_dataset)) eval_on_start = False print("Given model argument to load from: ", args.load_model) # TODO: Load learning rate scheduler if args.load_model: if not saver.load(args.load_model): # model.reset_parameters() print('Not loading, something went wrong', args.load_model) pass else: eval_on_start = False start_epoch = epoch end_epoch = start_epoch + args.epochs model.to(args.device) # TODO: Use saver only on full epochs or use it on certain iteration """ TRAIN START """ # Eval on Start if eval_on_start: val_result = val_dataset.eval(args, model, plots=val_plots) validation_loss.add_point(0, val_result['loss']) validation_accuracy.add_point(0, val_result['accuracy']) saver.write(epoch) for epoch in range(start_epoch, end_epoch): train_result = train_dataset.test(args, model, optimizer, epoch=epoch, plots=train_plots, scheduler=lr_scheduler) val_result = val_dataset.eval(args, model, epoch=epoch + 1, plots=val_plots) validation_loss.add_point(epoch, val_result['loss']) validation_accuracy.add_point(epoch, val_result['accuracy']) current_lr = None for param_group in optimizer.param_groups: current_lr = param_group['lr'] break lr_plt.add_point(epoch, current_lr if current_lr else 0) saver.tick(epoch + 1) if not args.cyc_lr: lr_scheduler.step() # TODO: Add writer # Log if epoch % args.save_interval == 0 or epoch == args.epochs - 1: for param_group in optimizer.param_groups: log_lr = param_group['lr'] break log = 'Epoch: [{epoch}]\t Train Loss {tl} Acc {ta}\t Val Loss {vl} Acc {va} lr {lr}'.format( epoch=epoch, tl=round(train_result['loss'], 3), ta=round(train_result['accuracy'], 3), vl=round(val_result['loss'], 3), va=round(val_result['accuracy'], 3), lr=log_lr) print(log)
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument( "--data_dir", default=None, type=str, required=True, help= "The input data dir. Should contain the .tsv files (or other data files) for the task." ) parser.add_argument("--word_embedding_path", default=None, type=str, required=True, help="Word Embedding Path") parser.add_argument("--model", default='CNN', type=str, required=True, help="CNN/LSTM/LSTM+Attention") parser.add_argument("--output_dir", default=None, type=str, required=True, help="The result path") parser.add_argument("--output_name", default=None, type=str, required=True, help="The result path") parser.add_argument( "--max_length", default=25, type=int, help="Maximum sequence length, sequences longer than this are truncated" ) parser.add_argument("--epochs", default=15, type=int, help="Number of epochs to train for") parser.add_argument("--learning_rate", default=0.001, type=float, dest="learning_rate", help="Learning rate for optimizer") parser.add_argument( "--device", default="cuda:0", dest="device", help="Device to use for training and evaluation e.g. (cpu, cuda:0)") parser.add_argument( "--dropout", default=0.1, type=float, dest="dropout", help= "Dropout (not keep_prob, but probability of ZEROING during training, i.e. keep_prob = 1 - dropout)" ) parser.add_argument("--batch_size", default=64, type=int, help="Batch size") parser.add_argument("--filter_sizes", default=[1, 2, 3, 4, 5], type=list, help="The filter sizes(CNN model)") parser.add_argument("--num_filters", default=50, type=int, help="The number of filters(CNN model)") parser.add_argument( "--hidden_size", default=64, type=int, help="The number of hidden_size(LSTM/LSTM_Attention model)") parser.add_argument( "--layer_num", default=1, type=int, help="The number of layer_num(LSTM/LSTM_Attention model)") parser.add_argument( "--bidirectional", default=True, type=bool, help="Is that bidirectional or not(LSTM/LSTM_Attention model)") parser.add_argument("--attention_size", default=32, type=int, help="The dim of attention(LSTM_Attention model)") parser.add_argument( "--model_size", default=128, type=int, help="The size of transformer's model(Transformer model)") parser.add_argument("--num_heads", default=4, type=int, help="The number of heads(Transformer model)") parser.add_argument("--num_blocks", default=2, type=int, help="The number of block(Transformer model)") args = parser.parse_args() print('......................Loading Data......................') x_trainval, y_trainval, x_test, y_test = get_data(args.data_dir) x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=0.2, random_state=66) word_embedding = load_word_embedding(args.word_embedding_path) x_train, x_val, x_test, average_len, vocab = pre_process( x_train, x_val, x_test) vocab = set(vocab) #Create the dictionary word_to_index and target_to_index word_to_id = {word: index for index, word in enumerate(vocab)} target_to_id = { target: index for index, target in enumerate(set(y_trainval)) } id_to_target = {value: key for key, value in target_to_id.items()} #Define some hyperparameters embedding_dim = 300 vocab_size = len(vocab) output_size = 3 pre_trained_enbedding = torch.zeros(vocab_size, embedding_dim) for key, value in word_to_id.items(): if key in word_embedding and (key != '<pad>'): pre_trained_enbedding[value, :] = torch.from_numpy( word_embedding[key]) #Transform data from text to tensor and put them in the Dataloader(for batch) train_loader = prepare_data(x_train, y_train, average_len, word_to_id, target_to_id, vocab, args.batch_size) val_loader = prepare_data(x_val, y_val, average_len, word_to_id, target_to_id, vocab, args.batch_size) test_loader = prepare_data(x_test, y_test, average_len, word_to_id, target_to_id, vocab, args.batch_size) #Build Model if args.model == 'CNN': model = CNN(vocab_size, embedding_dim, pre_trained_enbedding, args.filter_sizes, args.num_filters, args.dropout, output_size).to(args.device) if args.model == 'LSTM': model = LSTM(vocab_size, embedding_dim, pre_trained_enbedding, args.hidden_size, args.layer_num, args.bidirectional, output_size).to(args.device) if args.model == 'LSTM_Attention': model = LSTM_Attention(vocab_size, embedding_dim, pre_trained_enbedding, args.hidden_size, args.layer_num, args.bidirectional, args.attention_size, output_size).to(args.device) if args.model == 'Transformer': model = TransformerModel(vocab_size, average_len, args.batch_size, embedding_dim, pre_trained_enbedding, args.model_size, args.num_heads, args.num_blocks, args.dropout, output_size).to(args.device) print(model) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) #Training Data print('......................Training Data......................') print(' ') losses = [] best_recall_score = 0.0 with open( os.path.join( args.output_dir, args.model + args.output_name + '_training_result.csv'), 'w') as csvfile: fieldnames = [ 'Epoch', 'Loss', 'train_accuracy_score', 'train_recall_score', 'train_f1_score', 'val_accuracy_score', 'val_recall_score', 'val_f1_score', 'test_accuracy_score', 'test_recall_score', 'test_f1_score' ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for epoch in range(args.epochs): total_loss = 0 for batch_x, batch_y in train_loader: if torch.cuda.is_available(): batch_x, batch_y = batch_x.cuda(), batch_y.cuda() optimizer.zero_grad() logits = model(batch_x) loss = loss_function(logits, torch.max(batch_y, 1)[0]) loss.backward() optimizer.step() total_loss += loss.item() losses.append(total_loss) # print('......................Epoch: %d, Loss: %f......................' %(epoch,total_loss)) # print('......................Training Data Performance......................') train_accuracy_score, train_recall_score, train_f1_score = evaluate( train_loader, model, id_to_target) # print('......................Validation Data Performance......................') val_accuracy_score, val_recall_score, val_f1_score = evaluate( val_loader, model, id_to_target) if val_recall_score > best_recall_score: best_recall_score = val_accuracy_score # real_model = model.module torch.save(model.state_dict(), args.model + '2model_best.pth') # print('......................Test Data Performance......................') test_accuracy_score, test_recall_score, test_f1_score = evaluate( test_loader, model, id_to_target) writer.writerow({ 'Epoch': epoch, 'Loss': total_loss, 'train_accuracy_score': train_accuracy_score, 'train_recall_score': train_recall_score, 'train_f1_score': train_f1_score, 'val_accuracy_score': val_accuracy_score, 'val_recall_score': val_recall_score, 'val_f1_score': val_f1_score, 'test_accuracy_score': test_accuracy_score, 'test_recall_score': test_recall_score, 'test_f1_score': test_f1_score }) print(' ') if args.model == 'CNN': model = CNN(vocab_size, embedding_dim, pre_trained_enbedding, args.filter_sizes, args.num_filters, args.dropout, output_size) if args.model == 'LSTM': model = LSTM(vocab_size, embedding_dim, pre_trained_enbedding, args.hidden_size, args.layer_num, args.bidirectional, output_size) if args.model == 'LSTM_Attention': model = LSTM_Attention(vocab_size, embedding_dim, pre_trained_enbedding, args.hidden_size, args.layer_num, args.bidirectional, args.attention_size, output_size) if args.model == 'Transformer': model = TransformerModel(vocab_size, average_len, args.batch_size, embedding_dim, pre_trained_enbedding, args.model_size, args.num_heads, args.num_blocks, args.dropout, output_size) checkpoint = torch.load(args.model + '2model_best.pth') model.load_state_dict(checkpoint) model.to(args.device) print('......................Test Data Performance......................') test_accuracy_score, test_recall_score, test_f1_score = evaluate( test_loader, model, id_to_target) print('The accuracy is:%f ' % test_accuracy_score) print('The macro_recall is:%f ' % test_recall_score) print('The macro_F_score is:%f ' % test_f1_score) print(' ')
print('start to generate result excel') df = pd.read_excel('./data/sourceData/test.xlsx') new_df = df[['_id', '_id_x', '_id_y', 'nick_name', 'content']] nationalism_predictions = [] for index, each in tqdm(new_df.iterrows()): nationalism_predictions.append( weibo_id_prediction_dic.get(int(each["_id"]), "")) new_df['{}_prediction'.format( CURRENT_MODEL_NAME)] = nationalism_predictions new_df.to_excel('./predictionResults/{}_prediction_result.xlsx'.format( CURRENT_MODEL_NAME)) if __name__ == "__main__": if IS_TRAIN: model = LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, BATCH_SIZE) else: model = torch.load( './trainedModel/best_{}_model.pkl'.format(CURRENT_MODEL_NAME)) print('load model successfully') optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) criterion = nn.BCEWithLogitsLoss() device = torch.device('cuda') model = model.to(device) criterion = criterion.to(device) if IS_TRAIN: for i in range(5): train(model, train_iterator, optimizer, criterion) else: test(model, test_iterator)
from model.LSTM import LSTM from Tool.get_preprocess_data import * train_x, test_x, train_y, test_y = get_train_test_set() embedding_matrix, word_list = get_embedding_matrix() lstm = LSTM(embedding_matrix, train_x, train_y, test_x, test_y) lstm.run()