def main(opt): model = LSTM(opt, batch_first=True, dropout=opt.dropout) if opt.pre_train: model.load_state_dict(torch.load(opt.save_path)) optimizer = optim.Adam(model.parameters(), opt.learning_rate) mseloss = nn.MSELoss() dataset = PowerDataset(opt, prepocess_path=opt.prepocess_path, transform=transforms.Compose( [transforms.ToTensor()])) train_dataset = data.Subset(dataset, indices=range(8664)) test_dataset = data.Subset(dataset, indices=range(8664, len(dataset))) train_dataloader = data.dataloader.DataLoader(train_dataset, num_workers=opt.n_threads, batch_size=opt.batch_size, shuffle=True) test_sampler = data.SequentialSampler(test_dataset) test_dataloader = data.dataloader.DataLoader( test_dataset, num_workers=opt.n_threads, batch_size=opt.test_batch_size, shuffle=False, sampler=test_sampler) for e in range(opt.epochs): if opt.test_only: test(model, test_dataloader) break print('epoch: ', e) train(model, mseloss, optimizer, train_dataloader) test(model, test_dataloader) torch.save(model.state_dict(), opt.save_path)
def main(): global args, best_prec1 best_prec1 = 1e6 args = parser.parse_args() args.original_lr = 1e-6 args.lr = 1e-6 args.momentum = 0.95 args.decay = 5 * 1e-4 args.start_epoch = 0 args.epochs = 5000 args.steps = [-1, 1, 100, 150] args.scales = [1, 1, 1, 1] args.workers = 4 args.seed = time.time() args.print_freq = 30 args.feature_size = 100 args.lSeq=5 wandb.config.update(args) wandb.run.name = f"Default_{wandb.run.name}" if (args.task == wandb.run.name) else f"{args.task}_{wandb.run.name}" conf = configparser.ConfigParser() conf.read(args.config) # print(conf) TRAIN_DIR = conf.get("lstm", "train") VALID_DIR = conf.get("lstm", "valid") TEST_DIR = conf.get("lstm", "test") LOG_DIR = conf.get("lstm", "log") create_dir_not_exist(LOG_DIR) # TODO: train_list to train_file train_list = [os.path.join(TRAIN_DIR, item) for item in os.listdir(TRAIN_DIR)] val_list = [os.path.join(VALID_DIR, item) for item in os.listdir(VALID_DIR)] os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu torch.cuda.manual_seed(int(args.seed)) model = LSTM(args.feature_size, args.feature_size, args.feature_size) model = model.cuda() criterion = nn.MSELoss().cuda() optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay) model = DataParallel_withLoss(model, criterion) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train(train_list, model, criterion, optimizer, epoch) prec1 = validate(val_list, model, criterion, epoch) with open(os.path.join(LOG_DIR, args.task + ".txt"), "a") as f: f.write("epoch " + str(epoch) + " MSELoss: " + str(float(prec1))) f.write("\n") wandb.save(os.path.join(LOG_DIR, args.task + ".txt")) is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) print(' * best MSELoss {MSELoss:.3f} '.format(MSELoss=best_prec1)) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.pre, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.task, epoch=epoch, path=os.path.join(LOG_DIR, args.task))
def train_model(stock, col): data = pd.read_csv( f'../data/ashare/{stock}.csv', encoding='gbk', converters={0: lambda x: datetime.strptime(x, '%Y-%m-%d')}) data = data.sort_index(ascending=False) training_set = data.iloc[:, col].values sc = MinMaxScaler() training_data = sc.fit_transform(training_set.reshape(-1, 1)) # print(training_data) num_classes = 2 seq_length = 8 x, y = sliding_windows(training_data, seq_length, num_classes) print(x.shape) print(y.shape) train_size = int(len(y) * 0.67) test_size = len(y) - train_size trainX = Variable(torch.Tensor(np.array(x[0:train_size]))) trainY = Variable(torch.Tensor(np.array(y[0:train_size]))) # print(trainX) # print(trainY) testX = Variable(torch.Tensor(np.array(x[train_size:len(x)]))) testY = Variable(torch.Tensor(np.array(y[train_size:len(y)]))) num_epochs = 1500 learning_rate = 0.01 input_size = 1 hidden_size = 2 num_layers = 1 lstm = LSTM(num_classes, input_size, hidden_size, num_layers, seq_length) train(lstm, num_epochs, num_classes, trainX, trainY, learning_rate) torch.save(lstm.state_dict(), f'../data/ashare/models/{stock}-col{col}-8-2.pt')
def train(): train_writer = SummaryWriter( os.path.join(LOG_DIR, 'train7-64-LSTM-Doppler')) test_writer = SummaryWriter(os.path.join(LOG_DIR, 'test7-64-LSTM-Doppler')) train_loader, test_loader = load_data(TRAIN_DIR, TEST_DIR) lstm = LSTM().to(DEVICE) optimizer = torch.optim.Adam(lstm.parameters(), lr=LR) loss_func = nn.CrossEntropyLoss().to(DEVICE) for epoch in range(MAX_EPOCH): log_string('**** EPOCH %3d ****' % (epoch)) sys.stdout.flush() train_one_epoch(epoch, train_writer, train_loader, lstm, loss_func, optimizer) eval_one_epoch(epoch, test_writer, test_loader, lstm, loss_func) # save model parameters to files torch.save(lstm.state_dict(), MODEL_DIR)
def main(): names_str = read_csv(filname='data/names/names.csv') all_char_str = set([char for name in names_str for char in name]) char2idx = {char: i for i, char in enumerate(all_char_str)} char2idx['EOS'] = len(char2idx) # save char dictionary cPickle.dump(char2idx, open("dic.p", "wb")) names_idx = [[char2idx[char_str] for char_str in name_str] for name_str in names_str] # build model model = LSTM(input_dim=len(char2idx), embed_dim=100, hidden_dim=128) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters()) n_iters = 5 for iter in range(1, n_iters + 1): # data shuffle random.shuffle(names_idx) total_loss = 0 for i, name_idx in enumerate(names_idx): input = inputTensor(name_idx) target = targetTensor(name_idx, char2idx) loss = train(model, criterion, input, target) total_loss += loss optimizer.step() print(iter, "/", n_iters) print("loss {:.4}".format(float(total_loss / len(names_idx)))) # save trained model torch.save(model.state_dict(), "model.pt")
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) #Train model for epoch in range(num_epochs): for (inputs, labels) in train_loader: inputs = inputs.to(device) labels = labels.to(device) output, hidden = model(inputs) loss = criterion(output, labels) optimizer.zero_grad() # Clears existing gradients from previous epoch loss.backward() optimizer.step() if (epoch + 1) % 100 == 0: print(f'epoch {epoch+1}/{num_epochs}') print(f'loss={loss.item():.4f}') data = { "model_state": model.state_dict(), "seq_length": seq_length, "input_size": input_size, "hidden_size": hidden_size, "num_layers": num_layers, "num_classes": num_classes, "word_list": word_list, "tags": tags } FILE = "dataserialized3.pth" torch.save(data, FILE) print("Training complete, file saved to dataserialized3.pth")
# sort batch based on sequence length sort_batch(batch_data) # put batch on GPU batch_data = to_cuda(batch_data) # feed batch through model Y_output = lstm_model(batch_data[0], batch_data[2], cfg['hyperparams']['sequence_length']) Y_target = batch_data[1] Y_lenghts = batch_data[2] # calculate loss loss = ce_loss(Y_output, Y_target, Y_lenghts) # backprop loss.backward() optimiser.step() # log if batch_idx % 100 == 0: print("Epoch ", batch_idx, "CE: ", loss.item()) log_writer.add_scalar('ce_loss', loss.item(), batch_idx) # save model if batch_idx % 5000 == 0 and batch_idx >= 5000: torch.save( lstm_model.state_dict(), os.path.join(args.experiment_name, f'epoch_{batch_idx}.model'))
stocks[i] = random.choices(csv["Symbol"], k=batch_size) ppo[i] = DQN(model, lr, stocks[i], output_size, hidden[i], batch_size) reward_list[i] = deque(maxlen=100) last_profit[i] = 0 rewards = {} for e in tqdm(range(epochs)): for i in range(num_models): rewards[i] = 0 reward, profits, stocks_owned, hidden_layer, cash, total, value, transactions = ppo[ i].compute_loss(rewards[i], hidden[i]) hidden[i] = hidden_layer data = { "Stocks": (list(stocks_owned.keys())), "Number Owned": list(stocks_owned.values()), "Value": list(value.values()), "Transactions": transactions } stocks_csv = pd.DataFrame(data) stocks_csv.to_csv(f"stocks_owned{i+1}.csv") iter_profit = profits - last_profit[i] last_profit[i] = profits reward_list[i].append(float(reward)) print( f"\nModel {i+1}, Reward: {reward}, Mean Reward: {mean(reward_list[i])}" ) print( f"\nTotal profits: {profits}, Profits this run: {iter_profit}, Money in cash: {cash}, Value in stocks: {total - cash}, Total money: {total}, stocks owned: \n{stocks_csv.head()}" ) # time.sleep(300) torch.save(model.state_dict(), "model.pt")
for data, target in tqdm((train_loader), leave=False): loss = model.step(Variable(data), Variable(target)) losses.append(loss) mean_loss = np.mean(losses) print('epoch: {}, loss: {:.5f} ({:.5f},{})'.format( epoch, mean_loss, best_loss[0], epoch - best_loss[1])) if mean_loss < best_loss[0]: best_loss = (mean_loss, epoch) # save model torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': model.optimizer.state_dict(), 'loss': mean_loss, }, 'saved_models/lstm_adam_b10_lb168_model') elif epoch - best_loss[1] > patience: still_learning = False epoch += 1 # get just one sample for prediction data_pred, target_pred = next(iter(test_loader)) # prepare data for comparison pred = model.predict(Variable(data_pred), look_ahead) target_scaled = scaler.inverse_transform(target_pred[0, :].detach().view( -1, 1).numpy()) pred_scaled = scaler.inverse_transform(pred[0, :].detach().view(-1, 1).numpy())
try: corr_train, corr_val, corr_test = train(TrainX, TrainY, TestX, TestY, net, lossfunc, optimizer, num_epoch=200, clip=5, Finger=Finger) except KeyboardInterrupt: #save the model print("saving...") torch.save(net.state_dict(), PATH_pre_trained) print("model saved") ##test baseline model net.eval() pred, h = net( torch.from_numpy(TestX).float(), net.init_hidden(TestX.shape[0])) pred = pred.detach().numpy()[-1, :, :] pred = y_scaler.inverse_transform(pred) TestY = y_scaler.inverse_transform(TestY) pred = pred.reshape((-1, )) corrcoef = np.corrcoef(pred, TestY.reshape((-1, ))) TestYShifted = TestY
writer.add_scalar('loss_long_d_real', long_d_real_loss.item(), global_step=epoch * 317 + i_batch) writer.add_scalar('loss_long_d_fake', long_d_fake_loss.item(), global_step=epoch * 317 + i_batch) loss_total_list.append(loss_total.item()) loss_total_cur = np.mean(loss_total_list) if loss_total_cur < loss_total_min: loss_total_min = loss_total_cur torch.save(state_encoder.state_dict(), model_dir + '/state_encoder.pkl') torch.save(target_encoder.state_dict(), model_dir + '/target_encoder.pkl') torch.save(offset_encoder.state_dict(), model_dir + '/offset_encoder.pkl') torch.save(lstm.state_dict(), model_dir + '/lstm.pkl') torch.save(decoder.state_dict(), model_dir + '/decoder.pkl') torch.save(optimizer_g.state_dict(), model_dir + '/optimizer_g.pkl') if opt['train']['use_adv']: torch.save(short_discriminator.state_dict(), model_dir + '/short_discriminator.pkl') torch.save(long_discriminator.state_dict(), model_dir + '/long_discriminator.pkl') torch.save(optimizer_d.state_dict(), model_dir + '/optimizer_d.pkl') print("train epoch: %03d, cur total loss:%.3f, cur best loss:%.3f" % (epoch, loss_total_cur, loss_total_min))
print("0 Epoch loss : " + str(loss)) for ei in range(args.max_epochs): print('Epoch: ' + str(ei+1)) eNum += 1 # train train(args, model, args.trainPath, criterion, optimizer) # valid test loss= test(args, model, args.validPath, criterion) print('valid loss : {}'.format(loss.tolist())) if loss < best_loss or best_loss < 0: print('find best') best_loss = loss bad_counter = 0 torch.save(model.state_dict(), args.saveModel) else: bad_counter += 1 if bad_counter > args.patience: print('Early Stopping') break print('-----------------test-----------------') loss= test(args, model, args.testPath, criterion) print("Test loss : " + str(loss))
def train(args): prefix = '' f_prefix = '.' if not os.path.isdir("log/"): print("Directory creation script is running...") subprocess.call([f_prefix+'/make_directories.sh']) args.freq_validation = np.clip(args.freq_validation, 0, args.num_epochs) validation_epoch_list = list(range(args.freq_validation, args.num_epochs+1, args.freq_validation)) validation_epoch_list[-1]-=1 # Create the data loader object. This object would preprocess the data in terms of # batches each of size args.batch_size, of length args.seq_length dataloader = DataLoader(f_prefix, args.batch_size, args.seq_length, args.num_validation, forcePreProcess=True) method_name = "VANILLALSTM" model_name = "LSTM" save_tar_name = method_name+"_lstm_model_" if args.gru: model_name = "GRU" save_tar_name = method_name+"_gru_model_" # Log directory log_directory = os.path.join(prefix, 'log/') plot_directory = os.path.join(prefix, 'plot/', method_name, model_name) plot_train_file_directory = 'validation' # Logging files log_file_curve = open(os.path.join(log_directory, method_name, model_name,'log_curve.txt'), 'w+') log_file = open(os.path.join(log_directory, method_name, model_name, 'val.txt'), 'w+') # model directory save_directory = os.path.join(f_prefix, 'model') # Save the arguments int the config file with open(os.path.join(save_directory, method_name, model_name,'config.pkl'), 'wb') as f: pickle.dump(args, f) # Path to store the checkpoint file def checkpoint_path(x): return os.path.join(save_directory, method_name, model_name, save_tar_name+str(x)+'.tar') # model creation net = LSTM(args) if args.use_cuda: net = net.cuda() # optimizer = torch.optim.Adagrad(net.parameters(), weight_decay=args.lambda_param) optimizer = torch.optim.RMSprop(net.parameters(), lr=args.learning_rate) loss_f = torch.nn.MSELoss() learning_rate = args.learning_rate best_val_loss = 100 best_val_data_loss = 100 smallest_err_val = 100000 smallest_err_val_data = 100000 best_epoch_val = 0 best_epoch_val_data = 0 best_err_epoch_val = 0 best_err_epoch_val_data = 0 all_epoch_results = [] grids = [] num_batch = 0 # Training for epoch in range(args.num_epochs): print('****************Training epoch beginning******************') if dataloader.additional_validation and (epoch-1) in validation_epoch_list: dataloader.switch_to_dataset_type(True) dataloader.reset_batch_pointer(valid=False) loss_epoch = 0 # For each batch # num_batches 資料可以被分多少批 要跑幾個iter for batch in range(dataloader.num_batches): start = time.time() # print(dataloader.num_batches, dataloader.batch_size) # Get batch data x, y, d = dataloader.next_batch(randomUpdate=False) loss_batch = 0 # x_cat = Variable(torch.from_numpy(np.array(x[0])).float()) x_seq = np.array(x) y_seq = np.array(y) x_seq = Variable(torch.from_numpy(x_seq).float()) y_seq = Variable(torch.from_numpy(y_seq).float()) temp = x_seq[:,:,-2:] x_seq = x_seq[:,:,:-2] y_seq = y_seq[:,:,:3] hidden_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size)) cell_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size)) if args.use_cuda: x_seq = x_seq.cuda() y_seq = y_seq.cuda() temp = temp.cuda() hidden_states = hidden_states.cuda() cell_states = cell_states.cuda() # Zero out gradients net.zero_grad() optimizer.zero_grad() outputs, _, _ = net(x_seq, temp, hidden_states, cell_states) loss = loss_f(outputs, y_seq) loss_batch = loss.detach().item() # Compute gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(net.parameters(), args.grad_clip) # Update parameters optimizer.step() end = time.time() loss_epoch += loss_batch print('{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}'.format((batch+1) * dataloader.batch_size, dataloader.num_batches * dataloader.batch_size, epoch, loss_batch, end - start)) loss_epoch /= dataloader.num_batches print("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch)) #Log loss values log_file_curve.write("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch)+'\n') # Validation dataset if dataloader.additional_validation and (epoch) in validation_epoch_list: dataloader.switch_to_dataset_type() print('****************Validation with dataset epoch beginning******************') dataloader.reset_batch_pointer(valid=False) dataset_pointer_ins = dataloader.dataset_pointer validation_dataset_executed = True loss_epoch = 0 err_epoch = 0 num_of_batch = 0 smallest_err = 100000 #results of one epoch for all validation datasets epoch_result = [] #results of one validation dataset results = [] # For each batch for batch in range(dataloader.num_batches): # Get batch data x, y, d = dataloader.next_batch(randomUpdate=False) # Loss for this batch loss_batch = 0 err_batch = 0 # For each sequence for sequence in range(len(x)): # Get the sequence x_seq = x[sequence] y_seq = y[sequence] x_seq= np.array(x_seq) y_seq= np.array(y_seq)[:,:3] x_seq = Variable(torch.from_numpy(x_seq).float()) y_seq = Variable(torch.from_numpy(y_seq).float()) temp = x_seq[:,-2:] x_seq = x_seq[:,:-2] y_seq = y_seq[:,:3] if args.use_cuda: x_seq = x_seq.cuda() y_seq = y_seq.cuda() temp = temp.cuda() #will be used for error calculation orig_x_seq = y_seq.clone() # print(x_seq.size(), args.seq_length) with torch.no_grad(): hidden_states = Variable(torch.zeros(1, args.rnn_size)) cell_states = Variable(torch.zeros(1, args.rnn_size)) ret_x_seq = Variable(torch.zeros(args.seq_length, net.input_size)) # all_outputs = Variable(torch.zeros(1, args.seq_length, net.input_size)) # Initialize the return data structure if args.use_cuda: ret_x_seq = ret_x_seq.cuda() hidden_states = hidden_states.cuda() cell_states = cell_states.cuda() total_loss = 0 # For the observed part of the trajectory for tstep in range(args.seq_length): outputs, hidden_states, cell_states = net(x_seq[tstep].view(1, 1, net.input_size), temp[tstep].view(1, 1, temp.size()[-1]), hidden_states, cell_states) ret_x_seq[tstep, 0] = outputs[0,0,0] ret_x_seq[tstep, 1] = outputs[0,0,1] ret_x_seq[tstep, 2] = outputs[0,0,2] print(outputs.size(), ) loss = loss_f(outputs, y_seq[tstep].view(1, 1, y_seq.size()[1])) total_loss += loss total_loss = total_loss / args.seq_length #get mean and final error # print(ret_x_seq.size(), y_seq.size()) err = get_mean_error(ret_x_seq.data, y_seq.data, args.use_cuda) loss_batch += total_loss.item() err_batch += err print('Current file : ',' Batch : ', batch+1, ' Sequence: ', sequence+1, ' Sequence mean error: ', err, 'valid_loss: ',total_loss.item()) results.append((y_seq.data.cpu().numpy(), ret_x_seq.data.cpu().numpy())) loss_batch = loss_batch / dataloader.batch_size err_batch = err_batch / dataloader.batch_size num_of_batch += 1 loss_epoch += loss_batch err_epoch += err_batch epoch_result.append(results) all_epoch_results.append(epoch_result) if dataloader.num_batches != 0: loss_epoch = loss_epoch / dataloader.num_batches err_epoch = err_epoch / dataloader.num_batches # avarage_err = (err_epoch + f_err_epoch)/2 # Update best validation loss until now if loss_epoch < best_val_data_loss: best_val_data_loss = loss_epoch best_epoch_val_data = epoch if err_epoch<smallest_err_val_data: # Save the model after each epoch print('Saving model') torch.save({ 'epoch': epoch, 'state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, checkpoint_path(epoch)) smallest_err_val_data = err_epoch best_err_epoch_val_data = epoch print('(epoch {}), valid_loss = {:.3f}, valid_mean_err = {:.3f}'.format(epoch, loss_epoch, err_epoch)) print('Best epoch', best_epoch_val_data, 'Best validation loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data) log_file_curve.write("Validation dataset epoch: "+str(epoch)+" loss: "+str(loss_epoch)+" mean_err: "+str(err_epoch.data.cpu().numpy())+'\n') optimizer = time_lr_scheduler(optimizer, epoch, lr_decay_epoch = args.freq_optimizer) if dataloader.valid_num_batches != 0: print('Best epoch', best_epoch_val, 'Best validation Loss', best_val_loss, 'Best error epoch',best_err_epoch_val, 'Best error', smallest_err_val) # Log the best epoch and best validation loss log_file.write('Validation Best epoch:'+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss)) if dataloader.additional_validation: print('Best epoch acording to validation dataset', best_epoch_val_data, 'Best validation Loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data) log_file.write("Validation dataset Best epoch: "+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss)+'Best error epoch: ',str(best_err_epoch_val_data),'\n') #dataloader.write_to_plot_file(all_epoch_results[best_epoch_val_data], plot_directory) #elif dataloader.valid_num_batches != 0: # dataloader.write_to_plot_file(all_epoch_results[best_epoch_val], plot_directory) #else: if validation_dataset_executed: dataloader.switch_to_dataset_type(load_data=False) create_directories(plot_directory, [plot_train_file_directory]) dataloader.write_to_plot_file(all_epoch_results[len(all_epoch_results)-1], os.path.join(plot_directory, plot_train_file_directory)) # Close logging files log_file.close() log_file_curve.close()
total_loss += loss if iter % print_every == 0: print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss)) if iter % plot_every == 0: all_losses.append(total_loss / plot_every) print(total_loss / plot_every) total_loss = 0 ax.set_xlim([0, len(all_losses) + 10]) ax.plot(all_losses) plt.draw() plt.pause(0.0001) torch.save(rnn.state_dict(), './data/' + args.player_w_underscore + '.model') else: rnn = LSTM(12, 300, 2) rnn.load_state_dict( torch.load('./data/' + args.player_w_underscore + '.model')) num_test_examples = 100 for i in range(3): court = plt.imread('halfcourt.png') fig = plt.figure(figsize=(15, 11.5)) ax = plt.axes(xlim=(-10, 60), ylim=(-10, 60)) line = ax.scatter([], [], s=50)
def classify(tokenizerType): #Load dataset TEXT = data.Field(tokenize=tokenizerOptions[tokenizerType], include_lengths=True, lower=True) LABEL = data.LabelField(dtype=torch.float, sequential=False, use_vocab=False) fields = [('text', TEXT), ('label', LABEL)] train_data = data.TabularDataset(path='amazon_reviews.txt', format='tsv', fields=fields) #Split dataset into train, validation and test train_data, valid_data, test_data = train_data.split( split_ratio=[0.64, 0.2, 0.16], random_state=random.seed(SEED)) #Build vocabulary using predefined vectors TEXT.build_vocab(train_data, vectors="glove.6B.100d", unk_init=torch.Tensor.normal_) LABEL.build_vocab(train_data) #print(TEXT.vocab.itos[:100]) #Use GPU, if available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #Create iterators to get data in batches train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits( datasets=(train_data, valid_data, test_data), batch_size=BATCH_SIZE, device=device, sort_key=lambda x: len(x.text), sort=False, sort_within_batch=True) INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 100 HIDDEN_DIM = 256 OUTPUT_DIM = 1 model = LSTM(vocab_size=INPUT_DIM, embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, output_dim=OUTPUT_DIM, n_layers=3, bidirectional=True, dropout=0.5, pad_idx=TEXT.vocab.stoi[TEXT.pad_token]) #Replace initial weights of embedding with pre-trained embedding pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) #Set UNK and PAD embeddings to zero model.embedding.weight.data[TEXT.vocab.stoi[TEXT.unk_token]] = torch.zeros( EMBEDDING_DIM) model.embedding.weight.data[TEXT.vocab.stoi[TEXT.pad_token]] = torch.zeros( EMBEDDING_DIM) #SGD optimizer and binary cross entropy loss optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() #Transfer model and criterion to GPU model = model.to(device) criterion = criterion.to(device) best_valid_loss = float('inf') train_loss_list = [] valid_loss_list = [] for epoch in range(N_EPOCHS): train_loss, train_acc = train(model, train_iterator, optimizer, criterion) valid_loss, valid_acc = evaluate(model, valid_iterator, criterion) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), 'best-model.pt') train_loss_list.append(train_loss) valid_loss_list.append(valid_loss) print(tokenizerType + ":") plotLoss(train_loss_list, valid_loss_list) model.load_state_dict(torch.load('best-model.pt')) test_loss, test_acc = evaluate(model, test_iterator, criterion) print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%') print("\n")
class StockPrediction(): def __init__(self, stock, time_window, batch_size, learning_rate=0.001): self.stock = stock self.time_window = time_window self.batch_size = batch_size self.learning_rate = learning_rate self.input_size = 4 self.output_size = 1 self.nb_neurons = 200 self.prepare_data() self.output = "/Users/baptiste/Desktop/training" def validate(self): self.lstm_model.eval() error = [] loss_function = nn.MSELoss() it = iter(self.real_data_dataloader) real_data = next(it) loss = [] for i, (x, _) in enumerate(self.testing_dataloader): try: with torch.no_grad(): pred = self.lstm_model(x.float()) pred = self.data.unnormalizeData(pred) real_data = real_data.view(-1, 1) error = self.compute_error(error, pred, real_data) real_data = next(it) except: pass error_mean = np.mean(error) * 100 print("Mean error percentage : ", error_mean) self.lstm_model.train() def compute_error(self, error, pred, target): for i in range(self.batch_size): error.append(abs(pred[i, 0] - target[i, 0]) / target[i, 0]) return (error) def prepare_data(self): validation_split = 0 test_split = 0.1 train_split = 1 - validation_split - test_split self.data = Data(self.stock) df = self.data.getData() df_normalized = self.data.normalizeData(df) df_normalized = torch.FloatTensor(df_normalized.to_numpy()) train_split = int(train_split * df.shape[0]) validation_split = int(validation_split * df.shape[0]) test_split = int(test_split * df.shape[0]) training_split = df_normalized[:train_split, :] training_data = Dataset(training_split, self.time_window) self.training_dataloader = DataLoader(training_data, batch_size=self.batch_size) #testing_data real_data_tensor = torch.FloatTensor(df.to_numpy()) self.real_data_test = torch.FloatTensor( real_data_tensor[-test_split:-self.time_window, 3]) testing_dataset = Dataset(df_normalized[-test_split:, :], self.time_window) self.testing_dataloader = DataLoader(testing_dataset, batch_size=self.batch_size) self.real_data_dataloader = DataLoader(self.real_data_test, batch_size=self.batch_size) def train(self): #Model self.lstm_model = LSTM(self.input_size, self.output_size, self.nb_neurons) self.lstm_model.load_state_dict( torch.load("/Users/baptiste/Desktop/training/AAPL_36.pth")) loss_function = nn.MSELoss() optimizer = torch.optim.Adam(self.lstm_model.parameters(), lr=self.learning_rate) print("Start training") for epoch in range(nb_epochs): for (x, y) in self.training_dataloader: optimizer.zero_grad() self.lstm_model.hidden_cell = (torch.zeros( 1, self.batch_size, self.lstm_model.nb_neurons), torch.zeros( 1, self.batch_size, self.lstm_model.nb_neurons)) pred = self.lstm_model(x.float()) y = y.view(self.batch_size, 1) loss = loss_function(pred, y) loss.backward() optimizer.step() print("epoch n°%s : loss = %s" % (epoch, loss.item())) self.validate() if epoch % 5 == 1: model_name = "%s_%s.pth" % (self.stock, epoch) torch.save(self.lstm_model.state_dict(), os.path.join(output_path, model_name)) def show_result(self): files = os.listdir(self.output) for file in files: if ".pth" in file: path = os.path.join(self.output, file) lstm_model = LSTM(self.input_size, self.output_size, self.nb_neurons) lstm_model.load_state_dict(torch.load(path)) lstm_model.eval() print("model : %s loaded" % path) predictions = [] for (x, _) in self.testing_dataloader: if x.shape[0] == self.batch_size: with torch.no_grad(): lstm_model.hidden_cell = ( torch.zeros(1, self.batch_size, lstm_model.nb_neurons), torch.zeros(1, self.batch_size, lstm_model.nb_neurons)) output = lstm_model(x.float()) output = self.data.unnormalizeData( output).squeeze() predictions += output.tolist() plt.plot(predictions, label="prediction") plt.plot(self.real_data_test, label="target") plt.title(file) plt.legend() plt.show()
with torch.no_grad(): for i, (sequences, labels) in enumerate(validation_loader): sequences = sequences.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) outputs = model(sequences) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() validation_accuracy = 1.0 * correct / total print( f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(training_set)}, Validation Accuracy: {validation_accuracy * 100}%, Time: {time.time() - start_time}" ) validation_values.append(validation_accuracy) if epoch % 5 == 0: print("Checkpointing...") torch.save( { "epoch": epoch, "model_state_dict": model.state_dict(), "optimiser_state_dict": optimiser.state_dict(), "loss_values": loss_values, "validation_values": validation_values, }, f"{path}/checkpoint/epoch-{epoch}.tar", ) torch.save(model.state_dict(), f"{path}/model.pt")
def main(train_type=None): model_path = './model.pth' # dir_path = Path('/home/g19tka13/Downloads/data/3C') # data_path = dir_path / 'taskA/train.csv' train_data, weighted = strtolist() test_data = loadtestdata() preudo_list = [] used_unlabeled_data = None unlabeled_data = None vocab = None if train_type == 'self_train': unlabeled_data = pd.read_csv('/home/g19tka13/taskA/aclgenerate.csv', sep=',') unlabeled_data = unlabeled_data.head(3000) vocab = load_word_vector(train_data, test_data, 'self_train', unlabeled_data) # prelabeled_data = None # vocab = load_word_vector(train_data, test_data, 'self_train', used_unlabeled_data) # # if len(preudo_list) == 0: # 判断是否第一次训练模型。 # train_iter, val_iter, label_word_id = assemble(train_data, vocab, 1) # else: # train_iter, val_iter, label_word_id = assemble(train_data, vocab, 1, prelabeled_data) # 加入数据 else: vocab = load_word_vector(train_data, test_data) # train_iter, val_iter, label_word_id = assemble(train_data, vocab, 1) # test_iter, unlabel_iter = assemble(test_data, vocab, 0) # return train_iter, val_iter, test_iter, vocab, weighted, label_word_id best_val_f1 = 0 if train_type == 'self_train': prelabel_data = None vocab_size = vocab.vectors.size() print('Total num. of words: {}, word vector dimension: {}'.format( vocab_size[0], vocab_size[1])) model = LSTM(vocab_size[0], vocab_size[1], hidden_size=100, num_layers=2, batch=10) model.embedding.weight.data = vocab.vectors model.embedding.weight.requires_grad = False print(model) while len(preudo_list) < 2700: class_id = [] delete_id = [] if len(preudo_list) == 0: # 判断是否第一次训练模型。 train_iter, val_iter, label_word_id = assemble( train_data, vocab, 1) else: train_iter, val_iter, label_word_id = assemble( train_data, vocab, 1, prelabeled_data=prelabel_data) # 加入数据 test_iter, unlabel_iter = assemble(test_data, vocab, 0, unlabeled_data=unlabeled_data) weight = torch.tensor(weighted) train_iter = Data.DataLoader(train_iter, batch_size=10, shuffle=True) val_iter = Data.DataLoader(val_iter, batch_size=10, shuffle=True) test_iter = Data.DataLoader(test_iter, batch_size=10, shuffle=False) unlabel_iter = Data.DataLoader(unlabel_iter, batch_size=10, shuffle=False) # vocab_size = vocab.vectors.size() # print('Total num. of words: {}, word vector dimension: {}'.format( # vocab_size[0], # vocab_size[1])) # model = LSTM(vocab_size[0], vocab_size[1], hidden_size=100, num_layers=2, batch=10) # model.embedding.weight.data = vocab.vectors # model.embedding.weight.requires_grad = False # 使用已经训练好的词向量, 即保持词向量不更新(固定词向量) 则设置为false # print(model) # print(model.parameters()) # for parameter in model.parameters(): # print(parameter) optimizer = optim.Adam(model.parameters(), lr=0.0005) n_epoch = 10 # nn.CrossEntropyLoss you will give your weights only once while creating the module # loss_cs = nn.CrossEntropyLoss(weight=weight) # loss_fnc = nn.CosineEmbeddingLoss() # loss_mes = nn.MSELoss() y = torch.ones(1).long() for epoch in range(n_epoch): # model.train放在哪参考网址 https://blog.csdn.net/andyL_05/article/details/107004401 model.train() for item_idx, item in enumerate(train_iter, 0): label = item[2] unique_num, count = torch.unique( label, return_counts=True) # default sorted=True unique_num = unique_num.tolist() # print(unique_num, count) real_weight = torch.ones(6, dtype=torch.float) for i in range(6): if i in unique_num: idx = unique_num.index(i) real_weight[i] = 1 / np.log(1.02 + count[idx] / 10) else: real_weight[i] = 1 / np.log(2.02) optimizer.zero_grad() out = model(item) # label_pred = KMeans(n_clusters=6, init=label_out).fit_predict(out) # fixed weight result=0.1716 # loss = F.cross_entropy(out, label.long(), weight=weight) # real time weight calculation loss = F.cross_entropy(out, label.long(), weight=real_weight) # nn.CosineEmbeddingLoss() 损失函数需要是二维矩阵,而不是一维的。 # loss = loss_fnc(torch.unsqueeze(label_pred, dim=0), torch.unsqueeze(label.long(), dim=0), y) # loss = Variable(loss, requires_grad=True) # loss_MES = loss_mes(out, label_vector) # loss = loss_fnc(out, torch.Tensor(one_hot), y) loss.backward() # print(model.lstm.all_weights.shape) # print(model.lstm.) optimizer.step() if (item_idx + 1) % 5 == 0: train_value, train_y_pre = torch.max( out, 1 ) # max函数有两个返回值(此处out是二维数组)第一个是最大值的list,第二个是值对应的位置 # print('train_value', train_value) # acc = torch.mean((torch.tensor(train_y_pre == label.long(), dtype=torch.float))) # print(train_y_pre, label.long()) f1 = f1_score(label.long(), train_y_pre, average='macro') # print(train_y_pre, label) print( 'epoch: %d \t item_idx: %d \t loss: %.4f \t f1: %.4f' % (epoch, item_idx, loss, f1)) model.eval() # 跑完一个epoch就评价一次模型 val_pre_label = [] val_y_label = [] # if (epoch+1) % 5 == 0: with torch.no_grad(): # print(unlabel_iter) # for item in unlabel_iter: # prelabel # index = item[2] # out = model(item) # out = F.softmax(out, dim=1) # predict_value, predict_class = torch.max(out, 1) # print('predict_value', predict_value) # for i in range(len(predict_value)): # if predict_value[i] > 0.9: # delete_id.append(index[i]) # 为了获得数据索引,根据索引从原数据中删除。 # class_id.append(predict_class[i]) for item in val_iter: label = item[2] out = model(item) _, val_y_pre = torch.max(out, 1) val_pre_label.extend(val_y_pre) val_y_label.extend(label) # f1 = f1_score(label.long(), val_y_pre, average='macro') # val_f1.append(f1) # f1 = np.array(f1).mean() f1 = f1_score(torch.Tensor(val_y_label).long(), torch.Tensor(val_pre_label), average='macro') print(f1) if f1 > best_val_f1: print('val acc: %.4f > %.4f saving model %.4f' % (f1, best_val_f1, len(preudo_list))) torch.save(model.state_dict(), model_path) best_val_f1 = f1 model.eval() # 一轮训练结束在创建pseudo-label with torch.no_grad(): for item in unlabel_iter: # prelabel index = item[2] out = model(item) out = F.softmax(out, dim=1) predict_value, predict_class = torch.max(out, 1) # print('predict_value', predict_value) # print('predict_class', predict_class) for i in range(len(predict_value)): if predict_value[i] > 0.9: delete_id.append( index[i].item()) # 为了获得数据索引,根据索引从原数据中删除。 class_id.append(predict_class[i].item()) preudo_list.extend(delete_id) if len(preudo_list) != 0: unlabeled_data, prelabel_data = split_unlabeled_data( unlabeled_data, delete_id, class_id, prelabel_data) else: train_iter, val_iter, label_word_id, label_to_id = assemble( train_data, vocab, 1) test_iter, unlabel_iter = assemble(test_data, vocab, 0) # train_iter, val_iter, test_iter, vocab, weight, label_word_id = load_data() weight = torch.tensor(weighted) train_iter = Data.DataLoader(train_iter, batch_size=batch_size, shuffle=True) val_iter = Data.DataLoader(val_iter, batch_size=batch_size, shuffle=True) test_iter = Data.DataLoader(test_iter, batch_size=batch_size, shuffle=False) vocab_size = vocab.vectors.size() print('Total num. of words: {}, word vector dimension: {}'.format( vocab_size[0], vocab_size[1])) model = LSTM(vocab_size[0], vocab_size[1], hidden_size=100, num_layers=2, batch=batch_size) model.embedding.weight.data = vocab.vectors model.embedding.weight.requires_grad = False print(model) # print(model.parameters()) # for parameter in model.parameters(): # print(parameter) optimizer = optim.Adam(model.parameters(), lr=0.001) n_epoch = 50 best_val_f1 = 0 # nn.CrossEntropyLoss you will give your weights only once while creating the module # loss_cs = nn.CrossEntropyLoss(weight=weight) loss_fnc = nn.CosineEmbeddingLoss(reduction='mean', size_average=True, reduce=True) # loss_mes = nn.MSELoss() one_list = torch.ones((batch_size, 1), dtype=torch.float) zero_list = torch.zeros((batch_size, 1), dtype=torch.float) for epoch in range(n_epoch): # model.train放在哪参考网址 https://blog.csdn.net/andyL_05/article/details/107004401 model.train() batch_loss = 0 for item_idx, item in enumerate(train_iter, 0): label = item[2] unique_num, count = torch.unique( label, return_counts=True) # default sorted=True unique_num = unique_num.tolist() # print(unique_num, count) real_weight = torch.ones(6, dtype=torch.float) for i in range(6): if i in unique_num: idx = unique_num.index(i) real_weight[i] = 1 / np.log(1.02 + count[idx] / batch_size) else: real_weight[i] = 1 / np.log(2.02) optimizer.zero_grad() # out, p_rep, n_rep = model(item, label_to_id) out, out_o, label_matrix, out_len, label_id = model( item, label_to_id) # label_pred = KMeans(n_clusters=6, init=label_out).fit_predict(out) # fixed weight result=0.1716 # loss = F.cross_entropy(out, label.long(), weight=weight) # real time weight calculation p_rep, n_rep = confusion(out_o, label_matrix, out_len, label_id) loss1 = F.cross_entropy(out, label.long(), weight=real_weight) loss2 = loss_fnc(out, p_rep, one_list) loss3 = loss_fnc(out, n_rep, zero_list) loss = loss1 + loss2 + loss3 # batch_loss = batch_loss + +loss2 + loss # nn.CosineEmbeddingLoss() 损失函数需要是二维矩阵,而不是一维的。 # loss = loss_fnc(torch.unsqueeze(label_pred, dim=0), torch.unsqueeze(label.long(), dim=0), y) # loss = Variable(loss, requires_grad=True) # loss_MES = loss_mes(out, label_vector) # loss = loss_fnc(out, torch.Tensor(one_hot), y) loss.backward() # print(model.lstm.all_weights.shape) # print(model.lstm.) optimizer.step() if (item_idx + 1) % 5 == 0: _, train_y_pre = torch.max( out, 1) # max函数有两个返回值(此处out是二维数组)第一个是最大值的list,第二个是值对应的位置 # acc = torch.mean((torch.tensor(train_y_pre == label.long(), dtype=torch.float))) # print(train_y_pre, label.long()) f1 = f1_score(label.long(), train_y_pre, average='macro') # print(train_y_pre, label) print( 'epoch: %d \t item_idx: %d \t loss: %.4f \t f1: %.4f' % (epoch, item_idx, loss, f1)) # batch_loss = 0 # finish each epoch val a time val_pre_label = [] val_y_label = [] # if (epoch + 1) % 5 == 0: model.eval() with torch.no_grad(): for item in val_iter: label = item[2] out = model(item) _, val_y_pre = torch.max(out, 1) val_pre_label.extend(val_y_pre) val_y_label.extend(label) # acc = torch.mean((torch.tensor(val_y_pre == label, dtype=torch.float))) # f1 = f1_score(label.long(), val_y_pre, average='macro') # val_f1.append(f1) # f1 = np.array(f1).mean() f1 = f1_score(torch.Tensor(val_y_label).long(), torch.Tensor(val_pre_label), average='macro') print(f1) if f1 > best_val_f1: print('val acc: %.4f > %.4f saving model' % (f1, best_val_f1)) torch.save(model.state_dict(), model_path) best_val_f1 = f1 test_f1 = [] test_pre_label = [] test_y_label = [] model_state = torch.load(model_path) model.load_state_dict(model_state) model.eval() with torch.no_grad(): for item_idx, item in enumerate(test_iter, 0): label = item[2] out = model(item) _, test_pre = torch.max(out, 1) test_pre_label.extend(test_pre) test_y_label.extend(label) # print('test_true_label={} test_pre_label={}'.format(label, test_y_pre)) # f1 = f1_score(label.long(), test_y_pre, average='macro') # test_f1.append(f1) final_f1 = f1_score(torch.Tensor(test_y_label).long(), torch.Tensor(test_pre_label), average='macro') # final_f1 = np.array(test_f1).mean() print('test_pre_label', collections.Counter(torch.Tensor(test_pre_label).tolist())) print('test_y_label', collections.Counter(torch.Tensor(test_y_label).tolist())) print('test f1 : %.4f' % final_f1) generate_submission(torch.Tensor(test_pre_label).tolist()) count = {} test_pre = torch.Tensor(test_pre_label).tolist() test_true = torch.Tensor(test_y_label).tolist() c_matrxi = confusion_matrix(test_true, test_pre, labels=[0, 1, 2, 3, 4, 5]) print(c_matrxi) for i in range(len(test_true)): if test_true[i] == test_pre[i]: if test_true[i] not in count.keys(): count[test_true[i]] = 1 else: count[test_true[i]] = count[test_true[i]] + 1 print(count) pre_true = pd.DataFrame(columns=['true_id', 'pre_id']) test_true_ser = pd.Series(test_true) test_pre_ser = pd.Series(test_pre) pre_true['true_id'] = test_true_ser pre_true['pre_id'] = test_pre_ser pre_true.to_csv('/home/g19tka13/taskA/true_predict.csv', sep=',', index=False)
def save_network(network: LSTM, path: str): torch.save(network.state_dict(), path)
logger.debug(lstm_crf) # Task optimizer = optim.SGD(filter(lambda p: p.requires_grad, lstm_crf.parameters()), lr=args.lr, momentum=args.momentum) processor = SeqLabelProcessor(gpu=use_gpu) train_args = vars(args) train_args['word_embed_size'] = word_embed.num_embeddings state = { 'model': { 'word_embed': word_embed.state_dict(), 'char_embed': char_embed.state_dict(), 'char_hw': char_hw.state_dict(), 'lstm': lstm.state_dict(), 'crf': crf.state_dict(), 'linear': linear.state_dict(), 'lstm_crf': lstm_crf.state_dict() }, 'args': train_args, 'vocab': { 'token': token_vocab, 'label': label_vocab, 'char': char_vocab, } } try: global_step = 0 best_dev_score = best_test_score = 0.0
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Build the data loader dataset, targets = load_dataset() print('\nThe data are loaded') # Build the models lstm = LSTM(args.input_size, args.output_size) print('The model is build') print(lstm) if torch.cuda.is_available(): lstm.cuda() # Loss and Optimizer criterion = nn.MSELoss() optimizer = torch.optim.Adam(lstm.parameters(), lr=args.learning_rate) # Train the Models toatal_time = 0 sm = 50 # start saving models after 100 epochs for epoch in range(args.num_epochs): print('\nepoch ' + str(epoch) + ':') avg_loss = 0 start = time.time() for i in range(0, len(dataset), args.batch_size): lstm.zero_grad() bi, bt = get_input(i, dataset, targets, args.batch_size) bi = bi.view(-1, 1, 32) bi = to_var(bi) bt = to_var(bt) bo = lstm(bi) loss = criterion(bo, bt) avg_loss = avg_loss + loss.item() loss.backward() optimizer.step() epoch_avg_loss = avg_loss / (len(dataset) / args.batch_size) print('--average loss:', epoch_avg_loss) end = time.time() epoch_time = end - start toatal_time = toatal_time + epoch_time print('time of per epoch:', epoch_time) # save the data into csv data = [epoch_avg_loss] with open(args.model_path + 'lstm_loss.csv', 'a+') as csvfile: writer = csv.writer(csvfile) writer.writerow(data) if epoch == sm: model_path = 'lstm_' + str(sm) + '.pkl' torch.save(lstm.state_dict(), os.path.join(args.model_path, model_path)) sm = sm + args.save_step model_path = 'lstm_final.pkl' torch.save(lstm.state_dict(), os.path.join(args.model_path, model_path))
def train(feature,label, epochs, model, layer, hidden, save,postfix, index2char, index2phone, phone_map, phone2index): dataset = Feature_Dataset(feature,'train') train_size = int(0.9*len(dataset)) if feature == 'mfcc': feature_dim = 39 elif feature == 'fbank': feature_dim = 69 elif feature == 'all': feature_dim = 108 print("Building model and optimizer...") if model == 'LSTM': train_model = LSTM(feature_dim,hidden,layer) elif model == 'C_RNN': group_size = 5 train_model = C_RNN(group_size,feature_dim,hidden,layer) elif model == 'BiLSTM': train_model = LSTM(feature_dim, hidden, layer, bi = True) if USE_CUDA: train_model = train_model.cuda() optimizer = optim.Adam(train_model.parameters(), lr = 0.005) #optimizer = optim.SGD(train_model.parameters(),lr = 0.1) criterion = nn.NLLLoss() if USE_CUDA: criterion = criterion.cuda() for epoch in range(1,epochs+1): print("Epoch {}".format(epoch)) epoch_loss = 0 epoch_edit = 0 for i in tqdm(range(1,train_size+1)): data = dataset[i-1] speaker = data[0] train_model.zero_grad() input_hidden = train_model.init_hidden() train_feature = Variable(data[1].float()) output = train_model(train_feature,input_hidden) output_seq = test_trim(index2char, index2phone, phone_map, phone2index, torch.max(output,1)[1].data.cpu().numpy()) target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index, [[int(l)] for l in label[speaker]]) target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int'))) target = target.cuda() if USE_CUDA else target loss = criterion(output,target) edit = editdistance.eval(output_seq,target_seq) epoch_loss += loss.data[0]/train_size epoch_edit += edit/train_size loss.backward() optimizer.step() print("Negative log-likelihood: {}".format(epoch_loss)) print("Edit distance: {} ".format(epoch_edit)) val_loss = 0 val_edit = 0 for i in tqdm(range(train_size+1,len(dataset)+1)): data = dataset[i-1] speaker = data[0] val_feature = Variable(data[1].float()) output = train_model(val_feature,train_model.init_hidden()) target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int'))) target = target.cuda() if USE_CUDA else target val_loss += criterion(output,target).data[0] output_seq = test_trim(index2char,index2phone, phone_map, phone2index,torch.max(output,1)[1].data.cpu().numpy()) target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index,[[int(l)] for l in label[speaker]]) val_edit += editdistance.eval(output_seq,target_seq) print("Validation loss: {}".format(val_loss/(len(dataset)-train_size))) print("Validation edit distance: {}".format(val_edit/(len(dataset)-train_size))) if epoch%save == 0: directory = os.path.join(SAVE_DIR, feature, model, '{}-{}{}'.format(layer,hidden,postfix)) if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'model': train_model.state_dict(), 'opt': optimizer.state_dict(), 'val_loss': val_loss/(len(dataset)-train_size), 'val_edit': val_edit/(len(dataset)-train_size), }, os.path.join(directory, '{}.tar'.format(epoch))) print("Finish training")