def train(opt, th): ''' 训练模型 Args: opt -- 参数 th -- TextConverter对象 Returns: None ''' # 1. 训练数据 data_set = TextDataset(opt.train_data_path, th) train_data = DataLoader(data_set, opt.batch_size, shuffle=True, num_workers=opt.num_workers) # 2. 初始化模型 model = CharRNN(th.vocab_size, opt.embed_size, opt.hidden_size, opt.n_layers, opt.dropout_p, opt.bidir) if USE_CUDA: model = model.cuda(DEVICE_ID) # 3. 优化配置 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=opt.learning_rate) # 4. 训练 for e in range(opt.max_epochs): epoch_loss = 0 hidden = None for input_seqs, labels in train_data: # 都是[b, seq_len],最后一个不足b # 准备input和hidden b = input_seqs.shape[0] if hidden is not None: hidden = hidden[:, :b, :] labels = labels.long().view(-1) input_seqs, labels = get_variable(input_seqs), get_variable(labels) # 前向计算 probs, hidden = model(input_seqs, hidden) probs = probs.view(-1, th.vocab_size) # loss和反向 loss = criterion(probs, labels) optimizer.zero_grad() loss.backward(retain_graph=True) # 优化 nn.utils.clip_grad_norm(model.parameters(), 5) optimizer.step() epoch_loss += loss.data[0] # 交叉熵 entropy_loss = epoch_loss / len(train_data) perplexity = np.exp(entropy_loss) info = "epoch: {}, perp: {:.3f}".format(e + 1, perplexity) print(info) if perplexity <= opt.min_perplexity or e == opt.max_epochs - 1: print("best model") torch.save(model, opt.model_path) break
def main(): ds = Dataset('imdb') params = { 'batch_size': 67, 'shuffle': True, 'num_workers': 8, 'collate_fn': collate_fn } epochs = 4 lr = 0.01 tbptt_steps = 256 training_generator = data.DataLoader(ds, **params) model = CharRNN(input_size=ds.encoder.get_vocab_size(), embedding_size=8, hidden_size=128, output_size=ds.encoder.get_vocab_size(), no_sentiments=3, dense_size=32, padding_idx=ds.encoder.get_id(PADDING_TOKEN), n_layers=1) optimizer = torch.optim.Adam(model.parameters(), lr=lr) step_no = 0 for epoch in range(epochs): print('Epoch: ', epoch) for x_i, y_i, l_i in training_generator: model.reset_intermediate_vars() step_no += 1 print(x_i.size()) batch_loss = 0 for step in range(l_i[0] // tbptt_steps + (l_i[0] % tbptt_steps != 0)): von = tbptt_steps * step bis = min(tbptt_steps * (step + 1), l_i[0]) out = model(x_i[:, von:bis]) if step % 25 == 0: print(model.attn[0].detach().numpy(), model.attn[-1].detach().numpy()) loss = model.loss(out, y_i, l_i, von, bis) batch_loss += loss optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1.5) for p in model.parameters(): p.data.add_(-lr, p.grad.data) optimizer.step() model.detach_intermediate_vars() print('Total loss for this batch: ', batch_loss.item()) if step_no % 30 == 1: gen_sample, sentis = model.generate_text( ds.encoder, 'T', 200, 0.7) print_colored_text(gen_sample, sentis, ds.encoder) # Print an example with sentiments print_colored_text(x_i[-1].data.numpy(), get_sentiments(model, x_i[-1], 0.7), ds.encoder)
def main(): inputs, token_to_idx, idx_to_token = load_dataset(file_name=sys.argv[2]) #coloredlogs.install(level='DEBUG') num_layers = 2 rnn_type = 'lstm' dropout = 0.5 emb_size = 50 hidden_size = 256 learning_rate = 0.001 n_tokens = len(idx_to_token) model = CharRNN(num_layers=num_layers, rnn_type=rnn_type, dropout=dropout, n_tokens=n_tokens, emb_size=emb_size, hidden_size=hidden_size, pad_id=token_to_idx[PAD_TOKEN]) if torch.cuda.is_available(): model = model.cuda() optimiser = optim.Adam(model.parameters(), lr=learning_rate) try: model, optimiser, epoch, valid_loss_min = load_ckp( checkpoint_fpath=sys.argv[1], model=model, optimiser=optimiser) generate_sample(model, token_to_idx, idx_to_token, n_tokens=20) except KeyboardInterrupt: print('Aborted!')
def run_training(model: CharRNN, dataset, config: dict, validation: bool, valid_dataset): optimizer = torch.optim.Adam(model.parameters(), lr=config['initial_lr']) epoch = load_checkpoint(optimizer, model, config['filename']) if not epoch: epoch = 0 epoch += 1 params = { 'batch_size': config['batch_size'], 'shuffle': False, 'num_workers': 0 if os.name == 'nt' else 8 } data_generator = data.DataLoader(dataset, **params) while epoch < config['epochs'] + 1: model.reset_intermediate_vars() for step, (x_i, y_i, l_i) in enumerate(data_generator): loss = run_forward_pass_and_get_loss(model, x_i, y_i, l_i) # Gradient descent step optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1.5) optimizer.step() model.detach_intermediate_vars() if step % 100 == 0: print('Epoch: {} Loss for step {} : {}'.format( epoch, step, round(loss.item(), 4))) if step % 1000 == 1: gen_sample = model.generate_text(dataset.encoder, 't', 200) print_tokens(dataset.encoder.map_ids_to_tokens(gen_sample), config['is_bytes']) save_checkpoint(optimizer, model, epoch, config['filename']) if validation and epoch % 2: bpc = validate(valid_dataset, model) print('BPC on validation set: ', bpc) if epoch in config['lr_schedule']: optimizer = torch.optim.Adam(model.parameters(), lr=config['lr_schedule'][epoch]) epoch += 1
def main(): logging.root.setLevel(logging.NOTSET) inputs, token_to_idx, idx_to_token = load_dataset(file_name=sys.argv[2]) #coloredlogs.install(level='DEBUG') num_layers = 2 rnn_type = 'lstm' dropout = 0.5 emb_size = 50 hidden_size = 256 learning_rate = 0.001 n_tokens = len(idx_to_token) model = CharRNN(num_layers=num_layers, rnn_type=rnn_type, dropout=dropout, n_tokens=n_tokens, emb_size=emb_size, hidden_size=hidden_size, pad_id=token_to_idx[PAD_TOKEN]) if torch.cuda.is_available(): model = model.cuda() optimiser = optim.Adam(model.parameters(), lr=learning_rate) s1 = "bababac bababa bacc bac bacc" s2 = "bababac baba bac bacc bac" s3 = "baba" s4 = "ccab cab ccab ababab cababab" try: model, optimiser, epoch, valid_loss_min = load_ckp( checkpoint_fpath=sys.argv[1], model=model, optimiser=optimiser) score(model, token_to_idx, idx_to_token, seed_phrase=s1) score(model, token_to_idx, idx_to_token, seed_phrase=s2) score(model, token_to_idx, idx_to_token, seed_phrase=s3) score(model, token_to_idx, idx_to_token, seed_phrase=s4) except KeyboardInterrupt: print('Aborted!')
def main(): """ Main function Here, you should instantiate 1) DataLoaders for training and validation. Try SubsetRandomSampler to create these DataLoaders. 3) model 4) optimizer 5) cost function: use torch.nn.CrossEntropyLoss """ parser = argparse.ArgumentParser() parser.add_argument('--val_ratio', type=float, default=.5, help='The ratio for valid set') parser.add_argument('--n_layers', type=int, default=4, help='Number of stacked RNN layers') parser.add_argument('--n_hidden', type=int, default=512, help='Number of hidden neurons of RNN cells') parser.add_argument('--drop_prob', type=float, default=0.1, help='Dropout probability') parser.add_argument('--num_epochs', type=int, default=100, help='The number of epochs') parser.add_argument('--lr', type=float, default=0.001, help='Learning rate') parser.add_argument('--device', type=str, default='gpu', help='For cpu: \'cpu\', for gpu: \'gpu\'') parser.add_argument('--batch_size', type=int, default=256, help='Size of batches for training') parser.add_argument('--model_save_dir', type=str, default='../model', help='Directory for saving model.') parser.add_argument('--results_save_dir', type=str, default='../results', help='Directory for saving results.') parser.add_argument('--rnn', type=bool, default=True, help='Train vanilla rnn model') parser.add_argument('--lstm', type=bool, default=True, help='Train lstm model') parser.add_argument('--chunk_size', type=int, default=30, help='Chunk size(sequence length)') parser.add_argument('--s_step', type=int, default=3, help='Sequence step') args = parser.parse_args() n_cpu = multiprocessing.cpu_count() if args.device == 'gpu': args.device = 'cuda' device = torch.device(args.device) chunk_size = args.chunk_size s_step = args.s_step num_epochs = args.num_epochs batch_size = args.batch_size val_ratio = args.val_ratio shuffle_dataset = True random_seed = 42 datasets = dataset.Shakespeare('shakespeare_train.txt', chunk_size, s_step) dataset_size = len(datasets) indices = list(range(dataset_size)) split = int(np.floor(val_ratio * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) trn_loader = DataLoader(datasets, batch_size=batch_size, sampler=train_sampler, num_workers=n_cpu) val_loader = DataLoader(datasets, batch_size=batch_size, sampler=valid_sampler, num_workers=n_cpu) chars = datasets.chars print('-----Train Vanilla RNN Model-----') if args.rnn: model = CharRNN(chars, args).to(device) optimizer = Adam(model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() rnn_trn_loss, rnn_val_loss = [], [] best_val_loss = np.inf for epoch in range(args.num_epochs): epoch_time = time.time() trn_loss = train(model, trn_loader, device, criterion, optimizer) val_loss = validate(model, val_loader, device, criterion) rnn_trn_loss.append(trn_loss) rnn_val_loss.append(val_loss) print('Epoch: %3s/%s...' % (epoch + 1, num_epochs), 'Train Loss: %.4f...' % trn_loss, 'Val Loss: %.4f...' % val_loss, 'Time: %.4f' % (time.time() - epoch_time)) if val_loss < best_val_loss: best_val_loss = val_loss torch.save(model.state_dict(), '%s/rnn.pt' % args.model_save_dir) value, idx = np.array(rnn_val_loss).min(), np.array( rnn_val_loss).argmin() plt.figure(figsize=(8, 6)) plt.title('Vanilla RNN Model training and validation loss') plt.plot(np.arange(1, args.num_epochs + 1), rnn_trn_loss, 'g', label='Train Loss') plt.plot(np.arange(1, args.num_epochs + 1), rnn_val_loss, 'r', label='Val Loss') plt.grid(True) plt.legend(loc='upper right') plt.annotate('min epoch: %s \n\ min valid loss: %.5f' % (idx, value), (idx, value), xytext=(-60, 20), textcoords='offset points', arrowprops={'arrowstyle': '->'}) plt.savefig('%s/rnn_loss.png' % args.results_save_dir, dpi=300) print('-----Train LSTM Model-----') if args.lstm: model = CharLSTM(chars, args).to(device) optimizer = Adam(model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() lstm_trn_loss, lstm_val_loss = [], [] best_val_loss = np.inf for epoch in range(args.num_epochs): epoch_time = time.time() trn_loss = train(model, trn_loader, device, criterion, optimizer) val_loss = validate(model, val_loader, device, criterion) lstm_trn_loss.append(trn_loss) lstm_val_loss.append(val_loss) print('Epoch: %3s/%s...' % (epoch + 1, num_epochs), 'Train Loss: %.4f...' % trn_loss, 'Val Loss: %.4f...' % val_loss, 'Time: %.4f' % (time.time() - epoch_time)) if val_loss < best_val_loss: best_val_loss = val_loss torch.save(model.state_dict(), '%s/lstm.pt' % args.model_save_dir) value, idx = np.array(lstm_val_loss).min(), np.array( lstm_val_loss).argmin() plt.figure(figsize=(8, 6)) plt.title('LSTM Model training and validation loss') plt.plot(np.arange(1, args.num_epochs + 1), lstm_trn_loss, 'g', label='Train Loss') plt.plot(np.arange(1, args.num_epochs + 1), lstm_val_loss, 'r', label='Val Loss') plt.grid(True) plt.legend(loc='upper right') plt.annotate('min epoch: %s \n\ min valid loss: %.5f' % (idx, value), (idx, value), xytext=(-60, 20), textcoords='offset points', arrowprops={'arrowstyle': '->'}) plt.savefig('%s/lstm_loss.png' % args.results_save_dir, dpi=300)
def main(): # Parse command line arguments argparser = argparse.ArgumentParser() argparser.add_argument('--train_set', type=str, required=True) argparser.add_argument('--valid_set', type=str, required=True) argparser.add_argument('--model', type=str, default="gru") argparser.add_argument('--model_file', type=str, default='None') argparser.add_argument('--n_epochs', type=int, default=30) argparser.add_argument('--hidden_size', type=int, default=200) argparser.add_argument('--n_layers', type=int, default=3) argparser.add_argument('--learning_rate', type=float, default=0.01) argparser.add_argument('--chunk_len', type=int, default=200) argparser.add_argument('--batch_size', type=int, default=300) argparser.add_argument('--num_workers', type=int, default=8) argparser.add_argument('--cuda', action='store_true') argparser.add_argument('--cpu', action='store_true') args = argparser.parse_args() # Initialize models and start training if args.model_file == 'None': decoder = CharRNN( n_characters, args.hidden_size, n_characters, model=args.model, n_layers=args.n_layers, ) epoch_from = 1 prev_valid_loss = sys.maxsize old_filename = None else: if args.cpu: decoder = torch.load(args.model_file, map_location=lambda storage, loc: storage) else: decoder = torch.load(args.model_file) info = args.model_file.split('_') args.model = info[0] epoch_from = int(info[1][5:]) + 1 args.n_layers = int(info[2][7:]) args.hidden_size = int(info[5][2:]) prev_valid_loss = float(info[7][4:-3]) old_filename = args.model_file print( "successfully loaded model! Continuing from epoch {0} with valid loss {1}" .format(epoch_from, prev_valid_loss)) optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss() if args.cuda: decoder.cuda() start = time.time() train_dataset = WordDataset(args.train_set, args.chunk_len) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) valid_dataset = WordDataset(args.valid_set, args.chunk_len) valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) try: print('Training for maximum {} epochs...'.format(args.n_epochs)) for epoch in range(epoch_from, args.n_epochs + 1): train_loss, num_samples = 0, 0 for s in tqdm(train_dataloader): input_, target = prep_data(s['input'], s['target'], args.cuda) train_loss += train(decoder, optimizer, criterion, input_, target, args.batch_size, args.chunk_len, args.cuda) num_samples += 1 train_loss /= num_samples valid_loss, num_samples = 0, 0 for s in valid_dataloader: input_, target = prep_data(s['input'], s['target'], args.cuda) valid_loss += evaluate(decoder, criterion, input_, target, args.batch_size, args.chunk_len, args.cuda) num_samples += 1 valid_loss /= num_samples elapsed = time_since(start) pcnt = epoch / args.n_epochs * 100 log = ( '{} elapsed - epoch #{} ({:.1f}%) - training loss (BPC) {:.2f} ' '- validation loss (BPC) {:.2f}') print(log.format(elapsed, epoch, pcnt, train_loss, valid_loss)) if valid_loss > prev_valid_loss: print('No longer learning, just overfitting, stopping here.') break else: filename = model_file_name(decoder, epoch, train_loss, valid_loss) torch.save(decoder, filename) print('Saved as {}'.format(filename)) if old_filename: os.remove(old_filename) old_filename = filename prev_valid_loss = valid_loss except KeyboardInterrupt: print("Saving before quit...") try: valid_loss except: valid_loss = 'no_val' filename = model_file_name(decoder, epoch, train_loss, valid_loss) torch.save(decoder, filename) print('Saved as {}'.format(filename))
class Trainer(object): def __init__(self, args): self.args = args self.device = torch.device('cuda' if self.args.cuda else 'cpu') self.convert = None self.model = None self.optimizer = None self.criterion = self.get_loss self.meter = AverageValueMeter() self.train_loader = None self.get_data() self.get_model() self.get_optimizer() def get_data(self): self.convert = TextConverter(self.args.txt, max_vocab=self.args.max_vocab) dataset = TextDataset(self.args.txt, self.args.len, self.convert.text_to_arr) self.train_loader = DataLoader(dataset, self.args.batch_size, shuffle=True, num_workers=self.args.num_workers) def get_model(self): self.model = CharRNN(self.convert.vocab_size, self.args.embed_dim, self.args.hidden_size, self.args.num_layers, self.args.dropout, self.args.cuda).to(self.device) if self.args.cuda: cudnn.benchmark = True def get_optimizer(self): optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.lr) self.optimizer = ScheduledOptim(optimizer) @staticmethod def get_loss(score, label): return nn.CrossEntropyLoss()(score, label.view(-1)) def save_checkpoint(self, epoch): if (epoch + 1) % self.args.save_interval == 0: model_out_path = self.args.save_file + "epoch_{}_model.pth".format( epoch + 1) torch.save(self.model, model_out_path) print("Checkpoint saved to {}".format(model_out_path)) def save(self): model_out_path = self.args.save_file + "final_model.pth" torch.save(self.model, model_out_path) print("Final model saved to {}".format(model_out_path)) @staticmethod def pick_top_n(predictions, top_n=5): top_predict_prob, top_predict_label = torch.topk(predictions, top_n, 1) top_predict_prob /= torch.sum(top_predict_prob) top_predict_prob = top_predict_prob.squeeze(0).cpu().numpy() top_predict_label = top_predict_label.squeeze(0).cpu().numpy() c = np.random.choice(top_predict_label, size=1, p=top_predict_prob) return c def train(self): self.meter.reset() self.model.train() for x, y in tqdm(self.train_loader): y = y.long() x, y = x.to(self.device), y.to(self.device) # Forward. score, _ = self.model(x) loss = self.criterion(score, y) # Backward. self.optimizer.zero_grad() loss.backward() # Clip gradient. nn.utils.clip_grad_norm_(self.model.parameters(), 5) self.optimizer.step() self.meter.add(loss.item()) print('perplexity: {}'.format(np.exp(self.meter.value()[0]))) def test(self): self.model.eval() begin = np.array([i for i in self.args.begin]) begin = np.random.choice(begin, size=1) text_len = self.args.predict_len samples = [self.convert.word_to_int(c) for c in begin] input_txt = torch.LongTensor(samples)[None] input_txt = input_txt.to(self.device) _, init_state = self.model(input_txt) result = samples model_input = input_txt[:, -1][:, None] with torch.no_grad(): for i in range(text_len): out, init_state = self.model(model_input, init_state) prediction = self.pick_top_n(out.data) model_input = torch.LongTensor(prediction)[None].to( self.device) result.append(prediction[0]) print(self.convert.arr_to_text(result)) def predict(self): self.model.eval() samples = [self.convert.word_to_int(c) for c in self.args.begin] input_txt = torch.LongTensor(samples)[None].to(self.device) _, init_state = self.model(input_txt) result = samples model_input = input_txt[:, -1][:, None] with torch.no_grad(): for i in range(self.args.predict_len): out, init_state = self.model(model_input, init_state) prediction = self.pick_top_n(out.data) model_input = torch.LongTensor(prediction)[None].to( self.device) result.append(prediction[0]) print(self.convert.arr_to_text(result)) def run(self): for e in range(self.args.max_epoch): print('===> EPOCH: {}/{}'.format(e + 1, self.args.max_epoch)) self.train() self.test() self.save_checkpoint(e) self.save()
def train(opt, x_train, x_val, dictionary_len): ''' Training a network Arguments --------- net: CharRNN network data: training data to train the network (text) epochs: Number of epochs to train batch_size: Number of mini-sequences per mini-batch, aka batch size seq_length: Number of character steps per mini-batch lr: learning rate clip: gradient clipping val_frac: Fraction of data to hold out for validation print_every: Number of steps for printing training and validation loss ''' torch.manual_seed(0) np.random.seed(0) random.seed(0) # Declaring the hyperparameters batch_size = opt.batch_size seq_length = int(opt.seq_length) epochs = 50 if torch.cuda.is_available(): device = "cuda" torch.cuda.manual_seed_all(0) else: device = "cpu" print(device) date = datetime.now().strftime('%y%m%d%H%M%S') if opt.nologs: writer = SummaryWriter(log_dir=f'logs/nologs/') else: writer = SummaryWriter(log_dir=f'logs/logs_{date}/') y_train = get_labels_text_prediction(x_train) train_dataset = TextDataset(x_train, y_train, max_len=seq_length) if not opt.onlytrain: y_val = get_labels_text_prediction(x_val) val_dataset = TextDataset(x_val, y_val, max_len=seq_length) val_loader = DataLoader(dataset=val_dataset, pin_memory=device == 'cuda', batch_size=batch_size, shuffle=False) train_loader = DataLoader(dataset=train_dataset, pin_memory=device == 'cuda', batch_size=batch_size, shuffle=True) model_params = { 'dictionary_len': dictionary_len, 'dropout': opt.dropout, 'hidden_size': opt.hidden_size, 'layers': opt.layers, 'embedding_len': 32, 'device': device, 'lr': opt.lr } model = CharRNN(**model_params).to(device) print(model) # embed() # summary(model, input_size=(channels, H, W)) # summary(model, input_size=(dictionary_len, 28, 28)) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.scheduler: scheduler = ReduceLROnPlateau(optimizer, 'min', cooldown=3, factor=0.5, patience=10) global_step = 0 for j in trange(epochs, desc='T raining LSTM...'): for i, (x, y) in enumerate(train_loader): if i == len(train_loader) - 1: print("FER PADDING - DE MOMENT NO VA") continue model.train() x = x.to(device) y = y.to(device) # state_h, state_c = model.zero_state(opt.batch_size) # # Transfer data to GPU # state_h = state_h.to(device) # state_c = state_c.to(device) # DELETE PAST GRADIENTS optimizer.zero_grad() # FORWARD PASS --> ultim state , (tots) [ state_h[-1] == pred ] pred, (state_h, state_c) = model(x) # pred, (state_h, state_c) = model(x, (state_h, state_c)) # CALCULATE LOSS # pred = pred.transpose(1, 2) pred2 = pred.view(-1, dictionary_len) y2 = y.view(-1) loss = criterion(pred2, y2) loss_value = loss.item() # BACKWARD PASS loss.backward() # MINIMIZE LOSS optimizer.step() global_step += 1 if i % 100 == 0: writer.add_scalar('train/loss', loss_value, global_step) print('[Training epoch {}: {}/{}] Loss: {}'.format( j, i, len(train_loader), loss_value)) if not opt.onlytrain: val_loss = [] for i, (x, y) in enumerate(val_loader): if i == len(val_loader) - 1: # print("FER PADDING - DE MOMENT NO VA") continue model.eval() x = x.to(device) y = y.to(device) # state_h, state_c = model.zero_state(opt.batch_size) # state_h = state_h.to(device) # state_c = state_c.to(device) # NO BACKPROPAGATION # FORWARD PASS # pred, (state_h, state_c) = model(x, (state_h, state_c)) pred, (state_h, state_c) = model(x) # CALCULATE LOSS # pred = pred.transpose(1, 2) # pred = [batch x 40 x diccionary_len] # y = [batch x 40] pred2 = pred.view(-1, dictionary_len) y2 = y.view(-1) loss = criterion(pred2, y2) # loss = criterion(pred, y) val_loss.append(loss.item()) if i % 50 == 0: print('[Validation epoch {}: {}/{}] Loss: {}'.format( j, i, len(val_loader), loss.item())) writer.add_scalar('val/loss', np.mean(val_loss), j) if opt.scheduler: scheduler.step(np.mean(val_loss)) writer.add_scalar("lr", optimizer.param_groups[0]["lr"], j) predicted_words = inference_prediction(model, device, 500) # output = pred[0].unsqueeze(0) # [1,diccionary_len, 40] # predicted_words = do_inference_test(output, model, device) print(predicted_words) writer.add_text('val/Generated_Samples', predicted_words, j) checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } # if j % 5 == 0: os.makedirs("weights/{}".format(date), exist_ok=True) torch.save(checkpoint, "weights/{}/checkpoint_{}.pt".format(date, j))
def train(filename, rnn_type, num_layers, dropout, emb_size, hidden_size, num_epochs, batch_size, learning_rate, num_samples, seed_phrase, sample_every, checkpoint_path): """ Trains a character-level Recurrent Neural Network in PyTorch. Args: optional arguments [python train.py --help] """ logging.info('reading `{}` for character sequences'.format(filename)) inputs, token_to_idx, idx_to_token = load_dataset(file_name=filename) idx_to_token.remove('~') idx_to_token.remove('#') idx_to_token = ['~'] + idx_to_token + ['#'] token_to_idx = {token: idx_to_token.index(token) for token in idx_to_token} logging.info(idx_to_token) logging.info(token_to_idx) n_tokens = len(idx_to_token) max_length = inputs.size(1) logging.debug('creating char-level RNN model') model = CharRNN(num_layers=num_layers, rnn_type=rnn_type, dropout=dropout, n_tokens=n_tokens, emb_size=emb_size, hidden_size=hidden_size, pad_id=token_to_idx[PAD_TOKEN]) if torch.cuda.is_available(): model = model.cuda() logging.debug('defining model training operations') # define training procedures and operations for training the model criterion = nn.NLLLoss(reduction='mean') optimizer = optim.Adam(model.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', min_lr=1e-6, factor=0.1, patience=7, verbose=True) # train-val-test split of the dataset split_index = int(0.9 * inputs.size(0)) train_tensors, inputs = inputs[:split_index], inputs[split_index:] split_index = int(0.5 * inputs.size(0)) val_tensors, test_tensors = inputs[:split_index], inputs[split_index:] del inputs logging.info('train tensors: {}'.format(train_tensors.size())) logging.info('val tensors: {}'.format(val_tensors.size())) logging.info('test tensors: {}'.format(test_tensors.size())) logging.debug('training char-level RNN model') # loop over epochs for epoch in range(1, num_epochs + 1): epoch_loss, n_iter = 0.0, 0 # loop over batches for tensors in tqdm(iterate_minibatches(train_tensors, batchsize=batch_size), desc='Epoch[{}/{}]'.format(epoch, num_epochs), leave=False, total=train_tensors.size(0) // batch_size): # optimize model parameters epoch_loss += optimize(model, tensors, max_length, n_tokens, criterion, optimizer) n_iter += 1 # evaluate model after every epoch val_loss = evaluate(model, val_tensors, max_length, n_tokens, criterion) # lr_scheduler decreases lr when stuck at local minima scheduler.step(val_loss) # log epoch status info logging.info( 'Epoch[{}/{}]: train_loss - {:.4f} val_loss - {:.4f}'.format( epoch, num_epochs, epoch_loss / n_iter, val_loss)) # sample from the model every few epochs if epoch % sample_every == 0: print( 'Epoch[{}/{}]: train_loss - {:.4f} val_loss - {:.4f}'.format( epoch, num_epochs, epoch_loss / n_iter, val_loss)) for _ in range(num_samples): sample = generate_sample(model, token_to_idx, idx_to_token, max_length, n_tokens, seed_phrase=seed_phrase) logging.debug(sample) checkpoint = { 'epoch': epoch + 1, 'valid_loss_min': val_loss, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } # save checkpoint best_model_path = checkpoint_path save_ckp(checkpoint, False, checkpoint_path, best_model_path)
with open('./data/names.txt') as f: names = f.read().split('\n') vocab = build_vocab_from_list(names) names_dataset = NamesDataset(vocab, names) dataloader = DataLoader(names_dataset, batch_size=32, shuffle=True, collate_fn=collate) model = CharRNN(len(vocab), 256, 2) model.to(config.device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) n_epoch = 8 total_loss_item = 0 total_loss = 0 total_mini_batch = len(dataloader) for e in range(n_epoch): for i, example in enumerate(dataloader): for j in range(len(example[0])): input, target = example[0][j], example[1][j] output, loss = train(model, optimizer, criterion, input, target) total_loss += loss
def train( filename = "poets.txt", hidden_size = 128, n_layers = 2, learning_rate=0.01, n_epochs = 10000, chunk_len=20, batch_size = 1024, print_every = 100 ): #%% Global Configuration file, file_len, all_characters, n_characters = helpers.read_file( filename ) sentences = file.split("\n") print( "There are %d unique characters in the dataset" % n_characters ) print( "There are %d sentences in the dataset with total of %d characters" % ( len(sentences), len(file) ) ) #%% Model Saving and Loading model_filename = helpers.pt_name if os.path.exists( model_filename ): decoder = load( model_filename ) else: decoder = CharRNN( n_characters, hidden_size, n_characters, model = helpers.mcell, n_layers=n_layers, ) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() if helpers.USE_CUDA: decoder.cuda() start = time.time() all_losses = [] try: print("Training for %d epochs..." % n_epochs) for epoch in range(n_epochs): if epoch != 0 and epoch % 1000 == 0: learning_rate /= 2 decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate) inp, target = random_training_set( sentences, chunk_len, batch_size ) loss = train_one_entry(decoder, decoder_optimizer, criterion, inp, target, chunk_len, batch_size ) all_losses.append( loss ) if epoch != 0 and epoch % print_every == 0: print('%s: [%s (%d %d%%) %.4f]' % ( time.ctime(), helpers.time_since(start), epoch, epoch / n_epochs * 100, loss)) print(generate(decoder, '新年', 100, cuda= helpers.USE_CUDA), '\n') save( decoder, model_filename ) except KeyboardInterrupt: save( decoder, model_filename ) import matplotlib.pyplot as plt plt.plot( all_losses ) plt.xlabel( "iteration" ) plt.ylabel( "train loss" )
def main(mode="RNN"): """ Main function Here, you should instantiate 1) DataLoaders for training and validation. Try SubsetRandomSampler to create these DataLoaders. 3) model 4) optimizer 5) cost function: use torch.nn.CrossEntropyLoss """ # write your codes here start_time = time.time() data = "./shakespeare_train.txt" data_set = dataset.Shakespeare(data) all_characters = string.printable input_size = len(all_characters) if mode == "RNN": models_RNN = CharRNN(input_size, 512, input_size, 4).cuda() optim = torch.optim.Adam(models_RNN.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss() elif mode == "LSTM": models_LSTM = CharLSTM(input_size, 512, input_size, 4).cuda() optim = torch.optim.Adam(models_LSTM.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss() else: raise NotImplementedError all_losses = [] loss_avg = 0 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #sampler = SubsetRandomSampler() total_idx = list(range(len(data_set))) split_idx = int(len(data_set) * 0.7) trn_idx = total_idx[:split_idx] val_idx = total_idx[split_idx:] trn_loader = da.DataLoader(data_set, batch_size=64, sampler=SubsetRandomSampler(trn_idx)) val_loader = da.DataLoader(data_set, batch_size=64, sampler=SubsetRandomSampler(val_idx)) losses = [] val_losses = [] for epoch in range(1, 51): if mode == "RNN": loss = train(models_RNN, trn_loader, device, criterion, optim, epoch, input_size, mode="RNN") val_loss = validate(models_RNN, val_loader, device, criterion, epoch, input_size) elif mode == "LSTM": loss = train(models_LSTM, trn_loader, device, criterion, optim, epoch, input_size, mode="LSTM") val_loss = validate(models_LSTM, val_loader, device, criterion, epoch, input_size) losses.append(loss) val_losses.append(val_loss) return losses, val_losses
if args.cuda: print("Using CUDA") file, file_len = read_file(args.filename) n_characters = len(string.printable) decoder = CharRNN( n_characters, args.hidden_size, n_characters, model=args.model, n_layers=args.n_layers, ) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate) criterion = nn.CrossEntropyLoss() if args.cuda: decoder.cuda() start = time.time() all_losses = [] loss_avg = 0 try: print("Training for %d epochs..." % args.n_epochs) for epoch in tqdm(range(1, args.n_epochs + 1)): loss = train(*random_training_set(args.chunk_len, args.batch_size)) loss_avg += loss
#构建字典 vo={} for sentance in data: for word in sentance: if word not in vo: vo[word]=len(vo) vo['<EOP>']=len(vo) vo['<START>']=len(vo) for i in range(len(data)): data[i]=toList(data[i]) data[i].append("<EOP>") #给每句诗加个换行结尾 model=CharRNN(len(vo),256,256) optimizer=opt.RMSprop(model.parameters(),lr=0.01,weight_decay=0.0001) #RMSprop算法 lr学习率 wd权重衰减 criterion=nn.NLLLoss() #Negative Log Likelihood负对数似然损失函数 one_hot_var_target={} for a in vo: one_hot_var_target.setdefault(a,make_one_hot_vec_target(a,vo)) #setdefault方法是如果之前没有就设置为default的值 这里好像是把字符映射到了tensor上 epoch=10 batch=10 Train_size=len(data) def test(): loss=0 counts=0 v=int(Train_size/batch)