def validation(model, args, lr, epoch, device): dataloader, dataset = make_loader(args.cv_list, args.batch_size, num_workers=args.num_threads, processer=Processer( win_len=args.win_len, win_inc=args.win_inc, left_context=args.left_context, right_context=args.right_context, fft_len=args.fft_len, window_type=args.win_type)) model.eval() loss_total = 0.0 num_batch = len(dataloader) stime = time.time() with torch.no_grad(): for idx, data in enumerate(dataloader): inputs, labels, lengths = data inputs = inputs.to(device) labels = labels.to(device) lengths = lengths outputs, _ = data_parallel(model, (inputs, lengths)) loss = model.loss(outputs, labels, lengths) loss_total += loss.data.cpu() del loss, data, inputs, labels, lengths, _, outputs etime = time.time() eplashed = (etime - stime) / num_batch loss_total_avg = loss_total / num_batch print('CROSSVAL AVG.LOSS | Epoch {:3d}/{:3d} ' '| lr {:.6e} | {:2.3f}s/batch| time {:2.1f}mins ' '| loss {:2.8f}'.format(epoch + 1, args.max_epoch, lr, eplashed, (etime - stime) / 60.0, loss_total_avg)) sys.stdout.flush() return loss_total_avg
def train(model, args, device, writer): print('preparing data...') dataloader, dataset = make_loader( args.tr_list, args.batch_size, num_workers=args.num_threads, processer=Processer( win_len=args.win_len, win_inc=args.win_inc, left_context=args.left_context, right_context=args.right_context, fft_len=args.fft_len, window_type=args.win_type)) print_freq = 100 num_batch = len(dataloader) params = model.get_params(args.weight_decay) optimizer = optim.Adam(params, lr=args.learn_rate) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'min', factor=0.5, patience=1, verbose=True) if args.retrain: start_epoch, step = reload_model(model, optimizer, args.exp_dir, args.use_cuda) else: start_epoch, step = 0, 0 print('---------PRERUN-----------') lr = get_learning_rate(optimizer) print('(Initialization)') val_loss = validation(model, args, lr, -1, device) writer.add_scalar('Loss/Train', val_loss, step) writer.add_scalar('Loss/Cross-Validation', val_loss, step) for epoch in range(start_epoch, args.max_epoch): torch.manual_seed(args.seed + epoch) if args.use_cuda: torch.cuda.manual_seed(args.seed + epoch) model.train() loss_total = 0.0 loss_print = 0.0 stime = time.time() lr = get_learning_rate(optimizer) for idx, data in enumerate(dataloader): inputs, labels, lengths = data inputs = inputs.to(device) labels = labels.to(device) lengths = lengths model.zero_grad() outputs, _ = data_parallel(model, (inputs, lengths)) loss = model.loss(outputs, labels, lengths) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad_norm) optimizer.step() step += 1 loss_total += loss.data.cpu() loss_print += loss.data.cpu() del lengths, outputs, labels, inputs, loss, _ if (idx+1) % 3000 == 0: save_checkpoint(model, optimizer, epoch + 1, step, args.exp_dir) if (idx + 1) % print_freq == 0: eplashed = time.time() - stime speed_avg = eplashed / (idx+1) loss_print_avg = loss_print / print_freq print('Epoch {:3d}/{:3d} | batches {:5d}/{:5d} | lr {:1.4e} |' '{:2.3f}s/batches | loss {:2.6f}'.format( epoch, args.max_epoch, idx + 1, num_batch, lr, speed_avg, loss_print_avg)) sys.stdout.flush() writer.add_scalar('Loss/Train', loss_print_avg, step) loss_print = 0.0 eplashed = time.time() - stime loss_total_avg = loss_total / num_batch print( 'Training AVG.LOSS |' ' Epoch {:3d}/{:3d} | lr {:1.4e} |' ' {:2.3f}s/batch | time {:3.2f}mins |' ' loss {:2.6f}'.format( epoch + 1, args.max_epoch, lr, eplashed/num_batch, eplashed/60.0, loss_total_avg.item())) val_loss = validation(model, args, lr, epoch, device) writer.add_scalar('Loss/Cross-Validation', val_loss, step) writer.add_scalar('learn_rate', lr, step) if val_loss > scheduler.best: print('Rejected !!! The best is {:2.6f}'.format(scheduler.best)) else: save_checkpoint(model, optimizer, epoch + 1, step, args.exp_dir) scheduler.step(val_loss) sys.stdout.flush() stime = time.time()