def main(args): if args.name: args.name += '_' logname = f'{args.name}{args.t_tuple[0]}_{args.t_tuple[1]}_soft{args.soft}' \ f'c{args.channels}b{args.blocks}_p{args.patience}_' \ f'bs{args.batch_size}lr{args.lr}d{args.decay}_s{args.seed}' print(logname) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Using {}'.format(device)) torch.backends.cudnn.benchmark = True train_set = OneHotConvGameDataset(args.path, args.t_tuple[0], args.t_tuple[1], device, soft=args.soft) train_dat = DataLoader(train_set, batch_size=args.batch_size, shuffle=True) m = ConvNet(channels=args.channels, blocks=args.blocks) if args.pretrained: m.load_state_dict(torch.load('models/{}.pt'.format(args.pretrained), map_location=device)) print('Loaded ' + args.pretrained) logname = 'pre_'+logname m.to(device) loss_fn = nn.KLDivLoss(reduction='batchmean') optimizer = torch.optim.Adam(m.parameters(), lr=args.lr, weight_decay=args.decay) t_loss = [] min_move = [] best = 0.0 timer = 0 if args.patience == 0: stop = args.epochs else: stop = args.patience data_len = len(train_dat) for epoch in range(args.epochs): print('-' * 10) print('Epoch: {}'.format(epoch)) timer += 1 m.train() running_loss = 0 for x, y in tqdm(train_dat): optimizer.zero_grad() pred = m(x) loss = loss_fn(pred, y) running_loss += loss.data.item() loss.backward() optimizer.step() running_loss /= data_len if epoch == 2 and running_loss > 210/1000: stop = 0 print('Train mLoss: {:.3f}'.format(1e3 * running_loss)) t_loss.append(running_loss) m.eval() time1 = time() ave_min_move = eval_nn_min(m, number=10, repeats=40, device=device) time_str = ', took {:.0f} seconds'.format(time()-time1) min_move.append(ave_min_move) if ave_min_move >= best: tqdm.write(str(ave_min_move) + ' ** Best' + time_str) best = ave_min_move timer = 0 torch.save(m.state_dict(), 'models/' + logname + '_best.pt') else: tqdm.write(str(ave_min_move) + time_str) if timer >= stop: print('Ran out of patience') print(f'Best score: {best}') # torch.save(m.state_dict(), 'models/'+logname+f'_e{epoch}.pt') break else: print(f'{stop - timer} epochs remaining') np.savez('logs/'+logname, t_loss=t_loss, min_move=min_move, params=args)
valset = MyDataset('val') val_loader = DataLoader(dataset=valset, num_workers=4, batch_size=args.batch_size, pin_memory=True) testset = MyDataset('test') test_loader = DataLoader(dataset=testset, num_workers=4, batch_size=args.batch_size, pin_memory=True) model = ConvNet() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) model = model.cuda() best_acc = 0.0 for epoch in range(args.max_epoch): lr_scheduler.step() model.train() for i, batch in enumerate(train_loader): imgs, labels = batch[0].cuda(), batch[1].cuda() optimizer.zero_grad() logits = model(imgs) loss = F.cross_entropy(logits, labels) loss.backward() optimizer.step() acc = count_acc(logits, labels) print('epoch {}, train {}/{}, loss={:.4f}, acc={:.4f}' .format(epoch, i, len(train_loader), loss.item(), acc)) model.eval() tmp_acc = 0.0 tmp_num = 0.0 with torch.no_grad():