Exemplo n.º 1
0
def train(cfg):
    logger = logging.getLogger("JigsawPuzzle")
    if torch.cuda.is_available():
        device = "cuda"
        torch.backends.cudnn.benchmark = True
    else:
        device = "cpu"

    if cfg.dataset == "MNIST":
        in_c = 1
    else:
        in_c = 3

    train_data = build_dataset(cfg, "train")

    model = ConvModel(in_c, cfg.pieces, cfg.image_size, cfg.hid_c, cfg.stride, cfg.kernel_size).to(device)
    optimizer = optim.Adam(model.parameters(), cfg.lr, eps=1e-8)

    train_loader = DataLoader(train_data, cfg.batch_size, shuffle=True, num_workers=cfg.num_workers, drop_last=True)

    logger.info("start training")
    for epoch in range(1, cfg.epochs+1):
        sum_loss = 0
        for i, data in enumerate(train_loader):
            inputs, _ = data
            pieces, random_pieces, _ = batch_tch_divide_image(inputs, cfg.pieces)
            pieces, random_pieces = pieces.to(device), random_pieces.to(device)

            log_alpha = model(random_pieces)

            gumbel_sinkhorn_mat = [
                gumbel_sinkhorn_ops.gumbel_sinkhorn(log_alpha, cfg.tau, cfg.n_sink_iter)
                for _ in range(cfg.n_samples)
            ]

            est_ordered_pieces = [
                gumbel_sinkhorn_ops.inverse_permutation_for_image(random_pieces, gs_mat)
                for gs_mat in gumbel_sinkhorn_mat
            ]

            loss = sum([
                torch.nn.functional.mse_loss(X, pieces)
                for X in est_ordered_pieces
            ])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            sum_loss += loss.item()

            if cfg.display > 0 and ((i+1) % cfg.display) == 0:
                logger.info("epoch %i [%i/%i] loss %f", epoch, i+1, len(train_loader), loss.item())
        logger.info("epoch %i|  mean loss %f", epoch, sum_loss/len(train_loader))

    torch.save(model.state_dict(), os.path.join(cfg.out_dir, "model.pth"))
Exemplo n.º 2
0
    else:
        model = MLPModel(46, args.num_subgoals, use_rnn=False).to(device)

    start_itr = 0
    c = []
    if args.one_class:
        if args.pretrained_ckpt is not None:
            model.load_state_dict(
                torch.load('./ckpt/' + args.pretrained_ckpt + '.pkl'))
            start_itr = np.load('./iter_num/' + args.pretrained_ckpt + '.npy')
            c = torch.from_numpy(
                np.load('./c/' + args.pretrained_ckpt +
                        '.npy')).float().to(device)
        # computing initial c for one-class out-of-set estimation
        if len(c) == 0:
            c = get_c(dataset, model, args)

    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005)

    for itr in range(start_itr, args.max_iter):
        train(itr, dataset, args, model, optimizer, logger, device, c)
        if itr % 500 == 0:
            torch.save(model.state_dict(),
                       './ckpt/' + args.model_name + '.pkl')
            np.save('./iter_num/' + args.model_name + '.npy', itr)
            np.save('./labels/' + args.model_name + '.npy', dataset.labels)
        if itr in change_itr:
            gen_new_labels(dataset, model, args, device)
        if args.one_class and itr % 50 == 0 and itr <= 500:
            c = get_c(dataset, model, args)
Exemplo n.º 3
0
    agent1_accuracy_history.append(round_accuracy)
    agent1_message_length_history.append(round_sentence_length / 20)
    agent1_loss_history.append(round_loss)

    round += 1
    print("replacing roles")
    print("********** round %d **********" % round)

    round_accuracy, round_loss, round_sentence_length = train_round(
        agent2, agent1, batches, optimizer1, args.max_sentence_len,
        args.vocab_size)
    print_round_stats(round_accuracy, round_loss, round_sentence_length)

    if round % 50 == 0:
        t = list(range(len(agent1_accuracy_history)))
        plt.plot(t, agent1_accuracy_history, label="Accuracy")
        plt.plot(t,
                 agent1_message_length_history,
                 label="Message length (/20)")
        plt.plot(t, agent1_loss_history, label="Training loss")

        plt.xlabel('# Rounds')
        plt.legend()
        plt.savefig("graph.png")
        plt.clf()

    if round % 500 == 0:
        torch.save(agent1.state_dict(),
                   os.path.join('checkpoints', 'agent1-%d.ckp' % round))
        torch.save(agent2.state_dict(),
                   os.path.join('checkpoints', 'agent2-%d.ckp' % round))