示例#1
0
def train(hparams):

    # this won't crash ever. If no exp number is there, it'll be None
    exp_version_from_slurm_script = hparams.hpc_exp_number

    # init exp and track all the parameters from the HyperOptArgumentParser
    # the experiment version is optional, but using the one from slurm means the exp will not collide with other
    # versions if slurm runs multiple at once.
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        save_dir=hparams.log_path,
        version=exp_version_from_slurm_script,
        autosave=False,
    )
    exp.argparse(hparams)

    # pretend to train
    x = hparams.x_val
    for train_step in range(0, 100):
        y = hparams.y_val
        out = x * y
        exp.log({'fake_err': out.item()})

    # save exp when we're done
    exp.save()
示例#2
0
def train(hparams, *args):
    """Train your awesome model.

    :param hparams: The arguments to run the model with.
    """
    # Initialize experiments and track all the hyperparameters
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        # Location to save the metrics.
        save_dir=hparams.log_path,
        # The experiment version is optional, but using the one
        # from SLURM means the exp will not collide with other
        # versions if SLURM runs multiple at once.
        version=hparams.hpc_exp_number,
        autosave=False,
    )
    exp.argparse(hparams)

    # Pretend to train.
    x = hparams.x_val
    for train_step in range(0, 100):
        y = hparams.y_val
        out = x * y
        exp.log({'fake_err': out.item()})  # Log metrics.

    # Save exp when done.
    exp.save()
示例#3
0
def train(hparams, *args):
    """Train your awesome model.
    :param hparams: The arguments to run the model with.
    """
    # Initialize experiments and track all the hyperparameters
    # if hparams.disease_model:
    #     save_model_path = hparams.save_model_dir+'/disease'
    # else:
    #     save_model_path = hparams.save_model_dir+'/synthetic'
    # Set seeds
    SEED = hparams.seed
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    print(hparams)
    print(args)
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        # Location to save the metrics.
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)
    # checkpoint_callback = ModelCheckpoint(
    #     filepath=save_model_path+'/'+hparams.cage_nr +
    #     '/version_'+str(cluster.hpc_exp_number)+'/checkpoints',
    #     verbose=True,
    #     monitor='val_loss',
    #     mode='min',
    #     prefix=''
    # )
    # # Pretend to train.
    # x = torch.rand((1, hparams.x_val))
    # for train_step in range(0, 100):
    #     y = torch.rand((hparams.x_val, 1))
    #     out = x.mm(y)
    #     exp.log({'fake_err': out.item()})

    dsl, \
        trainedmodels,\
        validatedmodels,\
        losses,\
        lossdf,\
        knnres = runevaler("opsitu", hparams.epochs, [ESNNSystem],
                           [TorchEvaler], [eval_dual_ann],
                           networklayers=[hparams.c_layers, hparams.g_layers],
                           lrs=[hparams.lr],
                           dropoutrates=[hparams.dropout],
                           validate_on_k=10, n=1,
                           filenamepostfixes=["esnn"])
    stats = stat(lossdf, hparams.epochs, "esnn")
    print(f"type : {type(stats)}")
    print(f"innertype : {type(stats[0])}")
    print(f"stats : {stats}")
    print(f"stats0 : {stats[0]}")
    exp.log({'loss': stats[0]})
    #exp.log('tng_err': tng_err)
    #exp.log({"loss", stats[0]})
    # Save exp when .
    exp.save()
def train(hparams):
    # init exp and track all the parameters from the HyperOptArgumentParser
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)

    # pretend to train
    x = torch.rand((1, hparams.x_val))
    for train_step in range(0, 100):
        y = torch.rand((hparams.x_val, 1))
        out = x.mm(y)
        exp.log({'fake_err': out.item()})

    # save exp when we're done
    exp.save()
示例#5
0
def train(hparams, *args):
    """Train your awesome model.
    :param hparams: The arguments to run the model with.
    """
    # Initialize experiments and track all the hyperparameters
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        # Location to save the metrics.
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)

    # Pretend to train.
    x = torch.rand((1, hparams.x_val))
    for train_step in range(0, 100):
        y = torch.rand((hparams.x_val, 1))
        out = x.mm(y)
        exp.log({'fake_err': out.item()})

    # Save exp when .
    exp.save()
示例#6
0
def train(hparams):
    # init exp and track all the parameters from the HyperOptArgumentParser
    exp = Experiment(
        name=hparams.test_tube_exp_name,
        save_dir=hparams.log_path,
        autosave=False,
    )
    exp.argparse(hparams)

    # define tensorflow graph
    x = tf.placeholder(dtype=tf.int32, name='x')
    y = tf.placeholder(dtype=tf.int32, name='y')
    out = x * y

    sess = tf.Session()

    # Run the tf op
    for train_step in range(0, 100):
        output = sess.run(out, feed_dict={x: hparams.x_val, y: hparams.y_val})
        exp.log({'fake_err': output})

    # save exp when we're done
    exp.save()
def run(args):
    device = torch.device("cuda" if (
        not args.cpu) and torch.cuda.is_available() else "cpu")
    print("Using device", device)

    train_data, val_data, test_data, src, trg = loader.load_data(args)

    src_padding_idx = src.vocab.stoi['<pad>']
    trg_padding_idx = trg.vocab.stoi['<pad>']

    for i in range(5):
        print(i, src.vocab.itos[i])
        print(i, trg.vocab.itos[i])

    assert src_padding_idx == config.PAD_TOKEN
    assert trg_padding_idx == config.PAD_TOKEN

    #src_unk_idx = src.vocab.stoi['<unk>']
    #trg_unk_idx = trg.vocab.stoi['<unk>']

    src_vocab_size = len(src.vocab)
    trg_vocab_size = len(trg.vocab)

    encoder = models.CnnEncoder(args, src_padding_idx,
                                src_vocab_size).to(device)
    if args.attention:
        assert args.bidirectional, "if using attention model, bidirectional must be true"
        decoder = models.LuongAttnDecoderRNN(args, trg_padding_idx,
                                             trg_vocab_size).to(device)
    else:
        assert not args.bidirectional, "if not using attention model, bidirectional must be false"
        decoder = models.RnnDecoder(args, trg_padding_idx,
                                    trg_vocab_size).to(device)

    # initialize weights using gaussian with 0 mean and 0.01 std, just like the paper said
    # TODO: Better initialization. Xavier?
    for net in [encoder, decoder]:
        for name, param in net.named_parameters():
            #print(name, type(param), param)
            if 'bias' in name:
                nn.init.constant_(param, 0.0)
            elif 'weight' in name:
                nn.init.xavier_normal_(param)
            #nn.init.normal_(param, std=0.01)

    if args.encoder_word_embedding is not None:
        encoder_embedding_dict = torch.load(args.encoder_word_embedding)
        encoder.word_embedding.load_state_dict(
            {'weight': encoder_embedding_dict['weight']})
        if args.freeze_all_words:
            encoder.word_embedding.requires_grad = False
    else:  #####
        encoder_embedding_dict = None  #####
    if args.decoder_word_embedding is not None:
        decoder_embedding_dict = torch.load(args.decoder_word_embedding)
        decoder.embedding.load_state_dict(
            {'weight': decoder_embedding_dict['weight']})
        if args.freeze_all_words:
            decoder.embedding.requires_grad = False
    else:  #####
        decoder_embedding_dict = None  #####
    # TODO: other optimizers
    encoder_optimizer = optim.Adam(encoder.parameters(),
                                   lr=args.lr,
                                   weight_decay=args.l2_penalty)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=args.lr,
                                   weight_decay=args.l2_penalty)

    # TODO: use different loss?
    loss_function = nn.NLLLoss()

    # TODO: save/load weights
    # TODO: early stopping
    loss_history = defaultdict(list)
    bleu_history = defaultdict(list)

    # Initiate test-tube experiment object
    if not args.test:
        exp = Experiment(
            name=args.name,
            save_dir=args.logs_path,
            autosave=True,
        )
        exp.argparse(args)

        model_path = os.path.join(args.model_weights_path, exp.name)
        model_path = os.path.join(model_path, str(exp.version))
        pathlib.Path(model_path).mkdir(parents=True, exist_ok=True)
        print(model_path)

    if args.test:
        encoder.load_state_dict(
            torch.load(
                os.path.join(args.model_weights_path, 'encoder_weights.pt')))
        decoder.load_state_dict(
            torch.load(
                os.path.join(args.model_weights_path, 'decoder_weights.pt')))
        return test(args, encoder, decoder, encoder_optimizer,
                    decoder_optimizer, loss_function, device, i, test_data,
                    trg, encoder_embedding_dict, decoder_embedding_dict)
    else:
        for i in range(args.epoch):
            train_loss, val_loss, val_bleu = train_and_val(
                args, encoder, decoder, encoder_optimizer, decoder_optimizer,
                loss_function, device, i, train_data, val_data, trg,
                encoder_embedding_dict, decoder_embedding_dict)
            loss_history["train"].append(train_loss)
            loss_history["val"].append(val_loss)
            bleu_history["val"].append(val_bleu)

            # update best models
            if val_bleu == np.max(bleu_history["val"]):
                # save model weights of the best models
                torch.save(encoder.state_dict(),
                           os.path.join(model_path, 'encoder_weights.pt'))
                torch.save(decoder.state_dict(),
                           os.path.join(model_path, 'decoder_weights.pt'))
            if args.save_all_epoch:
                model_path_current_epoch = os.path.join(model_path, str(i))
                pathlib.Path(model_path_current_epoch).mkdir(parents=True,
                                                             exist_ok=True)
                torch.save(
                    encoder.state_dict(),
                    os.path.join(model_path_current_epoch,
                                 'encoder_weights.pt'))
                torch.save(
                    decoder.state_dict(),
                    os.path.join(model_path_current_epoch,
                                 'decoder_weights.pt'))

            # add logs
            exp.log({
                'train epoch loss': train_loss,
                'val epoch loss': val_loss,
                'val epoch bleu': val_bleu
            })

            if early_stop(bleu_history["val"], args.early_stopping, max):
                print("Early stopped.")
                break
示例#8
0
def train_VI_classification(net,
                            name,
                            save_dir,
                            batch_size,
                            nb_epochs,
                            trainset,
                            valset,
                            cuda,
                            flat_ims=False,
                            nb_its_dev=1,
                            early_stop=None,
                            load_path=None,
                            save_freq=20,
                            stop_criteria='test_ELBO',
                            tags=None,
                            show=False):
    exp = Experiment(name=name, debug=False, save_dir=save_dir, autosave=True)

    if load_path is not None:
        net.load(load_path)

    exp_version = exp.version

    media_dir = exp.get_media_path(name, exp_version)
    models_dir = exp.get_data_path(name, exp_version) + '/models'
    mkdir(models_dir)

    exp.tag({
        'n_layers': net.model.n_layers,
        'batch_size': batch_size,
        'init_lr': net.lr,
        'lr_schedule': net.schedule,
        'nb_epochs': nb_epochs,
        'early_stop': early_stop,
        'stop_criteria': stop_criteria,
        'nb_its_dev': nb_its_dev,
        'model_loaded': load_path,
        'cuda': cuda,
    })

    if net.model.__class__.__name__ == 'arq_uncert_conv2d_resnet':
        exp.tag({
            'outer_width': net.model.outer_width,
            'inner_width': net.model.inner_width
        })
    else:
        exp.tag({'width': net.model.width})

    exp.tag({
        'prob_model': net.model.prob_model.name,
        'prob_model_summary': net.model.prob_model.summary
    })
    if tags is not None:
        exp.tag(tags)

    if cuda:
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  pin_memory=True,
                                                  num_workers=3)
        valloader = torch.utils.data.DataLoader(valset,
                                                batch_size=batch_size,
                                                shuffle=False,
                                                pin_memory=True,
                                                num_workers=3)

    else:
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  pin_memory=False,
                                                  num_workers=3)
        valloader = torch.utils.data.DataLoader(valset,
                                                batch_size=batch_size,
                                                shuffle=False,
                                                pin_memory=False,
                                                num_workers=3)
    ## ---------------------------------------------------------------------------------------------------------------------
    # net dims
    cprint('c', '\nNetwork:')
    epoch = 0
    ## ---------------------------------------------------------------------------------------------------------------------
    # train
    cprint('c', '\nTrain:')

    print('  init cost variables:')
    mloglike_train = np.zeros(nb_epochs)
    KL_train = np.zeros(nb_epochs)
    ELBO_train = np.zeros(nb_epochs)
    ELBO_test = np.zeros(nb_epochs)
    err_train = np.zeros(nb_epochs)
    mloglike_dev = np.zeros(nb_epochs)
    err_dev = np.zeros(nb_epochs)
    best_epoch = 0
    best_train_ELBO = -np.inf
    best_test_ELBO = -np.inf
    best_dev_ll = -np.inf

    tic0 = time.time()
    for i in range(epoch, nb_epochs):
        net.set_mode_train(True)
        tic = time.time()
        nb_samples = 0
        for x, y in trainloader:

            if flat_ims:
                x = x.view(x.shape[0], -1)

            KL, minus_loglike, err = net.fit(x, y)
            err_train[i] += err
            mloglike_train[i] += minus_loglike / len(trainloader)
            KL_train[i] += KL / len(trainloader)
            nb_samples += len(x)

        # mloglike_train[i] *= nb_samples
        # KL_train[i] *= nb_samples
        ELBO_train[i] = (-KL_train[i] - mloglike_train[i]) * nb_samples
        err_train[i] /= nb_samples

        toc = time.time()

        # ---- print
        print("it %d/%d, sample minus loglike = %f, sample KL = %.10f, err = %f, ELBO = %f" % \
              (i, nb_epochs, mloglike_train[i], KL_train[i], err_train[i], ELBO_train[i]), end="")
        exp.log({
            'epoch': i,
            'MLL': mloglike_train[i],
            'KLD': KL_train[i],
            'err': err_train[i],
            'ELBO': ELBO_train[i]
        })
        cprint('r', '   time: %f seconds\n' % (toc - tic))
        net.update_lr(i, 0.1)

        # ---- dev
        if i % nb_its_dev == 0:
            tic = time.time()
            nb_samples = 0
            for j, (x, y) in enumerate(valloader):
                if flat_ims:
                    x = x.view(x.shape[0], -1)

                minus_loglike, err = net.eval(x, y)

                mloglike_dev[i] += minus_loglike / len(valloader)
                err_dev[i] += err
                nb_samples += len(x)

            ELBO_test[i] = (-KL_train[i] - mloglike_dev[i]) * nb_samples

            ELBO_test[i] = (-KL_train[i] - mloglike_dev[i]) * nb_samples
            err_dev[i] /= nb_samples
            toc = time.time()

            cprint('g',
                   '    sample minus loglike = %f, err = %f, ELBO = %f\n' %
                   (mloglike_dev[i], err_dev[i], ELBO_test[i]),
                   end="")
            cprint(
                'g',
                '    (prev best it = %i, sample minus loglike = %f, ELBO = %f)\n'
                % (best_epoch, best_dev_ll, best_test_ELBO),
                end="")
            cprint('g', '    time: %f seconds\n' % (toc - tic))
            exp.log({
                'epoch': i,
                'MLL_val': mloglike_dev[i],
                'err_val': err_dev[i],
                'ELBO_val': ELBO_test[i]
            })

            if stop_criteria == 'test_LL' and -mloglike_dev[i] > best_dev_ll:
                best_dev_ll = -mloglike_dev[i]
                best_epoch = i
                cprint('b', 'best test loglike: %d' % best_dev_ll)
                net.save(models_dir + '/theta_best.dat')
                probs = net.model.prob_model.get_q_probs().data.cpu().numpy()
                cuttoff = np.max(probs) * 0.95
                exp.tag({
                    "q_vec":
                    net.model.get_q_vector().cpu().detach().numpy(),
                    "q_probs":
                    net.model.prob_model.get_q_probs().cpu().detach().numpy(),
                    "expected_depth":
                    np.sum(probs * np.arange(net.model.n_layers + 1)),
                    "95th_depth":
                    np.argmax(probs > cuttoff),
                    "best_epoch":
                    best_epoch,
                    "best_dev_ll":
                    best_dev_ll
                })

            if stop_criteria == 'test_ELBO' and ELBO_test[i] > best_test_ELBO:
                best_test_ELBO = ELBO_test[i]
                best_epoch = i
                cprint('b', 'best test ELBO: %d' % best_test_ELBO)
                net.save(models_dir + '/theta_best.dat')
                probs = net.model.prob_model.get_q_probs().data.cpu().numpy()
                cuttoff = np.max(probs) * 0.95
                exp.tag({
                    "q_vec":
                    net.model.get_q_vector().cpu().detach().numpy(),
                    "q_probs":
                    net.model.prob_model.get_q_probs().cpu().detach().numpy(),
                    "expected_depth":
                    np.sum(probs * np.arange(net.model.n_layers + 1)),
                    "95th_depth":
                    np.argmax(probs > cuttoff),
                    "best_epoch":
                    best_epoch,
                    "best_test_ELBO":
                    best_test_ELBO
                })

        if stop_criteria == 'train_ELBO' and ELBO_train[i] > best_train_ELBO:
            best_train_ELBO = ELBO_train[i]
            best_epoch = i
            cprint('b', 'best train ELBO: %d' % best_train_ELBO)
            net.save(models_dir + '/theta_best.dat')
            probs = net.model.prob_model.get_q_probs().data.cpu().numpy()
            cuttoff = np.max(probs) * 0.95
            exp.tag({
                "q_vec":
                net.model.get_q_vector().cpu().detach().numpy(),
                "q_probs":
                net.model.prob_model.get_q_probs().cpu().detach().numpy(),
                "expected_depth":
                np.sum(probs * np.arange(net.model.n_layers + 1)),
                "95th_depth":
                np.argmax(probs > cuttoff),
                "best_epoch":
                best_epoch,
                "best_train_ELBO":
                best_train_ELBO
            })

        if save_freq is not None and i % save_freq == 0:
            exp.tag({
                "final_q_vec":
                net.model.get_q_vector().cpu().detach().numpy(),
                "final_q_probs":
                net.model.prob_model.get_q_probs().cpu().detach().numpy(),
                "final_expected_depth":
                np.sum(net.model.prob_model.get_q_probs().data.cpu().numpy() *
                       np.arange(net.model.n_layers + 1))
            })
            net.save(models_dir + '/theta_last.dat')

        if early_stop is not None and (i - best_epoch) > early_stop:
            exp.tag({"early_stop_epoch": i})
            cprint('r', '   stopped early!\n')
            break

    toc0 = time.time()
    runtime_per_it = (toc0 - tic0) / float(i + 1)
    cprint('r', '   average time: %f seconds\n' % runtime_per_it)

    ## ---------------------------------------------------------------------------------------------------------------------
    # fig cost vs its
    textsize = 15
    marker = 5

    plt.figure(dpi=100)
    fig, ax1 = plt.subplots()
    ax1.plot(range(0, i, nb_its_dev),
             np.clip(mloglike_dev[:i:nb_its_dev], a_min=-5, a_max=5), 'b-')
    ax1.plot(np.clip(mloglike_train[:i], a_min=-5, a_max=5), 'r--')
    ax1.set_ylabel('Cross Entropy')
    plt.xlabel('epoch')
    plt.grid(b=True, which='major', color='k', linestyle='-')
    plt.grid(b=True, which='minor', color='k', linestyle='--')
    lgd = plt.legend(['test', 'train'],
                     markerscale=marker,
                     prop={
                         'size': textsize,
                         'weight': 'normal'
                     })
    ax = plt.gca()
    plt.title('classification costs')
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                 ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(textsize)
        item.set_weight('normal')
    plt.savefig(media_dir + '/cost.png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    if show:
        plt.show()

    plt.figure(dpi=100)
    fig, ax1 = plt.subplots()
    ax1.plot(range(0, i), KL_train[:i], 'b-')
    ax1.set_ylabel('KL')
    plt.xlabel('epoch')
    plt.grid(b=True, which='major', color='k', linestyle='-')
    plt.grid(b=True, which='minor', color='k', linestyle='--')
    lgd = plt.legend(['KL'],
                     markerscale=marker,
                     prop={
                         'size': textsize,
                         'weight': 'normal'
                     })
    ax = plt.gca()
    plt.title('KL divideed by number of samples')
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                 ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(textsize)
        item.set_weight('normal')
    plt.savefig(media_dir + '/KL.png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    if show:
        plt.show()

    plt.figure(dpi=100)
    fig, ax1 = plt.subplots()
    ax1.plot(range(0, i), ELBO_train[:i], 'b-')
    ax1.set_ylabel('nats')
    plt.xlabel('epoch')
    plt.grid(b=True, which='major', color='k', linestyle='-')
    plt.grid(b=True, which='minor', color='k', linestyle='--')
    lgd = plt.legend(['ELBO'],
                     markerscale=marker,
                     prop={
                         'size': textsize,
                         'weight': 'normal'
                     })
    ax = plt.gca()
    plt.title('ELBO')
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                 ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(textsize)
        item.set_weight('normal')
    plt.savefig(media_dir + '/ELBO.png',
                bbox_extra_artists=(lgd, ),
                bbox_inches='tight')
    if show:
        plt.show()

    plt.figure(dpi=100)
    fig, ax2 = plt.subplots()
    ax2.set_ylabel('% error')
    ax2.semilogy(range(0, i, nb_its_dev), err_dev[:i:nb_its_dev], 'b-')
    ax2.semilogy(err_train[:i], 'r--')
    ax2.set_ylim(top=1, bottom=1e-3)
    plt.xlabel('epoch')
    plt.grid(b=True, which='major', color='k', linestyle='-')
    plt.grid(b=True, which='minor', color='k', linestyle='--')
    ax2.get_yaxis().set_minor_formatter(matplotlib.ticker.ScalarFormatter())
    ax2.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
    lgd = plt.legend(['test error', 'train error'],
                     markerscale=marker,
                     prop={
                         'size': textsize,
                         'weight': 'normal'
                     })
    ax = plt.gca()
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                 ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(textsize)
        item.set_weight('normal')
    plt.savefig(media_dir + '/err.png',
                bbox_extra_artists=(lgd, ),
                box_inches='tight')
    if show:
        plt.show()

    return exp, mloglike_train, KL_train, ELBO_train, err_train, mloglike_dev, err_dev