示例#1
0
def main(args: Args):
    train_image = Path(args.train_image)
    train_label = Path(args.train_label)
    test_image = Path(args.test_image)
    test_label = Path(args.test_label)
    data = Mnist(32, 0.9, train_image, train_label, test_image, test_label)
    model = MnistEncoder(28, 64, 3)

    # https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.loggers.mlflow.html
    mlflow_logger = MLFlowLogger()
    trainer = pl.Trainer(max_epochs=1, logger=mlflow_logger)

    trainer.fit(model, train_dataloader=data)
示例#2
0
def dataset_iterator(args):
    if args.dataset == 'mnist':
        train_gen, dev_gen, test_gen = Mnist.load(args.batch_size,
                                                  args.batch_size)
    if args.dataset == 'cifar10':
        data_dir = '../../../images/cifar-10-batches-py/'
        train_gen, dev_gen = Cifar10.load(args.batch_size, data_dir)
        test_gen = None
    if args.dataset == 'imagenet':
        data_dir = '../../../images/imagenet12/imagenet_val_png/'
        train_gen, dev_gen = Imagenet.load(args.batch_size, data_dir)
        test_gen = None
    if args.dataset == 'raise':
        data_dir = '../../../images/raise/'
        train_gen, dev_gen = Raise.load(args.batch_size, data_dir)
        test_gen = None
    else:
        raise ValueError

    return (train_gen, dev_gen, test_gen)
示例#3
0
def main():
    print("Local rank: ", hvd.local_rank(), hvd.size())

    logdir = osp.join(FLAGS.logdir, FLAGS.exp)
    if hvd.rank() == 0:
        if not osp.exists(logdir):
            os.makedirs(logdir)
        logger = TensorBoardOutputFormat(logdir)
    else:
        logger = None

    LABEL = None
    print("Loading data...")
    if FLAGS.dataset == 'cifar10':
        dataset = Cifar10(augment=FLAGS.augment, rescale=FLAGS.rescale)
        test_dataset = Cifar10(train=False, rescale=FLAGS.rescale)
        channel_num = 3

        X_NOISE = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
        LABEL = tf.placeholder(shape=(None, 10), dtype=tf.float32)
        LABEL_POS = tf.placeholder(shape=(None, 10), dtype=tf.float32)

        if FLAGS.large_model:
            model = ResNet32Large(num_channels=channel_num,
                                  num_filters=128,
                                  train=True)
        elif FLAGS.larger_model:
            model = ResNet32Larger(num_channels=channel_num, num_filters=128)
        elif FLAGS.wider_model:
            model = ResNet32Wider(num_channels=channel_num, num_filters=192)
        else:
            model = ResNet32(num_channels=channel_num, num_filters=128)

    elif FLAGS.dataset == 'imagenet':
        dataset = Imagenet(train=True)
        test_dataset = Imagenet(train=False)
        channel_num = 3
        X_NOISE = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32)
        LABEL = tf.placeholder(shape=(None, 1000), dtype=tf.float32)
        LABEL_POS = tf.placeholder(shape=(None, 1000), dtype=tf.float32)

        model = ResNet32Wider(num_channels=channel_num, num_filters=256)

    elif FLAGS.dataset == 'imagenetfull':
        channel_num = 3
        X_NOISE = tf.placeholder(shape=(None, 128, 128, 3), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 128, 128, 3), dtype=tf.float32)
        LABEL = tf.placeholder(shape=(None, 1000), dtype=tf.float32)
        LABEL_POS = tf.placeholder(shape=(None, 1000), dtype=tf.float32)

        model = ResNet128(num_channels=channel_num, num_filters=64)

    elif FLAGS.dataset == 'mnist':
        dataset = Mnist(rescale=FLAGS.rescale)
        test_dataset = dataset
        channel_num = 1
        X_NOISE = tf.placeholder(shape=(None, 28, 28), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 28, 28), dtype=tf.float32)
        LABEL = tf.placeholder(shape=(None, 10), dtype=tf.float32)
        LABEL_POS = tf.placeholder(shape=(None, 10), dtype=tf.float32)

        model = MnistNet(num_channels=channel_num,
                         num_filters=FLAGS.num_filters)

    elif FLAGS.dataset == 'dsprites':
        dataset = DSprites(cond_shape=FLAGS.cond_shape,
                           cond_size=FLAGS.cond_size,
                           cond_pos=FLAGS.cond_pos,
                           cond_rot=FLAGS.cond_rot)
        test_dataset = dataset
        channel_num = 1

        X_NOISE = tf.placeholder(shape=(None, 64, 64), dtype=tf.float32)
        X = tf.placeholder(shape=(None, 64, 64), dtype=tf.float32)

        if FLAGS.dpos_only:
            LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32)
        elif FLAGS.dsize_only:
            LABEL = tf.placeholder(shape=(None, 1), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 1), dtype=tf.float32)
        elif FLAGS.drot_only:
            LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32)
        elif FLAGS.cond_size:
            LABEL = tf.placeholder(shape=(None, 1), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 1), dtype=tf.float32)
        elif FLAGS.cond_shape:
            LABEL = tf.placeholder(shape=(None, 3), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 3), dtype=tf.float32)
        elif FLAGS.cond_pos:
            LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32)
        elif FLAGS.cond_rot:
            LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32)
        else:
            LABEL = tf.placeholder(shape=(None, 3), dtype=tf.float32)
            LABEL_POS = tf.placeholder(shape=(None, 3), dtype=tf.float32)

        model = DspritesNet(num_channels=channel_num,
                            num_filters=FLAGS.num_filters,
                            cond_size=FLAGS.cond_size,
                            cond_shape=FLAGS.cond_shape,
                            cond_pos=FLAGS.cond_pos,
                            cond_rot=FLAGS.cond_rot)

    print("Done loading...")

    if FLAGS.dataset == "imagenetfull":
        # In the case of full imagenet, use custom_tensorflow dataloader
        data_loader = TFImagenetLoader('train',
                                       FLAGS.batch_size,
                                       hvd.rank(),
                                       hvd.size(),
                                       rescale=FLAGS.rescale)
    else:
        data_loader = DataLoader(dataset,
                                 batch_size=FLAGS.batch_size,
                                 num_workers=FLAGS.data_workers,
                                 drop_last=True,
                                 shuffle=True)

    batch_size = FLAGS.batch_size

    weights = [model.construct_weights('context_0')]

    Y = tf.placeholder(shape=(None), dtype=tf.int32)

    # Varibles to run in training
    X_SPLIT = tf.split(X, FLAGS.num_gpus)
    X_NOISE_SPLIT = tf.split(X_NOISE, FLAGS.num_gpus)
    LABEL_SPLIT = tf.split(LABEL, FLAGS.num_gpus)
    LABEL_POS_SPLIT = tf.split(LABEL_POS, FLAGS.num_gpus)
    LABEL_SPLIT_INIT = list(LABEL_SPLIT)
    tower_grads = []
    tower_gen_grads = []
    x_mod_list = []

    optimizer = AdamOptimizer(FLAGS.lr, beta1=0.0, beta2=0.999)
    optimizer = hvd.DistributedOptimizer(optimizer)

    for j in range(FLAGS.num_gpus):

        if FLAGS.model_cclass:
            ind_batch_size = FLAGS.batch_size // FLAGS.num_gpus
            label_tensor = tf.Variable(tf.convert_to_tensor(np.reshape(
                np.tile(np.eye(10), (FLAGS.batch_size, 1, 1)),
                (FLAGS.batch_size * 10, 10)),
                                                            dtype=tf.float32),
                                       trainable=False,
                                       dtype=tf.float32)
            x_split = tf.tile(
                tf.reshape(X_SPLIT[j], (ind_batch_size, 1, 32, 32, 3)),
                (1, 10, 1, 1, 1))
            x_split = tf.reshape(x_split, (ind_batch_size * 10, 32, 32, 3))
            energy_pos = model.forward(x_split,
                                       weights[0],
                                       label=label_tensor,
                                       stop_at_grad=False)

            energy_pos_full = tf.reshape(energy_pos, (ind_batch_size, 10))
            energy_partition_est = tf.reduce_logsumexp(energy_pos_full,
                                                       axis=1,
                                                       keepdims=True)
            uniform = tf.random_uniform(tf.shape(energy_pos_full))
            label_tensor = tf.argmax(-energy_pos_full -
                                     tf.log(-tf.log(uniform)) -
                                     energy_partition_est,
                                     axis=1)
            label = tf.one_hot(label_tensor, 10, dtype=tf.float32)
            label = tf.Print(label, [label_tensor, energy_pos_full])
            LABEL_SPLIT[j] = label
            energy_pos = tf.concat(energy_pos, axis=0)
        else:
            energy_pos = [
                model.forward(X_SPLIT[j],
                              weights[0],
                              label=LABEL_POS_SPLIT[j],
                              stop_at_grad=False)
            ]
            energy_pos = tf.concat(energy_pos, axis=0)

        print("Building graph...")
        x_mod = x_orig = X_NOISE_SPLIT[j]

        x_grads = []

        energy_negs = []
        loss_energys = []

        energy_negs.extend([
            model.forward(tf.stop_gradient(x_mod),
                          weights[0],
                          label=LABEL_SPLIT[j],
                          stop_at_grad=False,
                          reuse=True)
        ])
        eps_begin = tf.zeros(1)

        steps = tf.constant(0)
        c = lambda i, x: tf.less(i, FLAGS.num_steps)

        def langevin_step(counter, x_mod):
            x_mod = x_mod + tf.random_normal(
                tf.shape(x_mod),
                mean=0.0,
                stddev=0.005 * FLAGS.rescale * FLAGS.noise_scale)

            energy_noise = energy_start = tf.concat([
                model.forward(x_mod,
                              weights[0],
                              label=LABEL_SPLIT[j],
                              reuse=True,
                              stop_at_grad=False,
                              stop_batch=True)
            ],
                                                    axis=0)

            x_grad, label_grad = tf.gradients(FLAGS.temperature * energy_noise,
                                              [x_mod, LABEL_SPLIT[j]])
            energy_noise_old = energy_noise

            lr = FLAGS.step_lr

            if FLAGS.proj_norm != 0.0:
                if FLAGS.proj_norm_type == 'l2':
                    x_grad = tf.clip_by_norm(x_grad, FLAGS.proj_norm)
                elif FLAGS.proj_norm_type == 'li':
                    x_grad = tf.clip_by_value(x_grad, -FLAGS.proj_norm,
                                              FLAGS.proj_norm)
                else:
                    print("Other types of projection are not supported!!!")
                    assert False

            # Clip gradient norm for now
            if FLAGS.hmc:
                # Step size should be tuned to get around 65% acceptance
                def energy(x):
                    return FLAGS.temperature * \
                        model.forward(x, weights[0], label=LABEL_SPLIT[j], reuse=True)

                x_last = hmc(x_mod, 15., 10, energy)
            else:
                x_last = x_mod - (lr) * x_grad

            x_mod = x_last
            x_mod = tf.clip_by_value(x_mod, 0, FLAGS.rescale)

            counter = counter + 1

            return counter, x_mod

        steps, x_mod = tf.while_loop(c, langevin_step, (steps, x_mod))

        energy_eval = model.forward(x_mod,
                                    weights[0],
                                    label=LABEL_SPLIT[j],
                                    stop_at_grad=False,
                                    reuse=True)
        x_grad = tf.gradients(FLAGS.temperature * energy_eval, [x_mod])[0]
        x_grads.append(x_grad)

        energy_negs.append(
            model.forward(tf.stop_gradient(x_mod),
                          weights[0],
                          label=LABEL_SPLIT[j],
                          stop_at_grad=False,
                          reuse=True))

        test_x_mod = x_mod

        temp = FLAGS.temperature

        energy_neg = energy_negs[-1]
        x_off = tf.reduce_mean(
            tf.abs(x_mod[:tf.shape(X_SPLIT[j])[0]] - X_SPLIT[j]))

        loss_energy = model.forward(x_mod,
                                    weights[0],
                                    reuse=True,
                                    label=LABEL,
                                    stop_grad=True)

        print("Finished processing loop construction ...")

        target_vars = {}

        if FLAGS.cclass or FLAGS.model_cclass:
            label_sum = tf.reduce_sum(LABEL_SPLIT[0], axis=0)
            label_prob = label_sum / tf.reduce_sum(label_sum)
            label_ent = -tf.reduce_sum(
                label_prob * tf.math.log(label_prob + 1e-7))
        else:
            label_ent = tf.zeros(1)

        target_vars['label_ent'] = label_ent

        if FLAGS.train:

            if FLAGS.objective == 'logsumexp':
                pos_term = temp * energy_pos
                energy_neg_reduced = (energy_neg - tf.reduce_min(energy_neg))
                coeff = tf.stop_gradient(tf.exp(-temp * energy_neg_reduced))
                norm_constant = tf.stop_gradient(tf.reduce_sum(coeff)) + 1e-4
                pos_loss = tf.reduce_mean(temp * energy_pos)
                neg_loss = coeff * (-1 * temp * energy_neg) / norm_constant
                loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss))
            elif FLAGS.objective == 'cd':
                pos_loss = tf.reduce_mean(temp * energy_pos)
                neg_loss = -tf.reduce_mean(temp * energy_neg)
                loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss))
            elif FLAGS.objective == 'softplus':
                loss_ml = FLAGS.ml_coeff * \
                    tf.nn.softplus(temp * (energy_pos - energy_neg))

            loss_total = tf.reduce_mean(loss_ml)

            if not FLAGS.zero_kl:
                loss_total = loss_total + tf.reduce_mean(loss_energy)

            loss_total = loss_total + \
                FLAGS.l2_coeff * (tf.reduce_mean(tf.square(energy_pos)) + tf.reduce_mean(tf.square((energy_neg))))

            print("Started gradient computation...")
            gvs = optimizer.compute_gradients(loss_total)
            gvs = [(k, v) for (k, v) in gvs if k is not None]

            print("Applying gradients...")

            tower_grads.append(gvs)

            print("Finished applying gradients.")

            target_vars['loss_ml'] = loss_ml
            target_vars['total_loss'] = loss_total
            target_vars['loss_energy'] = loss_energy
            target_vars['weights'] = weights
            target_vars['gvs'] = gvs

        target_vars['X'] = X
        target_vars['Y'] = Y
        target_vars['LABEL'] = LABEL
        target_vars['LABEL_POS'] = LABEL_POS
        target_vars['X_NOISE'] = X_NOISE
        target_vars['energy_pos'] = energy_pos
        target_vars['energy_start'] = energy_negs[0]

        if len(x_grads) >= 1:
            target_vars['x_grad'] = x_grads[-1]
            target_vars['x_grad_first'] = x_grads[0]
        else:
            target_vars['x_grad'] = tf.zeros(1)
            target_vars['x_grad_first'] = tf.zeros(1)

        target_vars['x_mod'] = x_mod
        target_vars['x_off'] = x_off
        target_vars['temp'] = temp
        target_vars['energy_neg'] = energy_neg
        target_vars['test_x_mod'] = test_x_mod
        target_vars['eps_begin'] = eps_begin

    if FLAGS.train:
        grads = average_gradients(tower_grads)
        train_op = optimizer.apply_gradients(grads)
        target_vars['train_op'] = train_op

    config = tf.ConfigProto()

    if hvd.size() > 1:
        config.gpu_options.visible_device_list = str(hvd.local_rank())

    sess = tf.Session(config=config)

    saver = loader = tf.train.Saver(max_to_keep=30,
                                    keep_checkpoint_every_n_hours=6)

    total_parameters = 0
    for variable in tf.trainable_variables():
        # shape is an array of tf.Dimension
        shape = variable.get_shape()
        variable_parameters = 1
        for dim in shape:
            variable_parameters *= dim.value
        total_parameters += variable_parameters
    print("Model has a total of {} parameters".format(total_parameters))

    sess.run(tf.global_variables_initializer())

    resume_itr = 0

    if (FLAGS.resume_iter != -1 or not FLAGS.train) and hvd.rank() == 0:
        model_file = osp.join(logdir, 'model_{}'.format(FLAGS.resume_iter))
        resume_itr = FLAGS.resume_iter
        # saver.restore(sess, model_file)
        optimistic_restore(sess, model_file)

    sess.run(hvd.broadcast_global_variables(0))
    print("Initializing variables...")

    print("Start broadcast")
    print("End broadcast")

    if FLAGS.train:
        print("Training phase")
        train(target_vars, saver, sess, logger, data_loader, resume_itr,
              logdir)
    print("Testing phase")
    test(target_vars, saver, sess, logger, data_loader)
示例#4
0
def main():

    # Initialize dataset
    if FLAGS.dataset == 'cifar10':
        dataset = Cifar10(train=False, rescale=FLAGS.rescale)
        channel_num = 3
        dim_input = 32 * 32 * 3
    elif FLAGS.dataset == 'imagenet':
        dataset = ImagenetClass()
        channel_num = 3
        dim_input = 64 * 64 * 3
    elif FLAGS.dataset == 'mnist':
        dataset = Mnist(train=False, rescale=FLAGS.rescale)
        channel_num = 1
        dim_input = 28 * 28 * 1
    elif FLAGS.dataset == 'dsprites':
        dataset = DSprites()
        channel_num = 1
        dim_input = 64 * 64 * 1
    elif FLAGS.dataset == '2d' or FLAGS.dataset == 'gauss':
        dataset = Box2D()

    dim_output = 1
    data_loader = DataLoader(dataset,
                             batch_size=FLAGS.batch_size,
                             num_workers=FLAGS.data_workers,
                             drop_last=False,
                             shuffle=True)

    if FLAGS.dataset == 'mnist':
        model = MnistNet(num_channels=channel_num)
    elif FLAGS.dataset == 'cifar10':
        if FLAGS.large_model:
            model = ResNet32Large(num_filters=128)
        elif FLAGS.wider_model:
            model = ResNet32Wider(num_filters=192)
        else:
            model = ResNet32(num_channels=channel_num, num_filters=128)
    elif FLAGS.dataset == 'dsprites':
        model = DspritesNet(num_channels=channel_num,
                            num_filters=FLAGS.num_filters)

    weights = model.construct_weights('context_{}'.format(0))

    config = tf.ConfigProto()
    sess = tf.Session(config=config)
    saver = loader = tf.train.Saver(max_to_keep=10)

    sess.run(tf.global_variables_initializer())
    logdir = osp.join(FLAGS.logdir, FLAGS.exp)

    model_file = osp.join(logdir, 'model_{}'.format(FLAGS.resume_iter))
    resume_itr = FLAGS.resume_iter

    if FLAGS.resume_iter != "-1":
        optimistic_restore(sess, model_file)
    else:
        print("WARNING, YOU ARE NOT LOADING A SAVE FILE")
    # saver.restore(sess, model_file)

    chain_weights, a_prev, a_new, x, x_init, approx_lr = ancestral_sample(
        model, weights, FLAGS.batch_size, temp=FLAGS.temperature)
    print("Finished constructing ancestral sample ...................")

    if FLAGS.dataset != "gauss":
        comb_weights_cum = []
        batch_size = tf.shape(x_init)[0]
        label_tiled = tf.tile(label_default, (batch_size, 1))
        e_compute = -FLAGS.temperature * model.forward(
            x_init, weights, label=label_tiled)
        e_pos_list = []

        for data_corrupt, data, label_gt in tqdm(data_loader):
            e_pos = sess.run([e_compute], {x_init: data})[0]
            e_pos_list.extend(list(e_pos))

        print(len(e_pos_list))
        print("Positive sample probability ", np.mean(e_pos_list),
              np.std(e_pos_list))

    if FLAGS.dataset == "2d":
        alr = 0.0045
    elif FLAGS.dataset == "gauss":
        alr = 0.0085
    elif FLAGS.dataset == "mnist":
        alr = 0.0065
        #90 alr = 0.0035
    else:
        # alr = 0.0125
        if FLAGS.rescale == 8:
            alr = 0.0085
        else:
            alr = 0.0045


#
    for i in range(1):
        tot_weight = 0
        for j in tqdm(range(1, FLAGS.pdist + 1)):
            if j == 1:
                if FLAGS.dataset == "cifar10":
                    x_curr = np.random.uniform(0,
                                               FLAGS.rescale,
                                               size=(FLAGS.batch_size, 32, 32,
                                                     3))
                elif FLAGS.dataset == "gauss":
                    x_curr = np.random.uniform(0,
                                               FLAGS.rescale,
                                               size=(FLAGS.batch_size,
                                                     FLAGS.gauss_dim))
                elif FLAGS.dataset == "mnist":
                    x_curr = np.random.uniform(0,
                                               FLAGS.rescale,
                                               size=(FLAGS.batch_size, 28, 28))
                else:
                    x_curr = np.random.uniform(0,
                                               FLAGS.rescale,
                                               size=(FLAGS.batch_size, 2))

            alpha_prev = (j - 1) / FLAGS.pdist
            alpha_new = j / FLAGS.pdist
            cweight, x_curr = sess.run(
                [chain_weights, x], {
                    a_prev: alpha_prev,
                    a_new: alpha_new,
                    x_init: x_curr,
                    approx_lr: alr * (5**(2.5 * -alpha_prev))
                })
            tot_weight = tot_weight + cweight

        print("Total values of lower value based off forward sampling",
              np.mean(tot_weight), np.std(tot_weight))

        tot_weight = 0

        for j in tqdm(range(FLAGS.pdist, 0, -1)):
            alpha_new = (j - 1) / FLAGS.pdist
            alpha_prev = j / FLAGS.pdist
            cweight, x_curr = sess.run(
                [chain_weights, x], {
                    a_prev: alpha_prev,
                    a_new: alpha_new,
                    x_init: x_curr,
                    approx_lr: alr * (5**(2.5 * -alpha_prev))
                })
            tot_weight = tot_weight - cweight

        print("Total values of upper value based off backward sampling",
              np.mean(tot_weight), np.std(tot_weight))
示例#5
0
文件: main.py 项目: RuiShu/bcde
def main():
    log_file = make_file_name()
    print args

    def evaluate(x, y, xu, yu, eval_tensors, iw=1):
        if iw == 1:
            xs, ys, xus, yus = [x], [y], [xu], [yu]
        else:
            batches = 2000
            xs, ys = list(tb.nputils.split(x, batches)), list(
                tb.nputils.split(y, batches))
            xus, yus = list(tb.nputils.split(xu, batches)), list(
                tb.nputils.split(yu, batches))

        values = []
        for x, y, xu, yu in zip(xs, ys, xus, yus):
            feed_dict = {
                T.x: x,
                T.xu: xu,
                T.y: y,
                T.yu: yu,
                T.phase: 0,
                T.u: u,
                T.iw: iw
            }
            v = T.sess.run(eval_tensors, feed_dict)
            values += [v]

        values = [np.mean(v).astype(v[0].dtype) for v in zip(*values)]
        return values

    def train(T_train_step, T_loss, data, iterep, n_epochs):
        for i in xrange(iterep * n_epochs):
            x, y, xu, yu = data.next_batch(args.bs)
            feed_dict = {
                T.x: x,
                T.xu: xu,
                T.y: y,
                T.yu: yu,
                T.phase: 1,
                T.u: u,
                T.iw: 1
            }
            _, loss = T.sess.run([T_train_step, T_loss], feed_dict)

            message = "loss: {:.2e}".format(loss)
            end_epoch, epoch = tb.utils.progbar(i,
                                                iterep,
                                                message,
                                                bar_length=5)

            if np.isnan(loss):
                print "NaN detected"
                quit()

            if end_epoch:
                iw = 100 if epoch % args.n_checks == 0 else 1
                tr_values = evaluate(data.x_label,
                                     data.y_label,
                                     data.x_train,
                                     data.y_train,
                                     writer.tensors,
                                     iw=1)
                va_values = evaluate(data.x_valid,
                                     data.y_valid,
                                     data.x_valid,
                                     data.y_valid,
                                     writer.tensors[:-1],
                                     iw=iw)
                te_values = evaluate(data.x_test,
                                     data.y_test,
                                     data.x_test,
                                     data.y_test,
                                     writer.tensors[:-1],
                                     iw=iw)
                values = tr_values + va_values + te_values + [epoch]
                writer.write(values=values)

    def make_writer():
        # Make log file
        writer = tb.FileWriter(log_file,
                               args=args,
                               pipe_to_sys=True,
                               overwrite=args.run >= 999)
        # Train log
        writer.add_var('train_iw', '{:4d}', T.iw)
        for v in ['bcde', 'bjde_x', 'bjde_xy', 'bjde_xu', 'bjde_yu', 'loss']:
            writer.add_var('train_{:s}'.format(v), '{:8.3f}', T[v])
        writer.add_var('l2_loss', '{:9.2e}', T.l2)

        # Validation log
        writer.add_var('valid_iw', '{:4d}')
        for v in ['bcde', 'bcde_x', 'bjde_xy', 'bjde_xu', 'bjde_yu', 'loss']:
            writer.add_var('valid_{:s}'.format(v), '{:8.3f}')

        # Test log
        writer.add_var('test_iw', '{:4d}')
        for v in ['bcde', 'bcde_x', 'bjde_xy', 'bjde_xu', 'bjde_yu', 'loss']:
            writer.add_var('test_{:s}'.format(v), '{:8.3f}')

        # Extra info
        writer.add_var('epoch', '{:>8d}')
        writer.initialize()
        return writer

    ###############
    # Build model #
    ###############
    tf.reset_default_graph()
    T = tb.utils.TensorDict(
        dict(bcde=constant(0),
             bjde_x=constant(0),
             bjde_xu=constant(0),
             bjde_yu=constant(0),
             bjde_xy=constant(0),
             l2=constant(0),
             loss=constant(0)))
    T.xu = placeholder((None, args.x_size), name='xu')
    T.yu = placeholder((None, args.y_size), name='yu')
    T.x = placeholder((None, args.x_size), name='x')
    T.y = placeholder((None, args.y_size), name='y')
    T.iw = placeholder(None, 'int32', name='iw') * 1  # hack for pholder eval
    T.u = placeholder(None, name='u')
    T.phase = placeholder(None, tf.bool, name='phase')

    if args.model == 'conditional':
        conditional(T)
    elif args.model in {'hybrid', 'hybrid_factored'}:
        hybrid(T)
    elif args.model == 'pretrained':
        pretrained(T)

    T.sess = tf.Session()
    T.sess.run(tf.global_variables_initializer())

    # Push all labeled data into unlabeled data set as well if using pretraining
    mnist = Mnist(args.n_label,
                  args.seed,
                  args.task,
                  shift=args.shift,
                  duplicate='pretrain' in args.model,
                  binarize=True)

    # Define remaining optimization hyperparameters
    if args.model == 'conditional':
        iterep = args.n_label / args.bs
        u = 1
    elif args.model in {'hybrid', 'hybrid_factored'}:
        iterep = args.n_total / args.bs
        u = 1 - args.n_label / float(args.n_total)
    elif args.model == 'pretrained':
        pretrain_iterep = args.n_total / args.bs
        iterep = args.n_label / args.bs
        u = 1

    # Sanity checks and creation of logger
    print "Data/Task statistics"
    print "Task:", args.task
    print "Data shapes of (x, y) for Labeled/Train/Valid/Test sets"
    print(mnist.x_label.shape, mnist.y_label.shape)
    print(mnist.x_train.shape, mnist.y_train.shape)
    print(mnist.x_valid.shape, mnist.y_valid.shape)
    print(mnist.x_test.shape, mnist.y_test.shape)
    writer = make_writer()

    ###############
    # Train model #
    ###############
    if 'pretrained' in args.model:
        print "Pretrain epochs, iterep", args.n_pretrain_epochs, pretrain_iterep
        train(T.pre_train_step, T.pre_loss, mnist, pretrain_iterep,
              args.n_pretrain_epochs)

    if 'hybrid' in args.model:
        print "Hybrid weighting on x_train and x_label:", (u, 1 - u)
    print "Epochs, Iterep", args.n_epochs, iterep
    train(T.train_step, T.loss, mnist, iterep, args.n_epochs)
    phase = placeholder((), tf.bool, name='phase')
))

exec "from {0:s} import {0:s}".format(args.model)
exec "T = {:s}(T)".format(args.model)
T.sess.run(tf.global_variables_initializer())

if args.model != 'classifier':
    path = tf.train.latest_checkpoint('save')
    restorer = tf.train.Saver(tf.get_collection('trainable_variables', 'enc'))
    restorer.restore(T.sess, path)

#############
# Load data #
#############
mnist = Mnist(size=32)
svhn = Svhn(size=32)

#########
# Train #
#########
bs = 100
iterep = 600
n_epoch = 5000 if args.model != 'classifier' else 17
epoch = 0
feed_dict = {T.phase: 1}
saver = tf.train.Saver()

print "Batch size:", bs
print "Iterep:", iterep
print "Total iterations:", n_epoch * iterep
示例#7
0
q_net['z'].mu = Sequential([Dense(50, input_dim=256)])

q_net['z'].var = Sequential([Dense(50, input_dim=256),
                             Activation('softplus')])

p_net['x'].net = Sequential([Dense(256, input_dim=60),
                             Activation('relu'),
                             Dense(256),
                             Activation('relu'),
                             Dense(784),
                             Activation('sigmoid')])

vae = VAE(u_net=u_net, q_net=q_net, p_net=p_net)
vae.compile('adam', loss_weights=[1.0, 1.0, 1.0])

dataloader = Mnist(nb_data=100, batchsize=100)
losslog = LossLog()
nll = NegativeLogLikelihood(dataloader,
                            n_samples=1,
                            run_every=1,
                            run_training=True,
                            run_validation=True,
                            display_epoch=True,
                            end_line=True)
vae.fit(dataloader,
        nb_epoch=1000,
        iter_per_epoch=600,
        callbacks=[losslog, nll],
        verbose=1)
示例#8
0
def main_single(gpu, FLAGS):
    if FLAGS.slurm:
        init_distributed_mode(FLAGS)

    os.environ['MASTER_ADDR'] = FLAGS.master_addr
    os.environ['MASTER_PORT'] = FLAGS.port

    rank_idx = FLAGS.node_rank * FLAGS.gpus + gpu
    world_size = FLAGS.nodes * FLAGS.gpus
    print("Values of args: ", FLAGS)

    if world_size > 1:
        if FLAGS.slurm:
            dist.init_process_group(backend='nccl', init_method='env://', world_size=world_size, rank=rank_idx)
        else:
            dist.init_process_group(backend='nccl', init_method='tcp://localhost:1700', world_size=world_size, rank=rank_idx)

    if FLAGS.dataset == "cifar10":
        train_dataset = Cifar10(FLAGS)
        valid_dataset = Cifar10(FLAGS, train=False, augment=False)
        test_dataset = Cifar10(FLAGS, train=False, augment=False)
    elif FLAGS.dataset == "stl":
        train_dataset = STLDataset(FLAGS)
        valid_dataset = STLDataset(FLAGS, train=False)
        test_dataset = STLDataset(FLAGS, train=False)
    elif FLAGS.dataset == "object":
        train_dataset = ObjectDataset(FLAGS.cond_idx)
        valid_dataset = ObjectDataset(FLAGS.cond_idx)
        test_dataset = ObjectDataset(FLAGS.cond_idx)
    elif FLAGS.dataset == "imagenet":
        train_dataset = ImageNet()
        valid_dataset = ImageNet()
        test_dataset = ImageNet()
    elif FLAGS.dataset == "mnist":
        train_dataset = Mnist(train=True)
        valid_dataset = Mnist(train=False)
        test_dataset = Mnist(train=False)
    elif FLAGS.dataset == "celeba":
        train_dataset = CelebAHQ(cond_idx=FLAGS.cond_idx)
        valid_dataset = CelebAHQ(cond_idx=FLAGS.cond_idx)
        test_dataset = CelebAHQ(cond_idx=FLAGS.cond_idx)
    elif FLAGS.dataset == "lsun":
        train_dataset = LSUNBed(cond_idx=FLAGS.cond_idx)
        valid_dataset = LSUNBed(cond_idx=FLAGS.cond_idx)
        test_dataset = LSUNBed(cond_idx=FLAGS.cond_idx)
    else:
        assert False

    train_dataloader = DataLoader(train_dataset, num_workers=FLAGS.data_workers, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True)
    valid_dataloader = DataLoader(valid_dataset, num_workers=FLAGS.data_workers, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True)
    test_dataloader = DataLoader(test_dataset, num_workers=FLAGS.data_workers, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True)

    FLAGS_OLD = FLAGS

    logdir = osp.join(FLAGS.logdir, FLAGS.exp)
    best_inception = 0.0

    if FLAGS.resume_iter != 0:
        model_path = osp.join(logdir, "model_{}.pth".format(FLAGS.resume_iter))
        checkpoint = torch.load(model_path)
        best_inception = checkpoint['best_inception']
        FLAGS = checkpoint['FLAGS']

        FLAGS.resume_iter = FLAGS_OLD.resume_iter
        FLAGS.nodes = FLAGS_OLD.nodes
        FLAGS.gpus = FLAGS_OLD.gpus
        FLAGS.node_rank = FLAGS_OLD.node_rank
        FLAGS.master_addr = FLAGS_OLD.master_addr
        FLAGS.train = FLAGS_OLD.train
        FLAGS.num_steps = FLAGS_OLD.num_steps
        FLAGS.step_lr = FLAGS_OLD.step_lr
        FLAGS.batch_size = FLAGS_OLD.batch_size
        FLAGS.ensembles = FLAGS_OLD.ensembles
        FLAGS.kl_coeff = FLAGS_OLD.kl_coeff
        FLAGS.repel_im = FLAGS_OLD.repel_im
        FLAGS.save_interval = FLAGS_OLD.save_interval

        for key in dir(FLAGS):
            if "__" not in key:
                FLAGS_OLD[key] = getattr(FLAGS, key)

        FLAGS = FLAGS_OLD

    if FLAGS.dataset == "cifar10":
        model_fn = ResNetModel
    elif FLAGS.dataset == "stl":
        model_fn = ResNetModel
    elif FLAGS.dataset == "object":
        model_fn = CelebAModel
    elif FLAGS.dataset == "mnist":
        model_fn = MNISTModel
    elif FLAGS.dataset == "celeba":
        model_fn = CelebAModel
    elif FLAGS.dataset == "lsun":
        model_fn = CelebAModel
    elif FLAGS.dataset == "imagenet":
        model_fn = ImagenetModel
    else:
        assert False

    models = [model_fn(FLAGS).train() for i in range(FLAGS.ensembles)]
    models_ema = [model_fn(FLAGS).train() for i in range(FLAGS.ensembles)]

    torch.cuda.set_device(gpu)
    if FLAGS.cuda:
        models = [model.cuda(gpu) for model in models]
        model_ema = [model_ema.cuda(gpu) for model_ema in models_ema]

    if FLAGS.gpus > 1:
        sync_model(models)

    parameters = []
    for model in models:
        parameters.extend(list(model.parameters()))

    optimizer = Adam(parameters, lr=FLAGS.lr, betas=(0.0, 0.9), eps=1e-8)

    ema_model(models, models_ema, mu=0.0)

    logger = TensorBoardOutputFormat(logdir)

    it = FLAGS.resume_iter

    if not osp.exists(logdir):
        os.makedirs(logdir)

    checkpoint = None
    if FLAGS.resume_iter != 0:
        model_path = osp.join(logdir, "model_{}.pth".format(FLAGS.resume_iter))
        checkpoint = torch.load(model_path)
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        for i, (model, model_ema) in enumerate(zip(models, models_ema)):
            model.load_state_dict(checkpoint['model_state_dict_{}'.format(i)])
            model_ema.load_state_dict(checkpoint['ema_model_state_dict_{}'.format(i)])


    print("New Values of args: ", FLAGS)

    pytorch_total_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
    print("Number of parameters for models", pytorch_total_params)

    train(models, models_ema, optimizer, logger, train_dataloader, FLAGS.resume_iter, logdir, FLAGS, gpu, best_inception)
示例#9
0
def test():
    from data import Mnist
    mnist = Mnist("./cache/")
    model = train(mnist.train_images, mnist.train_labels)
    confmat = get_confusion_matrix(model, mnist.test_images, mnist.test_labels)
    print(confmat.matrix_str())
示例#10
0
        kl_z = -log_norm(z, *z_prior) + log_norm(z, *z_post)
        u_loss = tf.transpose(tf.reshape(rec_x + rec_y + kl_z, (10, -1)))
        qy = tf.nn.softmax(y_logits)
        ln_qy = tf.nn.log_softmax(y_logits)
        u_loss = tf.reduce_mean(
            tf.reduce_sum(u_loss * qy + qy * ln_qy, axis=-1))

wl = placeholder(None, name='wl')
wu = placeholder(None, name='wu')
wa = placeholder(None, name='wa')
with tf.name_scope('loss'):
    y = placeholder((None, 10), name='y')
    loss = wl * l_loss + wu * u_loss + wa * a_loss

train_step = tf.train.AdamOptimizer().minimize(loss)
mnist = Mnist(100, 0, binarize=False, duplicate=False)
sess = tf.Session()
sess.run(tf.global_variables_initializer())

wu = 1.0
wl = 1.0
wa = 1.0
writer.add_var('train_acc', '{:8.3f}', acc)
writer.add_var('train_a_loss', '{:8.3f}', a_loss)
writer.add_var('train_l_loss', '{:8.3f}', l_loss)
writer.add_var('train_u_loss', '{:8.3f}', u_loss)
writer.add_var('test_acc', '{:8.3f}')
writer.add_var('test_a_loss', '{:8.3f}')
writer.add_var('test_l_loss', '{:8.3f}')
writer.add_var('test_u_loss', '{:8.3f}')
writer.add_var('epoch', '{:>8d}')
示例#11
0
def compute_inception(model):
    size = FLAGS.im_number
    num_steps = size // 1000

    images = []
    test_ims = []

    if FLAGS.dataset == "cifar10":
        test_dataset = Cifar10(FLAGS)
    elif FLAGS.dataset == "celeba":
        test_dataset = CelebAHQ()
    elif FLAGS.dataset == "mnist":
        test_dataset = Mnist(train=True)

    test_dataloader = DataLoader(test_dataset,
                                 batch_size=FLAGS.batch_size,
                                 num_workers=4,
                                 shuffle=True,
                                 drop_last=False)

    if FLAGS.dataset == "cifar10":
        for data_corrupt, data, label_gt in tqdm(test_dataloader):
            data = data.numpy()
            test_ims.extend(list(rescale_im(data)))

            if len(test_ims) > 10000:
                break
    elif FLAGS.dataset == "mnist":
        for data_corrupt, data, label_gt in tqdm(test_dataloader):
            data = data.numpy()
            test_ims.extend(list(np.tile(rescale_im(data), (1, 1, 3))))

            if len(test_ims) > 10000:
                break

    test_ims = test_ims[:10000]

    classes = 10

    print(FLAGS.batch_size)
    data_buffer = None

    for j in range(num_steps):
        itr = int(1000 / 500 * FLAGS.repeat_scale)

        if data_buffer is None:
            data_buffer = InceptionReplayBuffer(1000)

        curr_index = 0

        identity = np.eye(classes)

        if FLAGS.dataset == "celeba":
            n = 128
            c = 3
        elif FLAGS.dataset == "mnist":
            n = 28
            c = 1
        else:
            n = 32
            c = 3

        for i in tqdm(range(itr)):
            noise_scale = [1]
            if len(data_buffer) < 1000:
                x_init = np.random.uniform(0, 1, (FLAGS.batch_size, c, n, n))
                label = np.random.randint(0, classes, (FLAGS.batch_size))

                x_init = torch.Tensor(x_init).cuda()
                label = identity[label]
                label = torch.Tensor(label).cuda()

                x_new, _ = gen_image(label, FLAGS, model, x_init,
                                     FLAGS.num_steps)
                x_new = x_new.detach().cpu().numpy()
                label = label.detach().cpu().numpy()
                data_buffer.add(x_new, label)
            else:
                if i < itr - FLAGS.nomix:
                    (x_init, label), idx = data_buffer.sample(
                        FLAGS.batch_size, transform=FLAGS.transform)
                else:
                    if FLAGS.dataset == "celeba":
                        n = 20
                    else:
                        n = 2

                    ix = i % n
                    # for i in range(n):
                    start_idx = (1000 // n) * ix
                    end_idx = (1000 // n) * (ix + 1)
                    (x_init, label) = data_buffer._encode_sample(
                        list(range(start_idx, end_idx)), transform=False)
                    idx = list(range(start_idx, end_idx))

                x_init = torch.Tensor(x_init).cuda()
                label = torch.Tensor(label).cuda()
                x_new, energy = gen_image(label, FLAGS, model, x_init,
                                          FLAGS.num_steps)
                energy = energy.cpu().detach().numpy()
                x_new = x_new.cpu().detach().numpy()
                label = label.cpu().detach().numpy()
                data_buffer.set_elms(idx, x_new, label)

                if FLAGS.im_number != 50000:
                    print(np.mean(energy), np.std(energy))

            curr_index += 1

        ims = np.array(data_buffer._storage[:1000])
        ims = rescale_im(ims).transpose((0, 2, 3, 1))

        if FLAGS.dataset == "mnist":
            ims = np.tile(ims, (1, 1, 1, 3))

        images.extend(list(ims))

    random.shuffle(images)
    saveim = osp.join('sandbox_cachedir', FLAGS.exp,
                      "test{}.png".format(FLAGS.idx))

    if FLAGS.dataset == "cifar10":
        rix = np.random.permutation(1000)[:100]
        ims = ims[rix]
        im_panel = ims.reshape((10, 10, 32, 32, 3)).transpose(
            (0, 2, 1, 3, 4)).reshape((320, 320, 3))
        imsave(saveim, im_panel)

        print("Saved image!!!!")
        splits = max(1, len(images) // 5000)
        score, std = get_inception_score(images, splits=splits)
        print("Inception score of {} with std of {}".format(score, std))

        # FID score
        n = min(len(images), len(test_ims))
        fid = get_fid_score(images, test_ims)
        print("FID of score {}".format(fid))

    elif FLAGS.dataset == "mnist":
        # ims = ims[:100]
        # im_panel = ims.reshape((10, 10, 32, 32, 3)).transpose((0, 2, 1, 3, 4)).reshape((320, 320, 3))
        # imsave(saveim, im_panel)

        ims = ims[:100]
        im_panel = ims.reshape((10, 10, 28, 28, 3)).transpose(
            (0, 2, 1, 3, 4)).reshape((280, 280, 3))
        imsave(saveim, im_panel)

        print("Saved image!!!!")
        splits = max(1, len(images) // 5000)
        # score, std = get_inception_score(images, splits=splits)
        # print("Inception score of {} with std of {}".format(score, std))

        # FID score
        n = min(len(images), len(test_ims))
        fid = get_fid_score(images, test_ims)
        print("FID of score {}".format(fid))

    elif FLAGS.dataset == "celeba":

        ims = ims[:25]
        im_panel = ims.reshape((5, 5, 128, 128, 3)).transpose(
            (0, 2, 1, 3, 4)).reshape((5 * 128, 5 * 128, 3))
        imsave(saveim, im_panel)