예제 #1
0
def build_nn_objective(num_hidden=5, num_data=1000):
    """Builds a neural net, creates weights and data from that net,
    then defines the objective as the training error."""

    # Load and process MNIST data (borrowing from Kayak)
    partial_flatten = lambda x: np.reshape(x,
                                           (x.shape[0], np.prod(x.shape[1:])))
    one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :],
                                    dtype=int)

    edge_pixels_removed = 4  # on each edge.
    subsample_pixels = 5  # in both directions.
    images, labels = mnist()
    images = images[:, edge_pixels_removed:-edge_pixels_removed,
                    edge_pixels_removed:-edge_pixels_removed]  # Remove edges
    images = images[:num_data, ::subsample_pixels, ::
                    subsample_pixels]  # Subsample data
    images = partial_flatten(
        images) / 255.0  # After this, train_images is N by (x * y)
    labels = one_hot(labels, 10)[:num_data, :]  #TODO: Randomize order?

    # Build the network.
    layer_sizes = [images.shape[1]] + num_hidden + [10]
    L2_reg = 0
    parser, loss, = make_nn_funs(layer_sizes, L2_reg)

    # Build functions to interrogate the objective at a particular set of parameters.
    def objective(x, idxs=slice(0, num_data)):
        return loss(x, X=images, T=labels, idxs=idxs)

    obj_grad = grad(objective)
    obj_hvp = sliced_hvp(obj_grad)
    weights_subsets = {k: v[0] for k, v in parser.idxs_and_shapes.iteritems()}
    return parser.N, objective, obj_grad, obj_hvp, weights_subsets
예제 #2
0
def build_logistic_objective():
    """Builds a neural net, creates weights and data from that net,
    then defines the objective as the training error."""

    # Load and process MNIST data (borrowing from Kayak)
    partial_flatten = lambda x: np.reshape(x,
                                           (x.shape[0], np.prod(x.shape[1:])))
    one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :],
                                    dtype=int)

    subsample_pixels = 5
    subsample_data = 10
    images, labels = mnist()
    images = images[::subsample_data, ::subsample_pixels, ::
                    subsample_pixels]  # Subsample data
    images = partial_flatten(
        images) / 255.0  # After this, train_images is N by (x * y)
    labels = one_hot(labels, 10)[::subsample_data, :]

    # Build the network.
    L2_reg = 0
    parser, loss, = make_logistic_funs(images.shape[1], 10, L2_reg)

    # Build functions to interrogate the objective at a particular set of parameters.
    objective = partial(loss, X=images, T=labels)
    obj_grad = grad(objective)
    obj_hvp = sliced_hvp(obj_grad)
    weights_subsets = {k: v[0] for k, v in parser.idxs_and_shapes.iteritems()}
    return parser.N, objective, obj_grad, obj_hvp, weights_subsets
예제 #3
0
    def get_mnist(self):
        X, Y = mnist()
        binary_y = np.logical_or(Y == 0, Y == 1)
        X = X[binary_y]
        Y = Y[binary_y]
        Y[Y == 0] = -1

        return X, Y
예제 #4
0
    axes = plt.gca()
    plt.colorbar()
    plt.savefig(name)


if __name__ == '__main__':

    pre_encoder = keras.models.load_model(name + '/pre.h5')
    encoder = keras.models.load_model(name + '/encoder.h5')
    encoder_style = keras.models.load_model(name + '/encoder0.h5')
    discriminator_style = keras.models.load_model(name + '/discriminator0.h5')
    decoder = keras.models.load_model(name + '/decoder.h5')
    autoencoder = keras.models.load_model(name + '/model.h5')

    from util import mnist
    x_train, y_train, x_test, y_test = mnist()

    style_test = encoder_style.predict(x_test)
    plot_latent(style_test, y_test, "style.png")

    style_train = encoder_style.predict(x_train)
    plot_latent(style_train, y_train, "style-train.png")

    encoder_digit = keras.models.load_model(name + '/encoder1.h5')
    discriminator_digit = keras.models.load_model(name + '/discriminator1.h5')

    digit_test = encoder_digit.predict(x_test)

    print digit_test[:10]

    result_test = discriminator_digit.predict(digit_test)
예제 #5
0
def aae_train(name, epoch=128, computational_effort_factor=8):
    from keras.callbacks import TensorBoard, CSVLogger, ReduceLROnPlateau, EarlyStopping
    from keras.utils.generic_utils import Progbar
    from util import mnist, plot_examples
    batch_size = int(epoch * computational_effort_factor)
    print("epoch: {0}, batch: {1}".format(epoch, batch_size))
    x_train, y_train, x_test, y_test = mnist()
    from plot_all import plot_latent
    plot_latent(noise.predict(x_train), np.zeros_like(y_train),
                "style-noise.png")
    x_train = x_train[:36000, :]  # for removing residuals
    total = x_train.shape[0]
    real_train = np.ones([total, dimensions])
    fake_train = np.zeros([total, dimensions])
    r_loss, d_loss, g_loss = 0., 0., 0.
    try:
        for e in range(epoch):
            d = {'discriminator': 0, 'generator': 0}
            for i in range(total // batch_size):
                batch_pb = Progbar(total, width=25)

                def update(force=False):
                    batch_pb.update(
                        min((i + 1) * batch_size, total),
                        [
                            ('r', r_loss),
                            ('d', d_loss),
                            ('g', g_loss),
                            # ('d-g',(d_loss-g_loss))
                        ],
                        force=force)

                x_batch = x_train[i * batch_size:(i + 1) * batch_size]
                real_batch = real_train[i * batch_size:(i + 1) * batch_size]
                fake_batch = fake_train[i * batch_size:(i + 1) * batch_size]
                d_batch = np.concatenate((fake_batch, real_batch), 1)
                g_batch = np.concatenate((real_batch, real_batch), 1)

                def train_autoencoder():
                    set_trainable(encoder, True)
                    set_trainable(decoder, True)
                    map(lambda d: set_trainable(d, False), discriminators)
                    return aae_r.train_on_batch(x_batch, x_batch)

                def test():
                    return \
                        aae_r.test_on_batch(x_batch, x_batch), \
                        aae_d.test_on_batch(x_batch, d_batch), \
                        aae_g.test_on_batch(x_batch, g_batch)

                def train_discriminator():
                    d['discriminator'] += 1
                    set_trainable(encoder, False)
                    set_trainable(decoder, False)
                    map(lambda d: set_trainable(d, True), discriminators)
                    return aae_d.train_on_batch(x_batch, d_batch)

                def train_generator():
                    d['generator'] += 1
                    set_trainable(encoder, True)
                    set_trainable(decoder, False)
                    map(lambda d: set_trainable(d, False), discriminators)
                    return aae_g.train_on_batch(x_batch, g_batch)

                # r_loss = train_autoencoder()
                d_loss = train_discriminator()
                g_loss = train_generator()
                r_loss, d_loss, g_loss = test()
                update()
            print "Epoch {}/{}: {}".format(e, epoch,
                                           [('r', r_loss), ('d', d_loss),
                                            ('g', g_loss),
                                            ('td', d['discriminator']),
                                            ('tg', d['generator'])])
            if (e % 120) == 0:
                from plot_all import plot_latent, plot_latent_nolimit
                r_loss, d_loss, g_loss = test()
                z_test = encoders[0].predict(x_test)
                plot_latent(z_test, y_test, "style-test-{}.png".format(e))
                plot_latent_nolimit(z_test, y_test,
                                    "style2-test-{}.png".format(e))
    except KeyboardInterrupt:
        print("learning stopped")
예제 #6
0
def main():
    ap = argparse.ArgumentParser("SZO")
    ap.add_argument("--data",
                    choices=["mnist", "cifar10"],
                    default="mnist",
                    help="dataset")  #, "skewedmnist"
    ap.add_argument(
        "--opt",
        choices=["first", "flaxman", "dueling", "ghadimi", "agarwal"],
        help="optimizer type")
    ap.add_argument("--model", choices=["fc3", "cnn"], help="Model type")
    ap.add_argument("--depth", default=1, type=int, help="Depth of the cnn")
    ap.add_argument("--seed", default=12345, type=int, help="random seed")
    ap.add_argument("--num_epochs",
                    default=5,
                    type=int,
                    help="number of epochs")
    ap.add_argument("--num_rounds",
                    default=20,
                    type=int,
                    help="number of rounds")
    ap.add_argument("--lr",
                    default=0.1,
                    type=float,
                    help="initial learning rate")
    ap.add_argument("--pr", default=0.2, type=float, help="pruning rate")
    ap.add_argument("--mu",
                    default=0.1,
                    type=float,
                    help="exploration rate, smoothing parameter")
    ap.add_argument("--beta", default=0.0, type=float, help="momentum")
    ap.add_argument("--max_grad_norm",
                    default=0.0,
                    type=float,
                    help="maximum gradient norm")
    ap.add_argument("--var", default=1.0, type=float, help="noise variance")
    ap.add_argument("--eval_interval",
                    default=10000,
                    type=int,
                    help="evaluation interval")
    ap.add_argument("--batch_size", default=64, type=int, help="batch_size")
    ap.add_argument("--eval_batch_size",
                    default=1000,
                    type=int,
                    help="batch size used in evaluation")
    ap.add_argument("--cv",
                    default=True,
                    action="store_true",
                    help="whether to include control variates")  # type=bool,
    ap.add_argument(
        "--init",
        choices=["reset", "random", "last"],  #, 'rewind', 'best'
        help="initialization strategy in pruning: one of {reset, random, last}"
    )  #, rewind, best
    #ap.add_argument("--rewind_step", type=int, help="which epoch to return to after pruning")
    ap.add_argument(
        "--reward",
        choices=["nce", "acc", "expected_reward", "sampled_score"],
        help=
        "reward function: one of {nce, acc, expected_reward, sampled_score}")
    ap.add_argument("--prune_or_freeze",
                    choices=["none", "prune", "freeze"],
                    help="sparsification strategy: one of {prune or freeze}")
    ap.add_argument(
        "--masking_strategy",
        choices=["none", "L1", "heldout", "random"],
        help="masking strategy: one of {none, L1, heldout, random}")
    ap.add_argument(
        "--num_samples",
        type=int,
        help="number of samples to evaluate for gradient estimation")
    ap.add_argument("--device", choices=["cpu", "gpu"], default="cpu")
    ap.add_argument(
        '--affine',
        action="store_true",
        default=False,  # type=bool,
        help="if specified, turn on affine transform in normalization layers")
    ap.add_argument('--norm',
                    choices=["batch", "layer", "none"],
                    default="batch",
                    help="type of normalization to use between NN layeres")

    args = ap.parse_args()

    log_dir = f'runs-{args.seed}'
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    #if not os.path.exists('logs/'+log_dir):
    #    os.mkdir('logs/'+log_dir)

    # logging
    label = f'{args.opt}-{args.reward}-{args.prune_or_freeze}-{args.init}-{args.masking_strategy}-{args.batch_size}'
    logging.basicConfig(
        filename=os.path.join(log_dir, f'{label}-train.log'),
        filemode='a',
        format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO)
    logger = logging.getLogger(__name__)
    logger.addHandler(TqdmLoggingHandler())

    logger.info('Arguments:')
    for arg in vars(args):
        logger.info(f'\t{arg}: {getattr(args, arg)}')

    # data
    if args.data == 'mnist':
        trainset, testset, classes = mnist(data_path='data/MNIST_data/')
    elif args.data == 'cifar10':
        trainset, testset, classes = cifar10(data_path='data/CIFAR10_data/')
    trainloader, testloader, devloader = get_dataloader(
        trainset,
        testset,
        batch_size=args.batch_size,
        eval_batch_size=args.eval_batch_size,
        seed=args.seed)

    # model
    model = None
    model_kwargs = {
        'seed': args.seed,
        'class_names': classes,
        'output_dim': len(classes),
        'norm_affine': args.affine,
        'norm': args.norm
    }
    if args.model == 'cnn':
        assert args.data == 'cifar10'
        model_kwargs['modules'] = args.depth
        model_kwargs['input_size'] = 32
        model = ConvolutionalNN(**model_kwargs)
    elif args.model == 'fc3':
        if args.data == 'mnist':
            model_kwargs['input_dim'] = 28 * 28
        elif args.data == 'cifar10':
            model_kwargs['input_dim'] = 32 * 32 * 3
        model = FullyConnectedNN(**model_kwargs)
    else:
        raise ValueError("Unknown model type")

    # gpu
    device = None
    if args.device == 'gpu' and torch.cuda.is_available():
        device = 'cuda:0'
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    else:
        device = 'cpu'
    model.to(device)

    logger.info(f"Device: {device}")
    if torch.cuda.is_available():
        logger.info(f"\tn_gpu: {torch.cuda.device_count()}")

    # optimizer
    kwargs = {'prune_or_freeze': args.prune_or_freeze, 'init': args.init}
    if args.lr:
        kwargs['lr'] = args.lr
    if args.mu:
        kwargs['mu'] = args.mu
    if args.beta:
        kwargs['beta'] = args.beta
    if args.max_grad_norm:
        kwargs['max_grad_norm'] = args.max_grad_norm
    if args.var:
        kwargs['var'] = args.var
    if args.num_samples:
        kwargs['num_samples'] = args.num_samples
    #if args.init == 'rewind':
    #    print(args.rewind_step)

    opt = None
    if args.opt == 'first':
        if args.reward in ['sampled_score']:
            kwargs['cv'] = args.cv  # control variates
            opt = FirstOrderBanditOptimizer(model.parameters(), **kwargs)
        elif args.reward in ['nce', 'expected_reward']:
            opt = FirstOrderOptimizer(model.parameters(), **kwargs)
        else:
            raise ValueError
    elif args.opt == 'flaxman':
        opt = VanillaEvolutionOptimizer(model.parameters(), **kwargs)
    elif args.opt == 'dueling':
        opt = DuelingEvolutionOptimizer(model.parameters(), **kwargs)
    elif args.opt == 'ghadimi':
        opt = OneSideEvolutionOptimizer(model.parameters(), **kwargs)
    elif args.opt == 'agarwal':
        opt = TwoSideEvolutionOptimizer(model.parameters(), **kwargs)
    else:
        raise ValueError("Unknown optimizer type")

    #scheduler = lr_scheduler.ReduceLROnPlateau(opt, mode='max', patience=3, threshold=1e-2)
    scheduler = None  # constant learning rate
    # trainer
    pruning_rate = 0.0 if args.prune_or_freeze == 'none' or args.masking_strategy == 'none' else args.pr
    metrics = ['acc', 'f1-score', 'precision', 'recall']
    trainer = Trainer(model,
                      opt,
                      scheduler,
                      args.num_epochs,
                      args.num_rounds,
                      label,
                      seed=args.seed,
                      init=args.init,
                      pruning_rate=pruning_rate,
                      reward=args.reward,
                      metrics=metrics,
                      log_dir=log_dir,
                      eval_interval=args.eval_interval,
                      masking_strategy=args.masking_strategy,
                      device=device)

    trainer.train(trainloader, testloader, devloader)

    #del model
    #del opt
    #del scheduler
    #del trainer

    logging.shutdown()
        print(
            f'final w: {self.w}, final b: {self.b}, epochs: {epoch +1 } / {epochs}'
        )
        plt.plot(costs)
        plt.show()

    def predict(self, X):
        return np.sign(X.dot(self.w) + self.b)

    def score(self, X, Y):
        P = self.predict(X)
        return np.mean(P == Y)


if __name__ == '__main__':
    X, y = mnist()
    idx = np.logical_or(y == 0, y == 1)
    X = X[idx]
    y = y[idx]
    y[y == 0] = -1
    # because perceptron take target -1,1 so we need to change all the 0 to -1

    Ntrain = len(y) // 2

    X_train, y_train = X[:Ntrain], y[:Ntrain]
    X_test, y_test = X[Ntrain:], y[Ntrain:]

    classifier = Perceptron()
    t0 = datetime.now()
    classifier.fit(X_train, y_train)
    print(f"Training time is : {datetime.now()-t0}")