Пример #1
0
def main():
  x, y = load_mnist()
  # split the data into training, validation and test sets
  m = x.shape[0]
  m = m - 20000
  sample_frac = 0.01 # sampling 1% of the points
  split = int(sample_frac*m)
  print(split)

  # the training set acts as the sample of data for which we query volunteer classifications.
  # Here the data is sampled uniformly at random from the entire data set, targeting the most densely populated regions
  # of feature space.
  x_train = x[:split]
  y_train = y[:split]
  x_train_dev = x[split:2*split]
  y_train_dev = y[split:2*split]

  x_valid = x[50000:60000]
  y_valid = y[50000:60000]

  x_test  = x[60000:]
  y_test  = y[60000:]
  print(x_train.shape)

  clickable_analysis(x_test, y_test)
Пример #2
0
def model_selection():
    random_state = 8888
    x_train, y_train, x_test, y_test = load_mnist()
    pipelines = []
    pipelines.append(('MLP',
                      Pipeline([('Scaler', StandardScaler()),
                                ('MLP', MLPClassifier())])))
    pipelines.append(('Fisher discriminant analysis',
                      Pipeline([('Scaler', StandardScaler()),
                                ('Fisher discriminant analysis',
                                 LinearDiscriminantAnalysis())])))
    pipelines.append(('Perceptron',
                      Pipeline([('Scaler', StandardScaler()),
                                ('Perceptron', Perceptron())])))
    pipelines.append(
        ('LogisticRegression',
         Pipeline([('Scaler', StandardScaler()),
                   ('LogisticRegression', LogisticRegression())])))
    pipelines.append(('Linear SVM',
                      Pipeline([('Scaler', StandardScaler()),
                                ('Linear SVM', SVC(kernel="linear",
                                                   C=0.025))])))
    pipelines.append(('SVM RBF',
                      Pipeline([('Scaler', StandardScaler()),
                                ('SVM RBF', SVC(gamma=2, C=1))])))
    for name, model in pipelines:
        model.fit(x_train, y_train)
        y_pred_class = model.predict(x_test)
        print('name', metrics.accuracy_score(y_test, y_pred_class))
Пример #3
0
def idec(dataset="mnist",
         gamma=0.1,
         maxiter=2e4,
         update_interval=20,
         tol=0.00001,
         batch_size=256):
    maxiter = maxiter
    gamma = gamma
    update_interval = update_interval
    tol = tol
    batch_size = batch_size
    ae_weights = ("ae_weights/" + dataset + "_ae_weights/" + dataset +
                  "_ae_weights.h5")

    optimizer = SGD(lr=0.01, momentum=0.9)
    from datasets import load_mnist, load_usps, load_stl, load_cifar
    if dataset == 'mnist':  # recommends: n_clusters=10, update_interval=140
        x, y = load_mnist('./data/mnist/mnist.npz')
        update_interval = 140
    elif dataset == 'usps':  # recommends: n_clusters=10, update_interval=30
        x, y = load_usps('data/usps')
        update_interval = 30
    # prepare the IDEC model
    elif dataset == "stl":
        import numpy as np
        x, y = load_stl()
        update_interval = 20
    elif dataset == "cifar_10":
        x, y = load_cifar()
        update_interval = 140
    batch_size = 120
    print gamma, dataset
    try:
        count = Counter(y)
    except:
        count = Counter(y[:, 0])
    n_clusters = len(count)
    save_dir = 'results/idec_dataset:' + dataset + " gamma:" + str(gamma)
    idec = IDEC(dims=[x.shape[-1], 500, 500, 2000, 10],
                n_clusters=n_clusters,
                batch_size=batch_size)
    idec.initialize_model(ae_weights=ae_weights,
                          gamma=gamma,
                          optimizer=optimizer)
    plot_model(idec.model, to_file='idec_model.png', show_shapes=True)
    idec.model.summary()

    # begin clustering, time not include pretraining part.
    t0 = time()
    y_pred = idec.clustering(x,
                             y=y,
                             tol=tol,
                             maxiter=maxiter,
                             update_interval=update_interval,
                             save_dir=save_dir)
    print 'acc:', cluster_acc(y, y_pred)
    print 'clustering time: ', (time() - t0)
Пример #4
0
def load_data(folder):
    data = load_mnist(folder)
    train_data = data.get('train_data')
    train_labels = data.get('train_labels')
    test_data = data.get('test_data')
    test_labels = data.get('test_labels')

    # expand_dims for data
    train_data = np.expand_dims(train_data, axis=-1)
    test_data = np.expand_dims(test_data, axis=-1)

    # make one-hot labels
    train_labels = make_one_hot_labels(train_labels, NUM_CLASSES)
    test_labels = make_one_hot_labels(test_labels, NUM_CLASSES)

    return train_data, train_labels, test_data, test_labels
def main():
    n_clusters = 10  # this is chosen based on prior knowledge of classes in the data set.
    batch_size = 256
    lr = 0.01  # learning rate
    momentum = 0.9
    # tolerance - if clustering stops if less than this fraction of the data changes cluster on an interation
    tol = 0.001

    maxiter = 2e4
    update_interval = 140
    save_dir = './results/dec'

    x, y = load_mnist()

    #training_set_sizes = [100]
    training_set_sizes = [500, 1000, 5000, 10000, 50000]
    # prepare the DEC model
    dec = DEC(dims=[x.shape[-1], 500, 500, 2000, 10],
              n_clusters=n_clusters,
              batch_size=batch_size)

    for training_set_size in training_set_sizes:
        x_train = x[:training_set_size]
        y_train = y[:training_set_size]
        ae_weights = './ae_weights_m%d.h5' % training_set_size
        dec.initialize_model(optimizer=SGD(lr=lr, momentum=momentum),
                             ae_weights=ae_weights,
                             x=x_train)
        t0 = time()
        y_pred = dec.clustering(x_train,
                                y=y_train,
                                tol=tol,
                                maxiter=maxiter,
                                update_interval=update_interval,
                                save_dir=save_dir + '/%d' % training_set_size)

        print('clustering time: ', (time() - t0))
        print('acc:', cluster_acc(y_train, y_pred))
Пример #6
0

if len(sys.argv) < 2:
    terminate()
else:
    mode = sys.argv[1]
    if mode not in func_mode_list:
        terminate()


def show_plot_sample():
    fig = plt.figure(figsize=(8, 8))
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)
    for i in tqdm(range(25)):
        id = random.randint(0, len(testX) - 1)
        images = np.reshape(testX[id], [28, 28])
        ax = fig.add_subplot(5, 5, i + 1, xticks=[], yticks=[])
        ax.imshow(images, cmap=plt.cm.binary, interpolation='nearest')
        ax.text(0, 2, "label:" + str(testY[id]))
        ax.text(0, 4, "predict:" + str(knn.predict(testX[id])))
    plt.show()


if __name__ == '__main__':
    trainX, trainY, testX, testY = load_mnist()
    knn = KNNClassifier(train_data=trainX, train_labels=trainY, ord=2)
    if mode == 'run_sample':
        show_plot_sample()
    else:
        knn.test_acc(test_data=testX, test_label=testY, K=1)
Пример #7
0
    def __init__(self, conf):
        self.conf = conf

        # determine and create result dir
        i = 1
        log_path = conf.result_path + 'run0'
        while os.path.exists(log_path):
            log_path = '{}run{}'.format(conf.result_path, i)
            i += 1
        os.makedirs(log_path)
        self.log_path = log_path

        if not os.path.exists(conf.checkpoint_dir):
            os.makedirs(conf.checkpoint_dir)

        self.checkpoint_file = os.path.join(self.conf.checkpoint_dir,
                                            "model.ckpt")
        input_shape = [
            conf.batch_size, conf.scene_width, conf.scene_height, conf.channels
        ]
        # build model
        with tf.device(conf.device):
            self.mdl = model.Supair(conf)
            self.in_ph = tf.placeholder(tf.float32, input_shape)
            self.elbo = self.mdl.elbo(self.in_ph)

            self.mdl.num_parameters()

            self.optimizer = tf.train.AdamOptimizer()
            self.train_op = self.optimizer.minimize(-1 * self.elbo)

        self.sess = tf.Session()

        self.saver = tf.train.Saver()
        if self.conf.load_params:
            self.saver.restore(self.sess, self.checkpoint_file)
        else:
            self.sess.run(tf.global_variables_initializer())
            self.sess.run(tf.local_variables_initializer())

        # load data
        bboxes = None
        if conf.dataset == 'MNIST':
            (x, counts, y,
             bboxes), (x_test, c_test, _,
                       _) = datasets.load_mnist(conf.scene_width,
                                                max_digits=2,
                                                path=conf.data_path)
            visualize.store_images(x[0:10], log_path + '/img_raw')
            if conf.noise:
                x = datasets.add_noise(x)
                x_test = datasets.add_noise(x_test)
                visualize.store_images(x[0:10], log_path + '/img_noisy')
            if conf.structured_noise:
                x = datasets.add_structured_noise(x)
                x_test = datasets.add_structured_noise(x_test)
                visualize.store_images(x[0:10], log_path + '/img_struc_noisy')
            x_color = np.squeeze(x)
        elif conf.dataset == 'sprites':
            (x_color, counts,
             _), (x_test, c_test,
                  _) = datasets.make_sprites(50000, path=conf.data_path)
            if conf.noise:
                x_color = datasets.add_noise(x_color)
            x = visualize.rgb2gray(x_color)
            x = np.clip(x, 0.0, 1.0)
            x_test = visualize.rgb2gray(x_test)
            x_test = np.clip(x_test, 0.0, 1.0)
            if conf.noise:
                x = datasets.add_noise(x)
                x_test = datasets.add_noise(x_test)
                x_color = datasets.add_noise(x_color)
        elif conf.dataset == 'omniglot':
            x = 1 - datasets.load_omniglot(path=conf.data_path)
            counts = np.ones(x.shape[0], dtype=np.int32)
            x_color = np.squeeze(x)
        elif conf.dataset == 'svhn':
            x, counts, objects, bgs = datasets.load_svhn(path=conf.data_path)
            self.pretrain(x, objects, bgs)
            x_color = np.squeeze(x)
        else:
            raise ValueError('unknown dataset', conf.dataset)

        self.x, self.x_color, self.counts = x, x_color, counts
        self.x_test, self.c_test = x_test, c_test
        self.bboxes = bboxes

        print('Built model')
        self.obj_reconstructor = SpnReconstructor(self.mdl.obj_spn)
        self.bg_reconstructor = SpnReconstructor(self.mdl.bg_spn)

        tfgraph = tf.get_default_graph()
        self.tensors_of_interest = {
            'z_where': tfgraph.get_tensor_by_name('z_where:0'),
            'z_pres': tfgraph.get_tensor_by_name('z_pres:0'),
            'bg_score': tfgraph.get_tensor_by_name('bg_score:0'),
            'y': tfgraph.get_tensor_by_name('y:0'),
            'obj_vis': tfgraph.get_tensor_by_name('obj_vis:0'),
            'bg_maps': tfgraph.get_tensor_by_name('bg_maps:0')
        }
Пример #8
0
def run_experiment(settings):
    ############################################################################
    fashion_mnist = settings.fashion_mnist
    svhn = settings.svhn

    exponential_family = settings.exponential_family

    classes = settings.classes

    K = settings.K

    structure =  settings.structure

    # 'poon-domingos'
    pd_num_pieces = settings.pd_num_pieces

    # 'binary-trees'
    depth = settings.depth
    num_repetitions = settings.num_repetitions_mixture

    width = settings.width
    height = settings.height

    num_epochs = settings.num_epochs
    batch_size = settings.batch_size
    SGD_learning_rate = settings.SGD_learning_rate

    ############################################################################

    exponential_family_args = None
    if exponential_family == EinsumNetwork.BinomialArray:
        exponential_family_args = {'N': 255}
    if exponential_family == EinsumNetwork.CategoricalArray:
        exponential_family_args = {'K': 256}
    if exponential_family == EinsumNetwork.NormalArray:
        exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1}

    # get data
    if fashion_mnist:
        train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist()
    elif svhn:
        train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn()
    else:
        train_x, train_labels, test_x, test_labels = datasets.load_mnist()

    if not exponential_family != EinsumNetwork.NormalArray:
        train_x /= 255.
        test_x /= 255.
        train_x -= .5
        test_x -= .5

    # validation split
    valid_x = train_x[-10000:, :]
    train_x = train_x[:-10000, :]
    valid_labels = train_labels[-10000:]
    train_labels = train_labels[:-10000]
    # pick the selected classes
    if classes is not None:
        train_x = train_x[np.any(np.stack([train_labels == c for c in classes], 1), 1), :]
        valid_x = valid_x[np.any(np.stack([valid_labels == c for c in classes], 1), 1), :]
        test_x = test_x[np.any(np.stack([test_labels == c for c in classes], 1), 1), :]

        train_labels = [l for l in train_labels if l in classes]
        valid_labels = [l for l in valid_labels if l in classes]
        test_labels = [l for l in test_labels if l in classes]
    else:
        classes = np.unique(train_labels).tolist()

        train_labels = [l for l in train_labels if l in classes]
        valid_labels = [l for l in valid_labels if l in classes]
        test_labels = [l for l in test_labels if l in classes]

    train_x = torch.from_numpy(train_x).to(torch.device(device))
    valid_x = torch.from_numpy(valid_x).to(torch.device(device))
    test_x = torch.from_numpy(test_x).to(torch.device(device))

    ######################################
    # Make EinsumNetworks for each class #
    ######################################
    einets = []
    ps = []
    for c in classes:
        if structure == 'poon-domingos':
            pd_delta = [[height / d, width / d] for d in pd_num_pieces]
            graph = Graph.poon_domingos_structure(shape=(height, width), delta=pd_delta)
        elif structure == 'binary-trees':
            graph = Graph.random_binary_trees(num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions)
        else:
            raise AssertionError("Unknown Structure")

        args = EinsumNetwork.Args(
                num_var=train_x.shape[1],
                num_dims=3 if svhn else 1,
                num_classes=1,
                num_sums=K,
                num_input_distributions=K,
                exponential_family=exponential_family,
                exponential_family_args=exponential_family_args,
                use_em=False)

        einet = EinsumNetwork.EinsumNetwork(graph, args)

        init_dict = get_init_dict(einet, train_x, train_labels=train_labels, einet_class=c)
        einet.initialize(init_dict)
        einet.to(device)
        einets.append(einet)

        # Calculate amount of training samples per class
        ps.append(train_labels.count(c))

        print(f'Einsum network for class {c}:')
        print(einet)

    # normalize ps, construct mixture component
    ps = [p / sum(ps) for p in ps]
    ps = torch.tensor(ps).to(torch.device(device))
    mixture = EinetMixture(ps, einets, classes=classes)

    num_params = mixture.eval_size()

    ##################################
    # Evalueate after initialization #
    ##################################

    train_lls = []
    valid_lls = []
    test_lls = []
    train_accs = []
    valid_accs = []
    test_accs = []

    train_N = train_x.shape[0]
    valid_N = valid_x.shape[0]
    test_N = test_x.shape[0]
    mixture.eval()
    train_ll_before = mixture.eval_loglikelihood_batched(train_x, batch_size=batch_size, skip_reparam=True)
    valid_ll_before = mixture.eval_loglikelihood_batched(valid_x, batch_size=batch_size, skip_reparam=True)
    test_ll_before = mixture.eval_loglikelihood_batched(test_x, batch_size=batch_size, skip_reparam=True)
    print()
    print("Experiment 3: Log-likelihoods  --- train LL {}   valid LL {}   test LL {}".format(
            train_ll_before / train_N,
            valid_ll_before / valid_N,
            test_ll_before / test_N))
    train_lls.append(train_ll_before / train_N)
    valid_lls.append(valid_ll_before / valid_N)
    test_lls.append(test_ll_before / test_N)
    
    ################
    # Experiment 4 #
    ################
    train_labelsz = torch.tensor(train_labels).to(torch.device(device))
    valid_labelsz = torch.tensor(valid_labels).to(torch.device(device))
    test_labelsz = torch.tensor(test_labels).to(torch.device(device))

    acc_train_before = mixture.eval_accuracy_batched(classes, train_x, train_labelsz, batch_size=batch_size, skip_reparam=True)
    acc_valid_before = mixture.eval_accuracy_batched(classes, valid_x, valid_labelsz, batch_size=batch_size, skip_reparam=True)
    acc_test_before = mixture.eval_accuracy_batched(classes, test_x, test_labelsz, batch_size=batch_size, skip_reparam=True)
    print()
    print("Experiment 4: Classification accuracies  --- train acc {}   valid acc {}   test acc {}".format(
            acc_train_before,
            acc_valid_before,
            acc_test_before))
    train_accs.append(acc_train_before)
    valid_accs.append(acc_valid_before)
    test_accs.append(acc_test_before)
    mixture.train()

    ##################
    # Training phase #
    ##################

    """ Learning each sub Network Generatively """

    sub_net_parameters = None
    for einet in mixture.einets:
        if sub_net_parameters is None:
            sub_net_parameters = list(einet.parameters())
        else:
            sub_net_parameters += list(einet.parameters())
    sub_net_parameters += list(mixture.parameters())

    optimizer = torch.optim.SGD(sub_net_parameters, lr=SGD_learning_rate)

    start_time = time.time()

    end_time = time.time()

    for epoch_count in range(num_epochs):
        for (einet, c) in zip(einets, classes):
            train_x_c = train_x[[l == c for l in train_labels]]
            valid_x_c = valid_x[[l == c for l in valid_labels]]
            test_x_c = test_x[[l == c for l in test_labels]]

            train_N = train_x_c.shape[0]
            valid_N = valid_x_c.shape[0]
            test_N = test_x_c.shape[0]

            idx_batches = torch.randperm(train_N, device=device).split(batch_size)

            total_loss = 0.0
            for idx in idx_batches:
                batch_x = train_x_c[idx, :]
                optimizer.zero_grad()
                outputs = einet.forward(batch_x)
                ll_sample = EinsumNetwork.log_likelihoods(outputs)
                log_likelihood = ll_sample.sum()
                nll = log_likelihood * -1
                nll.backward()
                optimizer.step()
                total_loss += nll.detach().item()
            print(f'[{epoch_count}]   total loss: {total_loss}')

        mixture.eval()
        train_N = train_x.shape[0]
        valid_N = valid_x.shape[0]
        test_N = test_x.shape[0]
        train_ll = mixture.eval_loglikelihood_batched(train_x, batch_size=batch_size)
        valid_ll = mixture.eval_loglikelihood_batched(valid_x, batch_size=batch_size)
        test_ll = mixture.eval_loglikelihood_batched(test_x, batch_size=batch_size)
        train_lls.append(train_ll / train_N)
        valid_lls.append(valid_ll / valid_N)
        test_lls.append(test_ll / test_N)

        train_labelsz = torch.tensor(train_labels).to(torch.device(device))
        valid_labelsz = torch.tensor(valid_labels).to(torch.device(device))
        test_labelsz = torch.tensor(test_labels).to(torch.device(device))

        acc_train = mixture.eval_accuracy_batched(classes, train_x, train_labelsz, batch_size=batch_size)
        acc_valid = mixture.eval_accuracy_batched(classes, valid_x, valid_labelsz, batch_size=batch_size)
        acc_test = mixture.eval_accuracy_batched(classes, test_x, test_labelsz, batch_size=batch_size)
        train_accs.append(acc_train)
        valid_accs.append(acc_valid)
        test_accs.append(acc_test)
        mixture.train()

    print()
    print("Experiment 3: Log-likelihoods  --- train LL {}   valid LL {}   test LL {}".format(
            train_ll / train_N,
            valid_ll / valid_N,
            test_ll / test_N))

    print()
    print("Experiment 4: Classification accuracies  --- train acc {}   valid acc {}   test acc {}".format(
            acc_train,
            acc_valid,
            acc_test))

    print(f'Network size: {num_params} parameters')
    print(f'Training time: {end_time - start_time}s')

    return {
        'train_lls': train_lls,
        'valid_lls': valid_lls,
        'test_lls': test_lls,
        'train_accs': train_accs,
        'valid_accs': valid_accs,
        'test_accs': test_accs,
        'network_size': num_params,
        'training_time': end_time - start_time,
    }
def check_einets_eq(e1, e2):
    assert len(e1.einet_layers) == len(e2.einet_layers)
    for l, l_p  in zip(e1.einet_layers, e2.einet_layers):
        if hasattr(l, "params"):
            assert torch.all(torch.eq(l.params, l_p.params))


classes = [7]
num_epochs = 5
batch_size = 100

############################################################################


# get data
train_x, train_labels, test_x, test_labels = datasets.load_mnist()

# validation split
valid_x = train_x[-10000:, :]
train_x = train_x[:-10000, :]
valid_labels = train_labels[-10000:]
train_labels = train_labels[:-10000]

# pick the selected classes
if classes is not None:
    train_x = train_x[np.any(np.stack([train_labels == c for c in classes], 1), 1), :]
    valid_x = valid_x[np.any(np.stack([valid_labels == c for c in classes], 1), 1), :]
    test_x = test_x[np.any(np.stack([test_labels == c for c in classes], 1), 1), :]

train_x = torch.from_numpy(train_x).to(torch.device(device))
valid_x = torch.from_numpy(valid_x).to(torch.device(device))
Пример #10
0
    cfg.read('config.ini')
    train_cfg = dict(zip([key for key, _ in cfg.items('train')], \
                         [int(val) if val.isdigit() else val for _, val in cfg.items('train')]))
    print('config:', train_cfg)
    # parameters from config
    context_size = train_cfg['context_size']
    x_dim = train_cfg['x_dim']
    h_dim = train_cfg['h_dim']
    r_dim = train_cfg['r_dim']
    z_dim = train_cfg['z_dim']
    y_dim = train_cfg['y_dim']
    batch_size = train_cfg['batch_size']
    n_iter = train_cfg['n_iter']
    n_epoch = train_cfg['n_epoch']
    n_display = train_cfg['n_display'] 
    
    device = torch.device('cuda') if torch.cuda.device_count() > 0 else torch.device('cpu')
    
    # load dataloader
    data_loader = datasets.load_mnist(batch_size=batch_size)
    # data_loader = datasets.load_celeba(batch_size=batch_size)
    
    model = NeuralProcess(x_dim=x_dim, h_dim=h_dim, r_dim=r_dim, z_dim=z_dim, y_dim=y_dim, device=device)
    optimizer = optim.Adam(model.parameters(), lr=4e-3)
    # print(model)
    
    ModelTrainer = trainer.NPTrainer(model=model, context_size=context_size, optimizer=optimizer, device=device)
    ModelTrainer.train(data_loader=data_loader, n_epoch=n_epoch, n_iter=n_iter, test_for_every=n_display)

    # import sys; sys.exit(0)
Пример #11
0
    def new_start(start_train_set, online_offset):
        ############################################################################
        fashion_mnist = settings.fashion_mnist
        svhn = settings.svhn

        exponential_family = settings.exponential_family

        classes = settings.classes

        K = settings.K

        structure =  settings.structure

        # 'poon-domingos'
        pd_num_pieces = settings.pd_num_pieces

        # 'binary-trees'
        depth = settings.depth
        num_repetitions = settings.num_repetitions_mixture

        width = settings.width
        height = settings.height

        num_epochs = settings.num_epochs
        batch_size = settings.batch_size
        online_em_frequency = settings.online_em_frequency
        online_em_stepsize = settings.online_em_stepsize

        ############################################################################

        exponential_family_args = None
        if exponential_family == EinsumNetwork.BinomialArray:
            exponential_family_args = {'N': 255}
        if exponential_family == EinsumNetwork.CategoricalArray:
            exponential_family_args = {'K': 256}
        if exponential_family == EinsumNetwork.NormalArray:
            exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1}

        # get data
        if fashion_mnist:
            train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist()
        elif svhn:
            train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn()
        else:
            train_x, train_labels, test_x, test_labels = datasets.load_mnist()

        if not exponential_family != EinsumNetwork.NormalArray:
            train_x /= 255.
            test_x /= 255.
            train_x -= .5
            test_x -= .5

        # validation split
        valid_x = train_x[-10000:, :]
        # online_x = train_x[-40000:, :]
        train_x = train_x[:-(10000+online_offset-start_train_set), :]
        valid_labels = train_labels[-10000:]
        # online_labels = train_labels[-40000:]
        train_labels = train_labels[:-(10000+online_offset-start_train_set)]
        
        # # debug setup
        # valid_x = train_x[-10000:, :]
        # online_x = train_x[-45000:, :]
        # train_x = train_x[:-55000, :]
        # valid_labels = train_labels[-10000:]
        # online_labels = train_labels[-45000:]
        # train_labels = train_labels[:-55000]

        # valid_x = train_x[-10000:, :]
        # online_x = train_x[-10000:, :]
        # train_x = train_x[:-20000, :]
        # valid_labels = train_labels[-10000:]
        # online_labels = train_labels[-10000:]
        # train_labels = train_labels[:-20000]

        # valid_x = train_x[-10000:, :]
        # online_x = train_x[-20000:, :]
        # train_x = train_x[:-30000, :]
        # valid_labels = train_labels[-10000:]
        # online_labels = train_labels[-20000:]
        # train_labels = train_labels[:-30000]

        # pick the selected classes
        if classes is not None:
            train_x = train_x[np.any(np.stack([train_labels == c for c in classes], 1), 1), :]
            # online_x = online_x[np.any(np.stack([online_labels == c for c in classes], 1), 1), :]
            valid_x = valid_x[np.any(np.stack([valid_labels == c for c in classes], 1), 1), :]
            test_x = test_x[np.any(np.stack([test_labels == c for c in classes], 1), 1), :]

            train_labels = [l for l in train_labels if l in classes]
            # online_labels = [l for l in online_labels if l in classes]
            valid_labels = [l for l in valid_labels if l in classes]
            test_labels = [l for l in test_labels if l in classes]
        else:
            classes = np.unique(train_labels).tolist()

            train_labels = [l for l in train_labels if l in classes]
            # online_labels = [l for l in online_labels if l in classes]
            valid_labels = [l for l in valid_labels if l in classes]
            test_labels = [l for l in test_labels if l in classes]

        train_x = torch.from_numpy(train_x).to(torch.device(device))
        # online_x = torch.from_numpy(online_x).to(torch.device(device))
        valid_x = torch.from_numpy(valid_x).to(torch.device(device))
        test_x = torch.from_numpy(test_x).to(torch.device(device))

        ######################################
        # Make EinsumNetworks for each class #
        ######################################
        einets = []
        ps = []
        for c in classes:
            if structure == 'poon-domingos':
                pd_delta = [[height / d, width / d] for d in pd_num_pieces]
                graph = Graph.poon_domingos_structure(shape=(height, width), delta=pd_delta)
            elif structure == 'binary-trees':
                graph = Graph.random_binary_trees(num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions)
            else:
                raise AssertionError("Unknown Structure")

            args = EinsumNetwork.Args(
                    num_var=train_x.shape[1],
                    num_dims=3 if svhn else 1,
                    num_classes=1,
                    num_sums=K,
                    num_input_distributions=K,
                    exponential_family=exponential_family,
                    exponential_family_args=exponential_family_args,
                    online_em_frequency=online_em_frequency,
                    online_em_stepsize=online_em_stepsize)

            einet = EinsumNetwork.EinsumNetwork(graph, args)

            init_dict = get_init_dict(einet, train_x, train_labels=train_labels, einet_class=c)
            einet.initialize(init_dict)
            einet.to(device)
            einets.append(einet)

            # Calculate amount of training samples per class
            ps.append(train_labels.count(c))

            print(f'Einsum network for class {c}:')
            print(einet)

        # normalize ps, construct mixture component
        ps = [p / sum(ps) for p in ps]
        ps = torch.tensor(ps).to(torch.device(device))
        mixture = EinetMixture(ps, einets, classes=classes)

        num_params = mixture.eval_size()

        ##################################
        # Evalueate after initialization #
        ##################################

        train_N = train_x.shape[0]
        valid_N = valid_x.shape[0]
        test_N = test_x.shape[0]
        mixture.eval()
        train_ll_before = mixture.eval_loglikelihood_batched(train_x, batch_size=batch_size)
        valid_ll_before = mixture.eval_loglikelihood_batched(valid_x, batch_size=batch_size)
        test_ll_before = mixture.eval_loglikelihood_batched(test_x, batch_size=batch_size)
        print()
        print("Experiment 3: Log-likelihoods  --- train LL {}   valid LL {}   test LL {}".format(
                train_ll_before / train_N,
                valid_ll_before / valid_N,
                test_ll_before / test_N))
        train_lls.append(train_ll_before / train_N)
        valid_lls.append(valid_ll_before / valid_N)
        test_lls.append(test_ll_before / test_N)

        ################
        # Experiment 4 #
        ################
        train_labelsz = torch.tensor(train_labels).to(torch.device(device))
        valid_labelsz = torch.tensor(valid_labels).to(torch.device(device))
        test_labelsz = torch.tensor(test_labels).to(torch.device(device))

        acc_train_before = mixture.eval_accuracy_batched(classes, train_x, train_labelsz, batch_size=batch_size)
        acc_valid_before = mixture.eval_accuracy_batched(classes, valid_x, valid_labelsz, batch_size=batch_size)
        acc_test_before = mixture.eval_accuracy_batched(classes, test_x, test_labelsz, batch_size=batch_size)
        print()
        print("Experiment 8: Classification accuracies  --- train acc {}   valid acc {}   test acc {}".format(
                acc_train_before,
                acc_valid_before,
                acc_test_before))
        train_accs.append(acc_train_before)
        valid_accs.append(acc_valid_before)
        test_accs.append(acc_test_before)
Пример #12
0
def sdec(dataset="mnist",
         gamma=0.1,
         beta=1,
         maxiter=2e4,
         update_interval=20,
         tol=0.00001,
         batch_size=256):
    """arguements:
    dataset:choice the datasets that you want to run
    gamma: The Lambda in the lecture
    beta: the proportion of information we have known about the sample
    """
    maxiter = maxiter
    gamma = gamma
    update_interval = update_interval
    tol = tol
    beta = beta
    batch_size = batch_size
    ae_weights = ("ae_weights/" + dataset + "_ae_weights/" + dataset +
                  "_ae_weights.h5")

    # load dataset
    from datasets import load_mnist, load_usps, load_stl, load_cifar
    if dataset == 'mnist':  # recommends: n_clusters=10, update_interval=140
        x, y = load_mnist('./data/mnist/mnist.npz')
        update_interval = 140
    elif dataset == 'usps':  # recommends: n_clusters=10, update_interval=30
        x, y = load_usps('data/usps')
        update_interval = 30
    elif dataset == "stl":
        import numpy as np
        x, y = load_stl()
        update_interval = 20
    elif dataset == "cifar_10":
        x, y = load_cifar()
        update_interval = 40
    beta = beta
    print(gamma, dataset, beta)
    # prepare the SDEC model
    try:
        count = Counter(y)
    except:
        count = Counter(y[:, 0])
    n_clusters = len(count)
    save_dir = 'results/sdec_dataset:' + dataset + " gamma:" + str(gamma)
    laster_batch_size = x.shape[0] % batch_size
    dec = SDEC(dims=[x.shape[-1], 500, 500, 2000, 10],
               n_clusters=n_clusters,
               N=x.shape[0],
               x=x,
               batch_size=batch_size,
               laster_batch_size=laster_batch_size,
               gamma=gamma,
               beta=beta)
    dec.initialize_model(optimizer=SGD(lr=0.01, momentum=0.9),
                         ae_weights=ae_weights)
    dec.model.summary()
    t0 = time()
    y_pred = dec.clustering(x,
                            y=y,
                            tol=tol,
                            maxiter=maxiter,
                            update_interval=update_interval,
                            save_dir=save_dir)
    plot_model(dec.model, to_file='sdecmodel.png', show_shapes=True)
    print('acc:', cluster_acc(y, y_pred))
    print('clustering time: ', (time() - t0))
Пример #13
0
worker_HOSTS = [('lpdquad.epfl.ch', 5000), ('lpdquad.epfl.ch', 6000),
                ('lpdquad.epfl.ch', 7000), ('lpdquad.epfl.ch', 8000),
                ('lpdquad.epfl.ch', 9000)]
n = len(worker_HOSTS)
batch_size = 50
learning_rate = 0.05
activation_func = tf.nn.relu
max_train_epoch = 10000
max_train_accur = 0.97
builder_opt = tf.train.AdagradOptimizer(learning_rate)
builder_dims = [784, 100, 10]

# ------------------------------------------------------------------------- #

# Dataset instantiation
dataset = datasets.load_mnist()
train_set = dataset.cut(0, 50000, 50000).shuffle().cut(0, 50000, batch_size)
test_set = dataset.cut(50000, 60000, 10000)

# Model instantiation
graph = tf.Graph()
with graph.as_default():
    model = models.dense_classifier(builder_dims,
                                    inputs=None,
                                    act_fn=activation_func,
                                    optimizer=builder_opt,
                                    epoch=True)

# Establish connections with workers
sockets = []
for worker_HOST in worker_HOSTS:
def main():

    # constants
    batch_size = 256
    lr = 0.01
    momentum = 0.9
    tol = 0.001
    maxiter = 2e4
    update_interval = 140

    n_clusters = 10
    n_classes = 10

    lcolours = ['#D6FF79', '#B0FF92', '#A09BE7', '#5F00BA', '#56CBF9', \
                '#F3C969', '#ED254E', '#CAA8F5', '#D9F0FF', '#46351D']
    labels = [str(i) for i in range(n_clusters)]

    ae_weights = '../../../../DEC-keras/results/mnist/ae_weights.h5'
    dec_weights = '../../../../DEC-keras/results/mnist/%d/DEC_model_final.h5' % n_clusters

    # load mnist data set
    x, y = load_mnist()
    # split the data into training, validation and test sets
    m = x.shape[0]
    m = m - 20000
    sample_frac = 0.01
    split = int(sample_frac * m)
    print(split)
    x_train = x[:split]
    y_train = y[:split]
    x_valid = x[50000:60000]
    y_valid = y[50000:60000]
    x_test = x[60000:]
    y_test = y[60000:]

    # load pretrained DEC model
    dec = load_mnist_dec(x, ae_weights, dec_weights, n_clusters, \
      batch_size, lr, momentum)

    # predict training set cluster assignments
    y_pred = dec.predict_clusters(x_train)

    # inspect the clustering and simulate volunteer labelling of random sample (the training set)
    cluster_to_label_mapping, n_assigned_list, majority_class_fractions = \
      get_cluster_to_label_mapping(y_train, y_pred, n_classes, n_clusters)
    print(cluster_acc(y_train, y_pred))
    y_valid_pred = dec.predict_clusters(x_valid)
    print(cluster_acc(y_valid, y_valid_pred))

    # extract the cluster centres
    cluster_centres = get_cluster_centres(dec)

    # determine current unlabelled samples
    y_plot = np.array(y[:m], dtype='int')
    y_plot[split:] = -1

    # reduce embedding to 2D and plot labelled and unlabelled training set samples
    #pca_plot(dec.encoder, x[:m], cluster_centres, y=y_plot, labels=labels, \
    #           lcolours=lcolours)

    # get siamese training pairs
    im, cc, ls, cluster_to_label_mapping = \
      get_pairs_auto(dec, x_train, y_train, cluster_centres, \
        cluster_to_label_mapping, majority_class_fractions, n_clusters)

    #im, cc, ls, cluster_to_label_mapping = \
    #  get_pairs_auto_with_noise(dec, x_train, y_train, cluster_centres, \
    #    cluster_to_label_mapping, majority_class_fractions, n_clusters)
    """
  mcheckpointer = ModelCheckpoint(filepath='saved_models/weights.best..hdf5', \
                                  verbose=1, save_best_only=True)

  base_network = Model(dec.model.input, \
    dec.model.get_layer('encoder_%d' % (dec.n_stacks - 1)).output)
  fcheckpointer = FrameDumpCallback(base_network, x, cluster_centres, \
    './video', y=y_plot, labels=labels, lcolours=lcolours)
  """
    #callbacks = [mcheckpointer, fcheckpointer]
    callbacks = []

    model, base_network = train_siamese(dec, cluster_centres, im, cc, ls, \
      epochs=5, split_frac=0.75, callbacks=callbacks)
    #model, base_network = train_siamese_online(dec, x, cluster_centres, im, cc, ls, \
    #  epochs=1, split_frac=0.75, callbacks=[])

    y_pred = dec.predict_clusters(x_valid)

    cluster_to_label_mapping, n_assigned_list, majority_class_fractions = \
      get_cluster_to_label_mapping(y_valid, y_pred, n_classes, n_clusters)
    print(cluster_acc(y_valid, y_pred))
    #pca_plot(dec.encoder, x_valid, cluster_centres, y=y_valid, labels=labels, \
    #           lcolours=lcolours)

    y_pred = dec.predict_clusters(x[:m])
    print(np.argmin(majority_class_fractions))

    for j in range(1, 6):
        selection = np.where(
            y_pred[j * split:(j + 1) *
                   split] == np.argmin(majority_class_fractions))
        x_train = np.concatenate(
            (x_train, x[:m][j * split:(j + 1) * split][selection]))
        y_train = np.concatenate(
            (y_train, y[:m][j * split:(j + 1) * split][selection]))

        im, cc, ls, cluster_to_label_mapping = \
          get_pairs_auto(dec, x_train, y_train, cluster_centres, \
            cluster_to_label_mapping, majority_class_fractions, n_clusters)

        callbacks = []

        model, base_network = train_siamese(dec, cluster_centres, im, cc, ls, \
          epochs=1, split_frac=0.75, callbacks=callbacks)

        #x_train = x[:2*split]
        #y_train = y[:2*split]
        #y_pred = dec.predict_clusters(x_train)

        #cluster_to_label_mapping, n_assigned_list, majority_class_fractions = \
        #  get_cluster_to_label_mapping(y_train, y_pred, n_classes, n_clusters)

        y_pred = dec.predict_clusters(x_valid)

        cluster_to_label_mapping, n_assigned_list, majority_class_fractions = \
          get_cluster_to_label_mapping(y_valid, y_pred, n_classes, n_clusters)
        print(cluster_acc(y_valid, y_pred))
Пример #15
0
    ('conv2', ConvLayer0, {'n_out'      : 16,
                           'image_shape': (13, 13),
                           'filter_size': (5, 5)}),
    ('pool2', PoolLayer),
    ('conv3', ConvLayer0, {'n_out'      : 120,
                           'image_shape': (4, 4),
                           'filter_size': (4, 4)}),
    ('reshape1', ReshapeLayer, {'n_out': 120}),
    ('fc1', WbLayer, {'n_out': 84, 'activation': 'tanh'}),
    ('fc2', WbLayer, {'n_out': 10, 'activation': 'softmax'}) # actually rbf
]



# load datasets
datasets = pre_process(load_mnist(), (5 / 6, 1 / 6), shuffle=True)
tdatasets = share_datasets(datasets)


# build model
options = get_options(lenet5, 1, datasets, batch_size=256, dispFreq=30,
                      use_BN=True)
tparams, BNparams, network = make_model(options, batch_size=256)


# add early stopping condition
temp = OrderedDict()
temp['valid_error'] = get_error(tdatasets[1], options, batch_size=256)
temp['test_error'] = get_error(tdatasets[2], options, batch_size=256)
options['model_test'] = temp
Пример #16
0
def main(relaxation=None,
         learn_prior=True,
         max_iters=None,
         batch_size=24,
         num_latents=200,
         model_type=None,
         lr=None,
         test_bias=False,
         train_dir=None,
         iwae_samples=100,
         dataset="mnist",
         logf=None,
         var_lr_scale=10.,
         Q_wd=.0001,
         Q_depth=-1,
         checkpoint_path=None):

    valid_batch_size = 100

    if model_type == "L1":
        num_layers = 1
        layer_type = linear_layer
    elif model_type == "L2":
        num_layers = 2
        layer_type = linear_layer
    elif model_type == "NL1":
        num_layers = 1
        layer_type = nonlinear_layer
    else:
        assert False, "bad model type {}".format(model_type)

    sess = tf.Session()
    if dataset == "mnist":
        X_tr, X_va, X_te = datasets.load_mnist()
    elif dataset == "omni":
        X_tr, X_va, X_te = datasets.load_omniglot()
    else:
        assert False
    train_mean = np.mean(X_tr, axis=0, keepdims=True)
    train_output_bias = -np.log(1. / np.clip(train_mean, 0.001, 0.999) -
                                1.).astype(np.float32)

    x = tf.placeholder(tf.float32, [None, 784])
    x_im = tf.reshape(x, [-1, 28, 28, 1])
    tf.summary.image("x_true", x_im)

    # make prior for top b
    p_prior = tf.Variable(
        tf.zeros([num_latents], dtype=tf.float32),
        trainable=learn_prior,
        name='p_prior',
    )
    # create rebar specific variables temperature and eta
    log_temperatures = [create_log_temp(1) for l in range(num_layers)]
    temperatures = [tf.exp(log_temp) for log_temp in log_temperatures]
    batch_temperatures = [tf.reshape(temp, [1, -1]) for temp in temperatures]
    etas = [create_eta(1) for l in range(num_layers)]
    batch_etas = [tf.reshape(eta, [1, -1]) for eta in etas]

    # random uniform samples
    u = [
        tf.random_uniform([tf.shape(x)[0], num_latents], dtype=tf.float32)
        for l in range(num_layers)
    ]
    # create binary sampler
    b_sampler = BSampler(u, "b_sampler")
    gen_b_sampler = BSampler(u, "gen_b_sampler")
    # generate hard forward pass
    encoder_name = "encoder"
    decoder_name = "decoder"
    inf_la_b, samples_b = inference_network(x, train_mean, layer_type,
                                            num_layers, num_latents,
                                            encoder_name, False, b_sampler)
    gen_la_b = generator_network(samples_b, train_output_bias, layer_type,
                                 num_layers, num_latents, decoder_name, False)
    log_image(gen_la_b[-1], "x_pred")
    # produce samples
    _samples_la_b = generator_network(None,
                                      train_output_bias,
                                      layer_type,
                                      num_layers,
                                      num_latents,
                                      decoder_name,
                                      True,
                                      sampler=gen_b_sampler,
                                      prior=p_prior)
    log_image(_samples_la_b[-1], "x_sample")

    # hard loss evaluation and log probs
    f_b, log_q_bs = neg_elbo(x,
                             samples_b,
                             inf_la_b,
                             gen_la_b,
                             p_prior,
                             log=True)
    batch_f_b = tf.expand_dims(f_b, 1)
    total_loss = tf.reduce_mean(f_b)
    tf.summary.scalar("fb", total_loss)
    # optimizer for model parameters
    model_opt = tf.train.AdamOptimizer(lr, beta2=.99999)
    # optimizer for variance reducing parameters
    variance_opt = tf.train.AdamOptimizer(var_lr_scale * lr, beta2=.99999)
    # get encoder and decoder variables
    encoder_params = get_variables(encoder_name)
    decoder_params = get_variables(decoder_name)
    if learn_prior:
        decoder_params.append(p_prior)
    # compute and store gradients of hard loss with respect to encoder_parameters
    encoder_loss_grads = {}
    for g, v in model_opt.compute_gradients(total_loss,
                                            var_list=encoder_params):
        encoder_loss_grads[v.name] = g
    # get gradients for decoder parameters
    decoder_gradvars = model_opt.compute_gradients(total_loss,
                                                   var_list=decoder_params)
    # will hold all gradvars for the model (non-variance adjusting variables)
    model_gradvars = [gv for gv in decoder_gradvars]

    # conditional samples
    v = [v_from_u(_u, log_alpha) for _u, log_alpha in zip(u, inf_la_b)]
    # need to create soft samplers
    sig_z_sampler = SIGZSampler(u, batch_temperatures, "sig_z_sampler")
    sig_zt_sampler = SIGZSampler(v, batch_temperatures, "sig_zt_sampler")

    z_sampler = ZSampler(u, "z_sampler")
    zt_sampler = ZSampler(v, "zt_sampler")

    rebars = []
    reinforces = []
    variance_objectives = []
    # have to produce 2 forward passes for each layer for z and zt samples
    for l in range(num_layers):
        cur_la_b = inf_la_b[l]

        # if standard rebar or additive relaxation
        if relaxation == "rebar" or relaxation == "add":
            # compute soft samples and soft passes through model and soft elbos
            cur_z_sample = sig_z_sampler.sample(cur_la_b, l)
            prev_samples_z = samples_b[:l] + [cur_z_sample]

            cur_zt_sample = sig_zt_sampler.sample(cur_la_b, l)
            prev_samples_zt = samples_b[:l] + [cur_zt_sample]

            prev_log_alphas = inf_la_b[:l] + [cur_la_b]

            # soft forward passes
            inf_la_z, samples_z = inference_network(x,
                                                    train_mean,
                                                    layer_type,
                                                    num_layers,
                                                    num_latents,
                                                    encoder_name,
                                                    True,
                                                    sig_z_sampler,
                                                    samples=prev_samples_z,
                                                    log_alphas=prev_log_alphas)
            gen_la_z = generator_network(samples_z, train_output_bias,
                                         layer_type, num_layers, num_latents,
                                         decoder_name, True)
            inf_la_zt, samples_zt = inference_network(
                x,
                train_mean,
                layer_type,
                num_layers,
                num_latents,
                encoder_name,
                True,
                sig_zt_sampler,
                samples=prev_samples_zt,
                log_alphas=prev_log_alphas)
            gen_la_zt = generator_network(samples_zt, train_output_bias,
                                          layer_type, num_layers, num_latents,
                                          decoder_name, True)
            # soft loss evaluataions
            f_z, _ = neg_elbo(x, samples_z, inf_la_z, gen_la_z, p_prior)
            f_zt, _ = neg_elbo(x, samples_zt, inf_la_zt, gen_la_zt, p_prior)

        if relaxation == "add" or relaxation == "all":
            # sample z and zt
            prev_bs = samples_b[:l]
            cur_z_sample = z_sampler.sample(cur_la_b, l)
            cur_zt_sample = zt_sampler.sample(cur_la_b, l)

            q_z = Q_func(x,
                         train_mean,
                         cur_z_sample,
                         prev_bs,
                         Q_name(l),
                         False,
                         depth=Q_depth)
            q_zt = Q_func(x,
                          train_mean,
                          cur_zt_sample,
                          prev_bs,
                          Q_name(l),
                          True,
                          depth=Q_depth)
            tf.summary.scalar("q_z_{}".format(l), tf.reduce_mean(q_z))
            tf.summary.scalar("q_zt_{}".format(l), tf.reduce_mean(q_zt))
            if relaxation == "add":
                f_z = f_z + q_z
                f_zt = f_zt + q_zt
            elif relaxation == "all":
                f_z = q_z
                f_zt = q_zt
            else:
                assert False
        tf.summary.scalar("f_z_{}".format(l), tf.reduce_mean(f_z))
        tf.summary.scalar("f_zt_{}".format(l), tf.reduce_mean(f_zt))
        cur_samples_b = samples_b[l]
        # get gradient of sample log-likelihood wrt current parameter
        d_log_q_d_la = bernoulli_loglikelihood_derivitive(
            cur_samples_b, cur_la_b)
        # get gradient of soft-losses wrt current parameter
        d_f_z_d_la = tf.gradients(f_z, cur_la_b)[0]
        d_f_zt_d_la = tf.gradients(f_zt, cur_la_b)[0]
        batch_f_zt = tf.expand_dims(f_zt, 1)
        eta = batch_etas[l]
        # compute rebar and reinforce
        tf.summary.histogram("der_diff_{}".format(l), d_f_z_d_la - d_f_zt_d_la)
        tf.summary.histogram("d_log_q_d_la_{}".format(l), d_log_q_d_la)
        rebar = ((batch_f_b - eta * batch_f_zt) * d_log_q_d_la + eta *
                 (d_f_z_d_la - d_f_zt_d_la)) / batch_size
        reinforce = batch_f_b * d_log_q_d_la / batch_size
        rebars.append(rebar)
        reinforces.append(reinforce)
        tf.summary.histogram("rebar_{}".format(l), rebar)
        tf.summary.histogram("reinforce_{}".format(l), reinforce)
        # backpropogate rebar to individual layer parameters
        layer_params = get_variables(layer_name(l), arr=encoder_params)
        layer_rebar_grads = tf.gradients(cur_la_b, layer_params, grad_ys=rebar)
        # get direct loss grads for each parameter
        layer_loss_grads = [encoder_loss_grads[v.name] for v in layer_params]
        # each param's gradient should be rebar + the direct loss gradient
        layer_grads = [
            rg + lg for rg, lg in zip(layer_rebar_grads, layer_loss_grads)
        ]
        for rg, lg, v in zip(layer_rebar_grads, layer_loss_grads,
                             layer_params):
            tf.summary.histogram(v.name + "_grad_rebar", rg)
            tf.summary.histogram(v.name + "_grad_loss", lg)
        layer_gradvars = list(zip(layer_grads, layer_params))
        model_gradvars.extend(layer_gradvars)
        variance_objective = tf.reduce_mean(tf.square(rebar))
        variance_objectives.append(variance_objective)

    variance_objective = tf.add_n(variance_objectives)
    variance_vars = log_temperatures + etas
    if relaxation != "rebar":
        q_vars = get_variables("Q_")
        wd = tf.add_n([Q_wd * tf.nn.l2_loss(v) for v in q_vars])
        tf.summary.scalar("Q_weight_decay", wd)
        variance_vars = variance_vars + q_vars
    else:
        wd = 0.0
    variance_gradvars = variance_opt.compute_gradients(variance_objective + wd,
                                                       var_list=variance_vars)
    variance_train_op = variance_opt.apply_gradients(variance_gradvars)
    model_train_op = model_opt.apply_gradients(model_gradvars)
    with tf.control_dependencies([model_train_op, variance_train_op]):
        train_op = tf.no_op()

    for g, v in model_gradvars + variance_gradvars:
        print(g, v.name)
        if g is not None:
            tf.summary.histogram(v.name, v)
            tf.summary.histogram(v.name + "_grad", g)

    val_loss = tf.Variable(1000,
                           trainable=False,
                           name="val_loss",
                           dtype=tf.float32)
    train_loss = tf.Variable(1000,
                             trainable=False,
                             name="train_loss",
                             dtype=tf.float32)
    tf.summary.scalar("val_loss", val_loss)
    tf.summary.scalar("train_loss", train_loss)
    summ_op = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(train_dir)
    sess.run(tf.global_variables_initializer())

    # create savers
    train_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    val_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    iwae_elbo = -(tf.reduce_logsumexp(-f_b) - np.log(valid_batch_size))

    if checkpoint_path is None:
        iters_per_epoch = X_tr.shape[0] // batch_size
        print("Train set has {} examples".format(X_tr.shape[0]))
        if relaxation != "rebar":
            print("Pretraining Q network")
            for i in range(1000):
                if i % 100 == 0:
                    print(i)
                idx = np.random.randint(0, iters_per_epoch - 1)
                batch_xs = X_tr[idx * batch_size:(idx + 1) * batch_size]
                sess.run(variance_train_op, feed_dict={x: batch_xs})
        t = time.time()
        best_val_loss = np.inf
        for epoch in range(10000000):
            train_losses = []
            for i in range(iters_per_epoch):
                cur_iter = epoch * iters_per_epoch + i
                if cur_iter > max_iters:
                    print("Training Completed")
                    return
                batch_xs = X_tr[i * batch_size:(i + 1) * batch_size]
                if i % 1000 == 0:
                    loss, _, = sess.run([total_loss, train_op],
                                        feed_dict={x: batch_xs})
                    #summary_writer.add_summary(sum_str, cur_iter)
                    time_taken = time.time() - t
                    t = time.time()
                    #print(cur_iter, loss, "{} / batch".format(time_taken / 1000))
                    if test_bias:
                        rebs = []
                        refs = []
                        for _i in range(100000):
                            if _i % 1000 == 0:
                                print(_i)
                            rb, re = sess.run([rebars[3], reinforces[3]],
                                              feed_dict={x: batch_xs})
                            rebs.append(rb[:5])
                            refs.append(re[:5])
                        rebs = np.array(rebs)
                        refs = np.array(refs)
                        re_var = np.log(refs.var(axis=0))
                        rb_var = np.log(rebs.var(axis=0))
                        print("rebar variance     = {}".format(rb_var))
                        print("reinforce variance = {}".format(re_var))
                        print("rebar     = {}".format(rebs.mean(axis=0)))
                        print("reinforce = {}\n".format(refs.mean(axis=0)))
                else:
                    loss, _ = sess.run([total_loss, train_op],
                                       feed_dict={x: batch_xs})

                train_losses.append(loss)

            # epoch over, run test data
            iwaes = []
            for x_va in X_va:
                x_va_batch = np.array([x_va for i in range(valid_batch_size)])
                iwae = sess.run(iwae_elbo, feed_dict={x: x_va_batch})
                iwaes.append(iwae)
            trl = np.mean(train_losses)
            val = np.mean(iwaes)
            print("({}) Epoch = {}, Val loss = {}, Train loss = {}".format(
                train_dir, epoch, val, trl))
            logf.write("{}: {} {}\n".format(epoch, val, trl))
            sess.run([val_loss.assign(val), train_loss.assign(trl)])
            if val < best_val_loss:
                print("saving best model")
                best_val_loss = val
                val_saver.save(sess,
                               '{}/best-model'.format(train_dir),
                               global_step=epoch)
            np.random.shuffle(X_tr)
            if epoch % 10 == 0:
                train_saver.save(sess,
                                 '{}/model'.format(train_dir),
                                 global_step=epoch)

    # run iwae elbo on test set
    else:
        val_saver.restore(sess, checkpoint_path)
        iwae_elbo = -(tf.reduce_logsumexp(-f_b) - np.log(valid_batch_size))
        iwaes = []
        elbos = []
        for x_te in X_te:
            x_te_batch = np.array([x_te for i in range(100)])
            iwae, elbo = sess.run([iwae_elbo, f_b], feed_dict={x: x_te_batch})
            iwaes.append(iwae)
            elbos.append(elbo)
        print("MEAN IWAE: {}".format(np.mean(iwaes)))
        print("MEAN ELBO: {}".format(np.mean(elbos)))
Пример #17
0
from torch.utils.data import DataLoader

import datasets
from simpledataset import SimpleDataset


DS_PATH='~/.pythondata/mnist'
BATCH_SIZE = 64
IN_SIZE = 28*28
HIDDEN_SIZE = 50
OUT_SIZE = 10
LR=0.001
NEPOCHS = 10

### Prepare Data ###
X_train, y_train, X_test, y_test = datasets.load_mnist(DS_PATH)
X_train, X_test = X_train.reshape(len(X_train), -1), X_test.reshape(len(X_test), -1)
X_train, X_test = X_train / 255, X_test / 255
y_train, y_test = y_train.astype(np.long), y_test.astype(np.long)
train_dl = DataLoader(dataset=SimpleDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
test_dl = DataLoader(dataset=SimpleDataset(X_test, y_test), batch_size=BATCH_SIZE, shuffle=False)


### Prepare Network ###
class Net(torch.nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.l1 = torch.nn.Linear(IN_SIZE , HIDDEN_SIZE)
        self.l2 = torch.nn.Linear(HIDDEN_SIZE, OUT_SIZE)
Пример #18
0
def run_training():

    training_start_time = time.time()
    timeout_flag = False

    #############
    # Load data #
    #############
    if ARGS.data_set in ['mnist', 'fashion_mnist']:
        train_x, train_labels, valid_x, valid_labels, test_x, test_labels = datasets.load_mnist(
            ARGS.data_path)
    elif ARGS.data_set in DEBD:
        train_x, test_x, valid_x = datasets.load_debd(ARGS.data_path,
                                                      ARGS.data_set)
        train_labels = np.zeros(train_x.shape[0], dtype=np.int32)
        test_labels = np.zeros(test_x.shape[0], dtype=np.int32)
        valid_labels = np.zeros(valid_x.shape[0], dtype=np.int32)
    else:
        if ARGS.data_set == '20ng_classify':
            unpickled = pickle.load(
                open(ARGS.data_path + '/20ng-50-lda.pkl', "rb"))
        elif ARGS.data_set == 'higgs':
            unpickled = pickle.load(open(ARGS.data_path + '/higgs.pkl', "rb"))
        elif ARGS.data_set == 'wine':
            unpickled = pickle.load(open(ARGS.data_path + '/wine.pkl', "rb"))
        elif ARGS.data_set == 'wine_multiclass':
            unpickled = pickle.load(
                open(ARGS.data_path + '/wine_multiclass.pkl', "rb"))
        elif ARGS.data_set == 'theorem':
            unpickled = pickle.load(open(ARGS.data_path + '/theorem.pkl',
                                         "rb"))
        elif ARGS.data_set == 'imdb':
            unpickled = pickle.load(
                open(ARGS.data_path + '/imdb-dense-nmf-200.pkl', "rb"))
        train_x = unpickled[0]
        train_labels = unpickled[1]
        valid_x = unpickled[2]
        valid_labels = unpickled[3]
        test_x = unpickled[4]
        test_labels = unpickled[5]

    ######################
    # Data preprocessing #
    ######################
    if not ARGS.discrete_leaves:
        if ARGS.low_variance_threshold >= 0.0:
            v = np.var(train_x, 0)
            mu = np.mean(v)
            idx = v > ARGS.low_variance_threshold * mu
            train_x = train_x[:, idx]
            test_x = test_x[:, idx]
            if valid_x is not None:
                valid_x = valid_x[:, idx]

        # zero-mean, unit-variance
        if ARGS.normalization == "zmuv":
            train_x_mean = np.mean(train_x, 0)
            train_x_std = np.std(train_x, 0)

            train_x = (train_x - train_x_mean) / (train_x_std +
                                                  ARGS.zmuv_min_sigma)
            test_x = (test_x - train_x_mean) / (train_x_std +
                                                ARGS.zmuv_min_sigma)
            if valid_x is not None:
                valid_x = (valid_x - train_x_mean) / (train_x_std +
                                                      ARGS.zmuv_min_sigma)

    num_classes = len(np.unique(train_labels))
    train_n = int(train_x.shape[0])
    num_dims = int(train_x.shape[1])

    # stores evaluation metrics
    results = {
        'train_ACC': [],
        'train_CE': [],
        'train_LL': [],
        'train_MARG': [],
        'test_ACC': [],
        'test_CE': [],
        'test_LL': [],
        'test_MARG': [],
        'valid_ACC': [],
        'valid_CE': [],
        'valid_LL': [],
        'valid_MARG': [],
        'elapsed_wall_time_epoch': [],
        'best_valid_acc': None,
        'epoch_best_valid_acc': None,
        'best_valid_loss': None,
        'epoch_best_valid_loss': None
    }

    # try to restore model
    latest_model = tf.train.latest_checkpoint(ARGS.result_path +
                                              "/checkpoints/")
    if latest_model is not None:
        recovered_epoch = int(latest_model[latest_model.rfind('-') + 1:])

        if not os.path.isfile(ARGS.result_path + '/spn_description.pkl'):
            raise RuntimeError('Found checkpoint, but no description file.')
        if not os.path.isfile(ARGS.result_path + '/results.pkl'):
            raise RuntimeError('Found checkpoint, but no description file.')

        ndo, nco, ARGS_orig, region_graph_layers = pickle.load(
            open(ARGS.result_path + '/spn_description.pkl', 'rb'))
        if ndo != num_dims or nco != num_classes:
            raise RuntimeError(
                'Inconsistent number of dimensions/classes when trying to retrieve model.'
            )

        results = pickle.load(open(ARGS.result_path + '/results.pkl', "rb"))
        for k in results:
            if type(results[k]) == list and len(
                    results[k]) != recovered_epoch + 1:
                raise AssertionError("Results seem corrupted.")

        # Make Tensorflow model
        rat_spn = RatSpn(region_graph_layers, num_classes, ARGS=ARGS)
        start_epoch_number = recovered_epoch + 1
    else:
        if ARGS.model_description_file:
            ndo, nco, ARGS_orig, region_graph_layers = pickle.load(
                open(ARGS.model_description_file, 'rb'))
            if ndo != num_dims or nco != num_classes:
                raise RuntimeError(
                    'Inconsistent number of dimensions/classes when trying to retrieve model.'
                )

            # Make Tensorflow model
            rat_spn = RatSpn(region_graph_layers, num_classes, ARGS=ARGS)
        else:
            # Make Region Graph
            region_graph = RegionGraph(range(0, num_dims),
                                       np.random.randint(0, 1000000000))
            for _ in range(0, ARGS.num_recursive_splits):
                region_graph.random_split(2, ARGS.split_depth)
            region_graph_layers = region_graph.make_layers()

            # Make Tensorflow model
            rat_spn = RatSpn(region_graph_layers, num_classes, ARGS=ARGS)

        if not ARGS.no_save:
            pickle.dump((num_dims, num_classes, ARGS, region_graph_layers),
                        open(ARGS.result_path + '/spn_description.pkl', "wb"))

        start_epoch_number = 0

    # session
    if ARGS.GPU_fraction <= 0.95:
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=ARGS.GPU_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    else:
        sess = tf.Session()

    # saver
    saver = tf.train.Saver(max_to_keep=ARGS.store_model_max)
    if ARGS.store_best_valid_acc:
        best_valid_acc_saver = tf.train.Saver(max_to_keep=1)
    if ARGS.store_best_valid_loss:
        best_valid_loss_saver = tf.train.Saver(max_to_keep=1)

    # init/load model
    if latest_model is not None:
        saver.restore(sess, latest_model)
        print("")
        print("restored model after epoch {}".format(recovered_epoch))
        print("")
    else:
        init = tf.global_variables_initializer()
        sess.run(init)
        if ARGS.model_init_file:
            init_saver = tf.train.Saver(rat_spn.all_params)
            init_saver.restore(sess, ARGS.model_init_file)
            print("")
            print("used {} to init model".format(ARGS.model_init_file))
            print("")

    print(rat_spn)
    print("num params: {}".format(get_num_params()))
    print("start training")

    # train_writer = tf.summary.FileWriter("/scratch/rp587/tensorflow_work/", sess.graph)

    ############
    # Training #
    ############

    epoch_elapsed_times = []
    batches_per_epoch = int(np.ceil(float(train_n) / float(ARGS.batch_size)))

    for epoch_n in range(start_epoch_number, ARGS.num_epochs):

        epoch_start_time = time.time()
        rp = np.random.permutation(train_n)

        batch_start_idx = 0
        elapsed_wall_time_epoch = 0.0
        for batch_n in range(0, batches_per_epoch):
            if batch_n + 1 < batches_per_epoch:
                cur_idx = rp[batch_start_idx:batch_start_idx + ARGS.batch_size]
            else:
                cur_idx = rp[batch_start_idx:]
            batch_start_idx += ARGS.batch_size

            feed_dict = {
                rat_spn.inputs: train_x[cur_idx, :],
                rat_spn.labels: train_labels[cur_idx]
            }

            if ARGS.dropout_rate_input is not None:
                feed_dict[rat_spn.
                          dropout_input_placeholder] = ARGS.dropout_rate_input
            if ARGS.dropout_rate_sums is not None:
                feed_dict[
                    rat_spn.dropout_sums_placeholder] = ARGS.dropout_rate_sums

            start_time = time.time()
            if ARGS.optimizer == "em":
                one_hot_labels = -np.inf * np.ones((len(cur_idx), num_classes))
                one_hot_labels[range(len(cur_idx)),
                               [int(x) for x in train_labels[cur_idx]]] = 0.0
                feed_dict[rat_spn.EM_deriv_input_pl] = one_hot_labels

                start_time = time.time()
                sess.run(rat_spn.em_update_accums, feed_dict=feed_dict)
                elapsed_wall_time_epoch += (time.time() - start_time)
            else:
                _, CEM_value, cur_lr, loss_val, ll_mean_val, margin_val = \
                    sess.run([
                        rat_spn.train_op,
                        rat_spn.cross_entropy_mean,
                        rat_spn.learning_rate,
                        rat_spn.objective,
                        rat_spn.neg_norm_ll,
                        rat_spn.neg_margin_objective], feed_dict=feed_dict)
                elapsed_wall_time_epoch += (time.time() - start_time)

                if batch_n % 10 == 1:
                    print(
                        "epoch: {}[{}, {:.5f}]   CE: {:.5f}   nll: {:.5f}   negmargin: {:.5f}   loss: {:.5f}   time: {:.5f}"
                        .format(epoch_n, batch_n, cur_lr, CEM_value,
                                ll_mean_val, margin_val, loss_val,
                                elapsed_wall_time_epoch))

        if ARGS.optimizer == "em":
            sess.run(rat_spn.em_update_params)
            sess.run(rat_spn.em_reset_accums)
        else:
            sess.run(rat_spn.decrease_lr_op)

        ################
        ### Evaluate ###
        ################
        print('')
        print('epoch {}'.format(epoch_n))

        num_correct_train, CE_total, train_LL, train_MARG, train_loss = compute_performance(
            sess, train_x, train_labels, 100, rat_spn)
        train_ACC = 100. * float(num_correct_train) / float(train_x.shape[0])
        train_CE = CE_total / float(train_x.shape[0])
        print('   ###')
        print(
            '   ### accuracy on train set = {}   CE = {}   LL: {}   negmargin: {}'
            .format(train_ACC, train_CE, train_LL, train_MARG))

        if test_x is not None:
            num_correct_test, CE_total, test_LL, test_MARG, test_loss = compute_performance(
                sess, test_x, test_labels, 100, rat_spn)
            test_ACC = 100. * float(num_correct_test) / float(test_x.shape[0])
            test_CE = CE_total / float(test_x.shape[0])
            print('   ###')
            print(
                '   ### accuracy on test set = {}   CE = {}   LL: {}   negmargin: {}'
                .format(test_ACC, test_CE, test_LL, test_MARG))
        else:
            test_ACC = None
            test_CE = None
            test_LL = None

        if valid_x is not None:
            num_correct_valid, CE_total, valid_LL, valid_MARG, valid_loss = compute_performance(
                sess, valid_x, valid_labels, 100, rat_spn)
            valid_ACC = 100. * float(num_correct_valid) / float(
                valid_x.shape[0])
            valid_CE = CE_total / float(valid_x.shape[0])
            print('   ###')
            print(
                '   ### accuracy on valid set = {}   CE = {}   LL: {}   margin: {}'
                .format(valid_ACC, valid_CE, valid_LL, valid_MARG))
        else:
            valid_ACC = None
            valid_CE = None
            valid_LL = None

        print('   ###')
        print('')

        ##############
        ### timing ###
        ##############
        epoch_elapsed_times.append(time.time() - epoch_start_time)
        estimated_next_epoch_time = np.mean(
            epoch_elapsed_times) + 3 * np.std(epoch_elapsed_times)
        remaining_time = ARGS.timeout_seconds - (time.time() -
                                                 training_start_time)
        if estimated_next_epoch_time + ARGS.timeout_safety_seconds > remaining_time:
            print("Next epoch might exceed time limit, stop.")
            timeout_flag = True

        if not ARGS.no_save:
            results['train_ACC'].append(train_ACC)
            results['train_CE'].append(train_CE)
            results['train_LL'].append(train_LL)
            results['train_MARG'].append(train_LL)
            results['test_ACC'].append(test_ACC)
            results['test_CE'].append(test_CE)
            results['test_LL'].append(test_LL)
            results['test_MARG'].append(train_LL)
            results['valid_ACC'].append(valid_ACC)
            results['valid_CE'].append(valid_CE)
            results['valid_LL'].append(valid_LL)
            results['valid_MARG'].append(train_LL)
            results['elapsed_wall_time_epoch'].append(elapsed_wall_time_epoch)

            if ARGS.store_best_valid_acc and valid_x is not None:
                if results['best_valid_acc'] is None or valid_ACC > results[
                        'best_valid_acc']:
                    print('Better validation accuracy -> save model')
                    print('')

                    best_valid_acc_saver.save(sess,
                                              ARGS.result_path +
                                              "/best_valid_acc/model.ckpt",
                                              global_step=epoch_n,
                                              write_meta_graph=False)

                    results['best_valid_acc'] = valid_ACC
                    results['epoch_best_valid_acc'] = epoch_n

            if ARGS.store_best_valid_loss and valid_x is not None:
                if results['best_valid_loss'] is None or valid_loss < results[
                        'best_valid_loss']:
                    print('Better validation loss -> save model')
                    print('')

                    best_valid_loss_saver.save(sess,
                                               ARGS.result_path +
                                               "/best_valid_loss/model.ckpt",
                                               global_step=epoch_n,
                                               write_meta_graph=False)

                    results['best_valid_loss'] = valid_loss
                    results['epoch_best_valid_loss'] = epoch_n

            if epoch_n % ARGS.store_model_every_epochs == 0 \
                    or epoch_n + 1 == ARGS.num_epochs \
                    or timeout_flag:
                pickle.dump(results,
                            open(ARGS.result_path + '/results.pkl', "wb"))
                saver.save(sess,
                           ARGS.result_path + "/checkpoints/model.ckpt",
                           global_step=epoch_n,
                           write_meta_graph=False)

        if timeout_flag:
            sys.exit(7)
Пример #19
0
def run_experiment(settings):
    ############################################################################
    fashion_mnist = settings.fashion_mnist
    svhn = settings.svhn

    exponential_family = settings.exponential_family

    classes = settings.classes

    K = settings.K

    structure = settings.structure

    # 'poon-domingos'
    pd_num_pieces = settings.pd_num_pieces

    # 'binary-trees'
    depth = settings.depth
    num_repetitions_mixture = settings.num_repetitions_mixture

    width = settings.width
    height = settings.height

    num_epochs = settings.num_epochs
    batch_size = settings.batch_size
    SGD_learning_rate = settings.SGD_learning_rate

    ############################################################################

    exponential_family_args = None
    if exponential_family == EinsumNetwork.BinomialArray:
        exponential_family_args = {'N': 255}
    if exponential_family == EinsumNetwork.CategoricalArray:
        exponential_family_args = {'K': 256}
    if exponential_family == EinsumNetwork.NormalArray:
        exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1}

    # get data
    if fashion_mnist:
        train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist(
        )
    elif svhn:
        train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn(
        )
    else:
        train_x, train_labels, test_x, test_labels = datasets.load_mnist()

    if not exponential_family != EinsumNetwork.NormalArray:
        train_x /= 255.
        test_x /= 255.
        train_x -= .5
        test_x -= .5

    # validation split
    valid_x = train_x[-10000:, :]
    online_x = train_x[-11000:-10000, :]
    train_x = train_x[-56000:-11000, :]
    # init_x = train_x[-13000:-10000, :]

    valid_labels = train_labels[-10000:]
    online_labels = train_labels[-11000:-10000]
    train_labels = train_labels[-56000:-11000]
    # init_labels = train_labels[-13000:-10000]

    # full set of training
    # valid_x = train_x[-10000:, :]
    # online_x = train_x[-11000:-10000, :]
    # train_x = train_x[:-10000, :]

    # valid_labels = train_labels[-10000:]
    # online_labels = train_labels[-11000:-10000]
    # train_labels = train_labels[:-10000]

    # print('train_x:')
    # print(train_x.shape)
    # print(train_labels.shape)
    # print('online_x:')
    # print(online_x.shape)
    # print(online_labels.shape)
    # exit()

    # pick the selected classes
    if classes is not None:
        train_x = train_x[
            np.any(np.stack([train_labels == c for c in classes], 1), 1), :]
        online_x = online_x[
            np.any(np.stack([online_labels == c for c in classes], 1), 1), :]
        # init_x = init_x[np.any(np.stack([init_labels == c for c in classes], 1), 1), :]
        valid_x = valid_x[
            np.any(np.stack([valid_labels == c for c in classes], 1), 1), :]
        test_x = test_x[
            np.any(np.stack([test_labels == c for c in classes], 1), 1), :]

        train_labels = [l for l in train_labels if l in classes]
        train_labels_backup = train_labels
        online_labels = [l for l in online_labels if l in classes]
        # init_labels = [l for l in init_labels if l in classes]
        valid_labels = [l for l in valid_labels if l in classes]
        test_labels = [l for l in test_labels if l in classes]
    else:
        classes = np.unique(train_labels).tolist()

        train_labels = [l for l in train_labels if l in classes]
        train_labels_backup = train_labels
        online_labels = [l for l in online_labels if l in classes]
        # init_labels = [l for l in init_labels if l in classes]
        valid_labels = [l for l in valid_labels if l in classes]
        test_labels = [l for l in test_labels if l in classes]

    train_x = torch.from_numpy(train_x).to(torch.device(device))
    train_x_backup = train_x
    online_x = torch.from_numpy(online_x).to(torch.device(device))
    # init_x = torch.from_numpy(init_x).to(torch.device(device))
    valid_x = torch.from_numpy(valid_x).to(torch.device(device))
    test_x = torch.from_numpy(test_x).to(torch.device(device))

    ######################################
    # Make EinsumNetworks for each class #
    ######################################
    einets = []
    ps = []
    for c in classes:
        if structure == 'poon-domingos':
            pd_delta = [[height / d, width / d] for d in pd_num_pieces]
            graph = Graph.poon_domingos_structure(shape=(height, width),
                                                  delta=pd_delta)
        elif structure == 'binary-trees':
            graph = Graph.random_binary_trees(
                num_var=train_x.shape[1],
                depth=depth,
                num_repetitions=num_repetitions_mixture)
        else:
            raise AssertionError("Unknown Structure")

        args = EinsumNetwork.Args(
            num_var=train_x.shape[1],
            num_dims=3 if svhn else 1,
            num_classes=1,
            num_sums=K,
            num_input_distributions=K,
            exponential_family=exponential_family,
            exponential_family_args=exponential_family_args,
            use_em=False)

        einet = EinsumNetwork.EinsumNetwork(graph, args)

        # init_dict = get_init_dict(einet, init_x, train_labels=init_labels, einet_class=c)
        init_dict = get_init_dict(einet,
                                  train_x,
                                  train_labels=train_labels,
                                  einet_class=c)
        einet.initialize(init_dict)
        einet.to(device)
        einets.append(einet)

        # Calculate amount of training samples per class
        ps.append(train_labels.count(c))

        print(f'Einsum network for class {c}:')
        print(einet)

    # normalize ps, construct mixture component
    ps = [p / sum(ps) for p in ps]
    ps = torch.tensor(ps).to(torch.device(device))
    mixture = EinetMixture(ps, einets, classes=classes)

    num_params = mixture.eval_size()

    # data_dir = '../src/experiments/round5/data/weights_analysis/'
    # utils.mkdir_p(data_dir)
    # for (einet, c) in zip(einets, classes):
    #     data_file = os.path.join(data_dir, f"weights_before_{c}.json")
    #     weights = einet.einet_layers[-1].params.data.cpu()
    #     np.savetxt(data_file, einet.einet_layers[-1].reparam(weights)[0])

    ##################
    # Training phase #
    ##################

    sub_net_parameters = None
    for einet in mixture.einets:
        if sub_net_parameters is None:
            sub_net_parameters = list(einet.parameters())
        else:
            sub_net_parameters += list(einet.parameters())
    sub_net_parameters += list(mixture.parameters())

    optimizer = torch.optim.SGD(sub_net_parameters, lr=SGD_learning_rate)

    start_time = time.time()
    """ Learning each sub Network Generatively """
    for (einet, c) in zip(einets, classes):
        train_x_c = train_x[[l == c for l in train_labels]]

        train_N = train_x_c.shape[0]

        for epoch_count in range(num_epochs):
            idx_batches = torch.randperm(train_N,
                                         device=device).split(batch_size)

            total_loss = 0.0
            for idx in idx_batches:
                batch_x = train_x_c[idx, :]
                optimizer.zero_grad()
                outputs = einet.forward(batch_x)
                ll_sample = EinsumNetwork.log_likelihoods(outputs)
                log_likelihood = ll_sample.sum()
                nll = log_likelihood * -1
                nll.backward()
                optimizer.step()
                total_loss += nll.detach().item()

            print(f'[{epoch_count}]   total log-likelihood: {total_loss}')

    # data_dir = '../src/experiments/round5/data/weights_analysis/'
    # utils.mkdir_p(data_dir)
    # for (einet, c) in zip(einets, classes):
    #     data_file = os.path.join(data_dir, f"weights_after_{c}.json")
    #     weights = einet.einet_layers[-1].params.data.cpu()
    #     np.savetxt(data_file, einet.einet_layers[-1].reparam(weights)[0])
    # exit()

    ##################################
    # Evalueate after initialization #
    ##################################

    train_lls = []
    valid_lls = []
    test_lls = []
    train_accs = []
    valid_accs = []
    test_accs = []
    train_lls_ref = []
    valid_lls_ref = []
    test_lls_ref = []
    train_accs_ref = []
    valid_accs_ref = []
    test_accs_ref = []
    added_samples = [0]

    def eval_network(do_print=False, no_OA=False):
        if no_OA:
            train_N = train_x_backup.shape[0]
        else:
            train_N = train_x.shape[0]
        valid_N = valid_x.shape[0]
        test_N = test_x.shape[0]
        mixture.eval()
        if no_OA:
            train_ll_before = mixture.eval_loglikelihood_batched(
                train_x_backup, batch_size=batch_size)
        else:
            train_ll_before = mixture.eval_loglikelihood_batched(
                train_x, batch_size=batch_size)
        valid_ll_before = mixture.eval_loglikelihood_batched(
            valid_x, batch_size=batch_size)
        test_ll_before = mixture.eval_loglikelihood_batched(
            test_x, batch_size=batch_size)
        if do_print:
            print()
            print(
                "Experiment 3: Log-likelihoods  --- train LL {}   valid LL {}   test LL {}"
                .format(train_ll_before / train_N, valid_ll_before / valid_N,
                        test_ll_before / test_N))
        if no_OA:
            train_lls_ref.append(train_ll_before / train_N)
            valid_lls_ref.append(valid_ll_before / valid_N)
            test_lls_ref.append(test_ll_before / test_N)
        else:
            train_lls.append(train_ll_before / train_N)
            valid_lls.append(valid_ll_before / valid_N)
            test_lls.append(test_ll_before / test_N)

        ################
        # Experiment 4 #
        ################
        if no_OA:
            train_labelsz = torch.tensor(train_labels_backup).to(
                torch.device(device))
        else:
            train_labelsz = torch.tensor(train_labels).to(torch.device(device))
        valid_labelsz = torch.tensor(valid_labels).to(torch.device(device))
        test_labelsz = torch.tensor(test_labels).to(torch.device(device))

        if no_OA:
            acc_train_before = mixture.eval_accuracy_batched(
                classes, train_x_backup, train_labelsz, batch_size=batch_size)
        else:
            acc_train_before = mixture.eval_accuracy_batched(
                classes, train_x, train_labelsz, batch_size=batch_size)
        acc_valid_before = mixture.eval_accuracy_batched(classes,
                                                         valid_x,
                                                         valid_labelsz,
                                                         batch_size=batch_size)
        acc_test_before = mixture.eval_accuracy_batched(classes,
                                                        test_x,
                                                        test_labelsz,
                                                        batch_size=batch_size)
        if do_print:
            print()
            print(
                "Experiment 4: Classification accuracies  --- train acc {}   valid acc {}   test acc {}"
                .format(acc_train_before, acc_valid_before, acc_test_before))
        if no_OA:
            train_accs_ref.append(acc_train_before)
            valid_accs_ref.append(acc_valid_before)
            test_accs_ref.append(acc_test_before)
        else:
            train_accs.append(acc_train_before)
            valid_accs.append(acc_valid_before)
            test_accs.append(acc_test_before)
        mixture.train()

    eval_network(do_print=True, no_OA=False)
    eval_network(do_print=False, no_OA=True)

    #####################################################
    # Evaluate the network with different training sets #
    #####################################################

    idx_batches = torch.randperm(online_x.shape[0], device=device).split(20)

    for idx in tqdm(idx_batches):
        online_x_idx = online_x[idx]
        online_labels_idx = [online_labels[i] for i in idx]

        for (einet, c) in zip(einets, classes):
            batch_x = online_x_idx[[l == c for l in online_labels_idx]]
            train_x_backup = torch.cat((train_x_backup, batch_x))
            train_labels_backup += [c for i in batch_x]

        added_samples.append(added_samples[-1] + len(idx))
        eval_network(do_print=False, no_OA=True)

    #####################
    # Online adaptation #
    #####################

    for idx in tqdm(idx_batches):
        online_x_idx = online_x[idx]
        online_labels_idx = [online_labels[i] for i in idx]

        for (einet, c) in zip(einets, classes):
            batch_x = online_x_idx[[l == c for l in online_labels_idx]]
            online_update(einet, batch_x)
            train_x = torch.cat((train_x, batch_x))
            train_labels += [c for i in batch_x]

        eval_network(do_print=False, no_OA=False)

    print()
    print(f'Network size: {num_params} parameters')

    return {
        'train_lls': train_lls,
        'valid_lls': valid_lls,
        'test_lls': test_lls,
        'train_accs': train_accs,
        'valid_accs': valid_accs,
        'test_accs': test_accs,
        'train_lls_ref': train_lls_ref,
        'valid_lls_ref': valid_lls_ref,
        'test_lls_ref': test_lls_ref,
        'train_accs_ref': train_accs_ref,
        'valid_accs_ref': valid_accs_ref,
        'test_accs_ref': test_accs_ref,
        'network_size': num_params,
        'online_samples': added_samples,
    }
Пример #20
0
def run_testing():

    #############
    # Load data #
    #############
    if ARGS.data_set in ['mnist', 'fashion_mnist']:
        train_x, train_labels, valid_x, valid_labels, test_x, test_labels = datasets.load_mnist(
            ARGS.data_path)
    elif ARGS.data_set in DEBD:
        train_x, test_x, valid_x = datasets.load_debd(ARGS.data_path,
                                                      ARGS.data_set)
        train_labels = np.zeros(train_x.shape[0], dtype=np.int32)
        test_labels = np.zeros(test_x.shape[0], dtype=np.int32)
        valid_labels = np.zeros(valid_x.shape[0], dtype=np.int32)
    else:
        if ARGS.data_set == '20ng_classify':
            unpickled = pickle.load(
                open(ARGS.data_path + '/20ng-50-lda.pkl', "rb"))
        elif ARGS.data_set == 'higgs':
            unpickled = pickle.load(open(ARGS.data_path + '/higgs.pkl', "rb"))
        elif ARGS.data_set == 'wine':
            unpickled = pickle.load(open(ARGS.data_path + '/wine.pkl', "rb"))
        elif ARGS.data_set == 'wine_multiclass':
            unpickled = pickle.load(
                open(ARGS.data_path + '/wine_multiclass.pkl', "rb"))
        elif ARGS.data_set == 'theorem':
            unpickled = pickle.load(open(ARGS.data_path + '/theorem.pkl',
                                         "rb"))
        elif ARGS.data_set == 'imdb':
            unpickled = pickle.load(
                open(ARGS.data_path + '/imdb-dense-nmf-200.pkl', "rb"))
        train_x = unpickled[0]
        train_labels = unpickled[1]
        valid_x = unpickled[2]
        valid_labels = unpickled[3]
        test_x = unpickled[4]
        test_labels = unpickled[5]

    ######################
    # Data preprocessing #
    ######################
    if ARGS.low_variance_threshold >= 0.0:
        v = np.var(train_x, 0)
        mu = np.mean(v)
        idx = v > ARGS.low_variance_threshold * mu
        train_x = train_x[:, idx]
        test_x = test_x[:, idx]
        if valid_x is not None:
            valid_x = valid_x[:, idx]

    # zero-mean, unit-variance
    if ARGS.normalization == "zmuv":
        train_x_mean = np.mean(train_x, 0)
        train_x_std = np.std(train_x, 0)

        train_x = (train_x - train_x_mean) / (train_x_std +
                                              ARGS.zmuv_min_sigma)
        test_x = (test_x - train_x_mean) / (train_x_std + ARGS.zmuv_min_sigma)
        if valid_x is not None:
            valid_x = (valid_x - train_x_mean) / (train_x_std +
                                                  ARGS.zmuv_min_sigma)

    num_classes = len(np.unique(train_labels))
    num_dims = int(train_x.shape[1])

    ndo, nco, ARGS_orig, region_graph_layers = pickle.load(
        open(ARGS.model_description_file, 'rb'))
    if ndo != num_dims or nco != num_classes:
        raise RuntimeError(
            'Inconsistent number of dimensions/classes when trying to retrieve model.'
        )

    # Make Tensorflow model
    rat_spn = RatSpn(region_graph_layers, num_classes, ARGS=ARGS_orig)

    # session
    if ARGS.GPU_fraction <= 0.95:
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=ARGS.GPU_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    else:
        sess = tf.Session()

    # init/load model
    print("Loading model")
    init = tf.global_variables_initializer()
    sess.run(init)
    init_saver = tf.train.Saver(rat_spn.all_params)
    init_saver.restore(sess, ARGS.model_init_file)

    ###########
    # Testing #
    ###########
    print("Run testing")

    num_correct_train, CE_total, train_LL, train_MARG, train_loss = compute_performance(
        sess, train_x, train_labels, 100, rat_spn)
    train_ACC = 100. * float(num_correct_train) / float(train_x.shape[0])
    train_CE = CE_total / float(train_x.shape[0])
    print('   ###')
    print(
        '   ### accuracy on train set = {}   CE = {}   LL: {}   negmargin: {}'.
        format(train_ACC, train_CE, train_LL, train_MARG))

    num_correct_test, CE_total, test_LL, test_MARG, test_loss = compute_performance(
        sess, test_x, test_labels, 100, rat_spn)
    test_ACC = 100. * float(num_correct_test) / float(test_x.shape[0])
    test_CE = CE_total / float(test_x.shape[0])
    print('   ###')
    print(
        '   ### accuracy on test set = {}   CE = {}   LL: {}   negmargin: {}'.
        format(test_ACC, test_CE, test_LL, test_MARG))

    num_correct_valid, CE_total, valid_LL, valid_MARG, valid_loss = compute_performance(
        sess, valid_x, valid_labels, 100, rat_spn)
    valid_ACC = 100. * float(num_correct_valid) / float(valid_x.shape[0])
    valid_CE = CE_total / float(valid_x.shape[0])
    print('   ###')
    print('   ### accuracy on valid set = {}   CE = {}   LL: {}   margin: {}'.
          format(valid_ACC, valid_CE, valid_LL, valid_MARG))
Пример #21
0
import theano
import theano.tensor as T

from neuralmind import NeuralNetwork
from layers import HiddenLayer
from layers import DropoutLayer
import activations

from trainers import SGDTrainer
from trainers import ExponentialDecay

import datasets

# Load MNIST
datasets = datasets.load_mnist("mnist.pkl.gz")

model = NeuralNetwork(
	n_inputs=28*28,
	layers = [
		(DropoutLayer, {'probability': 0.2}),
		(HiddenLayer,
		{
			'n_units': 800, 
			'non_linearity': activations.rectify
		}),
		(DropoutLayer, {'probability': 0.5}),
		(HiddenLayer,
		{
			'n_units': 800, 
			'non_linearity': activations.rectify
Пример #22
0
def run_experiment(settings):
    ############################################################################
    fashion_mnist = settings.fashion_mnist
    svhn = settings.svhn

    exponential_family = settings.exponential_family

    classes = settings.classes

    K = settings.K

    structure = settings.structure

    # 'poon-domingos'
    pd_num_pieces = settings.pd_num_pieces

    # 'binary-trees'
    depth = settings.depth
    num_repetitions = settings.num_repetitions

    width = settings.width
    height = settings.height

    num_epochs = settings.num_epochs
    batch_size = settings.batch_size
    SGD_learning_rate = settings.SGD_learning_rate

    ############################################################################

    exponential_family_args = None
    if exponential_family == EinsumNetwork.BinomialArray:
        exponential_family_args = {'N': 255}
    if exponential_family == EinsumNetwork.CategoricalArray:
        exponential_family_args = {'K': 256}
    if exponential_family == EinsumNetwork.NormalArray:
        exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1}

    # get data
    if fashion_mnist:
        train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist(
        )
    elif svhn:
        train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn(
        )
    else:
        train_x, train_labels, test_x, test_labels = datasets.load_mnist()

    if not exponential_family != EinsumNetwork.NormalArray:
        train_x /= 255.
        test_x /= 255.
        train_x -= .5
        test_x -= .5

    # validation split
    valid_x = train_x[-10000:, :]
    train_x = train_x[:-10000, :]
    valid_labels = train_labels[-10000:]
    train_labels = train_labels[:-10000]
    # pick the selected classes
    if classes is not None:
        train_x = train_x[
            np.any(np.stack([train_labels == c for c in classes], 1), 1), :]
        valid_x = valid_x[
            np.any(np.stack([valid_labels == c for c in classes], 1), 1), :]
        test_x = test_x[
            np.any(np.stack([test_labels == c for c in classes], 1), 1), :]

        train_labels = [l for l in train_labels if l in classes]
        valid_labels = [l for l in valid_labels if l in classes]
        test_labels = [l for l in test_labels if l in classes]
    else:
        classes = np.unique(train_labels).tolist()

        train_labels = [l for l in train_labels if l in classes]
        valid_labels = [l for l in valid_labels if l in classes]
        test_labels = [l for l in test_labels if l in classes]

    train_x = torch.from_numpy(train_x).to(torch.device(device))
    valid_x = torch.from_numpy(valid_x).to(torch.device(device))
    test_x = torch.from_numpy(test_x).to(torch.device(device))

    ######################################
    # Make EinsumNetworks for each class #
    ######################################
    if structure == 'poon-domingos':
        pd_delta = [[height / d, width / d] for d in pd_num_pieces]
        graph = Graph.poon_domingos_structure(shape=(height, width),
                                              delta=pd_delta)
    elif structure == 'binary-trees':
        graph = Graph.random_binary_trees(num_var=train_x.shape[1],
                                          depth=depth,
                                          num_repetitions=num_repetitions)
    else:
        raise AssertionError("Unknown Structure")

    args = EinsumNetwork.Args(num_var=train_x.shape[1],
                              num_dims=3 if svhn else 1,
                              num_classes=len(classes),
                              num_sums=K,
                              num_input_distributions=K,
                              exponential_family=exponential_family,
                              exponential_family_args=exponential_family_args,
                              use_em=False)

    einet = EinsumNetwork.EinsumNetwork(graph, args)

    init_dict = get_init_dict(einet, train_x)
    einet.initialize(init_dict)
    einet.to(device)
    print(einet)

    num_params = EinsumNetwork.eval_size(einet)

    #################################
    # Discriminative training phase #
    #################################

    optimizer = torch.optim.SGD(einet.parameters(), lr=SGD_learning_rate)
    loss_function = torch.nn.CrossEntropyLoss()

    train_N = train_x.shape[0]
    valid_N = valid_x.shape[0]
    test_N = test_x.shape[0]

    start_time = time.time()

    for epoch_count in range(num_epochs):
        idx_batches = torch.randperm(train_N, device=device).split(batch_size)

        total_loss = 0
        for idx in idx_batches:
            batch_x = train_x[idx, :]
            optimizer.zero_grad()
            outputs = einet.forward(batch_x)
            target = torch.tensor([
                classes.index(train_labels[i]) for i in idx
            ]).to(torch.device(device))
            loss = loss_function(outputs, target)
            loss.backward()
            optimizer.step()
            total_loss += loss.detach().item()

        print(f'[{epoch_count}]   total loss: {total_loss}')

    end_time = time.time()

    ################
    # Experiment 5 #
    ################
    einet.eval()
    train_ll = EinsumNetwork.eval_loglikelihood_batched(einet,
                                                        train_x,
                                                        batch_size=batch_size)
    valid_ll = EinsumNetwork.eval_loglikelihood_batched(einet,
                                                        valid_x,
                                                        batch_size=batch_size)
    test_ll = EinsumNetwork.eval_loglikelihood_batched(einet,
                                                       test_x,
                                                       batch_size=batch_size)
    print()
    print(
        "Experiment 5: Log-likelihoods  --- train LL {}   valid LL {}   test LL {}"
        .format(train_ll / train_N, valid_ll / valid_N, test_ll / test_N))

    ################
    # Experiment 6 #
    ################
    train_labels = torch.tensor(train_labels).to(torch.device(device))
    valid_labels = torch.tensor(valid_labels).to(torch.device(device))
    test_labels = torch.tensor(test_labels).to(torch.device(device))

    acc_train = EinsumNetwork.eval_accuracy_batched(einet,
                                                    classes,
                                                    train_x,
                                                    train_labels,
                                                    batch_size=batch_size)
    acc_valid = EinsumNetwork.eval_accuracy_batched(einet,
                                                    classes,
                                                    valid_x,
                                                    valid_labels,
                                                    batch_size=batch_size)
    acc_test = EinsumNetwork.eval_accuracy_batched(einet,
                                                   classes,
                                                   test_x,
                                                   test_labels,
                                                   batch_size=batch_size)
    print()
    print(
        "Experiment 6: Classification accuracies  --- train acc {}   valid acc {}   test acc {}"
        .format(acc_train, acc_valid, acc_test))

    print()
    print(f'Network size: {num_params} parameters')
    print(f'Training time: {end_time - start_time}s')

    return {
        'train_ll': train_ll / train_N,
        'valid_ll': valid_ll / valid_N,
        'test_ll': test_ll / test_N,
        'train_acc': acc_train,
        'valid_acc': acc_valid,
        'test_acc': acc_test,
        'network_size': num_params,
        'training_time': end_time - start_time,
    }
Пример #23
0
 augmentation = {
     "rotation_range": {
         "minval": -0.3,
         "maxval": 0.3
     },
     "width_shift_range": {
         "minval": -2,
         "maxval": 2
     },
     "height_shift_range": {
         "minval": -2,
         "maxval": 2
     },
 }
 ds_aug, ds_cluster, X, y = datasets.load_mnist(args.train_batch,
                                                args.test_batch,
                                                augmentation)
 # Defining hyperparameters
 n_clusters = 10
 latent_dim = 10
 input_shape = (28**2, )
 # Define optimizers
 pretrain_optimizer = {
     "type": tf.optimizers.SGD,
     "params": {
         "lr": 1,
         "momentum": 0.9
     }
 }
 cluster_optimizer = {
     "type": tf.optimizers.Adam,
Пример #24
0
    t2 = time.time()
    print("    Spent time for training :  {}".format(t2-t1))

    X, y_true = test
    y_pred = model.predict(X)
    accuracy = accuracy_score(y_pred, y_true)
    print("    Accuracy :  {}\n".format(accuracy))


def run_profile(model, model_name, X, y):
    filename = model_name+".def"
    profile.runctx("for i in range(100): model.fit(X, y)",
                   globals(), locals(), filename)

    #p = pstats.Stats(filename)
    #p.print_stats()



training, test = datasets.load_mnist()
#X, y = datasets.make_classification()
#training, test = utils.train_test_split(X, y)
accuracy_and_time(SCW1(), "SCW1", training, test)
accuracy_and_time(SCW2(), "SCW2", training, test)
accuracy_and_time(LinearSVC(), "LinearSVC", training, test)

#training, test = datasets.load_mnist()
X, y = training
run_profile(SCW1(), "SCW1", X, y)
run_profile(SCW2(), "SCW2", X, y)
Пример #25
0
    parser.add_argument('--tol', default=0.001, type=float)
    parser.add_argument('--cae_weights',
                        default=None,
                        help='This argument must be given')
    parser.add_argument('--save_dir', default='results/temp')
    args = parser.parse_args()
    print(args)

    import os
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # load dataset
    from datasets import load_mnist, load_usps
    if args.dataset == 'mnist':
        x, y = load_mnist()
    elif args.dataset == 'usps':
        x, y = load_usps('data/usps')
    elif args.dataset == 'mnist-test':
        x, y = load_mnist()
        x, y = x[60000:], y[60000:]

    # prepare the DCEC model
    dcec = DCEC(input_shape=x.shape[1:],
                filters=[32, 64, 128, 10],
                n_clusters=args.n_clusters)
    plot_model(dcec.model,
               to_file=args.save_dir + '/dcec_model.png',
               show_shapes=True)
    dcec.model.summary()
Пример #26
0
# ---------------------------------------------------------------------------- #

# Parameters
f                  = 20
batch_size         = 50
learning_rate      = 0.05
activation_func    = tf.nn.relu
max_train_epoch    = 100000
max_train_accur    = 0.97
load_parameters    = True
parameters_path    = pathlib.Path("model.npy")

# ---------------------------------------------------------------------------- #

# Dataset instantiation
dataset   = datasets.load_mnist() # handwritten digit database
train_set = dataset.cut(0, 50000, 50000).shuffle().cut(0, 50000, batch_size) # 1000 batches of size 50
test_set  = dataset.cut(50000, 60000, 10000)                                 # 1 batch of size 10 000

# Model instantiator
builder_opt  = tf.train.AdagradOptimizer(learning_rate)
builder_dims = [784, 100, 10] # 3 layer neural network:
                              # input layer  : 784 neurons (1 image = 28*28 pixels)
                              # hidden layer : 100 neurons
                              # output layer :  10 neurons (digits 0-9)
def builder(inputs=None):
    return models.dense_classifier(builder_dims, inputs=inputs, act_fn=activation_func, optimizer=builder_opt, epoch=True)

# Model instantiation
graph = tf.Graph()
with graph.as_default():
Пример #27
0
def train(snapshotroot, device, forestType, numTrees, depth):
    xtrain, ytrain, xtest, ytest = datasets.load_mnist()

    # XXX: Other papers use val = test for this data set
    xval = xtest
    yval = ytest

    net = Net(forestType, numTrees, depth).to(device)
    criterion = nn.CrossEntropyLoss().to(device)

    # Transfer this data to the device
    xtrain = torch.from_numpy(xtrain).type(torch.float32).to(device)
    ytrain = torch.from_numpy(ytrain).type(torch.long).to(device)
    xval = torch.from_numpy(xval).type(torch.float32).to(device)
    yval = torch.from_numpy(yval).type(torch.long).to(device)
    xtest = torch.from_numpy(xtest).type(torch.float32).to(device)
    ytest = torch.from_numpy(ytest).type(torch.long).to(device)

    optimizer = optim.Adam(net.parameters(), lr=0.001)
    #optimizer = optim.Adam(net.parameters(), lr = 1e-3)

    # Count parameters
    numParams = sum(params.numel() for params in net.parameters())
    numTrainable = sum(params.numel() for params in net.parameters()
                       if params.requires_grad)
    print(
        f"There are {numParams} parameters total in this model ({numTrainable} are trainable)"
    )

    numEpochs = 50
    batchSize = 200

    indices = [i for i in range(xtrain.shape[0])]

    bestEpoch = numEpochs - 1
    bestLoss = 1000.0

    valLosses = np.zeros([numEpochs])

    for epoch in range(numEpochs):
        random.shuffle(indices)

        xtrain = xtrain[indices, :]
        ytrain = ytrain[indices]

        runningLoss = 0.0
        count = 0
        for xbatch, ybatch in batches(xtrain, ytrain, batchSize):
            optimizer.zero_grad()

            outputs = net(xbatch)
            loss = criterion(outputs, ybatch)

            loss.backward()

            optimizer.step()

            runningLoss += loss
            count += 1

        meanLoss = runningLoss / count

        snapshotFile = os.path.join(snapshotroot, f"epoch_{epoch}")
        torch.save(net.state_dict(), snapshotFile)

        runningLoss = 0.0
        count = 0

        with torch.no_grad():
            net.train(False)
            #for xbatch, ybatch in batches(xval, yval, batchSize):
            for xbatch, ybatch in zip([xval], [yval]):
                outputs = net(xbatch)
                loss = criterion(outputs, ybatch)

                runningLoss += loss
                count += 1

            net.train(True)

            valLoss = runningLoss / count

        if valLoss < bestLoss:
            bestLoss = valLoss
            bestEpoch = epoch

        valLosses[epoch] = valLoss

        #print(f"Info: epoch = {epoch}, loss = {meanLoss}, validation loss = {valLoss}")

    snapshotFile = os.path.join(snapshotroot, f"epoch_{bestEpoch}")

    net = Net(forestType, numTrees, depth)
    net.load_state_dict(torch.load(snapshotFile, map_location="cpu"))
    net = net.to(device)

    totalCorrect = 0
    count = 0

    with torch.no_grad():
        net.train(False)
        #for xbatch, ybatch in batches(xtest, ytest, batchSize):
        for xbatch, ybatch in zip([xtest], [ytest]):
            outputs = net(xbatch)
            outputs = torch.argmax(outputs, dim=1)

            tmpCorrect = torch.sum(outputs == ybatch)

            totalCorrect += tmpCorrect
            count += xbatch.shape[0]

    accuracy = float(totalCorrect) / float(count)
    print(
        f"Info: Best epoch = {bestEpoch}, test accuracy = {accuracy}, misclassification rate = {1.0 - accuracy}"
    )

    return accuracy, valLosses
Пример #28
0
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras_custom import PlasticReLU, ParameterizedLayer
from datasets import load_mnist
from visualizer import Visualizer
import sys

if __name__ == '__main__':

    np.random.seed(int(sys.argv[1]))
    epochs = int(sys.argv[2])

    datasets = load_mnist(42)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    model = Sequential()
    #visualizer = Visualizer([196,196,10], model, 1.)

    model.add(
        ParameterizedLayer(
            input_dim=196,
            output_dim=100,
            init_scale=(-.5, .5),
            integer_bits=1,
            fractional_bits=2,
            ifactor=1.,
            params_0=np.ones(196, dtype='float32'),
Пример #29
0
import scipy.io as sio
import datasets

LR = 1e-4
MBsize = 24
dim_var = [784, 200]
TestInterval = 5000
max_iters = 1000000

NonLinerNN = True
PreProcess = True
dataset = "mnist"
# dataset = "omni"

if dataset == "mnist":
    X_tr, X_va, X_te = datasets.load_mnist()
elif dataset == "omni":
    X_tr, X_va, X_te = datasets.load_omniglot()
else:
    assert False

num_train = X_tr.shape[0]
num_valid = X_va.shape[0]
num_test = X_te.shape[0]
train_mean = np.mean(X_tr, axis=0, keepdims=True)

tf.reset_default_graph()


def GOBernoulli(Prob):
    zsamp = tf.cast(tf.less_equal(tf.random_uniform(Prob.shape), Prob),
Пример #30
0
def run_experiment(settings):
    ############################################################################

    fashion_mnist = settings.fashion_mnist
    svhn = settings.svhn

    exponential_family = settings.exponential_family

    classes = settings.classes

    K = settings.K

    structure = settings.structure

    # 'poon-domingos'
    pd_num_pieces = settings.pd_num_pieces

    # 'binary-trees'
    depth = settings.depth
    num_repetitions = settings.num_repetitions

    width = settings.width
    height = settings.height

    num_epochs = settings.num_epochs
    batch_size = settings.batch_size
    online_em_frequency = settings.online_em_frequency
    online_em_stepsize = settings.online_em_stepsize
    SGD_learning_rate = settings.SGD_learning_rate

    ############################################################################

    exponential_family_args = None
    if exponential_family == EinsumNetwork.BinomialArray:
        exponential_family_args = {'N': 255}
    if exponential_family == EinsumNetwork.CategoricalArray:
        exponential_family_args = {'K': 256}
    if exponential_family == EinsumNetwork.NormalArray:
        exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1}

    # get data
    if fashion_mnist:
        train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist(
        )
    elif svhn:
        train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn(
        )
    else:
        train_x, train_labels, test_x, test_labels = datasets.load_mnist()

    if not exponential_family != EinsumNetwork.NormalArray:
        train_x /= 255.
        test_x /= 255.
        train_x -= .5
        test_x -= .5

    # validation split
    valid_x = train_x[-10000:, :]
    train_x = train_x[:-10000, :]
    valid_labels = train_labels[-10000:]
    train_labels = train_labels[:-10000]

    # pick the selected classes
    if classes is not None:
        train_x = train_x[
            np.any(np.stack([train_labels == c for c in classes], 1), 1), :]
        valid_x = valid_x[
            np.any(np.stack([valid_labels == c for c in classes], 1), 1), :]
        test_x = test_x[
            np.any(np.stack([test_labels == c for c in classes], 1), 1), :]

        train_labels = [l for l in train_labels if l in classes]
        valid_labels = [l for l in valid_labels if l in classes]
        test_labels = [l for l in test_labels if l in classes]
    else:
        classes = np.unique(train_labels).tolist()

        train_labels = [l for l in train_labels if l in classes]
        valid_labels = [l for l in valid_labels if l in classes]
        test_labels = [l for l in test_labels if l in classes]

    train_x = torch.from_numpy(train_x).to(torch.device(device))
    valid_x = torch.from_numpy(valid_x).to(torch.device(device))
    test_x = torch.from_numpy(test_x).to(torch.device(device))

    # Make EinsumNetwork
    ######################################
    if structure == 'poon-domingos':
        pd_delta = [[height / d, width / d] for d in pd_num_pieces]
        graph = Graph.poon_domingos_structure(shape=(height, width),
                                              delta=pd_delta)
    elif structure == 'binary-trees':
        graph = Graph.random_binary_trees(num_var=train_x.shape[1],
                                          depth=depth,
                                          num_repetitions=num_repetitions)
    else:
        raise AssertionError("Unknown Structure")

    args = EinsumNetwork.Args(num_var=train_x.shape[1],
                              num_dims=3 if svhn else 1,
                              num_classes=1,
                              num_sums=K,
                              num_input_distributions=K,
                              exponential_family=exponential_family,
                              exponential_family_args=exponential_family_args,
                              online_em_frequency=online_em_frequency,
                              online_em_stepsize=online_em_stepsize,
                              use_em=True)

    einet = EinsumNetwork.EinsumNetwork(graph, args)
    print(einet)

    init_dict = get_init_dict(einet, train_x)
    einet.initialize(init_dict)
    einet.to(device)

    num_params = EinsumNetwork.eval_size(einet)

    data_dir = '../src/experiments/round5/data/weights_analysis/'
    data_file = os.path.join(data_dir, f"weights_before.json")
    weights = einet.einet_layers[-1].params.data.cpu()
    np.savetxt(data_file, weights[0])

    # Train
    ######################################

    optimizer = torch.optim.SGD(einet.parameters(), lr=SGD_learning_rate)

    train_N = train_x.shape[0]
    valid_N = valid_x.shape[0]
    test_N = test_x.shape[0]

    start_time = time.time()

    for epoch_count in range(num_epochs):
        idx_batches = torch.randperm(train_N, device=device).split(batch_size)

        total_loss = 0.0
        for idx in idx_batches:
            batch_x = train_x[idx, :]
            # optimizer.zero_grad()
            outputs = einet.forward(batch_x)
            ll_sample = EinsumNetwork.log_likelihoods(outputs)
            log_likelihood = ll_sample.sum()
            log_likelihood.backward()
            # nll = log_likelihood * -1
            # nll.backward()
            # optimizer.step()

            einet.em_process_batch()
        einet.em_update()

        print(f'[{epoch_count}]   total loss: {total_loss}')

    end_time = time.time()

    data_dir = '../src/experiments/round5/data/weights_analysis/'
    data_file = os.path.join(data_dir, f"weights_after.json")
    weights = einet.einet_layers[-1].params.data.cpu()
    np.savetxt(data_file, weights[0])
    # exit()

    ################
    # Experiment 1 #
    ################
    einet.eval()
    train_ll = EinsumNetwork.eval_loglikelihood_batched(einet,
                                                        train_x,
                                                        batch_size=batch_size)
    valid_ll = EinsumNetwork.eval_loglikelihood_batched(einet,
                                                        valid_x,
                                                        batch_size=batch_size)
    test_ll = EinsumNetwork.eval_loglikelihood_batched(einet,
                                                       test_x,
                                                       batch_size=batch_size)
    print()
    print(
        "Experiment 1: Log-likelihoods  --- train LL {}   valid LL {}   test LL {}"
        .format(train_ll / train_N, valid_ll / valid_N, test_ll / test_N))

    ################
    # Experiment 2 #
    ################
    train_labels = torch.tensor(train_labels).to(torch.device(device))
    valid_labels = torch.tensor(valid_labels).to(torch.device(device))
    test_labels = torch.tensor(test_labels).to(torch.device(device))

    acc_train = EinsumNetwork.eval_accuracy_batched(einet,
                                                    classes,
                                                    train_x,
                                                    train_labels,
                                                    batch_size=batch_size)
    acc_valid = EinsumNetwork.eval_accuracy_batched(einet,
                                                    classes,
                                                    valid_x,
                                                    valid_labels,
                                                    batch_size=batch_size)
    acc_test = EinsumNetwork.eval_accuracy_batched(einet,
                                                   classes,
                                                   test_x,
                                                   test_labels,
                                                   batch_size=batch_size)
    print()
    print(
        "Experiment 2: Classification accuracies  --- train acc {}   valid acc {}   test acc {}"
        .format(acc_train, acc_valid, acc_test))

    print()
    print(f'Network size: {num_params} parameters')
    print(f'Training time: {end_time - start_time}s')

    return {
        'train_ll': train_ll / train_N,
        'valid_ll': valid_ll / valid_N,
        'test_ll': test_ll / test_N,
        'train_acc': acc_train,
        'valid_acc': acc_valid,
        'test_acc': acc_test,
        'network_size': num_params,
        'training_time': end_time - start_time,
    }
def main(relaxation=None,
         learn_prior=True,
         max_iters=None,
         batch_size=24,
         num_latents=200,
         model_type=None,
         lr=None,
         test_bias=False,
         train_dir=None,
         iwae_samples=100,
         dataset="mnist",
         logf=None,
         var_lr_scale=10.,
         Q_wd=.0001,
         Q_depth=-1,
         checkpoint_path=None):
    valid_batch_size = 100

    if model_type == "L1":
        num_layers = 1
        layer_type = linear_layer
    elif model_type == "L2":
        num_layers = 2
        layer_type = linear_layer
    elif model_type == "NL1":
        num_layers = 1
        layer_type = nonlinear_layer
    else:
        assert False, "bad model type {}".format(model_type)

    sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
        allow_growth=True)))
    if dataset == "mnist":
        X_tr, X_va, X_te = datasets.load_mnist()
    elif dataset == "omni":
        X_tr, X_va, X_te = datasets.load_omniglot()
    else:
        assert False

    num_train = X_tr.shape[0]
    num_valid = X_va.shape[0]
    num_test = X_te.shape[0]
    train_mean = np.mean(X_tr, axis=0, keepdims=True)
    train_output_bias = -np.log(1. / np.clip(train_mean, 0.001, 0.999) -
                                1.).astype(np.float32)

    x = tf.placeholder(tf.float32, [None, 784])
    # x_im = tf.reshape(x, [-1, 28, 28, 1])
    # tf.summary.image("x_true", x_im)

    # make prior for top b
    p_prior = tf.Variable(
        tf.zeros([num_latents], dtype=tf.float32),
        trainable=learn_prior,
        name='p_prior',
    )
    # create rebar specific variables temperature and eta
    log_temperatures = [create_log_temp(1) for l in range(num_layers)]
    temperatures = [tf.exp(log_temp) for log_temp in log_temperatures]
    batch_temperatures = [tf.reshape(temp, [1, -1]) for temp in temperatures]
    etas = [create_eta(1) for l in range(num_layers)]
    batch_etas = [tf.reshape(eta, [1, -1]) for eta in etas]

    # random uniform samples
    u = [
        tf.random_uniform([tf.shape(x)[0], num_latents], dtype=tf.float32)
        for l in range(num_layers)
    ]
    # create binary sampler
    b_sampler = BSampler(u, "b_sampler")
    gen_b_sampler = BSampler(u, "gen_b_sampler")
    # generate hard forward pass
    encoder_name = "encoder"
    decoder_name = "decoder"
    inf_la_b, samples_b = inference_network(x, train_mean, layer_type,
                                            num_layers, num_latents,
                                            encoder_name, False, b_sampler)
    gen_la_b = generator_network(samples_b, train_output_bias, layer_type,
                                 num_layers, num_latents, decoder_name, False)
    log_image(gen_la_b[-1], "x_pred")
    # produce samples
    _samples_la_b = generator_network(None,
                                      train_output_bias,
                                      layer_type,
                                      num_layers,
                                      num_latents,
                                      decoder_name,
                                      True,
                                      sampler=gen_b_sampler,
                                      prior=p_prior)
    log_image(_samples_la_b[-1], "x_sample")

    # hard loss evaluation and log probs
    f_b, log_q_bs = neg_elbo(x,
                             samples_b,
                             inf_la_b,
                             gen_la_b,
                             p_prior,
                             log=True)
    batch_f_b = tf.expand_dims(f_b, 1)
    total_loss = tf.reduce_mean(f_b)
    # tf.summary.scalar("fb", total_loss)
    # optimizer for model parameters
    model_opt = tf.train.AdamOptimizer(lr, beta2=.99999)
    # optimizer for variance reducing parameters
    variance_opt = tf.train.AdamOptimizer(var_lr_scale * lr, beta2=.99999)
    # get encoder and decoder variables
    encoder_params = get_variables(encoder_name)
    decoder_params = get_variables(decoder_name)
    if learn_prior:
        decoder_params.append(p_prior)
    # compute and store gradients of hard loss with respect to encoder_parameters
    encoder_loss_grads = {}
    for g, v in model_opt.compute_gradients(total_loss,
                                            var_list=encoder_params):
        encoder_loss_grads[v.name] = g
    # get gradients for decoder parameters
    decoder_gradvars = model_opt.compute_gradients(total_loss,
                                                   var_list=decoder_params)
    # will hold all gradvars for the model (non-variance adjusting variables)
    model_gradvars = [gv for gv in decoder_gradvars]

    # conditional samples
    v = [v_from_u(_u, log_alpha) for _u, log_alpha in zip(u, inf_la_b)]
    # need to create soft samplers
    sig_z_sampler = SIGZSampler(u, batch_temperatures, "sig_z_sampler")
    sig_zt_sampler = SIGZSampler(v, batch_temperatures, "sig_zt_sampler")

    z_sampler = ZSampler(u, "z_sampler")
    zt_sampler = ZSampler(v, "zt_sampler")

    rebars = []
    reinforces = []
    variance_objectives = []
    # have to produce 2 forward passes for each layer for z and zt samples
    for l in range(num_layers):
        cur_la_b = inf_la_b[l]

        # if standard rebar or additive relaxation
        if relaxation == "rebar" or relaxation == "add":
            # compute soft samples and soft passes through model and soft elbos
            cur_z_sample = sig_z_sampler.sample(cur_la_b, l)
            prev_samples_z = samples_b[:l] + [cur_z_sample]

            cur_zt_sample = sig_zt_sampler.sample(cur_la_b, l)
            prev_samples_zt = samples_b[:l] + [cur_zt_sample]

            prev_log_alphas = inf_la_b[:l] + [cur_la_b]

            # soft forward passes
            inf_la_z, samples_z = inference_network(x,
                                                    train_mean,
                                                    layer_type,
                                                    num_layers,
                                                    num_latents,
                                                    encoder_name,
                                                    True,
                                                    sig_z_sampler,
                                                    samples=prev_samples_z,
                                                    log_alphas=prev_log_alphas)
            gen_la_z = generator_network(samples_z, train_output_bias,
                                         layer_type, num_layers, num_latents,
                                         decoder_name, True)
            inf_la_zt, samples_zt = inference_network(
                x,
                train_mean,
                layer_type,
                num_layers,
                num_latents,
                encoder_name,
                True,
                sig_zt_sampler,
                samples=prev_samples_zt,
                log_alphas=prev_log_alphas)
            gen_la_zt = generator_network(samples_zt, train_output_bias,
                                          layer_type, num_layers, num_latents,
                                          decoder_name, True)
            # soft loss evaluataions
            f_z, _ = neg_elbo(x, samples_z, inf_la_z, gen_la_z, p_prior)
            f_zt, _ = neg_elbo(x, samples_zt, inf_la_zt, gen_la_zt, p_prior)

        if relaxation == "add" or relaxation == "all":
            # sample z and zt
            prev_bs = samples_b[:l]
            cur_z_sample = z_sampler.sample(cur_la_b, l)
            cur_zt_sample = zt_sampler.sample(cur_la_b, l)

            q_z = Q_func(x,
                         train_mean,
                         cur_z_sample,
                         prev_bs,
                         Q_name(l),
                         False,
                         depth=Q_depth)
            q_zt = Q_func(x,
                          train_mean,
                          cur_zt_sample,
                          prev_bs,
                          Q_name(l),
                          True,
                          depth=Q_depth)
            # tf.summary.scalar("q_z_{}".format(l), tf.reduce_mean(q_z))
            # tf.summary.scalar("q_zt_{}".format(l), tf.reduce_mean(q_zt))
            if relaxation == "add":
                f_z = f_z + q_z
                f_zt = f_zt + q_zt
            elif relaxation == "all":
                f_z = q_z
                f_zt = q_zt
            else:
                assert False
        # tf.summary.scalar("f_z_{}".format(l), tf.reduce_mean(f_z))
        # tf.summary.scalar("f_zt_{}".format(l), tf.reduce_mean(f_zt))
        cur_samples_b = samples_b[l]
        # get gradient of sample log-likelihood wrt current parameter
        d_log_q_d_la = bernoulli_loglikelihood_derivitive(
            cur_samples_b, cur_la_b)
        # get gradient of soft-losses wrt current parameter
        d_f_z_d_la = tf.gradients(f_z, cur_la_b)[0]
        d_f_zt_d_la = tf.gradients(f_zt, cur_la_b)[0]
        batch_f_zt = tf.expand_dims(f_zt, 1)
        eta = batch_etas[l]
        # compute rebar and reinforce
        # tf.summary.histogram("der_diff_{}".format(l), d_f_z_d_la - d_f_zt_d_la)
        # tf.summary.histogram("d_log_q_d_la_{}".format(l), d_log_q_d_la)
        rebar = ((batch_f_b - eta * batch_f_zt) * d_log_q_d_la + eta *
                 (d_f_z_d_la - d_f_zt_d_la)) / batch_size
        reinforce = batch_f_b * d_log_q_d_la / batch_size
        rebars.append(rebar)
        reinforces.append(reinforce)
        # tf.summary.histogram("rebar_{}".format(l), rebar)
        # tf.summary.histogram("reinforce_{}".format(l), reinforce)
        # backpropogate rebar to individual layer parameters
        layer_params = get_variables(layer_name(l), arr=encoder_params)
        layer_rebar_grads = tf.gradients(cur_la_b, layer_params, grad_ys=rebar)
        # get direct loss grads for each parameter
        layer_loss_grads = [encoder_loss_grads[v.name] for v in layer_params]
        # each param's gradient should be rebar + the direct loss gradient
        layer_grads = [
            rg + lg for rg, lg in zip(layer_rebar_grads, layer_loss_grads)
        ]
        # for rg, lg, v in zip(layer_rebar_grads, layer_loss_grads, layer_params):
        #     tf.summary.histogram(v.name + "_grad_rebar", rg)
        #     tf.summary.histogram(v.name + "_grad_loss", lg)
        layer_gradvars = list(zip(layer_grads, layer_params))
        model_gradvars.extend(layer_gradvars)
        variance_objective = tf.reduce_mean(tf.square(rebar))
        variance_objectives.append(variance_objective)

    variance_objective = tf.add_n(variance_objectives)
    variance_vars = log_temperatures + etas
    if relaxation != "rebar":
        q_vars = get_variables("Q_")
        wd = tf.add_n([Q_wd * tf.nn.l2_loss(v) for v in q_vars])
        # tf.summary.scalar("Q_weight_decay", wd)
        # variance_vars = variance_vars + q_vars
    else:
        wd = 0.0
    variance_gradvars = variance_opt.compute_gradients(variance_objective + wd,
                                                       var_list=variance_vars)
    variance_train_op = variance_opt.apply_gradients(variance_gradvars)
    model_train_op = model_opt.apply_gradients(model_gradvars)
    with tf.control_dependencies([model_train_op, variance_train_op]):
        train_op = tf.no_op()

    # for g, v in model_gradvars + variance_gradvars:
    #     print(g, v.name)
    #     if g is not None:
    #         tf.summary.histogram(v.name, v)
    #         tf.summary.histogram(v.name + "_grad", g)

    val_loss = tf.Variable(1000,
                           trainable=False,
                           name="val_loss",
                           dtype=tf.float32)
    train_loss = tf.Variable(1000,
                             trainable=False,
                             name="train_loss",
                             dtype=tf.float32)
    # tf.summary.scalar("val_loss", val_loss)
    # tf.summary.scalar("train_loss", train_loss)
    # summ_op = tf.summary.merge_all()
    # summary_writer = tf.summary.FileWriter(train_dir)
    sess.run(tf.global_variables_initializer())

    # create savers
    train_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    val_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    iwae_elbo = -(tf.reduce_logsumexp(-f_b) - np.log(valid_batch_size))

    if checkpoint_path is None:
        iters_per_epoch = X_tr.shape[0] // batch_size
        print("Train set has {} examples".format(X_tr.shape[0]))
        if relaxation != "rebar":
            print("Pretraining Q network")
            for i in range(1000):
                if i % 100 == 0:
                    print(i)
                idx = np.random.randint(0, iters_per_epoch - 1)
                batch_xs = X_tr[idx * batch_size:(idx + 1) * batch_size]
                sess.run(variance_train_op, feed_dict={x: batch_xs})
        # t = time.time()
        best_val_loss = np.inf

        # results saving
        if relaxation == 'rebar':
            mode_out = relaxation
        else:
            mode_out = 'RELAX' + relaxation
        result_dir = './Results_MNIST_SBN'
        if not os.path.isdir(result_dir):
            os.mkdir(result_dir)
        shutil.copyfile(
            sys.argv[0], result_dir + '/training_script_' + dataset + '_' +
            mode_out + '_' + model_type + '.py')
        pathsave = result_dir + '/TF_SBN_' + dataset + '_' + mode_out + '_MB[%d]_' % batch_size + model_type + '_LR[%.2e].mat' % lr

        tr_loss_mb_set = []
        tr_timerun_mb_set = []
        tr_iter_mb_set = []

        tr_loss_set = []
        tr_timerun_set = []
        tr_iter_set = []

        val_loss_set = []
        val_timerun_set = []
        val_iter_set = []

        te_loss_set = []
        te_timerun_set = []
        te_iter_set = []

        for epoch in range(10000000):
            # train_losses = []
            for i in range(iters_per_epoch):
                cur_iter = epoch * iters_per_epoch + i

                if cur_iter == 0:
                    time_start = time.clock()

                if cur_iter > max_iters:
                    print("Training Completed")
                    return

                batch_xs = X_tr[i * batch_size:(i + 1) * batch_size]
                loss, _ = sess.run([total_loss, train_op],
                                   feed_dict={x: batch_xs})

                time_run = time.clock() - time_start

                tr_loss_mb_set.append(loss)
                tr_timerun_mb_set.append(time_run)
                tr_iter_mb_set.append(cur_iter + 1)

                if (cur_iter + 1) % 100 == 0:
                    print(
                        'Step: [{:6d}], Loss_mb: [{:10.4f}], time_run: [{:10.4f}]'
                        .format(cur_iter + 1, loss, time_run))

                TestInterval = 5000
                Train_num_mbs = num_train // batch_size
                Valid_num_mbs = num_valid // batch_size
                Test_num_mbs = num_test // batch_size

                # Testing
                if (cur_iter + 1) % TestInterval == 0:

                    # Training
                    loss_train1 = 0
                    for step_train in range(Train_num_mbs):
                        x_train = X_tr[step_train *
                                       batch_size:(step_train + 1) *
                                       batch_size]

                        feed_dict_train = {x: x_train}
                        loss_train_mb1 = sess.run(total_loss,
                                                  feed_dict=feed_dict_train)
                        loss_train1 += loss_train_mb1 * batch_size

                    loss_train1 = loss_train1 / (Train_num_mbs * batch_size)

                    tr_loss_set.append(loss_train1)
                    tr_timerun_set.append(time_run)
                    tr_iter_set.append(cur_iter + 1)

                    # Validation
                    loss_val1 = 0
                    for step_val in range(Valid_num_mbs):
                        x_valid = X_va[step_val * batch_size:(step_val + 1) *
                                       batch_size]

                        feed_dict_val = {x: x_valid}
                        loss_val_mb1 = sess.run(total_loss,
                                                feed_dict=feed_dict_val)
                        loss_val1 += loss_val_mb1 * batch_size

                    loss_val1 = loss_val1 / (Valid_num_mbs * batch_size)

                    val_loss_set.append(loss_val1)
                    val_timerun_set.append(time_run)
                    val_iter_set.append(cur_iter + 1)

                    # Test
                    loss_test1 = 0
                    for step_test in range(Test_num_mbs):
                        x_test = X_te[step_test * batch_size:(step_test + 1) *
                                      batch_size]

                        feed_dict_test = {x: x_test}
                        loss_test_mb1 = sess.run(total_loss,
                                                 feed_dict=feed_dict_test)
                        loss_test1 += loss_test_mb1 * batch_size

                    loss_test1 = loss_test1 / (Test_num_mbs * batch_size)

                    te_loss_set.append(loss_test1)
                    te_timerun_set.append(time_run)
                    te_iter_set.append(cur_iter + 1)

                    print(
                        '============TestInterval: [{:6d}], Loss_train: [{:10.4f}], Loss_val: [{:10.4f}], Loss_test: [{:10.4f}]'
                        .format(TestInterval, loss_train1, loss_val1,
                                loss_test1))

                # Saving
                if (cur_iter + 1) % TestInterval == 0:
                    sio.savemat(
                        pathsave, {
                            'tr_loss_mb_set': tr_loss_mb_set,
                            'tr_timerun_mb_set': tr_timerun_mb_set,
                            'tr_iter_mb_set': tr_iter_mb_set,
                            'tr_loss_set': tr_loss_set,
                            'tr_timerun_set': tr_timerun_set,
                            'tr_iter_set': tr_iter_set,
                            'val_loss_set': val_loss_set,
                            'val_timerun_set': val_timerun_set,
                            'val_iter_set': val_iter_set,
                            'te_loss_set': te_loss_set,
                            'te_timerun_set': te_timerun_set,
                            'te_iter_set': te_iter_set,
                        })
Пример #32
0
        s = 0
        for i in range(0, len(out)):
            s += np.sum(np.square(expected[i] - out[i]))
        return (0.5 / len(out)) * s

    def accuracy(self, imgs, labels):
        correct = 0
        for i in range(0, len(imgs)):
            in_layer = img2input(imgs[i])
            out_layer = self.forward_pass(in_layer)
            if np.argmax(out_layer) + 1 == labels[i][0]:
                correct += 1
        return float(correct) / len(imgs)


X, Y = datasets.load_mnist("training")

# preprocess
for i in range(0, len(X)):
    X[i] /= 255.0

print("Preprocessing finished")

mlp = MLP([28 * 28, 30, 10])
mlp.init_weights()

#print("Before training accuracy: ", mlp.accuracy(X, Y))
out = mlp.forward_pass(img2input(X[0]))
expected = digit2vec(Y[0])

# expecteds = map(digit2vec, Y)