def main(): x, y = load_mnist() # split the data into training, validation and test sets m = x.shape[0] m = m - 20000 sample_frac = 0.01 # sampling 1% of the points split = int(sample_frac*m) print(split) # the training set acts as the sample of data for which we query volunteer classifications. # Here the data is sampled uniformly at random from the entire data set, targeting the most densely populated regions # of feature space. x_train = x[:split] y_train = y[:split] x_train_dev = x[split:2*split] y_train_dev = y[split:2*split] x_valid = x[50000:60000] y_valid = y[50000:60000] x_test = x[60000:] y_test = y[60000:] print(x_train.shape) clickable_analysis(x_test, y_test)
def model_selection(): random_state = 8888 x_train, y_train, x_test, y_test = load_mnist() pipelines = [] pipelines.append(('MLP', Pipeline([('Scaler', StandardScaler()), ('MLP', MLPClassifier())]))) pipelines.append(('Fisher discriminant analysis', Pipeline([('Scaler', StandardScaler()), ('Fisher discriminant analysis', LinearDiscriminantAnalysis())]))) pipelines.append(('Perceptron', Pipeline([('Scaler', StandardScaler()), ('Perceptron', Perceptron())]))) pipelines.append( ('LogisticRegression', Pipeline([('Scaler', StandardScaler()), ('LogisticRegression', LogisticRegression())]))) pipelines.append(('Linear SVM', Pipeline([('Scaler', StandardScaler()), ('Linear SVM', SVC(kernel="linear", C=0.025))]))) pipelines.append(('SVM RBF', Pipeline([('Scaler', StandardScaler()), ('SVM RBF', SVC(gamma=2, C=1))]))) for name, model in pipelines: model.fit(x_train, y_train) y_pred_class = model.predict(x_test) print('name', metrics.accuracy_score(y_test, y_pred_class))
def idec(dataset="mnist", gamma=0.1, maxiter=2e4, update_interval=20, tol=0.00001, batch_size=256): maxiter = maxiter gamma = gamma update_interval = update_interval tol = tol batch_size = batch_size ae_weights = ("ae_weights/" + dataset + "_ae_weights/" + dataset + "_ae_weights.h5") optimizer = SGD(lr=0.01, momentum=0.9) from datasets import load_mnist, load_usps, load_stl, load_cifar if dataset == 'mnist': # recommends: n_clusters=10, update_interval=140 x, y = load_mnist('./data/mnist/mnist.npz') update_interval = 140 elif dataset == 'usps': # recommends: n_clusters=10, update_interval=30 x, y = load_usps('data/usps') update_interval = 30 # prepare the IDEC model elif dataset == "stl": import numpy as np x, y = load_stl() update_interval = 20 elif dataset == "cifar_10": x, y = load_cifar() update_interval = 140 batch_size = 120 print gamma, dataset try: count = Counter(y) except: count = Counter(y[:, 0]) n_clusters = len(count) save_dir = 'results/idec_dataset:' + dataset + " gamma:" + str(gamma) idec = IDEC(dims=[x.shape[-1], 500, 500, 2000, 10], n_clusters=n_clusters, batch_size=batch_size) idec.initialize_model(ae_weights=ae_weights, gamma=gamma, optimizer=optimizer) plot_model(idec.model, to_file='idec_model.png', show_shapes=True) idec.model.summary() # begin clustering, time not include pretraining part. t0 = time() y_pred = idec.clustering(x, y=y, tol=tol, maxiter=maxiter, update_interval=update_interval, save_dir=save_dir) print 'acc:', cluster_acc(y, y_pred) print 'clustering time: ', (time() - t0)
def load_data(folder): data = load_mnist(folder) train_data = data.get('train_data') train_labels = data.get('train_labels') test_data = data.get('test_data') test_labels = data.get('test_labels') # expand_dims for data train_data = np.expand_dims(train_data, axis=-1) test_data = np.expand_dims(test_data, axis=-1) # make one-hot labels train_labels = make_one_hot_labels(train_labels, NUM_CLASSES) test_labels = make_one_hot_labels(test_labels, NUM_CLASSES) return train_data, train_labels, test_data, test_labels
def main(): n_clusters = 10 # this is chosen based on prior knowledge of classes in the data set. batch_size = 256 lr = 0.01 # learning rate momentum = 0.9 # tolerance - if clustering stops if less than this fraction of the data changes cluster on an interation tol = 0.001 maxiter = 2e4 update_interval = 140 save_dir = './results/dec' x, y = load_mnist() #training_set_sizes = [100] training_set_sizes = [500, 1000, 5000, 10000, 50000] # prepare the DEC model dec = DEC(dims=[x.shape[-1], 500, 500, 2000, 10], n_clusters=n_clusters, batch_size=batch_size) for training_set_size in training_set_sizes: x_train = x[:training_set_size] y_train = y[:training_set_size] ae_weights = './ae_weights_m%d.h5' % training_set_size dec.initialize_model(optimizer=SGD(lr=lr, momentum=momentum), ae_weights=ae_weights, x=x_train) t0 = time() y_pred = dec.clustering(x_train, y=y_train, tol=tol, maxiter=maxiter, update_interval=update_interval, save_dir=save_dir + '/%d' % training_set_size) print('clustering time: ', (time() - t0)) print('acc:', cluster_acc(y_train, y_pred))
if len(sys.argv) < 2: terminate() else: mode = sys.argv[1] if mode not in func_mode_list: terminate() def show_plot_sample(): fig = plt.figure(figsize=(8, 8)) fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) for i in tqdm(range(25)): id = random.randint(0, len(testX) - 1) images = np.reshape(testX[id], [28, 28]) ax = fig.add_subplot(5, 5, i + 1, xticks=[], yticks=[]) ax.imshow(images, cmap=plt.cm.binary, interpolation='nearest') ax.text(0, 2, "label:" + str(testY[id])) ax.text(0, 4, "predict:" + str(knn.predict(testX[id]))) plt.show() if __name__ == '__main__': trainX, trainY, testX, testY = load_mnist() knn = KNNClassifier(train_data=trainX, train_labels=trainY, ord=2) if mode == 'run_sample': show_plot_sample() else: knn.test_acc(test_data=testX, test_label=testY, K=1)
def __init__(self, conf): self.conf = conf # determine and create result dir i = 1 log_path = conf.result_path + 'run0' while os.path.exists(log_path): log_path = '{}run{}'.format(conf.result_path, i) i += 1 os.makedirs(log_path) self.log_path = log_path if not os.path.exists(conf.checkpoint_dir): os.makedirs(conf.checkpoint_dir) self.checkpoint_file = os.path.join(self.conf.checkpoint_dir, "model.ckpt") input_shape = [ conf.batch_size, conf.scene_width, conf.scene_height, conf.channels ] # build model with tf.device(conf.device): self.mdl = model.Supair(conf) self.in_ph = tf.placeholder(tf.float32, input_shape) self.elbo = self.mdl.elbo(self.in_ph) self.mdl.num_parameters() self.optimizer = tf.train.AdamOptimizer() self.train_op = self.optimizer.minimize(-1 * self.elbo) self.sess = tf.Session() self.saver = tf.train.Saver() if self.conf.load_params: self.saver.restore(self.sess, self.checkpoint_file) else: self.sess.run(tf.global_variables_initializer()) self.sess.run(tf.local_variables_initializer()) # load data bboxes = None if conf.dataset == 'MNIST': (x, counts, y, bboxes), (x_test, c_test, _, _) = datasets.load_mnist(conf.scene_width, max_digits=2, path=conf.data_path) visualize.store_images(x[0:10], log_path + '/img_raw') if conf.noise: x = datasets.add_noise(x) x_test = datasets.add_noise(x_test) visualize.store_images(x[0:10], log_path + '/img_noisy') if conf.structured_noise: x = datasets.add_structured_noise(x) x_test = datasets.add_structured_noise(x_test) visualize.store_images(x[0:10], log_path + '/img_struc_noisy') x_color = np.squeeze(x) elif conf.dataset == 'sprites': (x_color, counts, _), (x_test, c_test, _) = datasets.make_sprites(50000, path=conf.data_path) if conf.noise: x_color = datasets.add_noise(x_color) x = visualize.rgb2gray(x_color) x = np.clip(x, 0.0, 1.0) x_test = visualize.rgb2gray(x_test) x_test = np.clip(x_test, 0.0, 1.0) if conf.noise: x = datasets.add_noise(x) x_test = datasets.add_noise(x_test) x_color = datasets.add_noise(x_color) elif conf.dataset == 'omniglot': x = 1 - datasets.load_omniglot(path=conf.data_path) counts = np.ones(x.shape[0], dtype=np.int32) x_color = np.squeeze(x) elif conf.dataset == 'svhn': x, counts, objects, bgs = datasets.load_svhn(path=conf.data_path) self.pretrain(x, objects, bgs) x_color = np.squeeze(x) else: raise ValueError('unknown dataset', conf.dataset) self.x, self.x_color, self.counts = x, x_color, counts self.x_test, self.c_test = x_test, c_test self.bboxes = bboxes print('Built model') self.obj_reconstructor = SpnReconstructor(self.mdl.obj_spn) self.bg_reconstructor = SpnReconstructor(self.mdl.bg_spn) tfgraph = tf.get_default_graph() self.tensors_of_interest = { 'z_where': tfgraph.get_tensor_by_name('z_where:0'), 'z_pres': tfgraph.get_tensor_by_name('z_pres:0'), 'bg_score': tfgraph.get_tensor_by_name('bg_score:0'), 'y': tfgraph.get_tensor_by_name('y:0'), 'obj_vis': tfgraph.get_tensor_by_name('obj_vis:0'), 'bg_maps': tfgraph.get_tensor_by_name('bg_maps:0') }
def run_experiment(settings): ############################################################################ fashion_mnist = settings.fashion_mnist svhn = settings.svhn exponential_family = settings.exponential_family classes = settings.classes K = settings.K structure = settings.structure # 'poon-domingos' pd_num_pieces = settings.pd_num_pieces # 'binary-trees' depth = settings.depth num_repetitions = settings.num_repetitions_mixture width = settings.width height = settings.height num_epochs = settings.num_epochs batch_size = settings.batch_size SGD_learning_rate = settings.SGD_learning_rate ############################################################################ exponential_family_args = None if exponential_family == EinsumNetwork.BinomialArray: exponential_family_args = {'N': 255} if exponential_family == EinsumNetwork.CategoricalArray: exponential_family_args = {'K': 256} if exponential_family == EinsumNetwork.NormalArray: exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1} # get data if fashion_mnist: train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist() elif svhn: train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn() else: train_x, train_labels, test_x, test_labels = datasets.load_mnist() if not exponential_family != EinsumNetwork.NormalArray: train_x /= 255. test_x /= 255. train_x -= .5 test_x -= .5 # validation split valid_x = train_x[-10000:, :] train_x = train_x[:-10000, :] valid_labels = train_labels[-10000:] train_labels = train_labels[:-10000] # pick the selected classes if classes is not None: train_x = train_x[np.any(np.stack([train_labels == c for c in classes], 1), 1), :] valid_x = valid_x[np.any(np.stack([valid_labels == c for c in classes], 1), 1), :] test_x = test_x[np.any(np.stack([test_labels == c for c in classes], 1), 1), :] train_labels = [l for l in train_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] else: classes = np.unique(train_labels).tolist() train_labels = [l for l in train_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] train_x = torch.from_numpy(train_x).to(torch.device(device)) valid_x = torch.from_numpy(valid_x).to(torch.device(device)) test_x = torch.from_numpy(test_x).to(torch.device(device)) ###################################### # Make EinsumNetworks for each class # ###################################### einets = [] ps = [] for c in classes: if structure == 'poon-domingos': pd_delta = [[height / d, width / d] for d in pd_num_pieces] graph = Graph.poon_domingos_structure(shape=(height, width), delta=pd_delta) elif structure == 'binary-trees': graph = Graph.random_binary_trees(num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions) else: raise AssertionError("Unknown Structure") args = EinsumNetwork.Args( num_var=train_x.shape[1], num_dims=3 if svhn else 1, num_classes=1, num_sums=K, num_input_distributions=K, exponential_family=exponential_family, exponential_family_args=exponential_family_args, use_em=False) einet = EinsumNetwork.EinsumNetwork(graph, args) init_dict = get_init_dict(einet, train_x, train_labels=train_labels, einet_class=c) einet.initialize(init_dict) einet.to(device) einets.append(einet) # Calculate amount of training samples per class ps.append(train_labels.count(c)) print(f'Einsum network for class {c}:') print(einet) # normalize ps, construct mixture component ps = [p / sum(ps) for p in ps] ps = torch.tensor(ps).to(torch.device(device)) mixture = EinetMixture(ps, einets, classes=classes) num_params = mixture.eval_size() ################################## # Evalueate after initialization # ################################## train_lls = [] valid_lls = [] test_lls = [] train_accs = [] valid_accs = [] test_accs = [] train_N = train_x.shape[0] valid_N = valid_x.shape[0] test_N = test_x.shape[0] mixture.eval() train_ll_before = mixture.eval_loglikelihood_batched(train_x, batch_size=batch_size, skip_reparam=True) valid_ll_before = mixture.eval_loglikelihood_batched(valid_x, batch_size=batch_size, skip_reparam=True) test_ll_before = mixture.eval_loglikelihood_batched(test_x, batch_size=batch_size, skip_reparam=True) print() print("Experiment 3: Log-likelihoods --- train LL {} valid LL {} test LL {}".format( train_ll_before / train_N, valid_ll_before / valid_N, test_ll_before / test_N)) train_lls.append(train_ll_before / train_N) valid_lls.append(valid_ll_before / valid_N) test_lls.append(test_ll_before / test_N) ################ # Experiment 4 # ################ train_labelsz = torch.tensor(train_labels).to(torch.device(device)) valid_labelsz = torch.tensor(valid_labels).to(torch.device(device)) test_labelsz = torch.tensor(test_labels).to(torch.device(device)) acc_train_before = mixture.eval_accuracy_batched(classes, train_x, train_labelsz, batch_size=batch_size, skip_reparam=True) acc_valid_before = mixture.eval_accuracy_batched(classes, valid_x, valid_labelsz, batch_size=batch_size, skip_reparam=True) acc_test_before = mixture.eval_accuracy_batched(classes, test_x, test_labelsz, batch_size=batch_size, skip_reparam=True) print() print("Experiment 4: Classification accuracies --- train acc {} valid acc {} test acc {}".format( acc_train_before, acc_valid_before, acc_test_before)) train_accs.append(acc_train_before) valid_accs.append(acc_valid_before) test_accs.append(acc_test_before) mixture.train() ################## # Training phase # ################## """ Learning each sub Network Generatively """ sub_net_parameters = None for einet in mixture.einets: if sub_net_parameters is None: sub_net_parameters = list(einet.parameters()) else: sub_net_parameters += list(einet.parameters()) sub_net_parameters += list(mixture.parameters()) optimizer = torch.optim.SGD(sub_net_parameters, lr=SGD_learning_rate) start_time = time.time() end_time = time.time() for epoch_count in range(num_epochs): for (einet, c) in zip(einets, classes): train_x_c = train_x[[l == c for l in train_labels]] valid_x_c = valid_x[[l == c for l in valid_labels]] test_x_c = test_x[[l == c for l in test_labels]] train_N = train_x_c.shape[0] valid_N = valid_x_c.shape[0] test_N = test_x_c.shape[0] idx_batches = torch.randperm(train_N, device=device).split(batch_size) total_loss = 0.0 for idx in idx_batches: batch_x = train_x_c[idx, :] optimizer.zero_grad() outputs = einet.forward(batch_x) ll_sample = EinsumNetwork.log_likelihoods(outputs) log_likelihood = ll_sample.sum() nll = log_likelihood * -1 nll.backward() optimizer.step() total_loss += nll.detach().item() print(f'[{epoch_count}] total loss: {total_loss}') mixture.eval() train_N = train_x.shape[0] valid_N = valid_x.shape[0] test_N = test_x.shape[0] train_ll = mixture.eval_loglikelihood_batched(train_x, batch_size=batch_size) valid_ll = mixture.eval_loglikelihood_batched(valid_x, batch_size=batch_size) test_ll = mixture.eval_loglikelihood_batched(test_x, batch_size=batch_size) train_lls.append(train_ll / train_N) valid_lls.append(valid_ll / valid_N) test_lls.append(test_ll / test_N) train_labelsz = torch.tensor(train_labels).to(torch.device(device)) valid_labelsz = torch.tensor(valid_labels).to(torch.device(device)) test_labelsz = torch.tensor(test_labels).to(torch.device(device)) acc_train = mixture.eval_accuracy_batched(classes, train_x, train_labelsz, batch_size=batch_size) acc_valid = mixture.eval_accuracy_batched(classes, valid_x, valid_labelsz, batch_size=batch_size) acc_test = mixture.eval_accuracy_batched(classes, test_x, test_labelsz, batch_size=batch_size) train_accs.append(acc_train) valid_accs.append(acc_valid) test_accs.append(acc_test) mixture.train() print() print("Experiment 3: Log-likelihoods --- train LL {} valid LL {} test LL {}".format( train_ll / train_N, valid_ll / valid_N, test_ll / test_N)) print() print("Experiment 4: Classification accuracies --- train acc {} valid acc {} test acc {}".format( acc_train, acc_valid, acc_test)) print(f'Network size: {num_params} parameters') print(f'Training time: {end_time - start_time}s') return { 'train_lls': train_lls, 'valid_lls': valid_lls, 'test_lls': test_lls, 'train_accs': train_accs, 'valid_accs': valid_accs, 'test_accs': test_accs, 'network_size': num_params, 'training_time': end_time - start_time, }
def check_einets_eq(e1, e2): assert len(e1.einet_layers) == len(e2.einet_layers) for l, l_p in zip(e1.einet_layers, e2.einet_layers): if hasattr(l, "params"): assert torch.all(torch.eq(l.params, l_p.params)) classes = [7] num_epochs = 5 batch_size = 100 ############################################################################ # get data train_x, train_labels, test_x, test_labels = datasets.load_mnist() # validation split valid_x = train_x[-10000:, :] train_x = train_x[:-10000, :] valid_labels = train_labels[-10000:] train_labels = train_labels[:-10000] # pick the selected classes if classes is not None: train_x = train_x[np.any(np.stack([train_labels == c for c in classes], 1), 1), :] valid_x = valid_x[np.any(np.stack([valid_labels == c for c in classes], 1), 1), :] test_x = test_x[np.any(np.stack([test_labels == c for c in classes], 1), 1), :] train_x = torch.from_numpy(train_x).to(torch.device(device)) valid_x = torch.from_numpy(valid_x).to(torch.device(device))
cfg.read('config.ini') train_cfg = dict(zip([key for key, _ in cfg.items('train')], \ [int(val) if val.isdigit() else val for _, val in cfg.items('train')])) print('config:', train_cfg) # parameters from config context_size = train_cfg['context_size'] x_dim = train_cfg['x_dim'] h_dim = train_cfg['h_dim'] r_dim = train_cfg['r_dim'] z_dim = train_cfg['z_dim'] y_dim = train_cfg['y_dim'] batch_size = train_cfg['batch_size'] n_iter = train_cfg['n_iter'] n_epoch = train_cfg['n_epoch'] n_display = train_cfg['n_display'] device = torch.device('cuda') if torch.cuda.device_count() > 0 else torch.device('cpu') # load dataloader data_loader = datasets.load_mnist(batch_size=batch_size) # data_loader = datasets.load_celeba(batch_size=batch_size) model = NeuralProcess(x_dim=x_dim, h_dim=h_dim, r_dim=r_dim, z_dim=z_dim, y_dim=y_dim, device=device) optimizer = optim.Adam(model.parameters(), lr=4e-3) # print(model) ModelTrainer = trainer.NPTrainer(model=model, context_size=context_size, optimizer=optimizer, device=device) ModelTrainer.train(data_loader=data_loader, n_epoch=n_epoch, n_iter=n_iter, test_for_every=n_display) # import sys; sys.exit(0)
def new_start(start_train_set, online_offset): ############################################################################ fashion_mnist = settings.fashion_mnist svhn = settings.svhn exponential_family = settings.exponential_family classes = settings.classes K = settings.K structure = settings.structure # 'poon-domingos' pd_num_pieces = settings.pd_num_pieces # 'binary-trees' depth = settings.depth num_repetitions = settings.num_repetitions_mixture width = settings.width height = settings.height num_epochs = settings.num_epochs batch_size = settings.batch_size online_em_frequency = settings.online_em_frequency online_em_stepsize = settings.online_em_stepsize ############################################################################ exponential_family_args = None if exponential_family == EinsumNetwork.BinomialArray: exponential_family_args = {'N': 255} if exponential_family == EinsumNetwork.CategoricalArray: exponential_family_args = {'K': 256} if exponential_family == EinsumNetwork.NormalArray: exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1} # get data if fashion_mnist: train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist() elif svhn: train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn() else: train_x, train_labels, test_x, test_labels = datasets.load_mnist() if not exponential_family != EinsumNetwork.NormalArray: train_x /= 255. test_x /= 255. train_x -= .5 test_x -= .5 # validation split valid_x = train_x[-10000:, :] # online_x = train_x[-40000:, :] train_x = train_x[:-(10000+online_offset-start_train_set), :] valid_labels = train_labels[-10000:] # online_labels = train_labels[-40000:] train_labels = train_labels[:-(10000+online_offset-start_train_set)] # # debug setup # valid_x = train_x[-10000:, :] # online_x = train_x[-45000:, :] # train_x = train_x[:-55000, :] # valid_labels = train_labels[-10000:] # online_labels = train_labels[-45000:] # train_labels = train_labels[:-55000] # valid_x = train_x[-10000:, :] # online_x = train_x[-10000:, :] # train_x = train_x[:-20000, :] # valid_labels = train_labels[-10000:] # online_labels = train_labels[-10000:] # train_labels = train_labels[:-20000] # valid_x = train_x[-10000:, :] # online_x = train_x[-20000:, :] # train_x = train_x[:-30000, :] # valid_labels = train_labels[-10000:] # online_labels = train_labels[-20000:] # train_labels = train_labels[:-30000] # pick the selected classes if classes is not None: train_x = train_x[np.any(np.stack([train_labels == c for c in classes], 1), 1), :] # online_x = online_x[np.any(np.stack([online_labels == c for c in classes], 1), 1), :] valid_x = valid_x[np.any(np.stack([valid_labels == c for c in classes], 1), 1), :] test_x = test_x[np.any(np.stack([test_labels == c for c in classes], 1), 1), :] train_labels = [l for l in train_labels if l in classes] # online_labels = [l for l in online_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] else: classes = np.unique(train_labels).tolist() train_labels = [l for l in train_labels if l in classes] # online_labels = [l for l in online_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] train_x = torch.from_numpy(train_x).to(torch.device(device)) # online_x = torch.from_numpy(online_x).to(torch.device(device)) valid_x = torch.from_numpy(valid_x).to(torch.device(device)) test_x = torch.from_numpy(test_x).to(torch.device(device)) ###################################### # Make EinsumNetworks for each class # ###################################### einets = [] ps = [] for c in classes: if structure == 'poon-domingos': pd_delta = [[height / d, width / d] for d in pd_num_pieces] graph = Graph.poon_domingos_structure(shape=(height, width), delta=pd_delta) elif structure == 'binary-trees': graph = Graph.random_binary_trees(num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions) else: raise AssertionError("Unknown Structure") args = EinsumNetwork.Args( num_var=train_x.shape[1], num_dims=3 if svhn else 1, num_classes=1, num_sums=K, num_input_distributions=K, exponential_family=exponential_family, exponential_family_args=exponential_family_args, online_em_frequency=online_em_frequency, online_em_stepsize=online_em_stepsize) einet = EinsumNetwork.EinsumNetwork(graph, args) init_dict = get_init_dict(einet, train_x, train_labels=train_labels, einet_class=c) einet.initialize(init_dict) einet.to(device) einets.append(einet) # Calculate amount of training samples per class ps.append(train_labels.count(c)) print(f'Einsum network for class {c}:') print(einet) # normalize ps, construct mixture component ps = [p / sum(ps) for p in ps] ps = torch.tensor(ps).to(torch.device(device)) mixture = EinetMixture(ps, einets, classes=classes) num_params = mixture.eval_size() ################################## # Evalueate after initialization # ################################## train_N = train_x.shape[0] valid_N = valid_x.shape[0] test_N = test_x.shape[0] mixture.eval() train_ll_before = mixture.eval_loglikelihood_batched(train_x, batch_size=batch_size) valid_ll_before = mixture.eval_loglikelihood_batched(valid_x, batch_size=batch_size) test_ll_before = mixture.eval_loglikelihood_batched(test_x, batch_size=batch_size) print() print("Experiment 3: Log-likelihoods --- train LL {} valid LL {} test LL {}".format( train_ll_before / train_N, valid_ll_before / valid_N, test_ll_before / test_N)) train_lls.append(train_ll_before / train_N) valid_lls.append(valid_ll_before / valid_N) test_lls.append(test_ll_before / test_N) ################ # Experiment 4 # ################ train_labelsz = torch.tensor(train_labels).to(torch.device(device)) valid_labelsz = torch.tensor(valid_labels).to(torch.device(device)) test_labelsz = torch.tensor(test_labels).to(torch.device(device)) acc_train_before = mixture.eval_accuracy_batched(classes, train_x, train_labelsz, batch_size=batch_size) acc_valid_before = mixture.eval_accuracy_batched(classes, valid_x, valid_labelsz, batch_size=batch_size) acc_test_before = mixture.eval_accuracy_batched(classes, test_x, test_labelsz, batch_size=batch_size) print() print("Experiment 8: Classification accuracies --- train acc {} valid acc {} test acc {}".format( acc_train_before, acc_valid_before, acc_test_before)) train_accs.append(acc_train_before) valid_accs.append(acc_valid_before) test_accs.append(acc_test_before)
def sdec(dataset="mnist", gamma=0.1, beta=1, maxiter=2e4, update_interval=20, tol=0.00001, batch_size=256): """arguements: dataset:choice the datasets that you want to run gamma: The Lambda in the lecture beta: the proportion of information we have known about the sample """ maxiter = maxiter gamma = gamma update_interval = update_interval tol = tol beta = beta batch_size = batch_size ae_weights = ("ae_weights/" + dataset + "_ae_weights/" + dataset + "_ae_weights.h5") # load dataset from datasets import load_mnist, load_usps, load_stl, load_cifar if dataset == 'mnist': # recommends: n_clusters=10, update_interval=140 x, y = load_mnist('./data/mnist/mnist.npz') update_interval = 140 elif dataset == 'usps': # recommends: n_clusters=10, update_interval=30 x, y = load_usps('data/usps') update_interval = 30 elif dataset == "stl": import numpy as np x, y = load_stl() update_interval = 20 elif dataset == "cifar_10": x, y = load_cifar() update_interval = 40 beta = beta print(gamma, dataset, beta) # prepare the SDEC model try: count = Counter(y) except: count = Counter(y[:, 0]) n_clusters = len(count) save_dir = 'results/sdec_dataset:' + dataset + " gamma:" + str(gamma) laster_batch_size = x.shape[0] % batch_size dec = SDEC(dims=[x.shape[-1], 500, 500, 2000, 10], n_clusters=n_clusters, N=x.shape[0], x=x, batch_size=batch_size, laster_batch_size=laster_batch_size, gamma=gamma, beta=beta) dec.initialize_model(optimizer=SGD(lr=0.01, momentum=0.9), ae_weights=ae_weights) dec.model.summary() t0 = time() y_pred = dec.clustering(x, y=y, tol=tol, maxiter=maxiter, update_interval=update_interval, save_dir=save_dir) plot_model(dec.model, to_file='sdecmodel.png', show_shapes=True) print('acc:', cluster_acc(y, y_pred)) print('clustering time: ', (time() - t0))
worker_HOSTS = [('lpdquad.epfl.ch', 5000), ('lpdquad.epfl.ch', 6000), ('lpdquad.epfl.ch', 7000), ('lpdquad.epfl.ch', 8000), ('lpdquad.epfl.ch', 9000)] n = len(worker_HOSTS) batch_size = 50 learning_rate = 0.05 activation_func = tf.nn.relu max_train_epoch = 10000 max_train_accur = 0.97 builder_opt = tf.train.AdagradOptimizer(learning_rate) builder_dims = [784, 100, 10] # ------------------------------------------------------------------------- # # Dataset instantiation dataset = datasets.load_mnist() train_set = dataset.cut(0, 50000, 50000).shuffle().cut(0, 50000, batch_size) test_set = dataset.cut(50000, 60000, 10000) # Model instantiation graph = tf.Graph() with graph.as_default(): model = models.dense_classifier(builder_dims, inputs=None, act_fn=activation_func, optimizer=builder_opt, epoch=True) # Establish connections with workers sockets = [] for worker_HOST in worker_HOSTS:
def main(): # constants batch_size = 256 lr = 0.01 momentum = 0.9 tol = 0.001 maxiter = 2e4 update_interval = 140 n_clusters = 10 n_classes = 10 lcolours = ['#D6FF79', '#B0FF92', '#A09BE7', '#5F00BA', '#56CBF9', \ '#F3C969', '#ED254E', '#CAA8F5', '#D9F0FF', '#46351D'] labels = [str(i) for i in range(n_clusters)] ae_weights = '../../../../DEC-keras/results/mnist/ae_weights.h5' dec_weights = '../../../../DEC-keras/results/mnist/%d/DEC_model_final.h5' % n_clusters # load mnist data set x, y = load_mnist() # split the data into training, validation and test sets m = x.shape[0] m = m - 20000 sample_frac = 0.01 split = int(sample_frac * m) print(split) x_train = x[:split] y_train = y[:split] x_valid = x[50000:60000] y_valid = y[50000:60000] x_test = x[60000:] y_test = y[60000:] # load pretrained DEC model dec = load_mnist_dec(x, ae_weights, dec_weights, n_clusters, \ batch_size, lr, momentum) # predict training set cluster assignments y_pred = dec.predict_clusters(x_train) # inspect the clustering and simulate volunteer labelling of random sample (the training set) cluster_to_label_mapping, n_assigned_list, majority_class_fractions = \ get_cluster_to_label_mapping(y_train, y_pred, n_classes, n_clusters) print(cluster_acc(y_train, y_pred)) y_valid_pred = dec.predict_clusters(x_valid) print(cluster_acc(y_valid, y_valid_pred)) # extract the cluster centres cluster_centres = get_cluster_centres(dec) # determine current unlabelled samples y_plot = np.array(y[:m], dtype='int') y_plot[split:] = -1 # reduce embedding to 2D and plot labelled and unlabelled training set samples #pca_plot(dec.encoder, x[:m], cluster_centres, y=y_plot, labels=labels, \ # lcolours=lcolours) # get siamese training pairs im, cc, ls, cluster_to_label_mapping = \ get_pairs_auto(dec, x_train, y_train, cluster_centres, \ cluster_to_label_mapping, majority_class_fractions, n_clusters) #im, cc, ls, cluster_to_label_mapping = \ # get_pairs_auto_with_noise(dec, x_train, y_train, cluster_centres, \ # cluster_to_label_mapping, majority_class_fractions, n_clusters) """ mcheckpointer = ModelCheckpoint(filepath='saved_models/weights.best..hdf5', \ verbose=1, save_best_only=True) base_network = Model(dec.model.input, \ dec.model.get_layer('encoder_%d' % (dec.n_stacks - 1)).output) fcheckpointer = FrameDumpCallback(base_network, x, cluster_centres, \ './video', y=y_plot, labels=labels, lcolours=lcolours) """ #callbacks = [mcheckpointer, fcheckpointer] callbacks = [] model, base_network = train_siamese(dec, cluster_centres, im, cc, ls, \ epochs=5, split_frac=0.75, callbacks=callbacks) #model, base_network = train_siamese_online(dec, x, cluster_centres, im, cc, ls, \ # epochs=1, split_frac=0.75, callbacks=[]) y_pred = dec.predict_clusters(x_valid) cluster_to_label_mapping, n_assigned_list, majority_class_fractions = \ get_cluster_to_label_mapping(y_valid, y_pred, n_classes, n_clusters) print(cluster_acc(y_valid, y_pred)) #pca_plot(dec.encoder, x_valid, cluster_centres, y=y_valid, labels=labels, \ # lcolours=lcolours) y_pred = dec.predict_clusters(x[:m]) print(np.argmin(majority_class_fractions)) for j in range(1, 6): selection = np.where( y_pred[j * split:(j + 1) * split] == np.argmin(majority_class_fractions)) x_train = np.concatenate( (x_train, x[:m][j * split:(j + 1) * split][selection])) y_train = np.concatenate( (y_train, y[:m][j * split:(j + 1) * split][selection])) im, cc, ls, cluster_to_label_mapping = \ get_pairs_auto(dec, x_train, y_train, cluster_centres, \ cluster_to_label_mapping, majority_class_fractions, n_clusters) callbacks = [] model, base_network = train_siamese(dec, cluster_centres, im, cc, ls, \ epochs=1, split_frac=0.75, callbacks=callbacks) #x_train = x[:2*split] #y_train = y[:2*split] #y_pred = dec.predict_clusters(x_train) #cluster_to_label_mapping, n_assigned_list, majority_class_fractions = \ # get_cluster_to_label_mapping(y_train, y_pred, n_classes, n_clusters) y_pred = dec.predict_clusters(x_valid) cluster_to_label_mapping, n_assigned_list, majority_class_fractions = \ get_cluster_to_label_mapping(y_valid, y_pred, n_classes, n_clusters) print(cluster_acc(y_valid, y_pred))
('conv2', ConvLayer0, {'n_out' : 16, 'image_shape': (13, 13), 'filter_size': (5, 5)}), ('pool2', PoolLayer), ('conv3', ConvLayer0, {'n_out' : 120, 'image_shape': (4, 4), 'filter_size': (4, 4)}), ('reshape1', ReshapeLayer, {'n_out': 120}), ('fc1', WbLayer, {'n_out': 84, 'activation': 'tanh'}), ('fc2', WbLayer, {'n_out': 10, 'activation': 'softmax'}) # actually rbf ] # load datasets datasets = pre_process(load_mnist(), (5 / 6, 1 / 6), shuffle=True) tdatasets = share_datasets(datasets) # build model options = get_options(lenet5, 1, datasets, batch_size=256, dispFreq=30, use_BN=True) tparams, BNparams, network = make_model(options, batch_size=256) # add early stopping condition temp = OrderedDict() temp['valid_error'] = get_error(tdatasets[1], options, batch_size=256) temp['test_error'] = get_error(tdatasets[2], options, batch_size=256) options['model_test'] = temp
def main(relaxation=None, learn_prior=True, max_iters=None, batch_size=24, num_latents=200, model_type=None, lr=None, test_bias=False, train_dir=None, iwae_samples=100, dataset="mnist", logf=None, var_lr_scale=10., Q_wd=.0001, Q_depth=-1, checkpoint_path=None): valid_batch_size = 100 if model_type == "L1": num_layers = 1 layer_type = linear_layer elif model_type == "L2": num_layers = 2 layer_type = linear_layer elif model_type == "NL1": num_layers = 1 layer_type = nonlinear_layer else: assert False, "bad model type {}".format(model_type) sess = tf.Session() if dataset == "mnist": X_tr, X_va, X_te = datasets.load_mnist() elif dataset == "omni": X_tr, X_va, X_te = datasets.load_omniglot() else: assert False train_mean = np.mean(X_tr, axis=0, keepdims=True) train_output_bias = -np.log(1. / np.clip(train_mean, 0.001, 0.999) - 1.).astype(np.float32) x = tf.placeholder(tf.float32, [None, 784]) x_im = tf.reshape(x, [-1, 28, 28, 1]) tf.summary.image("x_true", x_im) # make prior for top b p_prior = tf.Variable( tf.zeros([num_latents], dtype=tf.float32), trainable=learn_prior, name='p_prior', ) # create rebar specific variables temperature and eta log_temperatures = [create_log_temp(1) for l in range(num_layers)] temperatures = [tf.exp(log_temp) for log_temp in log_temperatures] batch_temperatures = [tf.reshape(temp, [1, -1]) for temp in temperatures] etas = [create_eta(1) for l in range(num_layers)] batch_etas = [tf.reshape(eta, [1, -1]) for eta in etas] # random uniform samples u = [ tf.random_uniform([tf.shape(x)[0], num_latents], dtype=tf.float32) for l in range(num_layers) ] # create binary sampler b_sampler = BSampler(u, "b_sampler") gen_b_sampler = BSampler(u, "gen_b_sampler") # generate hard forward pass encoder_name = "encoder" decoder_name = "decoder" inf_la_b, samples_b = inference_network(x, train_mean, layer_type, num_layers, num_latents, encoder_name, False, b_sampler) gen_la_b = generator_network(samples_b, train_output_bias, layer_type, num_layers, num_latents, decoder_name, False) log_image(gen_la_b[-1], "x_pred") # produce samples _samples_la_b = generator_network(None, train_output_bias, layer_type, num_layers, num_latents, decoder_name, True, sampler=gen_b_sampler, prior=p_prior) log_image(_samples_la_b[-1], "x_sample") # hard loss evaluation and log probs f_b, log_q_bs = neg_elbo(x, samples_b, inf_la_b, gen_la_b, p_prior, log=True) batch_f_b = tf.expand_dims(f_b, 1) total_loss = tf.reduce_mean(f_b) tf.summary.scalar("fb", total_loss) # optimizer for model parameters model_opt = tf.train.AdamOptimizer(lr, beta2=.99999) # optimizer for variance reducing parameters variance_opt = tf.train.AdamOptimizer(var_lr_scale * lr, beta2=.99999) # get encoder and decoder variables encoder_params = get_variables(encoder_name) decoder_params = get_variables(decoder_name) if learn_prior: decoder_params.append(p_prior) # compute and store gradients of hard loss with respect to encoder_parameters encoder_loss_grads = {} for g, v in model_opt.compute_gradients(total_loss, var_list=encoder_params): encoder_loss_grads[v.name] = g # get gradients for decoder parameters decoder_gradvars = model_opt.compute_gradients(total_loss, var_list=decoder_params) # will hold all gradvars for the model (non-variance adjusting variables) model_gradvars = [gv for gv in decoder_gradvars] # conditional samples v = [v_from_u(_u, log_alpha) for _u, log_alpha in zip(u, inf_la_b)] # need to create soft samplers sig_z_sampler = SIGZSampler(u, batch_temperatures, "sig_z_sampler") sig_zt_sampler = SIGZSampler(v, batch_temperatures, "sig_zt_sampler") z_sampler = ZSampler(u, "z_sampler") zt_sampler = ZSampler(v, "zt_sampler") rebars = [] reinforces = [] variance_objectives = [] # have to produce 2 forward passes for each layer for z and zt samples for l in range(num_layers): cur_la_b = inf_la_b[l] # if standard rebar or additive relaxation if relaxation == "rebar" or relaxation == "add": # compute soft samples and soft passes through model and soft elbos cur_z_sample = sig_z_sampler.sample(cur_la_b, l) prev_samples_z = samples_b[:l] + [cur_z_sample] cur_zt_sample = sig_zt_sampler.sample(cur_la_b, l) prev_samples_zt = samples_b[:l] + [cur_zt_sample] prev_log_alphas = inf_la_b[:l] + [cur_la_b] # soft forward passes inf_la_z, samples_z = inference_network(x, train_mean, layer_type, num_layers, num_latents, encoder_name, True, sig_z_sampler, samples=prev_samples_z, log_alphas=prev_log_alphas) gen_la_z = generator_network(samples_z, train_output_bias, layer_type, num_layers, num_latents, decoder_name, True) inf_la_zt, samples_zt = inference_network( x, train_mean, layer_type, num_layers, num_latents, encoder_name, True, sig_zt_sampler, samples=prev_samples_zt, log_alphas=prev_log_alphas) gen_la_zt = generator_network(samples_zt, train_output_bias, layer_type, num_layers, num_latents, decoder_name, True) # soft loss evaluataions f_z, _ = neg_elbo(x, samples_z, inf_la_z, gen_la_z, p_prior) f_zt, _ = neg_elbo(x, samples_zt, inf_la_zt, gen_la_zt, p_prior) if relaxation == "add" or relaxation == "all": # sample z and zt prev_bs = samples_b[:l] cur_z_sample = z_sampler.sample(cur_la_b, l) cur_zt_sample = zt_sampler.sample(cur_la_b, l) q_z = Q_func(x, train_mean, cur_z_sample, prev_bs, Q_name(l), False, depth=Q_depth) q_zt = Q_func(x, train_mean, cur_zt_sample, prev_bs, Q_name(l), True, depth=Q_depth) tf.summary.scalar("q_z_{}".format(l), tf.reduce_mean(q_z)) tf.summary.scalar("q_zt_{}".format(l), tf.reduce_mean(q_zt)) if relaxation == "add": f_z = f_z + q_z f_zt = f_zt + q_zt elif relaxation == "all": f_z = q_z f_zt = q_zt else: assert False tf.summary.scalar("f_z_{}".format(l), tf.reduce_mean(f_z)) tf.summary.scalar("f_zt_{}".format(l), tf.reduce_mean(f_zt)) cur_samples_b = samples_b[l] # get gradient of sample log-likelihood wrt current parameter d_log_q_d_la = bernoulli_loglikelihood_derivitive( cur_samples_b, cur_la_b) # get gradient of soft-losses wrt current parameter d_f_z_d_la = tf.gradients(f_z, cur_la_b)[0] d_f_zt_d_la = tf.gradients(f_zt, cur_la_b)[0] batch_f_zt = tf.expand_dims(f_zt, 1) eta = batch_etas[l] # compute rebar and reinforce tf.summary.histogram("der_diff_{}".format(l), d_f_z_d_la - d_f_zt_d_la) tf.summary.histogram("d_log_q_d_la_{}".format(l), d_log_q_d_la) rebar = ((batch_f_b - eta * batch_f_zt) * d_log_q_d_la + eta * (d_f_z_d_la - d_f_zt_d_la)) / batch_size reinforce = batch_f_b * d_log_q_d_la / batch_size rebars.append(rebar) reinforces.append(reinforce) tf.summary.histogram("rebar_{}".format(l), rebar) tf.summary.histogram("reinforce_{}".format(l), reinforce) # backpropogate rebar to individual layer parameters layer_params = get_variables(layer_name(l), arr=encoder_params) layer_rebar_grads = tf.gradients(cur_la_b, layer_params, grad_ys=rebar) # get direct loss grads for each parameter layer_loss_grads = [encoder_loss_grads[v.name] for v in layer_params] # each param's gradient should be rebar + the direct loss gradient layer_grads = [ rg + lg for rg, lg in zip(layer_rebar_grads, layer_loss_grads) ] for rg, lg, v in zip(layer_rebar_grads, layer_loss_grads, layer_params): tf.summary.histogram(v.name + "_grad_rebar", rg) tf.summary.histogram(v.name + "_grad_loss", lg) layer_gradvars = list(zip(layer_grads, layer_params)) model_gradvars.extend(layer_gradvars) variance_objective = tf.reduce_mean(tf.square(rebar)) variance_objectives.append(variance_objective) variance_objective = tf.add_n(variance_objectives) variance_vars = log_temperatures + etas if relaxation != "rebar": q_vars = get_variables("Q_") wd = tf.add_n([Q_wd * tf.nn.l2_loss(v) for v in q_vars]) tf.summary.scalar("Q_weight_decay", wd) variance_vars = variance_vars + q_vars else: wd = 0.0 variance_gradvars = variance_opt.compute_gradients(variance_objective + wd, var_list=variance_vars) variance_train_op = variance_opt.apply_gradients(variance_gradvars) model_train_op = model_opt.apply_gradients(model_gradvars) with tf.control_dependencies([model_train_op, variance_train_op]): train_op = tf.no_op() for g, v in model_gradvars + variance_gradvars: print(g, v.name) if g is not None: tf.summary.histogram(v.name, v) tf.summary.histogram(v.name + "_grad", g) val_loss = tf.Variable(1000, trainable=False, name="val_loss", dtype=tf.float32) train_loss = tf.Variable(1000, trainable=False, name="train_loss", dtype=tf.float32) tf.summary.scalar("val_loss", val_loss) tf.summary.scalar("train_loss", train_loss) summ_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(train_dir) sess.run(tf.global_variables_initializer()) # create savers train_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) val_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) iwae_elbo = -(tf.reduce_logsumexp(-f_b) - np.log(valid_batch_size)) if checkpoint_path is None: iters_per_epoch = X_tr.shape[0] // batch_size print("Train set has {} examples".format(X_tr.shape[0])) if relaxation != "rebar": print("Pretraining Q network") for i in range(1000): if i % 100 == 0: print(i) idx = np.random.randint(0, iters_per_epoch - 1) batch_xs = X_tr[idx * batch_size:(idx + 1) * batch_size] sess.run(variance_train_op, feed_dict={x: batch_xs}) t = time.time() best_val_loss = np.inf for epoch in range(10000000): train_losses = [] for i in range(iters_per_epoch): cur_iter = epoch * iters_per_epoch + i if cur_iter > max_iters: print("Training Completed") return batch_xs = X_tr[i * batch_size:(i + 1) * batch_size] if i % 1000 == 0: loss, _, = sess.run([total_loss, train_op], feed_dict={x: batch_xs}) #summary_writer.add_summary(sum_str, cur_iter) time_taken = time.time() - t t = time.time() #print(cur_iter, loss, "{} / batch".format(time_taken / 1000)) if test_bias: rebs = [] refs = [] for _i in range(100000): if _i % 1000 == 0: print(_i) rb, re = sess.run([rebars[3], reinforces[3]], feed_dict={x: batch_xs}) rebs.append(rb[:5]) refs.append(re[:5]) rebs = np.array(rebs) refs = np.array(refs) re_var = np.log(refs.var(axis=0)) rb_var = np.log(rebs.var(axis=0)) print("rebar variance = {}".format(rb_var)) print("reinforce variance = {}".format(re_var)) print("rebar = {}".format(rebs.mean(axis=0))) print("reinforce = {}\n".format(refs.mean(axis=0))) else: loss, _ = sess.run([total_loss, train_op], feed_dict={x: batch_xs}) train_losses.append(loss) # epoch over, run test data iwaes = [] for x_va in X_va: x_va_batch = np.array([x_va for i in range(valid_batch_size)]) iwae = sess.run(iwae_elbo, feed_dict={x: x_va_batch}) iwaes.append(iwae) trl = np.mean(train_losses) val = np.mean(iwaes) print("({}) Epoch = {}, Val loss = {}, Train loss = {}".format( train_dir, epoch, val, trl)) logf.write("{}: {} {}\n".format(epoch, val, trl)) sess.run([val_loss.assign(val), train_loss.assign(trl)]) if val < best_val_loss: print("saving best model") best_val_loss = val val_saver.save(sess, '{}/best-model'.format(train_dir), global_step=epoch) np.random.shuffle(X_tr) if epoch % 10 == 0: train_saver.save(sess, '{}/model'.format(train_dir), global_step=epoch) # run iwae elbo on test set else: val_saver.restore(sess, checkpoint_path) iwae_elbo = -(tf.reduce_logsumexp(-f_b) - np.log(valid_batch_size)) iwaes = [] elbos = [] for x_te in X_te: x_te_batch = np.array([x_te for i in range(100)]) iwae, elbo = sess.run([iwae_elbo, f_b], feed_dict={x: x_te_batch}) iwaes.append(iwae) elbos.append(elbo) print("MEAN IWAE: {}".format(np.mean(iwaes))) print("MEAN ELBO: {}".format(np.mean(elbos)))
from torch.utils.data import DataLoader import datasets from simpledataset import SimpleDataset DS_PATH='~/.pythondata/mnist' BATCH_SIZE = 64 IN_SIZE = 28*28 HIDDEN_SIZE = 50 OUT_SIZE = 10 LR=0.001 NEPOCHS = 10 ### Prepare Data ### X_train, y_train, X_test, y_test = datasets.load_mnist(DS_PATH) X_train, X_test = X_train.reshape(len(X_train), -1), X_test.reshape(len(X_test), -1) X_train, X_test = X_train / 255, X_test / 255 y_train, y_test = y_train.astype(np.long), y_test.astype(np.long) train_dl = DataLoader(dataset=SimpleDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True) test_dl = DataLoader(dataset=SimpleDataset(X_test, y_test), batch_size=BATCH_SIZE, shuffle=False) ### Prepare Network ### class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.l1 = torch.nn.Linear(IN_SIZE , HIDDEN_SIZE) self.l2 = torch.nn.Linear(HIDDEN_SIZE, OUT_SIZE)
def run_training(): training_start_time = time.time() timeout_flag = False ############# # Load data # ############# if ARGS.data_set in ['mnist', 'fashion_mnist']: train_x, train_labels, valid_x, valid_labels, test_x, test_labels = datasets.load_mnist( ARGS.data_path) elif ARGS.data_set in DEBD: train_x, test_x, valid_x = datasets.load_debd(ARGS.data_path, ARGS.data_set) train_labels = np.zeros(train_x.shape[0], dtype=np.int32) test_labels = np.zeros(test_x.shape[0], dtype=np.int32) valid_labels = np.zeros(valid_x.shape[0], dtype=np.int32) else: if ARGS.data_set == '20ng_classify': unpickled = pickle.load( open(ARGS.data_path + '/20ng-50-lda.pkl', "rb")) elif ARGS.data_set == 'higgs': unpickled = pickle.load(open(ARGS.data_path + '/higgs.pkl', "rb")) elif ARGS.data_set == 'wine': unpickled = pickle.load(open(ARGS.data_path + '/wine.pkl', "rb")) elif ARGS.data_set == 'wine_multiclass': unpickled = pickle.load( open(ARGS.data_path + '/wine_multiclass.pkl', "rb")) elif ARGS.data_set == 'theorem': unpickled = pickle.load(open(ARGS.data_path + '/theorem.pkl', "rb")) elif ARGS.data_set == 'imdb': unpickled = pickle.load( open(ARGS.data_path + '/imdb-dense-nmf-200.pkl', "rb")) train_x = unpickled[0] train_labels = unpickled[1] valid_x = unpickled[2] valid_labels = unpickled[3] test_x = unpickled[4] test_labels = unpickled[5] ###################### # Data preprocessing # ###################### if not ARGS.discrete_leaves: if ARGS.low_variance_threshold >= 0.0: v = np.var(train_x, 0) mu = np.mean(v) idx = v > ARGS.low_variance_threshold * mu train_x = train_x[:, idx] test_x = test_x[:, idx] if valid_x is not None: valid_x = valid_x[:, idx] # zero-mean, unit-variance if ARGS.normalization == "zmuv": train_x_mean = np.mean(train_x, 0) train_x_std = np.std(train_x, 0) train_x = (train_x - train_x_mean) / (train_x_std + ARGS.zmuv_min_sigma) test_x = (test_x - train_x_mean) / (train_x_std + ARGS.zmuv_min_sigma) if valid_x is not None: valid_x = (valid_x - train_x_mean) / (train_x_std + ARGS.zmuv_min_sigma) num_classes = len(np.unique(train_labels)) train_n = int(train_x.shape[0]) num_dims = int(train_x.shape[1]) # stores evaluation metrics results = { 'train_ACC': [], 'train_CE': [], 'train_LL': [], 'train_MARG': [], 'test_ACC': [], 'test_CE': [], 'test_LL': [], 'test_MARG': [], 'valid_ACC': [], 'valid_CE': [], 'valid_LL': [], 'valid_MARG': [], 'elapsed_wall_time_epoch': [], 'best_valid_acc': None, 'epoch_best_valid_acc': None, 'best_valid_loss': None, 'epoch_best_valid_loss': None } # try to restore model latest_model = tf.train.latest_checkpoint(ARGS.result_path + "/checkpoints/") if latest_model is not None: recovered_epoch = int(latest_model[latest_model.rfind('-') + 1:]) if not os.path.isfile(ARGS.result_path + '/spn_description.pkl'): raise RuntimeError('Found checkpoint, but no description file.') if not os.path.isfile(ARGS.result_path + '/results.pkl'): raise RuntimeError('Found checkpoint, but no description file.') ndo, nco, ARGS_orig, region_graph_layers = pickle.load( open(ARGS.result_path + '/spn_description.pkl', 'rb')) if ndo != num_dims or nco != num_classes: raise RuntimeError( 'Inconsistent number of dimensions/classes when trying to retrieve model.' ) results = pickle.load(open(ARGS.result_path + '/results.pkl', "rb")) for k in results: if type(results[k]) == list and len( results[k]) != recovered_epoch + 1: raise AssertionError("Results seem corrupted.") # Make Tensorflow model rat_spn = RatSpn(region_graph_layers, num_classes, ARGS=ARGS) start_epoch_number = recovered_epoch + 1 else: if ARGS.model_description_file: ndo, nco, ARGS_orig, region_graph_layers = pickle.load( open(ARGS.model_description_file, 'rb')) if ndo != num_dims or nco != num_classes: raise RuntimeError( 'Inconsistent number of dimensions/classes when trying to retrieve model.' ) # Make Tensorflow model rat_spn = RatSpn(region_graph_layers, num_classes, ARGS=ARGS) else: # Make Region Graph region_graph = RegionGraph(range(0, num_dims), np.random.randint(0, 1000000000)) for _ in range(0, ARGS.num_recursive_splits): region_graph.random_split(2, ARGS.split_depth) region_graph_layers = region_graph.make_layers() # Make Tensorflow model rat_spn = RatSpn(region_graph_layers, num_classes, ARGS=ARGS) if not ARGS.no_save: pickle.dump((num_dims, num_classes, ARGS, region_graph_layers), open(ARGS.result_path + '/spn_description.pkl', "wb")) start_epoch_number = 0 # session if ARGS.GPU_fraction <= 0.95: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=ARGS.GPU_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) else: sess = tf.Session() # saver saver = tf.train.Saver(max_to_keep=ARGS.store_model_max) if ARGS.store_best_valid_acc: best_valid_acc_saver = tf.train.Saver(max_to_keep=1) if ARGS.store_best_valid_loss: best_valid_loss_saver = tf.train.Saver(max_to_keep=1) # init/load model if latest_model is not None: saver.restore(sess, latest_model) print("") print("restored model after epoch {}".format(recovered_epoch)) print("") else: init = tf.global_variables_initializer() sess.run(init) if ARGS.model_init_file: init_saver = tf.train.Saver(rat_spn.all_params) init_saver.restore(sess, ARGS.model_init_file) print("") print("used {} to init model".format(ARGS.model_init_file)) print("") print(rat_spn) print("num params: {}".format(get_num_params())) print("start training") # train_writer = tf.summary.FileWriter("/scratch/rp587/tensorflow_work/", sess.graph) ############ # Training # ############ epoch_elapsed_times = [] batches_per_epoch = int(np.ceil(float(train_n) / float(ARGS.batch_size))) for epoch_n in range(start_epoch_number, ARGS.num_epochs): epoch_start_time = time.time() rp = np.random.permutation(train_n) batch_start_idx = 0 elapsed_wall_time_epoch = 0.0 for batch_n in range(0, batches_per_epoch): if batch_n + 1 < batches_per_epoch: cur_idx = rp[batch_start_idx:batch_start_idx + ARGS.batch_size] else: cur_idx = rp[batch_start_idx:] batch_start_idx += ARGS.batch_size feed_dict = { rat_spn.inputs: train_x[cur_idx, :], rat_spn.labels: train_labels[cur_idx] } if ARGS.dropout_rate_input is not None: feed_dict[rat_spn. dropout_input_placeholder] = ARGS.dropout_rate_input if ARGS.dropout_rate_sums is not None: feed_dict[ rat_spn.dropout_sums_placeholder] = ARGS.dropout_rate_sums start_time = time.time() if ARGS.optimizer == "em": one_hot_labels = -np.inf * np.ones((len(cur_idx), num_classes)) one_hot_labels[range(len(cur_idx)), [int(x) for x in train_labels[cur_idx]]] = 0.0 feed_dict[rat_spn.EM_deriv_input_pl] = one_hot_labels start_time = time.time() sess.run(rat_spn.em_update_accums, feed_dict=feed_dict) elapsed_wall_time_epoch += (time.time() - start_time) else: _, CEM_value, cur_lr, loss_val, ll_mean_val, margin_val = \ sess.run([ rat_spn.train_op, rat_spn.cross_entropy_mean, rat_spn.learning_rate, rat_spn.objective, rat_spn.neg_norm_ll, rat_spn.neg_margin_objective], feed_dict=feed_dict) elapsed_wall_time_epoch += (time.time() - start_time) if batch_n % 10 == 1: print( "epoch: {}[{}, {:.5f}] CE: {:.5f} nll: {:.5f} negmargin: {:.5f} loss: {:.5f} time: {:.5f}" .format(epoch_n, batch_n, cur_lr, CEM_value, ll_mean_val, margin_val, loss_val, elapsed_wall_time_epoch)) if ARGS.optimizer == "em": sess.run(rat_spn.em_update_params) sess.run(rat_spn.em_reset_accums) else: sess.run(rat_spn.decrease_lr_op) ################ ### Evaluate ### ################ print('') print('epoch {}'.format(epoch_n)) num_correct_train, CE_total, train_LL, train_MARG, train_loss = compute_performance( sess, train_x, train_labels, 100, rat_spn) train_ACC = 100. * float(num_correct_train) / float(train_x.shape[0]) train_CE = CE_total / float(train_x.shape[0]) print(' ###') print( ' ### accuracy on train set = {} CE = {} LL: {} negmargin: {}' .format(train_ACC, train_CE, train_LL, train_MARG)) if test_x is not None: num_correct_test, CE_total, test_LL, test_MARG, test_loss = compute_performance( sess, test_x, test_labels, 100, rat_spn) test_ACC = 100. * float(num_correct_test) / float(test_x.shape[0]) test_CE = CE_total / float(test_x.shape[0]) print(' ###') print( ' ### accuracy on test set = {} CE = {} LL: {} negmargin: {}' .format(test_ACC, test_CE, test_LL, test_MARG)) else: test_ACC = None test_CE = None test_LL = None if valid_x is not None: num_correct_valid, CE_total, valid_LL, valid_MARG, valid_loss = compute_performance( sess, valid_x, valid_labels, 100, rat_spn) valid_ACC = 100. * float(num_correct_valid) / float( valid_x.shape[0]) valid_CE = CE_total / float(valid_x.shape[0]) print(' ###') print( ' ### accuracy on valid set = {} CE = {} LL: {} margin: {}' .format(valid_ACC, valid_CE, valid_LL, valid_MARG)) else: valid_ACC = None valid_CE = None valid_LL = None print(' ###') print('') ############## ### timing ### ############## epoch_elapsed_times.append(time.time() - epoch_start_time) estimated_next_epoch_time = np.mean( epoch_elapsed_times) + 3 * np.std(epoch_elapsed_times) remaining_time = ARGS.timeout_seconds - (time.time() - training_start_time) if estimated_next_epoch_time + ARGS.timeout_safety_seconds > remaining_time: print("Next epoch might exceed time limit, stop.") timeout_flag = True if not ARGS.no_save: results['train_ACC'].append(train_ACC) results['train_CE'].append(train_CE) results['train_LL'].append(train_LL) results['train_MARG'].append(train_LL) results['test_ACC'].append(test_ACC) results['test_CE'].append(test_CE) results['test_LL'].append(test_LL) results['test_MARG'].append(train_LL) results['valid_ACC'].append(valid_ACC) results['valid_CE'].append(valid_CE) results['valid_LL'].append(valid_LL) results['valid_MARG'].append(train_LL) results['elapsed_wall_time_epoch'].append(elapsed_wall_time_epoch) if ARGS.store_best_valid_acc and valid_x is not None: if results['best_valid_acc'] is None or valid_ACC > results[ 'best_valid_acc']: print('Better validation accuracy -> save model') print('') best_valid_acc_saver.save(sess, ARGS.result_path + "/best_valid_acc/model.ckpt", global_step=epoch_n, write_meta_graph=False) results['best_valid_acc'] = valid_ACC results['epoch_best_valid_acc'] = epoch_n if ARGS.store_best_valid_loss and valid_x is not None: if results['best_valid_loss'] is None or valid_loss < results[ 'best_valid_loss']: print('Better validation loss -> save model') print('') best_valid_loss_saver.save(sess, ARGS.result_path + "/best_valid_loss/model.ckpt", global_step=epoch_n, write_meta_graph=False) results['best_valid_loss'] = valid_loss results['epoch_best_valid_loss'] = epoch_n if epoch_n % ARGS.store_model_every_epochs == 0 \ or epoch_n + 1 == ARGS.num_epochs \ or timeout_flag: pickle.dump(results, open(ARGS.result_path + '/results.pkl', "wb")) saver.save(sess, ARGS.result_path + "/checkpoints/model.ckpt", global_step=epoch_n, write_meta_graph=False) if timeout_flag: sys.exit(7)
def run_experiment(settings): ############################################################################ fashion_mnist = settings.fashion_mnist svhn = settings.svhn exponential_family = settings.exponential_family classes = settings.classes K = settings.K structure = settings.structure # 'poon-domingos' pd_num_pieces = settings.pd_num_pieces # 'binary-trees' depth = settings.depth num_repetitions_mixture = settings.num_repetitions_mixture width = settings.width height = settings.height num_epochs = settings.num_epochs batch_size = settings.batch_size SGD_learning_rate = settings.SGD_learning_rate ############################################################################ exponential_family_args = None if exponential_family == EinsumNetwork.BinomialArray: exponential_family_args = {'N': 255} if exponential_family == EinsumNetwork.CategoricalArray: exponential_family_args = {'K': 256} if exponential_family == EinsumNetwork.NormalArray: exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1} # get data if fashion_mnist: train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist( ) elif svhn: train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn( ) else: train_x, train_labels, test_x, test_labels = datasets.load_mnist() if not exponential_family != EinsumNetwork.NormalArray: train_x /= 255. test_x /= 255. train_x -= .5 test_x -= .5 # validation split valid_x = train_x[-10000:, :] online_x = train_x[-11000:-10000, :] train_x = train_x[-56000:-11000, :] # init_x = train_x[-13000:-10000, :] valid_labels = train_labels[-10000:] online_labels = train_labels[-11000:-10000] train_labels = train_labels[-56000:-11000] # init_labels = train_labels[-13000:-10000] # full set of training # valid_x = train_x[-10000:, :] # online_x = train_x[-11000:-10000, :] # train_x = train_x[:-10000, :] # valid_labels = train_labels[-10000:] # online_labels = train_labels[-11000:-10000] # train_labels = train_labels[:-10000] # print('train_x:') # print(train_x.shape) # print(train_labels.shape) # print('online_x:') # print(online_x.shape) # print(online_labels.shape) # exit() # pick the selected classes if classes is not None: train_x = train_x[ np.any(np.stack([train_labels == c for c in classes], 1), 1), :] online_x = online_x[ np.any(np.stack([online_labels == c for c in classes], 1), 1), :] # init_x = init_x[np.any(np.stack([init_labels == c for c in classes], 1), 1), :] valid_x = valid_x[ np.any(np.stack([valid_labels == c for c in classes], 1), 1), :] test_x = test_x[ np.any(np.stack([test_labels == c for c in classes], 1), 1), :] train_labels = [l for l in train_labels if l in classes] train_labels_backup = train_labels online_labels = [l for l in online_labels if l in classes] # init_labels = [l for l in init_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] else: classes = np.unique(train_labels).tolist() train_labels = [l for l in train_labels if l in classes] train_labels_backup = train_labels online_labels = [l for l in online_labels if l in classes] # init_labels = [l for l in init_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] train_x = torch.from_numpy(train_x).to(torch.device(device)) train_x_backup = train_x online_x = torch.from_numpy(online_x).to(torch.device(device)) # init_x = torch.from_numpy(init_x).to(torch.device(device)) valid_x = torch.from_numpy(valid_x).to(torch.device(device)) test_x = torch.from_numpy(test_x).to(torch.device(device)) ###################################### # Make EinsumNetworks for each class # ###################################### einets = [] ps = [] for c in classes: if structure == 'poon-domingos': pd_delta = [[height / d, width / d] for d in pd_num_pieces] graph = Graph.poon_domingos_structure(shape=(height, width), delta=pd_delta) elif structure == 'binary-trees': graph = Graph.random_binary_trees( num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions_mixture) else: raise AssertionError("Unknown Structure") args = EinsumNetwork.Args( num_var=train_x.shape[1], num_dims=3 if svhn else 1, num_classes=1, num_sums=K, num_input_distributions=K, exponential_family=exponential_family, exponential_family_args=exponential_family_args, use_em=False) einet = EinsumNetwork.EinsumNetwork(graph, args) # init_dict = get_init_dict(einet, init_x, train_labels=init_labels, einet_class=c) init_dict = get_init_dict(einet, train_x, train_labels=train_labels, einet_class=c) einet.initialize(init_dict) einet.to(device) einets.append(einet) # Calculate amount of training samples per class ps.append(train_labels.count(c)) print(f'Einsum network for class {c}:') print(einet) # normalize ps, construct mixture component ps = [p / sum(ps) for p in ps] ps = torch.tensor(ps).to(torch.device(device)) mixture = EinetMixture(ps, einets, classes=classes) num_params = mixture.eval_size() # data_dir = '../src/experiments/round5/data/weights_analysis/' # utils.mkdir_p(data_dir) # for (einet, c) in zip(einets, classes): # data_file = os.path.join(data_dir, f"weights_before_{c}.json") # weights = einet.einet_layers[-1].params.data.cpu() # np.savetxt(data_file, einet.einet_layers[-1].reparam(weights)[0]) ################## # Training phase # ################## sub_net_parameters = None for einet in mixture.einets: if sub_net_parameters is None: sub_net_parameters = list(einet.parameters()) else: sub_net_parameters += list(einet.parameters()) sub_net_parameters += list(mixture.parameters()) optimizer = torch.optim.SGD(sub_net_parameters, lr=SGD_learning_rate) start_time = time.time() """ Learning each sub Network Generatively """ for (einet, c) in zip(einets, classes): train_x_c = train_x[[l == c for l in train_labels]] train_N = train_x_c.shape[0] for epoch_count in range(num_epochs): idx_batches = torch.randperm(train_N, device=device).split(batch_size) total_loss = 0.0 for idx in idx_batches: batch_x = train_x_c[idx, :] optimizer.zero_grad() outputs = einet.forward(batch_x) ll_sample = EinsumNetwork.log_likelihoods(outputs) log_likelihood = ll_sample.sum() nll = log_likelihood * -1 nll.backward() optimizer.step() total_loss += nll.detach().item() print(f'[{epoch_count}] total log-likelihood: {total_loss}') # data_dir = '../src/experiments/round5/data/weights_analysis/' # utils.mkdir_p(data_dir) # for (einet, c) in zip(einets, classes): # data_file = os.path.join(data_dir, f"weights_after_{c}.json") # weights = einet.einet_layers[-1].params.data.cpu() # np.savetxt(data_file, einet.einet_layers[-1].reparam(weights)[0]) # exit() ################################## # Evalueate after initialization # ################################## train_lls = [] valid_lls = [] test_lls = [] train_accs = [] valid_accs = [] test_accs = [] train_lls_ref = [] valid_lls_ref = [] test_lls_ref = [] train_accs_ref = [] valid_accs_ref = [] test_accs_ref = [] added_samples = [0] def eval_network(do_print=False, no_OA=False): if no_OA: train_N = train_x_backup.shape[0] else: train_N = train_x.shape[0] valid_N = valid_x.shape[0] test_N = test_x.shape[0] mixture.eval() if no_OA: train_ll_before = mixture.eval_loglikelihood_batched( train_x_backup, batch_size=batch_size) else: train_ll_before = mixture.eval_loglikelihood_batched( train_x, batch_size=batch_size) valid_ll_before = mixture.eval_loglikelihood_batched( valid_x, batch_size=batch_size) test_ll_before = mixture.eval_loglikelihood_batched( test_x, batch_size=batch_size) if do_print: print() print( "Experiment 3: Log-likelihoods --- train LL {} valid LL {} test LL {}" .format(train_ll_before / train_N, valid_ll_before / valid_N, test_ll_before / test_N)) if no_OA: train_lls_ref.append(train_ll_before / train_N) valid_lls_ref.append(valid_ll_before / valid_N) test_lls_ref.append(test_ll_before / test_N) else: train_lls.append(train_ll_before / train_N) valid_lls.append(valid_ll_before / valid_N) test_lls.append(test_ll_before / test_N) ################ # Experiment 4 # ################ if no_OA: train_labelsz = torch.tensor(train_labels_backup).to( torch.device(device)) else: train_labelsz = torch.tensor(train_labels).to(torch.device(device)) valid_labelsz = torch.tensor(valid_labels).to(torch.device(device)) test_labelsz = torch.tensor(test_labels).to(torch.device(device)) if no_OA: acc_train_before = mixture.eval_accuracy_batched( classes, train_x_backup, train_labelsz, batch_size=batch_size) else: acc_train_before = mixture.eval_accuracy_batched( classes, train_x, train_labelsz, batch_size=batch_size) acc_valid_before = mixture.eval_accuracy_batched(classes, valid_x, valid_labelsz, batch_size=batch_size) acc_test_before = mixture.eval_accuracy_batched(classes, test_x, test_labelsz, batch_size=batch_size) if do_print: print() print( "Experiment 4: Classification accuracies --- train acc {} valid acc {} test acc {}" .format(acc_train_before, acc_valid_before, acc_test_before)) if no_OA: train_accs_ref.append(acc_train_before) valid_accs_ref.append(acc_valid_before) test_accs_ref.append(acc_test_before) else: train_accs.append(acc_train_before) valid_accs.append(acc_valid_before) test_accs.append(acc_test_before) mixture.train() eval_network(do_print=True, no_OA=False) eval_network(do_print=False, no_OA=True) ##################################################### # Evaluate the network with different training sets # ##################################################### idx_batches = torch.randperm(online_x.shape[0], device=device).split(20) for idx in tqdm(idx_batches): online_x_idx = online_x[idx] online_labels_idx = [online_labels[i] for i in idx] for (einet, c) in zip(einets, classes): batch_x = online_x_idx[[l == c for l in online_labels_idx]] train_x_backup = torch.cat((train_x_backup, batch_x)) train_labels_backup += [c for i in batch_x] added_samples.append(added_samples[-1] + len(idx)) eval_network(do_print=False, no_OA=True) ##################### # Online adaptation # ##################### for idx in tqdm(idx_batches): online_x_idx = online_x[idx] online_labels_idx = [online_labels[i] for i in idx] for (einet, c) in zip(einets, classes): batch_x = online_x_idx[[l == c for l in online_labels_idx]] online_update(einet, batch_x) train_x = torch.cat((train_x, batch_x)) train_labels += [c for i in batch_x] eval_network(do_print=False, no_OA=False) print() print(f'Network size: {num_params} parameters') return { 'train_lls': train_lls, 'valid_lls': valid_lls, 'test_lls': test_lls, 'train_accs': train_accs, 'valid_accs': valid_accs, 'test_accs': test_accs, 'train_lls_ref': train_lls_ref, 'valid_lls_ref': valid_lls_ref, 'test_lls_ref': test_lls_ref, 'train_accs_ref': train_accs_ref, 'valid_accs_ref': valid_accs_ref, 'test_accs_ref': test_accs_ref, 'network_size': num_params, 'online_samples': added_samples, }
def run_testing(): ############# # Load data # ############# if ARGS.data_set in ['mnist', 'fashion_mnist']: train_x, train_labels, valid_x, valid_labels, test_x, test_labels = datasets.load_mnist( ARGS.data_path) elif ARGS.data_set in DEBD: train_x, test_x, valid_x = datasets.load_debd(ARGS.data_path, ARGS.data_set) train_labels = np.zeros(train_x.shape[0], dtype=np.int32) test_labels = np.zeros(test_x.shape[0], dtype=np.int32) valid_labels = np.zeros(valid_x.shape[0], dtype=np.int32) else: if ARGS.data_set == '20ng_classify': unpickled = pickle.load( open(ARGS.data_path + '/20ng-50-lda.pkl', "rb")) elif ARGS.data_set == 'higgs': unpickled = pickle.load(open(ARGS.data_path + '/higgs.pkl', "rb")) elif ARGS.data_set == 'wine': unpickled = pickle.load(open(ARGS.data_path + '/wine.pkl', "rb")) elif ARGS.data_set == 'wine_multiclass': unpickled = pickle.load( open(ARGS.data_path + '/wine_multiclass.pkl', "rb")) elif ARGS.data_set == 'theorem': unpickled = pickle.load(open(ARGS.data_path + '/theorem.pkl', "rb")) elif ARGS.data_set == 'imdb': unpickled = pickle.load( open(ARGS.data_path + '/imdb-dense-nmf-200.pkl', "rb")) train_x = unpickled[0] train_labels = unpickled[1] valid_x = unpickled[2] valid_labels = unpickled[3] test_x = unpickled[4] test_labels = unpickled[5] ###################### # Data preprocessing # ###################### if ARGS.low_variance_threshold >= 0.0: v = np.var(train_x, 0) mu = np.mean(v) idx = v > ARGS.low_variance_threshold * mu train_x = train_x[:, idx] test_x = test_x[:, idx] if valid_x is not None: valid_x = valid_x[:, idx] # zero-mean, unit-variance if ARGS.normalization == "zmuv": train_x_mean = np.mean(train_x, 0) train_x_std = np.std(train_x, 0) train_x = (train_x - train_x_mean) / (train_x_std + ARGS.zmuv_min_sigma) test_x = (test_x - train_x_mean) / (train_x_std + ARGS.zmuv_min_sigma) if valid_x is not None: valid_x = (valid_x - train_x_mean) / (train_x_std + ARGS.zmuv_min_sigma) num_classes = len(np.unique(train_labels)) num_dims = int(train_x.shape[1]) ndo, nco, ARGS_orig, region_graph_layers = pickle.load( open(ARGS.model_description_file, 'rb')) if ndo != num_dims or nco != num_classes: raise RuntimeError( 'Inconsistent number of dimensions/classes when trying to retrieve model.' ) # Make Tensorflow model rat_spn = RatSpn(region_graph_layers, num_classes, ARGS=ARGS_orig) # session if ARGS.GPU_fraction <= 0.95: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=ARGS.GPU_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) else: sess = tf.Session() # init/load model print("Loading model") init = tf.global_variables_initializer() sess.run(init) init_saver = tf.train.Saver(rat_spn.all_params) init_saver.restore(sess, ARGS.model_init_file) ########### # Testing # ########### print("Run testing") num_correct_train, CE_total, train_LL, train_MARG, train_loss = compute_performance( sess, train_x, train_labels, 100, rat_spn) train_ACC = 100. * float(num_correct_train) / float(train_x.shape[0]) train_CE = CE_total / float(train_x.shape[0]) print(' ###') print( ' ### accuracy on train set = {} CE = {} LL: {} negmargin: {}'. format(train_ACC, train_CE, train_LL, train_MARG)) num_correct_test, CE_total, test_LL, test_MARG, test_loss = compute_performance( sess, test_x, test_labels, 100, rat_spn) test_ACC = 100. * float(num_correct_test) / float(test_x.shape[0]) test_CE = CE_total / float(test_x.shape[0]) print(' ###') print( ' ### accuracy on test set = {} CE = {} LL: {} negmargin: {}'. format(test_ACC, test_CE, test_LL, test_MARG)) num_correct_valid, CE_total, valid_LL, valid_MARG, valid_loss = compute_performance( sess, valid_x, valid_labels, 100, rat_spn) valid_ACC = 100. * float(num_correct_valid) / float(valid_x.shape[0]) valid_CE = CE_total / float(valid_x.shape[0]) print(' ###') print(' ### accuracy on valid set = {} CE = {} LL: {} margin: {}'. format(valid_ACC, valid_CE, valid_LL, valid_MARG))
import theano import theano.tensor as T from neuralmind import NeuralNetwork from layers import HiddenLayer from layers import DropoutLayer import activations from trainers import SGDTrainer from trainers import ExponentialDecay import datasets # Load MNIST datasets = datasets.load_mnist("mnist.pkl.gz") model = NeuralNetwork( n_inputs=28*28, layers = [ (DropoutLayer, {'probability': 0.2}), (HiddenLayer, { 'n_units': 800, 'non_linearity': activations.rectify }), (DropoutLayer, {'probability': 0.5}), (HiddenLayer, { 'n_units': 800, 'non_linearity': activations.rectify
def run_experiment(settings): ############################################################################ fashion_mnist = settings.fashion_mnist svhn = settings.svhn exponential_family = settings.exponential_family classes = settings.classes K = settings.K structure = settings.structure # 'poon-domingos' pd_num_pieces = settings.pd_num_pieces # 'binary-trees' depth = settings.depth num_repetitions = settings.num_repetitions width = settings.width height = settings.height num_epochs = settings.num_epochs batch_size = settings.batch_size SGD_learning_rate = settings.SGD_learning_rate ############################################################################ exponential_family_args = None if exponential_family == EinsumNetwork.BinomialArray: exponential_family_args = {'N': 255} if exponential_family == EinsumNetwork.CategoricalArray: exponential_family_args = {'K': 256} if exponential_family == EinsumNetwork.NormalArray: exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1} # get data if fashion_mnist: train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist( ) elif svhn: train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn( ) else: train_x, train_labels, test_x, test_labels = datasets.load_mnist() if not exponential_family != EinsumNetwork.NormalArray: train_x /= 255. test_x /= 255. train_x -= .5 test_x -= .5 # validation split valid_x = train_x[-10000:, :] train_x = train_x[:-10000, :] valid_labels = train_labels[-10000:] train_labels = train_labels[:-10000] # pick the selected classes if classes is not None: train_x = train_x[ np.any(np.stack([train_labels == c for c in classes], 1), 1), :] valid_x = valid_x[ np.any(np.stack([valid_labels == c for c in classes], 1), 1), :] test_x = test_x[ np.any(np.stack([test_labels == c for c in classes], 1), 1), :] train_labels = [l for l in train_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] else: classes = np.unique(train_labels).tolist() train_labels = [l for l in train_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] train_x = torch.from_numpy(train_x).to(torch.device(device)) valid_x = torch.from_numpy(valid_x).to(torch.device(device)) test_x = torch.from_numpy(test_x).to(torch.device(device)) ###################################### # Make EinsumNetworks for each class # ###################################### if structure == 'poon-domingos': pd_delta = [[height / d, width / d] for d in pd_num_pieces] graph = Graph.poon_domingos_structure(shape=(height, width), delta=pd_delta) elif structure == 'binary-trees': graph = Graph.random_binary_trees(num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions) else: raise AssertionError("Unknown Structure") args = EinsumNetwork.Args(num_var=train_x.shape[1], num_dims=3 if svhn else 1, num_classes=len(classes), num_sums=K, num_input_distributions=K, exponential_family=exponential_family, exponential_family_args=exponential_family_args, use_em=False) einet = EinsumNetwork.EinsumNetwork(graph, args) init_dict = get_init_dict(einet, train_x) einet.initialize(init_dict) einet.to(device) print(einet) num_params = EinsumNetwork.eval_size(einet) ################################# # Discriminative training phase # ################################# optimizer = torch.optim.SGD(einet.parameters(), lr=SGD_learning_rate) loss_function = torch.nn.CrossEntropyLoss() train_N = train_x.shape[0] valid_N = valid_x.shape[0] test_N = test_x.shape[0] start_time = time.time() for epoch_count in range(num_epochs): idx_batches = torch.randperm(train_N, device=device).split(batch_size) total_loss = 0 for idx in idx_batches: batch_x = train_x[idx, :] optimizer.zero_grad() outputs = einet.forward(batch_x) target = torch.tensor([ classes.index(train_labels[i]) for i in idx ]).to(torch.device(device)) loss = loss_function(outputs, target) loss.backward() optimizer.step() total_loss += loss.detach().item() print(f'[{epoch_count}] total loss: {total_loss}') end_time = time.time() ################ # Experiment 5 # ################ einet.eval() train_ll = EinsumNetwork.eval_loglikelihood_batched(einet, train_x, batch_size=batch_size) valid_ll = EinsumNetwork.eval_loglikelihood_batched(einet, valid_x, batch_size=batch_size) test_ll = EinsumNetwork.eval_loglikelihood_batched(einet, test_x, batch_size=batch_size) print() print( "Experiment 5: Log-likelihoods --- train LL {} valid LL {} test LL {}" .format(train_ll / train_N, valid_ll / valid_N, test_ll / test_N)) ################ # Experiment 6 # ################ train_labels = torch.tensor(train_labels).to(torch.device(device)) valid_labels = torch.tensor(valid_labels).to(torch.device(device)) test_labels = torch.tensor(test_labels).to(torch.device(device)) acc_train = EinsumNetwork.eval_accuracy_batched(einet, classes, train_x, train_labels, batch_size=batch_size) acc_valid = EinsumNetwork.eval_accuracy_batched(einet, classes, valid_x, valid_labels, batch_size=batch_size) acc_test = EinsumNetwork.eval_accuracy_batched(einet, classes, test_x, test_labels, batch_size=batch_size) print() print( "Experiment 6: Classification accuracies --- train acc {} valid acc {} test acc {}" .format(acc_train, acc_valid, acc_test)) print() print(f'Network size: {num_params} parameters') print(f'Training time: {end_time - start_time}s') return { 'train_ll': train_ll / train_N, 'valid_ll': valid_ll / valid_N, 'test_ll': test_ll / test_N, 'train_acc': acc_train, 'valid_acc': acc_valid, 'test_acc': acc_test, 'network_size': num_params, 'training_time': end_time - start_time, }
augmentation = { "rotation_range": { "minval": -0.3, "maxval": 0.3 }, "width_shift_range": { "minval": -2, "maxval": 2 }, "height_shift_range": { "minval": -2, "maxval": 2 }, } ds_aug, ds_cluster, X, y = datasets.load_mnist(args.train_batch, args.test_batch, augmentation) # Defining hyperparameters n_clusters = 10 latent_dim = 10 input_shape = (28**2, ) # Define optimizers pretrain_optimizer = { "type": tf.optimizers.SGD, "params": { "lr": 1, "momentum": 0.9 } } cluster_optimizer = { "type": tf.optimizers.Adam,
t2 = time.time() print(" Spent time for training : {}".format(t2-t1)) X, y_true = test y_pred = model.predict(X) accuracy = accuracy_score(y_pred, y_true) print(" Accuracy : {}\n".format(accuracy)) def run_profile(model, model_name, X, y): filename = model_name+".def" profile.runctx("for i in range(100): model.fit(X, y)", globals(), locals(), filename) #p = pstats.Stats(filename) #p.print_stats() training, test = datasets.load_mnist() #X, y = datasets.make_classification() #training, test = utils.train_test_split(X, y) accuracy_and_time(SCW1(), "SCW1", training, test) accuracy_and_time(SCW2(), "SCW2", training, test) accuracy_and_time(LinearSVC(), "LinearSVC", training, test) #training, test = datasets.load_mnist() X, y = training run_profile(SCW1(), "SCW1", X, y) run_profile(SCW2(), "SCW2", X, y)
parser.add_argument('--tol', default=0.001, type=float) parser.add_argument('--cae_weights', default=None, help='This argument must be given') parser.add_argument('--save_dir', default='results/temp') args = parser.parse_args() print(args) import os if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # load dataset from datasets import load_mnist, load_usps if args.dataset == 'mnist': x, y = load_mnist() elif args.dataset == 'usps': x, y = load_usps('data/usps') elif args.dataset == 'mnist-test': x, y = load_mnist() x, y = x[60000:], y[60000:] # prepare the DCEC model dcec = DCEC(input_shape=x.shape[1:], filters=[32, 64, 128, 10], n_clusters=args.n_clusters) plot_model(dcec.model, to_file=args.save_dir + '/dcec_model.png', show_shapes=True) dcec.model.summary()
# ---------------------------------------------------------------------------- # # Parameters f = 20 batch_size = 50 learning_rate = 0.05 activation_func = tf.nn.relu max_train_epoch = 100000 max_train_accur = 0.97 load_parameters = True parameters_path = pathlib.Path("model.npy") # ---------------------------------------------------------------------------- # # Dataset instantiation dataset = datasets.load_mnist() # handwritten digit database train_set = dataset.cut(0, 50000, 50000).shuffle().cut(0, 50000, batch_size) # 1000 batches of size 50 test_set = dataset.cut(50000, 60000, 10000) # 1 batch of size 10 000 # Model instantiator builder_opt = tf.train.AdagradOptimizer(learning_rate) builder_dims = [784, 100, 10] # 3 layer neural network: # input layer : 784 neurons (1 image = 28*28 pixels) # hidden layer : 100 neurons # output layer : 10 neurons (digits 0-9) def builder(inputs=None): return models.dense_classifier(builder_dims, inputs=inputs, act_fn=activation_func, optimizer=builder_opt, epoch=True) # Model instantiation graph = tf.Graph() with graph.as_default():
def train(snapshotroot, device, forestType, numTrees, depth): xtrain, ytrain, xtest, ytest = datasets.load_mnist() # XXX: Other papers use val = test for this data set xval = xtest yval = ytest net = Net(forestType, numTrees, depth).to(device) criterion = nn.CrossEntropyLoss().to(device) # Transfer this data to the device xtrain = torch.from_numpy(xtrain).type(torch.float32).to(device) ytrain = torch.from_numpy(ytrain).type(torch.long).to(device) xval = torch.from_numpy(xval).type(torch.float32).to(device) yval = torch.from_numpy(yval).type(torch.long).to(device) xtest = torch.from_numpy(xtest).type(torch.float32).to(device) ytest = torch.from_numpy(ytest).type(torch.long).to(device) optimizer = optim.Adam(net.parameters(), lr=0.001) #optimizer = optim.Adam(net.parameters(), lr = 1e-3) # Count parameters numParams = sum(params.numel() for params in net.parameters()) numTrainable = sum(params.numel() for params in net.parameters() if params.requires_grad) print( f"There are {numParams} parameters total in this model ({numTrainable} are trainable)" ) numEpochs = 50 batchSize = 200 indices = [i for i in range(xtrain.shape[0])] bestEpoch = numEpochs - 1 bestLoss = 1000.0 valLosses = np.zeros([numEpochs]) for epoch in range(numEpochs): random.shuffle(indices) xtrain = xtrain[indices, :] ytrain = ytrain[indices] runningLoss = 0.0 count = 0 for xbatch, ybatch in batches(xtrain, ytrain, batchSize): optimizer.zero_grad() outputs = net(xbatch) loss = criterion(outputs, ybatch) loss.backward() optimizer.step() runningLoss += loss count += 1 meanLoss = runningLoss / count snapshotFile = os.path.join(snapshotroot, f"epoch_{epoch}") torch.save(net.state_dict(), snapshotFile) runningLoss = 0.0 count = 0 with torch.no_grad(): net.train(False) #for xbatch, ybatch in batches(xval, yval, batchSize): for xbatch, ybatch in zip([xval], [yval]): outputs = net(xbatch) loss = criterion(outputs, ybatch) runningLoss += loss count += 1 net.train(True) valLoss = runningLoss / count if valLoss < bestLoss: bestLoss = valLoss bestEpoch = epoch valLosses[epoch] = valLoss #print(f"Info: epoch = {epoch}, loss = {meanLoss}, validation loss = {valLoss}") snapshotFile = os.path.join(snapshotroot, f"epoch_{bestEpoch}") net = Net(forestType, numTrees, depth) net.load_state_dict(torch.load(snapshotFile, map_location="cpu")) net = net.to(device) totalCorrect = 0 count = 0 with torch.no_grad(): net.train(False) #for xbatch, ybatch in batches(xtest, ytest, batchSize): for xbatch, ybatch in zip([xtest], [ytest]): outputs = net(xbatch) outputs = torch.argmax(outputs, dim=1) tmpCorrect = torch.sum(outputs == ybatch) totalCorrect += tmpCorrect count += xbatch.shape[0] accuracy = float(totalCorrect) / float(count) print( f"Info: Best epoch = {bestEpoch}, test accuracy = {accuracy}, misclassification rate = {1.0 - accuracy}" ) return accuracy, valLosses
import numpy as np from keras.models import Sequential from keras.layers import Dense, Dropout, Activation from keras.optimizers import SGD from keras_custom import PlasticReLU, ParameterizedLayer from datasets import load_mnist from visualizer import Visualizer import sys if __name__ == '__main__': np.random.seed(int(sys.argv[1])) epochs = int(sys.argv[2]) datasets = load_mnist(42) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] model = Sequential() #visualizer = Visualizer([196,196,10], model, 1.) model.add( ParameterizedLayer( input_dim=196, output_dim=100, init_scale=(-.5, .5), integer_bits=1, fractional_bits=2, ifactor=1., params_0=np.ones(196, dtype='float32'),
import scipy.io as sio import datasets LR = 1e-4 MBsize = 24 dim_var = [784, 200] TestInterval = 5000 max_iters = 1000000 NonLinerNN = True PreProcess = True dataset = "mnist" # dataset = "omni" if dataset == "mnist": X_tr, X_va, X_te = datasets.load_mnist() elif dataset == "omni": X_tr, X_va, X_te = datasets.load_omniglot() else: assert False num_train = X_tr.shape[0] num_valid = X_va.shape[0] num_test = X_te.shape[0] train_mean = np.mean(X_tr, axis=0, keepdims=True) tf.reset_default_graph() def GOBernoulli(Prob): zsamp = tf.cast(tf.less_equal(tf.random_uniform(Prob.shape), Prob),
def run_experiment(settings): ############################################################################ fashion_mnist = settings.fashion_mnist svhn = settings.svhn exponential_family = settings.exponential_family classes = settings.classes K = settings.K structure = settings.structure # 'poon-domingos' pd_num_pieces = settings.pd_num_pieces # 'binary-trees' depth = settings.depth num_repetitions = settings.num_repetitions width = settings.width height = settings.height num_epochs = settings.num_epochs batch_size = settings.batch_size online_em_frequency = settings.online_em_frequency online_em_stepsize = settings.online_em_stepsize SGD_learning_rate = settings.SGD_learning_rate ############################################################################ exponential_family_args = None if exponential_family == EinsumNetwork.BinomialArray: exponential_family_args = {'N': 255} if exponential_family == EinsumNetwork.CategoricalArray: exponential_family_args = {'K': 256} if exponential_family == EinsumNetwork.NormalArray: exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1} # get data if fashion_mnist: train_x, train_labels, test_x, test_labels = datasets.load_fashion_mnist( ) elif svhn: train_x, train_labels, test_x, test_labels, extra_x, extra_labels = datasets.load_svhn( ) else: train_x, train_labels, test_x, test_labels = datasets.load_mnist() if not exponential_family != EinsumNetwork.NormalArray: train_x /= 255. test_x /= 255. train_x -= .5 test_x -= .5 # validation split valid_x = train_x[-10000:, :] train_x = train_x[:-10000, :] valid_labels = train_labels[-10000:] train_labels = train_labels[:-10000] # pick the selected classes if classes is not None: train_x = train_x[ np.any(np.stack([train_labels == c for c in classes], 1), 1), :] valid_x = valid_x[ np.any(np.stack([valid_labels == c for c in classes], 1), 1), :] test_x = test_x[ np.any(np.stack([test_labels == c for c in classes], 1), 1), :] train_labels = [l for l in train_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] else: classes = np.unique(train_labels).tolist() train_labels = [l for l in train_labels if l in classes] valid_labels = [l for l in valid_labels if l in classes] test_labels = [l for l in test_labels if l in classes] train_x = torch.from_numpy(train_x).to(torch.device(device)) valid_x = torch.from_numpy(valid_x).to(torch.device(device)) test_x = torch.from_numpy(test_x).to(torch.device(device)) # Make EinsumNetwork ###################################### if structure == 'poon-domingos': pd_delta = [[height / d, width / d] for d in pd_num_pieces] graph = Graph.poon_domingos_structure(shape=(height, width), delta=pd_delta) elif structure == 'binary-trees': graph = Graph.random_binary_trees(num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions) else: raise AssertionError("Unknown Structure") args = EinsumNetwork.Args(num_var=train_x.shape[1], num_dims=3 if svhn else 1, num_classes=1, num_sums=K, num_input_distributions=K, exponential_family=exponential_family, exponential_family_args=exponential_family_args, online_em_frequency=online_em_frequency, online_em_stepsize=online_em_stepsize, use_em=True) einet = EinsumNetwork.EinsumNetwork(graph, args) print(einet) init_dict = get_init_dict(einet, train_x) einet.initialize(init_dict) einet.to(device) num_params = EinsumNetwork.eval_size(einet) data_dir = '../src/experiments/round5/data/weights_analysis/' data_file = os.path.join(data_dir, f"weights_before.json") weights = einet.einet_layers[-1].params.data.cpu() np.savetxt(data_file, weights[0]) # Train ###################################### optimizer = torch.optim.SGD(einet.parameters(), lr=SGD_learning_rate) train_N = train_x.shape[0] valid_N = valid_x.shape[0] test_N = test_x.shape[0] start_time = time.time() for epoch_count in range(num_epochs): idx_batches = torch.randperm(train_N, device=device).split(batch_size) total_loss = 0.0 for idx in idx_batches: batch_x = train_x[idx, :] # optimizer.zero_grad() outputs = einet.forward(batch_x) ll_sample = EinsumNetwork.log_likelihoods(outputs) log_likelihood = ll_sample.sum() log_likelihood.backward() # nll = log_likelihood * -1 # nll.backward() # optimizer.step() einet.em_process_batch() einet.em_update() print(f'[{epoch_count}] total loss: {total_loss}') end_time = time.time() data_dir = '../src/experiments/round5/data/weights_analysis/' data_file = os.path.join(data_dir, f"weights_after.json") weights = einet.einet_layers[-1].params.data.cpu() np.savetxt(data_file, weights[0]) # exit() ################ # Experiment 1 # ################ einet.eval() train_ll = EinsumNetwork.eval_loglikelihood_batched(einet, train_x, batch_size=batch_size) valid_ll = EinsumNetwork.eval_loglikelihood_batched(einet, valid_x, batch_size=batch_size) test_ll = EinsumNetwork.eval_loglikelihood_batched(einet, test_x, batch_size=batch_size) print() print( "Experiment 1: Log-likelihoods --- train LL {} valid LL {} test LL {}" .format(train_ll / train_N, valid_ll / valid_N, test_ll / test_N)) ################ # Experiment 2 # ################ train_labels = torch.tensor(train_labels).to(torch.device(device)) valid_labels = torch.tensor(valid_labels).to(torch.device(device)) test_labels = torch.tensor(test_labels).to(torch.device(device)) acc_train = EinsumNetwork.eval_accuracy_batched(einet, classes, train_x, train_labels, batch_size=batch_size) acc_valid = EinsumNetwork.eval_accuracy_batched(einet, classes, valid_x, valid_labels, batch_size=batch_size) acc_test = EinsumNetwork.eval_accuracy_batched(einet, classes, test_x, test_labels, batch_size=batch_size) print() print( "Experiment 2: Classification accuracies --- train acc {} valid acc {} test acc {}" .format(acc_train, acc_valid, acc_test)) print() print(f'Network size: {num_params} parameters') print(f'Training time: {end_time - start_time}s') return { 'train_ll': train_ll / train_N, 'valid_ll': valid_ll / valid_N, 'test_ll': test_ll / test_N, 'train_acc': acc_train, 'valid_acc': acc_valid, 'test_acc': acc_test, 'network_size': num_params, 'training_time': end_time - start_time, }
def main(relaxation=None, learn_prior=True, max_iters=None, batch_size=24, num_latents=200, model_type=None, lr=None, test_bias=False, train_dir=None, iwae_samples=100, dataset="mnist", logf=None, var_lr_scale=10., Q_wd=.0001, Q_depth=-1, checkpoint_path=None): valid_batch_size = 100 if model_type == "L1": num_layers = 1 layer_type = linear_layer elif model_type == "L2": num_layers = 2 layer_type = linear_layer elif model_type == "NL1": num_layers = 1 layer_type = nonlinear_layer else: assert False, "bad model type {}".format(model_type) sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) if dataset == "mnist": X_tr, X_va, X_te = datasets.load_mnist() elif dataset == "omni": X_tr, X_va, X_te = datasets.load_omniglot() else: assert False num_train = X_tr.shape[0] num_valid = X_va.shape[0] num_test = X_te.shape[0] train_mean = np.mean(X_tr, axis=0, keepdims=True) train_output_bias = -np.log(1. / np.clip(train_mean, 0.001, 0.999) - 1.).astype(np.float32) x = tf.placeholder(tf.float32, [None, 784]) # x_im = tf.reshape(x, [-1, 28, 28, 1]) # tf.summary.image("x_true", x_im) # make prior for top b p_prior = tf.Variable( tf.zeros([num_latents], dtype=tf.float32), trainable=learn_prior, name='p_prior', ) # create rebar specific variables temperature and eta log_temperatures = [create_log_temp(1) for l in range(num_layers)] temperatures = [tf.exp(log_temp) for log_temp in log_temperatures] batch_temperatures = [tf.reshape(temp, [1, -1]) for temp in temperatures] etas = [create_eta(1) for l in range(num_layers)] batch_etas = [tf.reshape(eta, [1, -1]) for eta in etas] # random uniform samples u = [ tf.random_uniform([tf.shape(x)[0], num_latents], dtype=tf.float32) for l in range(num_layers) ] # create binary sampler b_sampler = BSampler(u, "b_sampler") gen_b_sampler = BSampler(u, "gen_b_sampler") # generate hard forward pass encoder_name = "encoder" decoder_name = "decoder" inf_la_b, samples_b = inference_network(x, train_mean, layer_type, num_layers, num_latents, encoder_name, False, b_sampler) gen_la_b = generator_network(samples_b, train_output_bias, layer_type, num_layers, num_latents, decoder_name, False) log_image(gen_la_b[-1], "x_pred") # produce samples _samples_la_b = generator_network(None, train_output_bias, layer_type, num_layers, num_latents, decoder_name, True, sampler=gen_b_sampler, prior=p_prior) log_image(_samples_la_b[-1], "x_sample") # hard loss evaluation and log probs f_b, log_q_bs = neg_elbo(x, samples_b, inf_la_b, gen_la_b, p_prior, log=True) batch_f_b = tf.expand_dims(f_b, 1) total_loss = tf.reduce_mean(f_b) # tf.summary.scalar("fb", total_loss) # optimizer for model parameters model_opt = tf.train.AdamOptimizer(lr, beta2=.99999) # optimizer for variance reducing parameters variance_opt = tf.train.AdamOptimizer(var_lr_scale * lr, beta2=.99999) # get encoder and decoder variables encoder_params = get_variables(encoder_name) decoder_params = get_variables(decoder_name) if learn_prior: decoder_params.append(p_prior) # compute and store gradients of hard loss with respect to encoder_parameters encoder_loss_grads = {} for g, v in model_opt.compute_gradients(total_loss, var_list=encoder_params): encoder_loss_grads[v.name] = g # get gradients for decoder parameters decoder_gradvars = model_opt.compute_gradients(total_loss, var_list=decoder_params) # will hold all gradvars for the model (non-variance adjusting variables) model_gradvars = [gv for gv in decoder_gradvars] # conditional samples v = [v_from_u(_u, log_alpha) for _u, log_alpha in zip(u, inf_la_b)] # need to create soft samplers sig_z_sampler = SIGZSampler(u, batch_temperatures, "sig_z_sampler") sig_zt_sampler = SIGZSampler(v, batch_temperatures, "sig_zt_sampler") z_sampler = ZSampler(u, "z_sampler") zt_sampler = ZSampler(v, "zt_sampler") rebars = [] reinforces = [] variance_objectives = [] # have to produce 2 forward passes for each layer for z and zt samples for l in range(num_layers): cur_la_b = inf_la_b[l] # if standard rebar or additive relaxation if relaxation == "rebar" or relaxation == "add": # compute soft samples and soft passes through model and soft elbos cur_z_sample = sig_z_sampler.sample(cur_la_b, l) prev_samples_z = samples_b[:l] + [cur_z_sample] cur_zt_sample = sig_zt_sampler.sample(cur_la_b, l) prev_samples_zt = samples_b[:l] + [cur_zt_sample] prev_log_alphas = inf_la_b[:l] + [cur_la_b] # soft forward passes inf_la_z, samples_z = inference_network(x, train_mean, layer_type, num_layers, num_latents, encoder_name, True, sig_z_sampler, samples=prev_samples_z, log_alphas=prev_log_alphas) gen_la_z = generator_network(samples_z, train_output_bias, layer_type, num_layers, num_latents, decoder_name, True) inf_la_zt, samples_zt = inference_network( x, train_mean, layer_type, num_layers, num_latents, encoder_name, True, sig_zt_sampler, samples=prev_samples_zt, log_alphas=prev_log_alphas) gen_la_zt = generator_network(samples_zt, train_output_bias, layer_type, num_layers, num_latents, decoder_name, True) # soft loss evaluataions f_z, _ = neg_elbo(x, samples_z, inf_la_z, gen_la_z, p_prior) f_zt, _ = neg_elbo(x, samples_zt, inf_la_zt, gen_la_zt, p_prior) if relaxation == "add" or relaxation == "all": # sample z and zt prev_bs = samples_b[:l] cur_z_sample = z_sampler.sample(cur_la_b, l) cur_zt_sample = zt_sampler.sample(cur_la_b, l) q_z = Q_func(x, train_mean, cur_z_sample, prev_bs, Q_name(l), False, depth=Q_depth) q_zt = Q_func(x, train_mean, cur_zt_sample, prev_bs, Q_name(l), True, depth=Q_depth) # tf.summary.scalar("q_z_{}".format(l), tf.reduce_mean(q_z)) # tf.summary.scalar("q_zt_{}".format(l), tf.reduce_mean(q_zt)) if relaxation == "add": f_z = f_z + q_z f_zt = f_zt + q_zt elif relaxation == "all": f_z = q_z f_zt = q_zt else: assert False # tf.summary.scalar("f_z_{}".format(l), tf.reduce_mean(f_z)) # tf.summary.scalar("f_zt_{}".format(l), tf.reduce_mean(f_zt)) cur_samples_b = samples_b[l] # get gradient of sample log-likelihood wrt current parameter d_log_q_d_la = bernoulli_loglikelihood_derivitive( cur_samples_b, cur_la_b) # get gradient of soft-losses wrt current parameter d_f_z_d_la = tf.gradients(f_z, cur_la_b)[0] d_f_zt_d_la = tf.gradients(f_zt, cur_la_b)[0] batch_f_zt = tf.expand_dims(f_zt, 1) eta = batch_etas[l] # compute rebar and reinforce # tf.summary.histogram("der_diff_{}".format(l), d_f_z_d_la - d_f_zt_d_la) # tf.summary.histogram("d_log_q_d_la_{}".format(l), d_log_q_d_la) rebar = ((batch_f_b - eta * batch_f_zt) * d_log_q_d_la + eta * (d_f_z_d_la - d_f_zt_d_la)) / batch_size reinforce = batch_f_b * d_log_q_d_la / batch_size rebars.append(rebar) reinforces.append(reinforce) # tf.summary.histogram("rebar_{}".format(l), rebar) # tf.summary.histogram("reinforce_{}".format(l), reinforce) # backpropogate rebar to individual layer parameters layer_params = get_variables(layer_name(l), arr=encoder_params) layer_rebar_grads = tf.gradients(cur_la_b, layer_params, grad_ys=rebar) # get direct loss grads for each parameter layer_loss_grads = [encoder_loss_grads[v.name] for v in layer_params] # each param's gradient should be rebar + the direct loss gradient layer_grads = [ rg + lg for rg, lg in zip(layer_rebar_grads, layer_loss_grads) ] # for rg, lg, v in zip(layer_rebar_grads, layer_loss_grads, layer_params): # tf.summary.histogram(v.name + "_grad_rebar", rg) # tf.summary.histogram(v.name + "_grad_loss", lg) layer_gradvars = list(zip(layer_grads, layer_params)) model_gradvars.extend(layer_gradvars) variance_objective = tf.reduce_mean(tf.square(rebar)) variance_objectives.append(variance_objective) variance_objective = tf.add_n(variance_objectives) variance_vars = log_temperatures + etas if relaxation != "rebar": q_vars = get_variables("Q_") wd = tf.add_n([Q_wd * tf.nn.l2_loss(v) for v in q_vars]) # tf.summary.scalar("Q_weight_decay", wd) # variance_vars = variance_vars + q_vars else: wd = 0.0 variance_gradvars = variance_opt.compute_gradients(variance_objective + wd, var_list=variance_vars) variance_train_op = variance_opt.apply_gradients(variance_gradvars) model_train_op = model_opt.apply_gradients(model_gradvars) with tf.control_dependencies([model_train_op, variance_train_op]): train_op = tf.no_op() # for g, v in model_gradvars + variance_gradvars: # print(g, v.name) # if g is not None: # tf.summary.histogram(v.name, v) # tf.summary.histogram(v.name + "_grad", g) val_loss = tf.Variable(1000, trainable=False, name="val_loss", dtype=tf.float32) train_loss = tf.Variable(1000, trainable=False, name="train_loss", dtype=tf.float32) # tf.summary.scalar("val_loss", val_loss) # tf.summary.scalar("train_loss", train_loss) # summ_op = tf.summary.merge_all() # summary_writer = tf.summary.FileWriter(train_dir) sess.run(tf.global_variables_initializer()) # create savers train_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) val_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) iwae_elbo = -(tf.reduce_logsumexp(-f_b) - np.log(valid_batch_size)) if checkpoint_path is None: iters_per_epoch = X_tr.shape[0] // batch_size print("Train set has {} examples".format(X_tr.shape[0])) if relaxation != "rebar": print("Pretraining Q network") for i in range(1000): if i % 100 == 0: print(i) idx = np.random.randint(0, iters_per_epoch - 1) batch_xs = X_tr[idx * batch_size:(idx + 1) * batch_size] sess.run(variance_train_op, feed_dict={x: batch_xs}) # t = time.time() best_val_loss = np.inf # results saving if relaxation == 'rebar': mode_out = relaxation else: mode_out = 'RELAX' + relaxation result_dir = './Results_MNIST_SBN' if not os.path.isdir(result_dir): os.mkdir(result_dir) shutil.copyfile( sys.argv[0], result_dir + '/training_script_' + dataset + '_' + mode_out + '_' + model_type + '.py') pathsave = result_dir + '/TF_SBN_' + dataset + '_' + mode_out + '_MB[%d]_' % batch_size + model_type + '_LR[%.2e].mat' % lr tr_loss_mb_set = [] tr_timerun_mb_set = [] tr_iter_mb_set = [] tr_loss_set = [] tr_timerun_set = [] tr_iter_set = [] val_loss_set = [] val_timerun_set = [] val_iter_set = [] te_loss_set = [] te_timerun_set = [] te_iter_set = [] for epoch in range(10000000): # train_losses = [] for i in range(iters_per_epoch): cur_iter = epoch * iters_per_epoch + i if cur_iter == 0: time_start = time.clock() if cur_iter > max_iters: print("Training Completed") return batch_xs = X_tr[i * batch_size:(i + 1) * batch_size] loss, _ = sess.run([total_loss, train_op], feed_dict={x: batch_xs}) time_run = time.clock() - time_start tr_loss_mb_set.append(loss) tr_timerun_mb_set.append(time_run) tr_iter_mb_set.append(cur_iter + 1) if (cur_iter + 1) % 100 == 0: print( 'Step: [{:6d}], Loss_mb: [{:10.4f}], time_run: [{:10.4f}]' .format(cur_iter + 1, loss, time_run)) TestInterval = 5000 Train_num_mbs = num_train // batch_size Valid_num_mbs = num_valid // batch_size Test_num_mbs = num_test // batch_size # Testing if (cur_iter + 1) % TestInterval == 0: # Training loss_train1 = 0 for step_train in range(Train_num_mbs): x_train = X_tr[step_train * batch_size:(step_train + 1) * batch_size] feed_dict_train = {x: x_train} loss_train_mb1 = sess.run(total_loss, feed_dict=feed_dict_train) loss_train1 += loss_train_mb1 * batch_size loss_train1 = loss_train1 / (Train_num_mbs * batch_size) tr_loss_set.append(loss_train1) tr_timerun_set.append(time_run) tr_iter_set.append(cur_iter + 1) # Validation loss_val1 = 0 for step_val in range(Valid_num_mbs): x_valid = X_va[step_val * batch_size:(step_val + 1) * batch_size] feed_dict_val = {x: x_valid} loss_val_mb1 = sess.run(total_loss, feed_dict=feed_dict_val) loss_val1 += loss_val_mb1 * batch_size loss_val1 = loss_val1 / (Valid_num_mbs * batch_size) val_loss_set.append(loss_val1) val_timerun_set.append(time_run) val_iter_set.append(cur_iter + 1) # Test loss_test1 = 0 for step_test in range(Test_num_mbs): x_test = X_te[step_test * batch_size:(step_test + 1) * batch_size] feed_dict_test = {x: x_test} loss_test_mb1 = sess.run(total_loss, feed_dict=feed_dict_test) loss_test1 += loss_test_mb1 * batch_size loss_test1 = loss_test1 / (Test_num_mbs * batch_size) te_loss_set.append(loss_test1) te_timerun_set.append(time_run) te_iter_set.append(cur_iter + 1) print( '============TestInterval: [{:6d}], Loss_train: [{:10.4f}], Loss_val: [{:10.4f}], Loss_test: [{:10.4f}]' .format(TestInterval, loss_train1, loss_val1, loss_test1)) # Saving if (cur_iter + 1) % TestInterval == 0: sio.savemat( pathsave, { 'tr_loss_mb_set': tr_loss_mb_set, 'tr_timerun_mb_set': tr_timerun_mb_set, 'tr_iter_mb_set': tr_iter_mb_set, 'tr_loss_set': tr_loss_set, 'tr_timerun_set': tr_timerun_set, 'tr_iter_set': tr_iter_set, 'val_loss_set': val_loss_set, 'val_timerun_set': val_timerun_set, 'val_iter_set': val_iter_set, 'te_loss_set': te_loss_set, 'te_timerun_set': te_timerun_set, 'te_iter_set': te_iter_set, })
s = 0 for i in range(0, len(out)): s += np.sum(np.square(expected[i] - out[i])) return (0.5 / len(out)) * s def accuracy(self, imgs, labels): correct = 0 for i in range(0, len(imgs)): in_layer = img2input(imgs[i]) out_layer = self.forward_pass(in_layer) if np.argmax(out_layer) + 1 == labels[i][0]: correct += 1 return float(correct) / len(imgs) X, Y = datasets.load_mnist("training") # preprocess for i in range(0, len(X)): X[i] /= 255.0 print("Preprocessing finished") mlp = MLP([28 * 28, 30, 10]) mlp.init_weights() #print("Before training accuracy: ", mlp.accuracy(X, Y)) out = mlp.forward_pass(img2input(X[0])) expected = digit2vec(Y[0]) # expecteds = map(digit2vec, Y)