def sgemm(A, B, dim_x=16, dim_y=16, blk_m=64, blk_n=64, blk_k=4, dim_xa=64, dim_ya=4, dim_xb=4, dim_yb=64): assert A.dtype == cp.float32 assert B.dtype == cp.float32 assert (dim_x * dim_y == dim_xa * dim_ya == dim_xb * dim_yb) m, k = A.shape k, n = B.shape # Inputs matrices need to be in Fortran order. A = cp.asfortranarray(A) B = cp.asfortranarray(B) C = cp.empty((m, n), dtype=cp.float32, order='F') config = { 'DIM_X': dim_x, 'DIM_Y': dim_y, 'BLK_M': blk_m, 'BLK_N': blk_n, 'BLK_K': blk_k, 'DIM_XA': dim_xa, 'DIM_YA': dim_ya, 'DIM_XB': dim_xb, 'DIM_YB': dim_yb, 'THR_M': blk_m // dim_x, 'THR_N': blk_n // dim_y } code = read_code(sgemm_file, params=config) kern = load_kernel('sgemm', code) grid = (int(math.ceil(m / blk_m)), int(math.ceil(n / blk_n)), 1) block = (dim_x, dim_y, 1) args = (m, n, k, A, B, C) shared_mem = blk_k * (blk_m + 1) * 4 + blk_n * (blk_k + 1) * 4 kern(grid, block, args=args, shared_mem=shared_mem) return C
#%% X = train_images Xfull = np.concatenate([train_images,test_images]) ys2 = [[y] for y in ys] ysfull = ys2 + [[y] for y in test_ys] Yfull = np.array(ysfull) Y = np.array(ys2) # # from fc_kernel import kernel_matrix # Kfull = kernel_matrix(Xfull,number_layers=number_layers,sigmaw=sigmaw,sigmab=sigmab) FLAGS["m"] = 1500 Kfull = load_kernel(FLAGS) K = Kfull[0:m,0:m] #%% # filename=kernel_folder # for flag in ["network","dataset","m","confusion","label_corruption","binarized","whitening","random_labels","number_layers","sigmaw","sigmab"]: # filename+=str(FLAGS[flag])+"_" # filename += "kernel.npy" # np.save(open(filename,"wb"),Kfull) # ### trying gpflow now #%% import tensorflow as tf
X = flat_train_images ys2 = [[y] for y in ys] Y = np.array(ys2) #%% print("Loading kernel") from os import path # FLAGS["m"] = m+500 # filename=kernel_folder # for flag in ["network","dataset","m","confusion","label_corruption","binarized","whitening","random_labels","number_layers","sigmaw","sigmab"]: # filename+=str(FLAGS[flag])+"_" # filename += "kernel.npy" # if path.exists(filename): # K = load_kernel(FLAGS) # try: K = load_kernel(FLAGS) # except: # if rank == 0: # from fc_kernel import kernel_matrix # K = kernel_matrix(X,number_layers=number_layers,sigmaw=sigmaw,sigmab=sigmab, n_gpus=n_gpus) # np.save(open(filename,"wb"),K) # K = load_kernel(FLAGS) print("Loaded kernel") #%% Kinv = np.linalg.inv(K) det = np.linalg.eigh(K)[0] n = len(X) normalization = (np.sqrt(np.power(2 * np.pi, n) * det))
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) if init_dist != "gaussian": raise NotImplementedError( "Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!" ) from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) if n_gpus > 0: os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True #tf.enable_eager_execution(config=config) set_session = tf.compat.v1.keras.backend.set_session config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras '''GET DATA''' from utils import load_data, load_model, load_kernel train_images, flat_train_images, ys, _, _ = load_data(FLAGS) X = flat_train_images ys2 = [[y] for y in ys] Y = np.array(ys2) image_size = train_images.shape[1] number_channels = train_images.shape[-1] input_dim = flat_train_images.shape[1] print("compute probability and bound", network, dataset) if using_NTK: FLAGS["use_empirical_NTK"] = True theta = load_kernel(FLAGS) print(theta) #if using NTK, the above gets the NTK kernel, but we also need the non-NTK one to compute the bound! FLAGS["use_empirical_NTK"] = False K_pre = load_kernel(FLAGS) print(K_pre) if normalize_kernel: K_pre = K_pre / K_pre.max() K = kernel_mult * K_pre if theta.shape[0] >= m: #must have compute kernel for GP_train theta = theta[:m, :m] if K.shape[0] >= m: #must have compute kernel for GP_train K = K[:m, :m] else: K_pre = load_kernel(FLAGS) print(K_pre) if normalize_kernel: K_pre = K_pre / K_pre.max() K = kernel_mult * K_pre if K.shape[0] >= m: #must have compute kernel for GP_train K = K[:m, :m] #finding log marginal likelihood of data if using_EP: from GP_prob.GP_prob_gpy2 import GP_prob logPU = GP_prob(K, X, Y, method="EP", using_exactPB=using_exactPB) elif using_Laplace: from GP_prob.GP_prob_gpy2 import GP_prob # from GP_prob.GP_prob_numpy import GP_prob logPU = GP_prob(K, X, Y, method="Laplace", using_exactPB=using_exactPB) # logPU = GP_prob(K,np.squeeze(Y)) elif using_Laplace2: # from GP_prob.GP_prob_gpy import GP_prob from GP_prob.GP_prob_numpy import GP_prob #this gives different results because it uses a worse implementation of Laplace, by using a more Naive Newton method to find the maximum of the posterior # logPU = GP_prob(K,X,Y,method="Laplace") logPU = GP_prob(K, np.squeeze(Y)) elif using_MC: from GP_prob.GP_prob_MC import GP_prob logPU = GP_prob(K, X, Y, FLAGS) elif using_regression: from GP_prob.GP_prob_regression import GP_prob # logPU = GP_prob(K,X,Y,sigma_noise=np.sqrt(total_samples/2)) logPU = GP_prob(K, X, Y, sigma_noise=1.0) elif using_NTK: # from GP_prob.GP_prob_regression import GP_prob # logPU = GP_prob(K,X,Y,sigma_noise=np.sqrt(total_samples/2)) # logPU = GP_prob(K,X,Y,sigma_noise=1.0, posterior="ntk") from GP_prob.GP_prob_ntk import GP_prob logPU = GP_prob(K, theta, X, Y, t=1e2) if rank == 0: print(logPU) #compute PAC-Bayes bound delta = 2**-10 bound = (-logPU + 2 * np.log(total_samples) + 1 - np.log(delta)) / total_samples bound = 1 - np.exp(-bound) print("pre-confusion-correction bound: ", bound) rho = confusion / (1.0 + confusion) bound = (bound - 0.5 * rho) / ( 1 - rho ) #to correct for the confusion changing the training data distribution (in training set, but not in test set)! print("Bound: ", bound) print("Accuracy bound: ", 1 - bound) useful_flags = [ "dataset", "boolfun_comp", "boolfun", "network", "m", "label_corruption", "confusion", "number_layers", "sigmaw", "sigmab", "binarized", "pooling", "intermediate_pooling", "whitening", "training", "n_gpus", "kernel_mult", "normalize_kernel" ] with open(results_folder + prefix + "bounds.txt", "a") as file: file.write("#") for key in useful_flags: file.write("{}\t".format(key)) file.write("bound") file.write("\t") file.write("logP") file.write("\n") for key in useful_flags: file.write("{}\t".format(FLAGS[key])) file.write("{}".format(bound)) file.write("\t") file.write("{}".format(logPU)) file.write("\n")
def main(_): FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) if init_dist != "gaussian": raise NotImplementedError( "Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!" ) from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) if n_gpus > 0: os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True #tf.enable_eager_execution(config=config) set_session = tf.compat.v1.keras.backend.set_session config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras '''GET DATA''' from utils import load_data, load_model, load_kernel train_images, flat_train_images, ys, test_images, test_ys = load_data( FLAGS) print("max val", train_images.max()) #print("ys", ys) #process data to be on the right format for GP #test on a smaller sample on test set because otherwise GP would run out of memory test_images = test_images[:test_function_size] test_ys = test_ys[:test_function_size] X = flat_train_images data = test_images tp_order = np.concatenate([[0, len(data.shape) - 1], np.arange(1, len(data.shape) - 1)]) print(data.shape, tp_order) flat_data = np.transpose( data, tp_order ) # NHWC -> NCHW # this is because the cnn GP kernels assume this flat_test_images = np.array( [test_image.flatten() for test_image in flat_data]) Xtrain = flat_train_images Xtest = flat_test_images Xfull = np.concatenate([flat_train_images, flat_test_images]) ys2 = [[y] for y in ys] # if test_fun_override is not None: # ys2test = [[float(x)] for x in test_fun_override] # else: ys2test = [[y] for y in test_ys] ysfull = ys2 + ys2test Yfull = np.array(ysfull) Ytrain = np.array(ys2) Ytest = np.array(ys2test) image_size = train_images.shape[1] number_channels = train_images.shape[-1] input_dim = flat_train_images.shape[1] print("compute probability and bound", network, dataset) # if loss is not "mse": # raise NotImplementedError("Haven't implemented logQ estimate for CE loss yet") if using_NTK: raise NotImplementedError( "Haven't implemented logQ estimate for NTK yet") # FLAGS["use_empirical_NTK"] = True # theta = load_kernel(FLAGS) # print(theta) # #if using NTK, the above gets the NTK kernel, but we also need the non-NTK one to compute the bound! # FLAGS["use_empirical_NTK"] = False # K_pre = load_kernel(FLAGS) # print(K_pre) # if normalize_kernel: # K_pre = K_pre/K_pre.max() # K = kernel_mult*K_pre # if theta.shape[0] >= m: #must have compute kernel for GP_train # theta = theta[:m,:m] # if K.shape[0] >= m: #must have compute kernel for GP_train # K = K[:m,:m] else: K_pre = load_kernel(FLAGS) print(K_pre) if normalize_kernel: K_pre = K_pre / K_pre.max() Kfull = kernel_mult * K_pre #finding log marginal likelihood of data if loss == "mse": from GP_prob.nngp_mse_heaviside_posterior import nngp_mse_heaviside_posteror_params mean, cov = nngp_mse_heaviside_posteror_params(Xtrain, Ytrain, Xtest, Kfull) else: raise NotImplementedError("Only mse loss implemented") if rank == 0: from utils import save_posterior_params save_posterior_params(mean, cov, FLAGS)
def main(_): MAX_TRAIN_EPOCHS=5000 FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict() from utils import preprocess_flags FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) if doing_regression: assert loss == "mse" global threshold from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() num_tasks_per_job = number_inits//size tasks = list(range(int(rank*num_tasks_per_job),int((rank+1)*num_tasks_per_job))) if rank < number_inits%size: tasks.append(size*num_tasks_per_job+rank) import os if n_gpus>0: os.environ["CUDA_VISIBLE_DEVICES"]=str(rank%n_gpus) from tensorflow import keras '''LOAD DATA & ARCHITECTURE''' from utils import load_data,load_model,load_kernel train_images,flat_train_images,ys,test_images,test_ys = load_data(FLAGS) print("max val", train_images.max()) #print("ys", ys) #process data to be on the right format for GP #test on a smaller sample on test set because otherwise GP would run out of memory test_images = test_images[:1000] test_ys = test_ys[:1000] X = flat_train_images data = test_images tp_order = np.concatenate([[0,len(data.shape)-1], np.arange(1, len(data.shape)-1)]) print(data.shape,tp_order) flat_data = np.transpose(data, tp_order) # NHWC -> NCHW # this is because the cnn GP kernels assume this flat_test_images = np.array([test_image.flatten() for test_image in flat_data]) Xfull = np.concatenate([flat_train_images,flat_test_images]) ys2 = [[y] for y in ys] ysfull = ys2 + [[y] for y in test_ys] Yfull = np.array(ysfull) Y = np.array(ys2) FLAGS["use_empirical_NTK"] = True theta_full = load_kernel(FLAGS) #print(theta_full) FLAGS["use_empirical_NTK"] = False K_pre = load_kernel(FLAGS) print(K_pre) if normalize_kernel: K_pre = K_pre/K_pre.max() Kfull = kernel_mult*K_pre input_dim = train_images.shape[1] num_channels = train_images.shape[-1] print(train_images.shape, ys.shape) n=X.shape[0] K_train = Kfull[:n,:n] K_test = Kfull[n:,n:] K_train_test = Kfull[:n,n:] theta_train = theta_full[:n,:n] theta_test = theta_full[n:,n:] theta_train_test = theta_full[:n,n:] mu,Sigma = NTK_posterior(K_train,K_test,K_train_test,theta_train,theta_test,theta_train_test,X,Y,t=training_time) sample_weights = None if gamma != 1.0: sample_weights = np.ones(len(ys)) if not oversampling2: sample_weights[m:] = gamma else: raise NotImplementedError("Gamma not equal to 1.0 with oversampling2 not implemented") model = load_model(FLAGS) set_session = tf.compat.v1.keras.backend.set_session config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.compat.v1.Session(config=config) set_session(sess) # set this TensorFlow session as the default session for Keras '''TRAINING LOOP''' #things to keep track off #functions = [] test_accs = 0 test_accs_squared = 0 test_sensitivities = 0 test_specificities = 0 train_accs = 0 train_accs_squared = 0 funs_filename = results_folder+prefix+"_"+str(rank)+"_nn_train_functions.txt" if loss=="mse": likelihood = "gaussian" elif loss=="ce": likelihood = "bernoulli" print("Training GP with "+likelihood+" likelihood") model.compile("sgd", loss="mse") from initialization import get_all_layers, is_normalization_layer, reset_weights, simple_reset_weights if network not in ["cnn", "fc"]: layers = get_all_layers(model) are_norm = [is_normalization_layer(l) for l in layers for w in l.get_weights()] initial_weights = model.get_weights() K_train_train = Kfull[:len(X),:len(X)] X_train_test = Kfull[:len(X),len(X):len(Xfull)-len(X)] # predictor = nt.predict.gradient_descent_mse(g_dd, y_train, g_td) '''MAIN LOOP''' local_index = 0 from math import ceil samples_per_chunk_base=min(len(tasks),10000) num_chunks = len(tasks)//samples_per_chunk_base remainder = len(tasks)%samples_per_chunk_base if remainder > 0: num_chunks += 1 for chunki in range(num_chunks): print(chunki) if chunki == num_chunks-1 and remainder>0: samples_per_chunk = remainder else: samples_per_chunk = samples_per_chunk_base funs_file = open(funs_filename,"a") # ##if the labels are to be generated by a neural network in parallel if nn_random_labels or nn_random_regression_outputs: if network in ["cnn", "fc"]: simple_reset_weights(model, sigmaw, sigmab) else: reset_weights(model, initial_weights, are_norm, sigmaw, sigmab, truncated_init_dist) if nn_random_labels: ys = model.predict(train_images)[:,0]>0 if training: test_ys = model.predict(test_images)[:,0]>0 else: ys = model.predict(train_images)[:,0] if training: test_ys = model.predict(test_images)[:,0] ## local_index+=1 #preds = model.predict(flat_test_images)[0] #dimensions of output of posterior_samples is (number of input points)x(dimension of output Y)x(number of samples) #preds = model.posterior_samples(flat_test_images,size=samples_per_chunk)[:,0,:].T print(mu.shape) preds = np.random.multivariate_normal(mu,Sigma,size=samples_per_chunk) print(preds.shape) #preds = np.array([pred[0] for pred in preds]) if not doing_regression: th = 0.5 train_loss, train_acc = 0, 1.0*samples_per_chunk test_loss, test_acc = np.sum(cross_entropy_loss(test_ys,preds))/len(test_ys), np.sum((preds>th)==test_ys)/len(test_ys) else: train_acc = train_loss = 0 test_acc = test_loss = np.sum(cross_entropy_loss(test_ys,preds))/len(test_ys) #for th in np.linspace(0,1,1000): if loss=="mse": #NOTE: sensitivity and specificity are not implemented for MSE loss test_sensitivity = -1 test_specificity = -1 else: print("threshold", threshold) #TODO: this is ugly, I should just add a flag that allows to say whether we are doing threshold selection or not!! if threshold != -1: for th in np.linspace(0,1,1000): test_specificity = np.sum(((sigmoid(preds)>th)==test_ys[:100])*(test_ys[:100]==0))/np.sum(test_ys[:100]==0) if test_specificity>0.99: num_0s = np.sum(test_ys==0) if num_0s > 0: test_specificity = np.sum(((sigmoid(preds)>th)==test_ys)*(test_ys==0))/(num_0s) else: test_specificity = -1 if test_specificity>0.99: num_1s = np.sum(test_ys==1) if num_1s > 0: test_sensitivity = np.sum(((sigmoid(preds)>th)==test_ys)*(test_ys==1))/(num_1s) else: test_sensitivity = -1 break else: # for th in np.linspace(0,1,5): # low number of thresholds as I'm not exploring unbalanced datasets right now # test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==0])/(len([x for x in test_ys if x==0])) # if test_specificity>0.99: # test_sensitivity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==1])/(len([x for x in test_ys if x==1])) # break test_specificity = -1 test_sensitivity = -1 print("Training accuracy", train_acc/samples_per_chunk) print('Test accuracy:', test_acc/samples_per_chunk) if threshold != -1: print('Test sensitivity:', test_sensitivity/samples_per_chunk) print('Test specificity:', test_specificity/samples_per_chunk) if not ignore_non_fit or train_acc == 1.0: print("printing function to file", funs_filename) functions = preds[:,:test_function_size]>0.5 functions=functions.astype(int) print(functions.shape) functions = [''.join([str(int(x)) for x in function])+"\r\n" for function in functions] funs_file.writelines(functions) funs_file.close() #functions.append(function) test_accs += test_acc test_accs_squared += test_acc**2 test_sensitivities += test_sensitivity test_specificities += test_specificity train_accs += train_acc train_accs_squared += train_acc**2 test_accs_recv = comm.reduce(test_accs, root=0) test_accs_squared_recv = comm.reduce(test_accs_squared, root=0) test_sensitivities_recv = comm.reduce(test_sensitivities, root=0) test_specificities_recv = comm.reduce(test_specificities, root=0) train_accs_recv = comm.reduce(train_accs, root=0) train_accs_squared_recv = comm.reduce(train_accs_squared, root=0) '''PROCESS COLLECTIVE DATA''' if rank == 0: test_acc = test_accs_recv/number_inits test_sensitivity = test_sensitivities_recv/number_inits test_specificity = test_specificities_recv/number_inits train_acc = train_accs_recv/number_inits print('Mean train accuracy:', train_acc) print('Mean test accuracy:', test_acc) if threshold != -1: print('Mean test sensitivity:', test_sensitivity) print('Mean test specificity:', test_specificity) test_acc = test_accs_recv/number_inits train_acc = train_accs_recv/number_inits train_acc_std = train_accs_squared_recv/number_inits - train_acc**2 test_acc_std = test_accs_squared_recv/number_inits - test_acc**2 useful_train_flags = ["dataset", "m", "network", "pooling", "ignore_non_fit", "test_function_size", "number_layers", "sigmaw", "sigmab", "init_dist","use_shifted_init","shifted_init_shift","whitening", "centering", "oversampling", "oversampling2", "channel_normalization", "training", "binarized", "confusion","filter_sizes", "gamma", "intermediate_pooling", "label_corruption", "threshold", "n_gpus", "n_samples_repeats", "layer_widths", "number_inits", "padding"] with open(results_folder+prefix+"nn_training_results.txt","a") as file: file.write("#") for key in sorted(useful_train_flags): file.write("{}\t".format(key)) file.write("\t".join(["train_acc", "test_error", "test_acc","test_sensitivity","test_specificity","train_acc_std","test_acc_std"])) file.write("\n") for key in sorted(useful_train_flags): file.write("{}\t".format(FLAGS[key])) file.write("{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\n".format(train_acc, 1-test_acc,test_acc,\ test_sensitivity,test_specificity,\ train_acc_std,test_acc_std))
def main(_): FLAGS = tf.app.flags.FLAGS.flag_values_dict() from utils import preprocess_flags FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) # total_samples = m from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) # num_inits_per_task = 1 #num_tasks = int(sys.argv[1]) num_tasks = number_samples #from tensorflow.python.client import device_lib # #def get_available_gpus(): # local_device_protos = device_lib.list_local_devices() # return [x.name for x in local_device_protos if x.device_type == 'GPU'] # #num_gpus = len(get_available_gpus()) num_gpus = n_gpus num_tasks_per_job = num_tasks // size tasks = list( range(rank * num_tasks_per_job, (rank + 1) * num_tasks_per_job)) if rank < num_tasks % size: tasks.append(size * num_tasks_per_job + rank) #config = tf.ConfigProto(device_count={'GPU': rank%num_gpus}) config = tf.ConfigProto() os.environ["CUDA_VISIBLE_DEVICES"] = str(rank % num_gpus) config.gpu_options.allow_growth = True tf.enable_eager_execution(config=config) from utils import load_data, load_model, load_kernel data, flat_data, _, _, _ = load_data(FLAGS) data = tf.constant(data) model = load_model(FLAGS) K = load_kernel(FLAGS) def lass(model, x, r=0.01): pred = tf.sign(model(x)) alpha = 0.5 #alpha=0.25 #beta=0.2 deltax = tf.zeros(x.shape) xtilde = x + deltax max_iters = 20 iterr = 0 while iterr < max_iters: with tf.GradientTape() as g: g.watch(xtilde) y = model(xtilde) grads = g.gradient(y, xtilde) delta = alpha * tf.sign( -pred * grads) #+ beta*tf.random.normal(x.shape) deltax += delta deltax = tf.clip_by_value(deltax, -r, r) # deltax -= tf.to_float(tf.math.abs(deltax) >= r) * tf.clip_by_value(deltax,-r,r) xtilde = x + deltax # print(grads) if tf.sign(model(xtilde)).numpy()[0] != pred.numpy()[0]: return True iterr += 1 return False def crit_sample_ratio( model, xs, r=0.01 ): # is 0.3 fine for a 0-1 scaling, when they say 0-255 what do they mean? Hmm crit_samples = 0 for i in range(int(xs.shape[0])): #print(i) # print(xs[i:i+1,:,:,:]) if lass(model, xs[i:i + 1, :, :, :], r): crit_samples += 1 return 1.0 * crit_samples / int(xs.shape[0]) #%% print("Beginning job %d of %d" % (rank, size)) import time start_time = time.time() crit_sample_ratios = [] #probs = [] for index in tasks: print(index) model.load_weights("./sampled_nets/" + str(index) + "_" + json_string_filename + ".h5") csr = crit_sample_ratio(model, data, r=0.03) crit_sample_ratios.append((index, csr)) with open( results_folder + "CSRs_" + FLAGS["prefix"] + "_" + FLAGS["dataset"] + "_" + FLAGS["network"] + "_" + str(FLAGS["number_layers"]) + "_" + FLAGS["pooling"] + "_" + FLAGS["intermediate_pooling"] + ".txt", "a") as f: f.write(str(index) + "\t" + str(csr) + "\n") #print(csr) print("--- %s seconds ---" % (time.time() - start_time)) print("Finishing job %d of %d" % (rank, size)) csr_data = comm.gather(crit_sample_ratios, root=0) #tf.keras.initializers.glorot_uniform if rank == 0: csr_data = sum(csr_data, []) pickle.dump( csr_data, open( results_folder + "CSRs_" + FLAGS["prefix"] + "_" + FLAGS["dataset"] + "_" + FLAGS["network"] + "_" + str(FLAGS["number_layers"]) + "_" + FLAGS["pooling"] + "_" + FLAGS["intermediate_pooling"] + ".p", "wb"))
def main(_): FLAGS = tf.app.flags.FLAGS.flag_values_dict() FLAGS = preprocess_flags(FLAGS) globals().update(FLAGS) from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() print(rank) os.environ["CUDA_VISIBLE_DEVICES"] = str((rank + 1) % n_gpus) config = tf.ConfigProto() config.gpu_options.allow_growth = True #tf.enable_eager_execution(config=config) set_session = keras.backend.set_session config.log_device_placement = False # to log device placement (on which device the operation ran) sess = tf.Session(config=config) set_session( sess) # set this TensorFlow session as the default session for Keras from utils import load_data, load_model, load_kernel train_images, flat_train_images, ys, _, _ = load_data(FLAGS) X = flat_train_images ys2 = [[y] for y in ys] Y = np.array(ys2) image_size = train_images.shape[1] number_channels = train_images.shape[-1] input_dim = flat_train_images.shape[1] num_tasks = 100 cupy_samples = 1e5 num_tasks_per_job = num_tasks // size tasks = list( range(int(rank * num_tasks_per_job), int( (rank + 1) * num_tasks_per_job))) if rank < num_tasks % size: tasks.append(size * num_tasks_per_job + rank) print("compute probability and bound", network, dataset) K = load_kernel(FLAGS) import cupy as cp # import numpy as cp Y = cp.array(Y) mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() freq = 0 for i in tasks: mempool.free_all_blocks() pinned_mempool.free_all_blocks() exact_samples = cp.random.multivariate_normal( cp.zeros(m), K, int(cupy_samples), dtype=np.float32) > 0 fits_data = cp.prod(~(exact_samples[:, :m] ^ (Y.T == 1)), 1) indices = cp.where(fits_data)[0] freq += len(indices) freqs = comm.gather(freqs, root=0) if rank == 0: freqs = sum(freqs, []) prob = freqs / (num_tasks * cupy_samples) logPU = np.log(prob) log10PU = np.log10(prob) print(log10PU) #compute PAC-Bayes bound delta = 2**-10 bound = (-logPU + 2 * np.log(total_samples) + 1 - np.log(delta)) / total_samples bound = 1 - np.exp(-bound) print("pre-confusion-correction bound: ", bound) rho = confusion / (1.0 + confusion) bound = (bound - 0.5 * rho) / ( 1 - rho ) #to correct for the confusion changing the training data distribution (in training set, but not in test set)! print("Bound: ", bound) print("Accuracy bound: ", 1 - bound) useful_flags = [ "dataset", "network", "m", "label_corruption", "confusion", "number_layers", "sigmaw", "sigmab", "binarized", "pooling", "intermediate_pooling", "whitening", "centering", "channel_normalization", "training", "n_gpus" ] with open(results_folder + prefix + "bounds.txt", "a") as file: file.write("#") for key in useful_flags: file.write("{}\t".format(key)) file.write("bound") file.write("\t") file.write("log10PU") file.write("\n") for key in useful_flags: file.write("{}\t".format(FLAGS[key])) file.write("{}".format(bound)) file.write("\t") file.write("{}".format(log10PU)) file.write("\n")