示例#1
0
def sgemm(A,
          B,
          dim_x=16,
          dim_y=16,
          blk_m=64,
          blk_n=64,
          blk_k=4,
          dim_xa=64,
          dim_ya=4,
          dim_xb=4,
          dim_yb=64):
    assert A.dtype == cp.float32
    assert B.dtype == cp.float32
    assert (dim_x * dim_y == dim_xa * dim_ya == dim_xb * dim_yb)

    m, k = A.shape
    k, n = B.shape

    # Inputs matrices need to be in Fortran order.
    A = cp.asfortranarray(A)
    B = cp.asfortranarray(B)

    C = cp.empty((m, n), dtype=cp.float32, order='F')

    config = {
        'DIM_X': dim_x,
        'DIM_Y': dim_y,
        'BLK_M': blk_m,
        'BLK_N': blk_n,
        'BLK_K': blk_k,
        'DIM_XA': dim_xa,
        'DIM_YA': dim_ya,
        'DIM_XB': dim_xb,
        'DIM_YB': dim_yb,
        'THR_M': blk_m // dim_x,
        'THR_N': blk_n // dim_y
    }
    code = read_code(sgemm_file, params=config)
    kern = load_kernel('sgemm', code)

    grid = (int(math.ceil(m / blk_m)), int(math.ceil(n / blk_n)), 1)
    block = (dim_x, dim_y, 1)
    args = (m, n, k, A, B, C)
    shared_mem = blk_k * (blk_m + 1) * 4 + blk_n * (blk_k + 1) * 4
    kern(grid, block, args=args, shared_mem=shared_mem)
    return C
示例#2
0
#%%

X = train_images
Xfull =  np.concatenate([train_images,test_images])
ys2 = [[y] for y in ys]
ysfull = ys2 + [[y] for y in test_ys]
Yfull = np.array(ysfull)
Y = np.array(ys2)
#
# from fc_kernel import kernel_matrix
# Kfull = kernel_matrix(Xfull,number_layers=number_layers,sigmaw=sigmaw,sigmab=sigmab)


FLAGS["m"] = 1500
Kfull = load_kernel(FLAGS)
K = Kfull[0:m,0:m]

#%%

# filename=kernel_folder
# for flag in ["network","dataset","m","confusion","label_corruption","binarized","whitening","random_labels","number_layers","sigmaw","sigmab"]:
#     filename+=str(FLAGS[flag])+"_"
# filename += "kernel.npy"
# np.save(open(filename,"wb"),Kfull)
#

### trying gpflow now
#%%

import tensorflow as tf
示例#3
0
X = flat_train_images
ys2 = [[y] for y in ys]
Y = np.array(ys2)

#%%
print("Loading kernel")
from os import path
# FLAGS["m"] = m+500
# filename=kernel_folder
# for flag in ["network","dataset","m","confusion","label_corruption","binarized","whitening","random_labels","number_layers","sigmaw","sigmab"]:
#     filename+=str(FLAGS[flag])+"_"
# filename += "kernel.npy"
# if path.exists(filename):
#     K = load_kernel(FLAGS)
# try:
K = load_kernel(FLAGS)
# except:
#     if rank == 0:
#         from fc_kernel import kernel_matrix
#         K = kernel_matrix(X,number_layers=number_layers,sigmaw=sigmaw,sigmab=sigmab, n_gpus=n_gpus)
#         np.save(open(filename,"wb"),K)
#     K = load_kernel(FLAGS)

print("Loaded kernel")
#%%

Kinv = np.linalg.inv(K)

det = np.linalg.eigh(K)[0]
n = len(X)
normalization = (np.sqrt(np.power(2 * np.pi, n) * det))
示例#4
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    if init_dist != "gaussian":
        raise NotImplementedError(
            "Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!"
        )

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)

    if n_gpus > 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus)
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True

    #tf.enable_eager_execution(config=config)
    set_session = tf.compat.v1.keras.backend.set_session
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.compat.v1.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras
    '''GET DATA'''
    from utils import load_data, load_model, load_kernel
    train_images, flat_train_images, ys, _, _ = load_data(FLAGS)
    X = flat_train_images
    ys2 = [[y] for y in ys]
    Y = np.array(ys2)
    image_size = train_images.shape[1]
    number_channels = train_images.shape[-1]
    input_dim = flat_train_images.shape[1]

    print("compute probability and bound", network, dataset)

    if using_NTK:
        FLAGS["use_empirical_NTK"] = True
        theta = load_kernel(FLAGS)
        print(theta)
        #if using NTK, the above gets the NTK kernel, but we also need the non-NTK one to compute the bound!
        FLAGS["use_empirical_NTK"] = False
        K_pre = load_kernel(FLAGS)
        print(K_pre)
        if normalize_kernel:
            K_pre = K_pre / K_pre.max()
        K = kernel_mult * K_pre
        if theta.shape[0] >= m:  #must have compute kernel for GP_train
            theta = theta[:m, :m]
        if K.shape[0] >= m:  #must have compute kernel for GP_train
            K = K[:m, :m]
    else:
        K_pre = load_kernel(FLAGS)
        print(K_pre)
        if normalize_kernel:
            K_pre = K_pre / K_pre.max()
        K = kernel_mult * K_pre
        if K.shape[0] >= m:  #must have compute kernel for GP_train
            K = K[:m, :m]

    #finding log marginal likelihood of data
    if using_EP:
        from GP_prob.GP_prob_gpy2 import GP_prob
        logPU = GP_prob(K, X, Y, method="EP", using_exactPB=using_exactPB)
    elif using_Laplace:
        from GP_prob.GP_prob_gpy2 import GP_prob
        # from GP_prob.GP_prob_numpy import GP_prob
        logPU = GP_prob(K, X, Y, method="Laplace", using_exactPB=using_exactPB)
        # logPU = GP_prob(K,np.squeeze(Y))
    elif using_Laplace2:
        # from GP_prob.GP_prob_gpy import GP_prob
        from GP_prob.GP_prob_numpy import GP_prob  #this gives different results because it uses a worse implementation of Laplace, by using a more Naive Newton method to find the maximum of the posterior
        # logPU = GP_prob(K,X,Y,method="Laplace")
        logPU = GP_prob(K, np.squeeze(Y))
    elif using_MC:
        from GP_prob.GP_prob_MC import GP_prob
        logPU = GP_prob(K, X, Y, FLAGS)
    elif using_regression:
        from GP_prob.GP_prob_regression import GP_prob
        # logPU = GP_prob(K,X,Y,sigma_noise=np.sqrt(total_samples/2))
        logPU = GP_prob(K, X, Y, sigma_noise=1.0)
    elif using_NTK:
        # from GP_prob.GP_prob_regression import GP_prob
        # logPU = GP_prob(K,X,Y,sigma_noise=np.sqrt(total_samples/2))
        # logPU = GP_prob(K,X,Y,sigma_noise=1.0, posterior="ntk")
        from GP_prob.GP_prob_ntk import GP_prob
        logPU = GP_prob(K, theta, X, Y, t=1e2)

    if rank == 0:
        print(logPU)
        #compute PAC-Bayes bound
        delta = 2**-10
        bound = (-logPU + 2 * np.log(total_samples) + 1 -
                 np.log(delta)) / total_samples
        bound = 1 - np.exp(-bound)
        print("pre-confusion-correction bound: ", bound)
        rho = confusion / (1.0 + confusion)
        bound = (bound - 0.5 * rho) / (
            1 - rho
        )  #to correct for the confusion changing the training data distribution (in training set, but not in test set)!
        print("Bound: ", bound)
        print("Accuracy bound: ", 1 - bound)
        useful_flags = [
            "dataset", "boolfun_comp", "boolfun", "network", "m",
            "label_corruption", "confusion", "number_layers", "sigmaw",
            "sigmab", "binarized", "pooling", "intermediate_pooling",
            "whitening", "training", "n_gpus", "kernel_mult",
            "normalize_kernel"
        ]
        with open(results_folder + prefix + "bounds.txt", "a") as file:
            file.write("#")
            for key in useful_flags:
                file.write("{}\t".format(key))
            file.write("bound")
            file.write("\t")
            file.write("logP")
            file.write("\n")
            for key in useful_flags:
                file.write("{}\t".format(FLAGS[key]))
            file.write("{}".format(bound))
            file.write("\t")
            file.write("{}".format(logPU))
            file.write("\n")
示例#5
0
def main(_):

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    if init_dist != "gaussian":
        raise NotImplementedError(
            "Initialization distributions other than Gaussian are not implemented for computing pac bayes bounds!"
        )

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)

    if n_gpus > 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str((rank) % n_gpus)
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True

    #tf.enable_eager_execution(config=config)
    set_session = tf.compat.v1.keras.backend.set_session
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.compat.v1.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras
    '''GET DATA'''
    from utils import load_data, load_model, load_kernel
    train_images, flat_train_images, ys, test_images, test_ys = load_data(
        FLAGS)
    print("max val", train_images.max())
    #print("ys", ys)
    #process data to be on the right format for GP
    #test on a smaller sample on test set because otherwise GP would run out of memory
    test_images = test_images[:test_function_size]
    test_ys = test_ys[:test_function_size]
    X = flat_train_images
    data = test_images
    tp_order = np.concatenate([[0, len(data.shape) - 1],
                               np.arange(1,
                                         len(data.shape) - 1)])
    print(data.shape, tp_order)
    flat_data = np.transpose(
        data, tp_order
    )  # NHWC -> NCHW # this is because the cnn GP kernels assume this
    flat_test_images = np.array(
        [test_image.flatten() for test_image in flat_data])
    Xtrain = flat_train_images
    Xtest = flat_test_images
    Xfull = np.concatenate([flat_train_images, flat_test_images])
    ys2 = [[y] for y in ys]
    # if test_fun_override is not None:
    #     ys2test = [[float(x)] for x in test_fun_override]
    # else:
    ys2test = [[y] for y in test_ys]
    ysfull = ys2 + ys2test
    Yfull = np.array(ysfull)
    Ytrain = np.array(ys2)
    Ytest = np.array(ys2test)
    image_size = train_images.shape[1]
    number_channels = train_images.shape[-1]
    input_dim = flat_train_images.shape[1]

    print("compute probability and bound", network, dataset)

    # if loss is not "mse":
    #     raise NotImplementedError("Haven't implemented logQ estimate for CE loss yet")

    if using_NTK:
        raise NotImplementedError(
            "Haven't implemented logQ estimate for NTK yet")
        # FLAGS["use_empirical_NTK"] = True
        # theta = load_kernel(FLAGS)
        # print(theta)
        # #if using NTK, the above gets the NTK kernel, but we also need the non-NTK one to compute the bound!
        # FLAGS["use_empirical_NTK"] = False
        # K_pre = load_kernel(FLAGS)
        # print(K_pre)
        # if normalize_kernel:
        #     K_pre = K_pre/K_pre.max()
        # K = kernel_mult*K_pre
        # if theta.shape[0] >= m: #must have compute kernel for GP_train
        #     theta = theta[:m,:m]
        # if K.shape[0] >= m: #must have compute kernel for GP_train
        #     K = K[:m,:m]
    else:
        K_pre = load_kernel(FLAGS)
        print(K_pre)
        if normalize_kernel:
            K_pre = K_pre / K_pre.max()
        Kfull = kernel_mult * K_pre

    #finding log marginal likelihood of data
    if loss == "mse":
        from GP_prob.nngp_mse_heaviside_posterior import nngp_mse_heaviside_posteror_params
        mean, cov = nngp_mse_heaviside_posteror_params(Xtrain, Ytrain, Xtest,
                                                       Kfull)
    else:
        raise NotImplementedError("Only mse loss implemented")

    if rank == 0:
        from utils import save_posterior_params
        save_posterior_params(mean, cov, FLAGS)
示例#6
0
def main(_):
    MAX_TRAIN_EPOCHS=5000

    FLAGS = tf.compat.v1.app.flags.FLAGS.flag_values_dict()
    from utils import preprocess_flags
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)
    if doing_regression:
        assert loss == "mse"
    global threshold

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    num_tasks_per_job = number_inits//size
    tasks = list(range(int(rank*num_tasks_per_job),int((rank+1)*num_tasks_per_job)))

    if rank < number_inits%size:
        tasks.append(size*num_tasks_per_job+rank)

    import os
    if n_gpus>0:
        os.environ["CUDA_VISIBLE_DEVICES"]=str(rank%n_gpus)

    from tensorflow import keras

    '''LOAD DATA & ARCHITECTURE'''

    from utils import load_data,load_model,load_kernel
    train_images,flat_train_images,ys,test_images,test_ys = load_data(FLAGS)
    print("max val", train_images.max())
    #print("ys", ys)
    #process data to be on the right format for GP
    #test on a smaller sample on test set because otherwise GP would run out of memory
    test_images = test_images[:1000]
    test_ys = test_ys[:1000]
    X = flat_train_images
    data = test_images
    tp_order = np.concatenate([[0,len(data.shape)-1], np.arange(1, len(data.shape)-1)])
    print(data.shape,tp_order)
    flat_data = np.transpose(data, tp_order)  # NHWC -> NCHW # this is because the cnn GP kernels assume this
    flat_test_images = np.array([test_image.flatten() for test_image in flat_data])
    Xfull =  np.concatenate([flat_train_images,flat_test_images])
    ys2 = [[y] for y in ys]
    ysfull = ys2 + [[y] for y in test_ys]
    Yfull = np.array(ysfull)
    Y = np.array(ys2)


    FLAGS["use_empirical_NTK"] = True
    theta_full = load_kernel(FLAGS)
    #print(theta_full)
    FLAGS["use_empirical_NTK"] = False
    K_pre = load_kernel(FLAGS)
    print(K_pre)
    if normalize_kernel:
        K_pre = K_pre/K_pre.max()
    Kfull = kernel_mult*K_pre

    input_dim = train_images.shape[1]
    num_channels = train_images.shape[-1]
    print(train_images.shape, ys.shape)

    n=X.shape[0]
    K_train = Kfull[:n,:n]
    K_test = Kfull[n:,n:]
    K_train_test = Kfull[:n,n:]
    theta_train = theta_full[:n,:n]
    theta_test = theta_full[n:,n:]
    theta_train_test = theta_full[:n,n:]
    mu,Sigma = NTK_posterior(K_train,K_test,K_train_test,theta_train,theta_test,theta_train_test,X,Y,t=training_time)

    sample_weights = None
    if gamma != 1.0:
        sample_weights = np.ones(len(ys))
        if not oversampling2:
            sample_weights[m:] = gamma
        else:
            raise NotImplementedError("Gamma not equal to 1.0 with oversampling2 not implemented")

    model = load_model(FLAGS)

    set_session = tf.compat.v1.keras.backend.set_session

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.compat.v1.Session(config=config)
    set_session(sess)  # set this TensorFlow session as the default session for Keras

    '''TRAINING LOOP'''
    #things to keep track off
    #functions = []
    test_accs = 0
    test_accs_squared = 0
    test_sensitivities = 0
    test_specificities = 0
    train_accs = 0
    train_accs_squared = 0
    funs_filename = results_folder+prefix+"_"+str(rank)+"_nn_train_functions.txt"

    if loss=="mse":
        likelihood = "gaussian"
    elif loss=="ce":
        likelihood = "bernoulli"
    print("Training GP with "+likelihood+" likelihood")

    model.compile("sgd", loss="mse")

    from initialization import get_all_layers, is_normalization_layer, reset_weights, simple_reset_weights
    if network not in ["cnn", "fc"]:
        layers = get_all_layers(model)
        are_norm = [is_normalization_layer(l) for l in layers for w in l.get_weights()]
        initial_weights = model.get_weights()

    K_train_train = Kfull[:len(X),:len(X)]
    X_train_test = Kfull[:len(X),len(X):len(Xfull)-len(X)]
    # predictor = nt.predict.gradient_descent_mse(g_dd, y_train, g_td)

    '''MAIN LOOP'''
    local_index = 0

    from math import ceil
    samples_per_chunk_base=min(len(tasks),10000)
    num_chunks = len(tasks)//samples_per_chunk_base
    remainder = len(tasks)%samples_per_chunk_base
    if remainder > 0:
        num_chunks += 1
    for chunki in range(num_chunks):
        print(chunki)
        if chunki == num_chunks-1 and remainder>0:
            samples_per_chunk = remainder
        else:
            samples_per_chunk = samples_per_chunk_base
        funs_file = open(funs_filename,"a")
        #
        ##if the labels are to be generated by a neural network in parallel
        if nn_random_labels or nn_random_regression_outputs:
            if network in ["cnn", "fc"]:
                simple_reset_weights(model, sigmaw, sigmab)
            else:
                reset_weights(model, initial_weights, are_norm, sigmaw, sigmab, truncated_init_dist)
            if nn_random_labels:
                ys = model.predict(train_images)[:,0]>0
                if training:
                    test_ys = model.predict(test_images)[:,0]>0
            else:
                ys = model.predict(train_images)[:,0]
                if training:
                    test_ys = model.predict(test_images)[:,0]
        ##

        local_index+=1

        #preds = model.predict(flat_test_images)[0]
        #dimensions of output of posterior_samples is (number of input points)x(dimension of output Y)x(number of samples)
        #preds = model.posterior_samples(flat_test_images,size=samples_per_chunk)[:,0,:].T
        print(mu.shape)
        preds = np.random.multivariate_normal(mu,Sigma,size=samples_per_chunk)
        print(preds.shape)
        #preds = np.array([pred[0] for pred in preds])
        if not doing_regression:
            th = 0.5
            train_loss, train_acc = 0, 1.0*samples_per_chunk
            test_loss, test_acc = np.sum(cross_entropy_loss(test_ys,preds))/len(test_ys), np.sum((preds>th)==test_ys)/len(test_ys)
        else:
            train_acc = train_loss = 0
            test_acc = test_loss = np.sum(cross_entropy_loss(test_ys,preds))/len(test_ys)

        #for th in np.linspace(0,1,1000):
        if loss=="mse":
            #NOTE: sensitivity and specificity are not implemented for MSE loss
            test_sensitivity = -1
            test_specificity = -1
        else:
            print("threshold", threshold)
            #TODO: this is ugly, I should just add a flag that allows to say whether we are doing threshold selection or not!!
            if threshold != -1:
                for th in np.linspace(0,1,1000):
                    test_specificity = np.sum(((sigmoid(preds)>th)==test_ys[:100])*(test_ys[:100]==0))/np.sum(test_ys[:100]==0)
                    if test_specificity>0.99:
                        num_0s = np.sum(test_ys==0)
                        if num_0s > 0:
                            test_specificity = np.sum(((sigmoid(preds)>th)==test_ys)*(test_ys==0))/(num_0s)
                        else:
                            test_specificity = -1
                        if test_specificity>0.99:
                            num_1s = np.sum(test_ys==1)
                            if num_1s > 0:
                                test_sensitivity = np.sum(((sigmoid(preds)>th)==test_ys)*(test_ys==1))/(num_1s)
                            else:
                                test_sensitivity = -1
                            break
            else:
                # for th in np.linspace(0,1,5): # low number of thresholds as I'm not exploring unbalanced datasets right now
                #     test_specificity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==0])/(len([x for x in test_ys if x==0]))
                #     if test_specificity>0.99:
                #         test_sensitivity = sum([(sigmoid(preds[i])>th)==x for i,x in enumerate(test_ys) if x==1])/(len([x for x in test_ys if x==1]))
                #         break
                test_specificity = -1
                test_sensitivity = -1

        print("Training accuracy", train_acc/samples_per_chunk)
        print('Test accuracy:', test_acc/samples_per_chunk)
        if threshold != -1:
            print('Test sensitivity:', test_sensitivity/samples_per_chunk)
            print('Test specificity:', test_specificity/samples_per_chunk)
        if not ignore_non_fit or train_acc == 1.0:
            print("printing function to file", funs_filename)
            functions = preds[:,:test_function_size]>0.5
            functions=functions.astype(int)
            print(functions.shape)
            functions = [''.join([str(int(x)) for x in function])+"\r\n" for function in functions]
            funs_file.writelines(functions)
            funs_file.close()
            #functions.append(function)
            test_accs += test_acc
            test_accs_squared += test_acc**2
            test_sensitivities += test_sensitivity
            test_specificities += test_specificity
            train_accs += train_acc
            train_accs_squared += train_acc**2

    test_accs_recv = comm.reduce(test_accs, root=0)
    test_accs_squared_recv = comm.reduce(test_accs_squared, root=0)
    test_sensitivities_recv = comm.reduce(test_sensitivities, root=0)
    test_specificities_recv = comm.reduce(test_specificities, root=0)
    train_accs_recv = comm.reduce(train_accs, root=0)
    train_accs_squared_recv = comm.reduce(train_accs_squared, root=0)

    '''PROCESS COLLECTIVE DATA'''
    if rank == 0:
        test_acc = test_accs_recv/number_inits
        test_sensitivity = test_sensitivities_recv/number_inits
        test_specificity = test_specificities_recv/number_inits
        train_acc = train_accs_recv/number_inits
        print('Mean train accuracy:', train_acc)
        print('Mean test accuracy:', test_acc)
        if threshold != -1:
            print('Mean test sensitivity:', test_sensitivity)
            print('Mean test specificity:', test_specificity)
        test_acc = test_accs_recv/number_inits
        train_acc = train_accs_recv/number_inits
        train_acc_std = train_accs_squared_recv/number_inits - train_acc**2
        test_acc_std = test_accs_squared_recv/number_inits - test_acc**2

        useful_train_flags = ["dataset", "m", "network", "pooling", "ignore_non_fit", "test_function_size", "number_layers", "sigmaw", "sigmab", "init_dist","use_shifted_init","shifted_init_shift","whitening", "centering", "oversampling", "oversampling2", "channel_normalization", "training", "binarized", "confusion","filter_sizes", "gamma", "intermediate_pooling", "label_corruption", "threshold", "n_gpus", "n_samples_repeats", "layer_widths", "number_inits", "padding"]
        with open(results_folder+prefix+"nn_training_results.txt","a") as file:
            file.write("#")
            for key in sorted(useful_train_flags):
                file.write("{}\t".format(key))
            file.write("\t".join(["train_acc", "test_error", "test_acc","test_sensitivity","test_specificity","train_acc_std","test_acc_std"]))
            file.write("\n")
            for key in sorted(useful_train_flags):
                file.write("{}\t".format(FLAGS[key]))
            file.write("{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\n".format(train_acc, 1-test_acc,test_acc,\
                test_sensitivity,test_specificity,\
                train_acc_std,test_acc_std))
示例#7
0
def main(_):

    FLAGS = tf.app.flags.FLAGS.flag_values_dict()
    from utils import preprocess_flags
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    # total_samples = m

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)
    # num_inits_per_task = 1
    #num_tasks = int(sys.argv[1])
    num_tasks = number_samples

    #from tensorflow.python.client import device_lib
    #
    #def get_available_gpus():
    #    local_device_protos = device_lib.list_local_devices()
    #    return [x.name for x in local_device_protos if x.device_type == 'GPU']
    #
    #num_gpus = len(get_available_gpus())
    num_gpus = n_gpus

    num_tasks_per_job = num_tasks // size
    tasks = list(
        range(rank * num_tasks_per_job, (rank + 1) * num_tasks_per_job))

    if rank < num_tasks % size:
        tasks.append(size * num_tasks_per_job + rank)

    #config = tf.ConfigProto(device_count={'GPU': rank%num_gpus})
    config = tf.ConfigProto()
    os.environ["CUDA_VISIBLE_DEVICES"] = str(rank % num_gpus)
    config.gpu_options.allow_growth = True
    tf.enable_eager_execution(config=config)

    from utils import load_data, load_model, load_kernel
    data, flat_data, _, _, _ = load_data(FLAGS)
    data = tf.constant(data)
    model = load_model(FLAGS)
    K = load_kernel(FLAGS)

    def lass(model, x, r=0.01):
        pred = tf.sign(model(x))
        alpha = 0.5
        #alpha=0.25
        #beta=0.2
        deltax = tf.zeros(x.shape)
        xtilde = x + deltax
        max_iters = 20
        iterr = 0
        while iterr < max_iters:
            with tf.GradientTape() as g:
                g.watch(xtilde)
                y = model(xtilde)
            grads = g.gradient(y, xtilde)
            delta = alpha * tf.sign(
                -pred * grads)  #+ beta*tf.random.normal(x.shape)
            deltax += delta
            deltax = tf.clip_by_value(deltax, -r, r)
            # deltax -= tf.to_float(tf.math.abs(deltax) >= r) * tf.clip_by_value(deltax,-r,r)
            xtilde = x + deltax
            # print(grads)

            if tf.sign(model(xtilde)).numpy()[0] != pred.numpy()[0]:
                return True
            iterr += 1
        return False

    def crit_sample_ratio(
        model,
        xs,
        r=0.01
    ):  # is 0.3 fine for a 0-1 scaling, when they say 0-255 what do they mean? Hmm
        crit_samples = 0
        for i in range(int(xs.shape[0])):
            #print(i)
            # print(xs[i:i+1,:,:,:])
            if lass(model, xs[i:i + 1, :, :, :], r):
                crit_samples += 1
        return 1.0 * crit_samples / int(xs.shape[0])

    #%%

    print("Beginning job %d of %d" % (rank, size))
    import time
    start_time = time.time()
    crit_sample_ratios = []
    #probs = []
    for index in tasks:
        print(index)
        model.load_weights("./sampled_nets/" + str(index) + "_" +
                           json_string_filename + ".h5")
        csr = crit_sample_ratio(model, data, r=0.03)
        crit_sample_ratios.append((index, csr))
        with open(
                results_folder + "CSRs_" + FLAGS["prefix"] + "_" +
                FLAGS["dataset"] + "_" + FLAGS["network"] + "_" +
                str(FLAGS["number_layers"]) + "_" + FLAGS["pooling"] + "_" +
                FLAGS["intermediate_pooling"] + ".txt", "a") as f:
            f.write(str(index) + "\t" + str(csr) + "\n")
        #print(csr)
    print("--- %s seconds ---" % (time.time() - start_time))
    print("Finishing job %d of %d" % (rank, size))

    csr_data = comm.gather(crit_sample_ratios, root=0)

    #tf.keras.initializers.glorot_uniform

    if rank == 0:
        csr_data = sum(csr_data, [])
        pickle.dump(
            csr_data,
            open(
                results_folder + "CSRs_" + FLAGS["prefix"] + "_" +
                FLAGS["dataset"] + "_" + FLAGS["network"] + "_" +
                str(FLAGS["number_layers"]) + "_" + FLAGS["pooling"] + "_" +
                FLAGS["intermediate_pooling"] + ".p", "wb"))
示例#8
0
def main(_):

    FLAGS = tf.app.flags.FLAGS.flag_values_dict()
    FLAGS = preprocess_flags(FLAGS)
    globals().update(FLAGS)

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)

    os.environ["CUDA_VISIBLE_DEVICES"] = str((rank + 1) % n_gpus)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    #tf.enable_eager_execution(config=config)
    set_session = keras.backend.set_session
    config.log_device_placement = False  # to log device placement (on which device the operation ran)
    sess = tf.Session(config=config)
    set_session(
        sess)  # set this TensorFlow session as the default session for Keras

    from utils import load_data, load_model, load_kernel
    train_images, flat_train_images, ys, _, _ = load_data(FLAGS)
    X = flat_train_images
    ys2 = [[y] for y in ys]
    Y = np.array(ys2)
    image_size = train_images.shape[1]
    number_channels = train_images.shape[-1]
    input_dim = flat_train_images.shape[1]

    num_tasks = 100
    cupy_samples = 1e5

    num_tasks_per_job = num_tasks // size
    tasks = list(
        range(int(rank * num_tasks_per_job), int(
            (rank + 1) * num_tasks_per_job)))

    if rank < num_tasks % size:
        tasks.append(size * num_tasks_per_job + rank)

    print("compute probability and bound", network, dataset)

    K = load_kernel(FLAGS)
    import cupy as cp
    # import numpy as cp

    Y = cp.array(Y)

    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    freq = 0
    for i in tasks:
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()
        exact_samples = cp.random.multivariate_normal(
            cp.zeros(m), K, int(cupy_samples), dtype=np.float32) > 0

        fits_data = cp.prod(~(exact_samples[:, :m] ^ (Y.T == 1)), 1)

        indices = cp.where(fits_data)[0]
        freq += len(indices)

    freqs = comm.gather(freqs, root=0)

    if rank == 0:
        freqs = sum(freqs, [])
        prob = freqs / (num_tasks * cupy_samples)
        logPU = np.log(prob)
        log10PU = np.log10(prob)
        print(log10PU)
        #compute PAC-Bayes bound
        delta = 2**-10
        bound = (-logPU + 2 * np.log(total_samples) + 1 -
                 np.log(delta)) / total_samples
        bound = 1 - np.exp(-bound)
        print("pre-confusion-correction bound: ", bound)
        rho = confusion / (1.0 + confusion)
        bound = (bound - 0.5 * rho) / (
            1 - rho
        )  #to correct for the confusion changing the training data distribution (in training set, but not in test set)!
        print("Bound: ", bound)
        print("Accuracy bound: ", 1 - bound)
        useful_flags = [
            "dataset", "network", "m", "label_corruption", "confusion",
            "number_layers", "sigmaw", "sigmab", "binarized", "pooling",
            "intermediate_pooling", "whitening", "centering",
            "channel_normalization", "training", "n_gpus"
        ]
        with open(results_folder + prefix + "bounds.txt", "a") as file:
            file.write("#")
            for key in useful_flags:
                file.write("{}\t".format(key))
            file.write("bound")
            file.write("\t")
            file.write("log10PU")
            file.write("\n")
            for key in useful_flags:
                file.write("{}\t".format(FLAGS[key]))
            file.write("{}".format(bound))
            file.write("\t")
            file.write("{}".format(log10PU))
            file.write("\n")