Exemplo n.º 1
0
def main(argv=None):

    tf.set_random_seed(1234)
    sess = tf.Session(config=config)

    for ti in range(1):

        h5f = h5py.File('SVHN_single_grey_without_extra.h5', 'r')

        # Load the training, test and validation set
        X_train = h5f['X_train'][:]
        Y_train = h5f['y_train'][:]
        X_test = h5f['X_test'][:]
        Y_test = h5f['y_test'][:]
        X_val = h5f['X_val'][:]
        Y_val = h5f['y_val'][:]

        h5f.close()

        #Make dataset divisible by batch size
        pts = int(len(X_train) / batch_size)
        X_train = np.array(X_train[:pts * batch_size])
        Y_train = np.array(Y_train[:pts * batch_size])

        Ntrain = len(X_train)
        Nval = len(X_val)

        corrupt = int(0.5 * len(X_train))
        for i in range(corrupt):
            curr_class = np.argmax(Y_train[i])
            Y_train[i] = np.zeros(10)

            #random label
            j = curr_class
            while j == curr_class:
                j = np.random.randint(0, 10)

            Y_train[i][j] = 1

        ## Define model
        x_train_tf = tf.placeholder(tf.float32,
                                    shape=(None, height, width, nch))
        y_train_tf = tf.placeholder(tf.float32, shape=(None, nclass))

        x_val_tf = tf.placeholder(tf.float32, shape=(None, height, width, nch))
        y_val_tf = tf.placeholder(tf.float32, shape=(None, nclass))

        scope_model = 'svhn_classifier'
        with tf.variable_scope(scope_model, reuse=False):
            model = svhn_model(X_train, nclass)

        cls_train = KerasModelWrapper(model).get_logits(x_train_tf)
        cls_test = KerasModelWrapper(model).get_logits(x_val_tf)

        var_cls = model.trainable_weights

        #########################################################################################################
        ## Bilevel training
        #########################################################################################################
        bl_imp = bilevel_importance(sess, x_train_tf, x_val_tf, y_train_tf,
                                    y_val_tf, cls_train, cls_test, var_cls,
                                    batch_size, lr_u, lr_v, rho_0, lamb_0,
                                    eps_0, nu_0, c_rho, c_lamb, c_eps)

        sess.run(tf.global_variables_initializer())

        ## Pre-Train with val data
        bl_imp.train_simple(X_val, Y_val, 200)
        if False:
            importance_atanh = np.ones((Ntrain)) * 0.8
            importance_atanh = np.arctanh(2. * importance_atanh - 1.)
        else:
            # Predictions based on valididation set pretrained model
            print("Test Accuracy")
            _, __ = bl_imp.eval_simple(X_test, Y_test)
            print("Train Accuracy")
            _, y_train_init = bl_imp.eval_simple(X_train, Y_train)

            #Initialize importance
            importance_atanh = np.ones((Ntrain)) * np.arctanh(2. * 0.2 - 1.)
            ind_correct = np.where(np.argmax(Y_train) == y_train_init)[0]
            importance_atanh[ind_correct] = np.arctanh(2. * 0.8 - 1.)

        if True:
            #Start Bilevel Training
            for epoch in range(nepochs):
                nb_batches = int(float(Ntrain) / batch_size)
                if Ntrain % batch_size != 0:
                    nb_batches += 1

                ind_shuf = np.arange(Ntrain)
                np.random.shuffle(ind_shuf)
                for batch in range(nb_batches):
                    ind_batch = range(batch_size * batch,
                                      min(batch_size * (1 + batch), Ntrain))
                    ind_val = np.random.choice(Nval,
                                               size=(batch_size),
                                               replace=False)
                    ind_tr = ind_shuf[ind_batch]

                    f, gvnorm, gv_nu, lamb_g, timp_atanh = bl_imp.train(
                        X_train[ind_tr], Y_train[ind_tr], X_val[ind_val],
                        Y_val[ind_val], importance_atanh[ind_tr], niter)
                    importance_atanh[ind_tr] = timp_atanh

                ## Renormalize importance_atanh
                if True:
                    importance = 0.5 * (np.tanh(importance_atanh) + 1.)
                    importance = 0.5 * importance / np.mean(importance)
                    importance = np.maximum(.00001,
                                            np.minimum(.99999, importance))
                    importance_atanh = np.arctanh(2. * importance - 1.)

                if epoch % 1 == 0:
                    rho_t, lamb_t, eps_t = sess.run(
                        [bl_imp.bl.rho_t, bl_imp.bl.lamb_t, bl_imp.bl.eps_t])
                    print(
                        corrupt,
                        'epoch %d (rho=%f, lamb=%f, eps=%f): h=%f + %f + %f + %f= %f'
                        % (epoch, rho_t, lamb_t, eps_t, f, gvnorm, gv_nu,
                           lamb_g, f + gvnorm + lamb_g + gv_nu))
                    print("Test Accuracy")
                    bl_imp.eval_simple(X_test, Y_test)
                    print("Train Accuracy")
                    bl_imp.eval_simple(X_train, Y_train)
                    print("Val Accuracy")
                    bl_imp.eval_simple(X_val, Y_val)
                    print('mean ai=%f, mean I(ai>0.1)=%f' %
                          (np.mean(importance),
                           len(np.where(importance > 0.1)[0]) /
                           np.float(X_train.shape[0])))
                    print(corrupt, niter, "\n")

    sess.close()
    return
def main(argv=None):

    tf.set_random_seed(1234)
    sess = tf.Session(config=config)

    ## Read data
    (X_train_all, Y_train_all), (X_test, Y_test) = cifar10.load_data()
    
    X_train_all = X_train_all.astype('float32')
    X_test = X_test.astype('float32')
    X_train_all /= 255
    X_test /= 255
    
    Y_train_all = keras.utils.to_categorical(Y_train_all, nclass)
    Y_test = keras.utils.to_categorical(Y_test, nclass)
    
    points = 40000
    val_points = 10000
   
    X_train = X_train_all[:points]
    Y_train = Y_train_all[:points]
    X_val = X_train_all[points:points+val_points] 
    Y_val = Y_train_all[points:points+val_points] 
    
    Ntrain = len(X_train)
    Nval = len(X_val)
    
    corrupt = int(0.25 * len(X_train))
    for i in range(corrupt):
        curr_class = np.argmax(Y_train[i])
        Y_train[i] = np.zeros(10)
        
        #random label
        j = curr_class
        while j == curr_class:
            j = np.random.randint(0, 10)
        
        #shift 1 label
        #j = (curr_class + 1)%10
        
        Y_train[i][j] = 1
        
    
    x_train_tf = tf.placeholder(tf.float32, shape=(batch_size,height,width,nch))
    y_train_tf = tf.placeholder(tf.float32, shape=(batch_size,nclass))
    
    x_val_tf = tf.placeholder(tf.float32, shape=(None,height,width,nch))
    y_val_tf = tf.placeholder(tf.float32, shape=(None,nclass))

    scope_model = 'cifar_classifier'
    with tf.variable_scope(scope_model, reuse=False):    
        model = CIFAR(X_train, nclass)
        
    cls_train = KerasModelWrapper(model).get_logits(x_train_tf)
    cls_test = KerasModelWrapper(model).get_logits(x_val_tf)

    var_cls = model.trainable_weights 

    #########################################################################################################
    ## Bilevel training
    #########################################################################################################
    bl_imp = bilevel_importance(sess, x_train_tf, x_val_tf, y_train_tf, y_val_tf, cls_train, cls_test, var_cls,
        batch_size,lr_u,lr_v,rho_0,lamb_0,eps_0,nu_0,c_rho,c_lamb,c_eps)

    sess.run(tf.global_variables_initializer())
    
    # Pretrain lower level
    bl_imp.train_simple(X_val, Y_val, 101)
    
    if False:
        importance_atanh = np.ones((Ntrain))*0.8
        importance_atanh = np.arctanh(2.*importance_atanh-1.)
    else:
        # Predictions based on pretrained model
        print("Test Accuracy")
        _, __  = bl_imp.eval_simple(X_test, Y_test)
        print("Train Accuracy")
        _,y_train_init = bl_imp.eval_simple(X_train, Y_train)
        
        importance_atanh = np.ones((Ntrain))*np.arctanh(2.*0.2-1.)#0.2
        ind_correct = np.where(np.argmax(Y_train,1)==y_train_init)[0]
        importance_atanh[ind_correct] = np.arctanh(2.*0.8-1.)#0.8
        
    if True:
        for epoch in range(nepochs):
            nb_batches = int(np.floor(float(Ntrain)/batch_size))
            ind_shuf = np.arange(Ntrain)
            np.random.shuffle(ind_shuf)
            
            for batch in range(nb_batches):
                ind_batch = range(batch_size*batch,min(batch_size*(1+batch), Ntrain))
                ind_tr = ind_shuf[ind_batch]
                ind_val = np.random.choice(Nval, size=(batch_size),replace=False)
                
                # Using data augmentation
                
                for train_X_batch, train_Y_batch in datagen.flow(X_train[ind_tr], Y_train[ind_tr], batch_size=len(ind_tr), shuffle=False):
                    break
                
                for val_X_batch, val_Y_batch in datagen.flow(X_val[ind_val], Y_val[ind_val], batch_size=len(ind_val), shuffle=False):
                    break
                
                f, gvnorm, gv_nu, lamb_g, timp_atanh = bl_imp.train(train_X_batch, train_Y_batch, val_X_batch, val_Y_batch, importance_atanh[ind_tr], niter)
                #f, gvnorm, gv_nu, lamb_g, timp_atanh = bl_imp.train(X_train[ind_tr], Y_train[ind_tr], X_val[ind_val], Y_val[ind_val], importance_atanh[ind_tr], niter)
                
                importance_atanh[ind_tr] = timp_atanh
                
            ## Renormalize importance_atanh
            if True:
                importance = 0.5*(np.tanh(importance_atanh)+1.)
                importance = 0.5*importance/np.mean(importance)
                importance = np.maximum(.00001,np.minimum(.99999,importance))
                importance_atanh = np.arctanh(2.*importance-1.)
            
            if epoch%1==0:
                rho_t,lamb_t,eps_t = sess.run([bl_imp.bl.rho_t,bl_imp.bl.lamb_t,bl_imp.bl.eps_t])
                print('epoch %d (rho=%f, lamb=%f, eps=%f): h=%f + %f + %f + %f= %f'%
                    (epoch,rho_t,lamb_t,eps_t,f,gvnorm,gv_nu,lamb_g,f+gvnorm+gv_nu+lamb_g))
                print("Test Accuracy")
                bl_imp.eval_simple(X_test,Y_test)
                print("Train Accuracy")
                bl_imp.eval_simple(X_train,Y_train)
                print("Val Accuracy")
                bl_imp.eval_simple(X_val,Y_val)
                print('mean ai=%f, mean I(ai>0.1)=%f'%(np.mean(importance),len(np.where(importance>0.1)[0])/np.float(X_train.shape[0])))
                print(niter, "\n")
            
    sess.close()
    return 
    y_val_tf = tf.placeholder(tf.float32, shape=(batch_size, nclass))

    scope_model = 'mnist_classifier'
    with tf.variable_scope(scope_model, reuse=False):
        model = mnist_model(X_train, nclass)

    cls_train = KerasModelWrapper(model).get_logits(x_train_tf)
    cls_test = KerasModelWrapper(model).get_logits(x_val_tf)

    var_cls = model.trainable_weights

    #########################################################################################################
    ## Bilevel training
    #########################################################################################################
    bl_imp = bilevel_importance(sess, x_train_tf, x_val_tf, y_train_tf,
                                y_val_tf, cls_train, cls_test, var_cls,
                                batch_size, lr_u, lr_v, rho_0, lamb_0, eps_0,
                                nu_0, c_rho, c_lamb, c_eps)

    sess.run(tf.global_variables_initializer())

    ## Pre-train with val data
    bl_imp.train_simple(X_val, Y_val, 100)

    if False:
        importance_atanh = np.ones((Ntrain)) * 0.8
        importance_atanh = np.arctanh(2. * importance_atanh - 1.)
    else:
        # Predictions based on valid-pretrained model?
        print("Test Accuracy")
        _, __ = bl_imp.eval_simple(X_test, Y_test)
        print("Train Accuracy")
def main(argv=None):

    tf.set_random_seed(1234)
    sess = tf.Session()

    ## Read data
    (X_train_all, Y_train_all), (X_test, Y_test) = mnist.load_data()

    X_train_all = X_train_all.reshape(len(X_train_all), 28 * 28 * 1)
    X_test = X_test.reshape(len(X_test), 28 * 28 * 1)

    X_train_all = X_train_all.astype('float32')
    X_test = X_test.astype('float32')
    X_train_all /= 255
    X_test /= 255

    Y_train_all = keras.utils.to_categorical(Y_train_all, nclass)
    Y_test = keras.utils.to_categorical(Y_test, nclass)

    points = 5000
    val_points = 5000

    X_train = X_train_all[:points]
    Y_train = Y_train_all[:points]
    X_val = X_train_all[points:points + val_points]
    Y_val = Y_train_all[points:points + val_points]

    Ntrain = len(X_train)
    Nval = len(X_val)

    corrupt = int(0.5 * len(X_train))
    for i in range(corrupt):
        curr_class = np.argmax(Y_train[i])
        Y_train[i] = np.zeros(10)

        #random label
        j = curr_class
        while j == curr_class:
            j = np.random.randint(0, 10)

        Y_train[i][j] = 1

    ## Define model
    x_train_tf = tf.placeholder(tf.float32, shape=(None, height * width * nch))
    y_train_tf = tf.placeholder(tf.float32, shape=(None, nclass))

    x_val_tf = tf.placeholder(tf.float32, shape=(None, height * width * nch))
    y_val_tf = tf.placeholder(tf.float32, shape=(None, nclass))

    scope_model = 'mnist_classifier'
    with tf.variable_scope(scope_model, reuse=False):
        model = mnist_model_logistic_reg(X_train, nclass)

    cls_train = KerasModelWrapper(model).get_logits(x_train_tf)
    cls_test = KerasModelWrapper(model).get_logits(x_val_tf)

    var_cls = model.trainable_weights

    total_parameters = 0
    for variable in tf.trainable_variables():
        # shape is an array of tf.Dimension
        shape = variable.get_shape()
        print(shape)
        print(len(shape))
        variable_parameters = 1
        for dim in shape:
            print(dim)
            variable_parameters *= dim.value
        print(variable_parameters)
        total_parameters += variable_parameters
    print(total_parameters)

    #########################################################################################################
    ## Bilevel training
    #########################################################################################################
    bl_imp = bilevel_importance(sess, x_train_tf, x_val_tf, y_train_tf,
                                y_val_tf, cls_train, cls_test, var_cls,
                                batch_size, lr_u, lr_v, rho_0, lamb_0, eps_0,
                                nu_0, c_rho, c_lamb, c_eps)

    sess.run(tf.global_variables_initializer())

    ## Pre-train with val data
    bl_imp.train_simple(X_val, Y_val, 100)
    bl_imp.eval_simple(X_test, Y_test)

    if False:
        importance_atanh = np.ones((Ntrain)) * 0.8
        importance_atanh = np.arctanh(2. * importance_atanh - 1.)
    else:
        # Predictions based on valid-pretrained model?
        print("Test Accuracy")
        _, __ = bl_imp.eval_simple(X_test, Y_test)
        print("Train Accuracy")
        _, y_train_init = bl_imp.eval_simple(X_train, Y_train)

        importance_atanh = np.ones((Ntrain)) * np.arctanh(2. * 0.4 - 1.)
        ind_correct = np.where(np.argmax(Y_train) == y_train_init)[0]
        importance_atanh[ind_correct] = np.arctanh(2. * 0.6 - 1.)

    if True:
        for epoch in range(nepochs):
            nb_batches = int(np.floor(float(Ntrain) / batch_size))
            ind_shuf = np.arange(Ntrain)
            np.random.shuffle(ind_shuf)
            for batch in range(nb_batches):
                ind_batch = range(batch_size * batch,
                                  min(batch_size * (1 + batch), Ntrain))
                ind_val = np.random.choice(Nval,
                                           size=(batch_size),
                                           replace=False)
                ind_tr = ind_shuf[ind_batch]

                f, gvnorm, gv_nu, lamb_g, timp_atanh = bl_imp.train(
                    X_train[ind_tr], Y_train[ind_tr], X_val[ind_val],
                    Y_val[ind_val], importance_atanh[ind_tr], niter)
                importance_atanh[ind_tr] = timp_atanh

            ## Renormalize importance_atanh
            if True:
                importance = 0.5 * (np.tanh(importance_atanh) + 1.)
                importance = 0.5 * importance / np.mean(importance)
                importance = np.maximum(.00001, np.minimum(.99999, importance))
                importance_atanh = np.arctanh(2. * importance - 1.)

            if epoch % 1 == 0:
                rho_t, lamb_t, eps_t = sess.run(
                    [bl_imp.bl.rho_t, bl_imp.bl.lamb_t, bl_imp.bl.eps_t])
                print(
                    'epoch %d (rho=%f, lamb=%f, eps=%f): h=%f + %f + %f + %f= %f'
                    % (epoch, rho_t, lamb_t, eps_t, f, gvnorm, gv_nu, lamb_g,
                       f + gvnorm + lamb_g + gv_nu))
                print("Test Accuracy")
                bl_imp.eval_simple(X_test, Y_test)
                print("Train Accuracy")
                bl_imp.eval_simple(X_train, Y_train)
                print("Val Accuracy")
                bl_imp.eval_simple(X_val, Y_val)
                print(
                    'mean ai=%f, mean I(ai>0.1)=%f' %
                    (np.mean(importance), len(np.where(importance > 0.1)[0]) /
                     np.float(X_train.shape[0])))
                print("\n")

    ind = np.argwhere(importance > 0.9).flatten()
    print(len(ind))
    sess.run(tf.global_variables_initializer())
    bl_imp.train_simple(np.concatenate([X_train[ind], X_val]),
                        np.concatenate([Y_train[ind], Y_val]), 500)
    bl_imp.eval_simple(X_test, Y_test)

    sess.close()
    return