示例#1
0
def define_errors_default_models(model,
                                 l1=0.,
                                 l2=0.,
                                 synthetic_hypers=None,
                                 augment=0):
    assert isinstance(model, rf.Network)

    res = rf.vectorize_model(model.var_list,
                             model.inp[-1],
                             *model.Ws,
                             augment=augment)
    s, out, ws = res[0], res[1], res[2:]

    # error
    y = tf.placeholder(tf.float32, name='y')
    error = tf.reduce_mean(rf.cross_entropy_loss(y, out),
                           name='error')  # also validation error

    base_training_error = rf.cross_entropy_loss(y, out)

    gamma = None
    if synthetic_hypers is not None:
        gamma = tf.Variable(tf.ones([synthetic_hypers]))
        training_error = tf.reduce_mean([
            gamma[k] * base_training_error[k] for k in range(synthetic_hypers)
        ])
    else:
        training_error = tf.reduce_mean(base_training_error)

    rho_l1s, reg_l1s, rho_l2s, reg_l2s = None, None, None, None

    # layer-wise l1 regularizers]
    if isinstance(l1, float):
        rho_l1s = [
            tf.Variable(l1, name='rho_l1_%d' % k) for k in range(len(ws))
        ]
        reg_l1s = [tf.reduce_sum(tf.abs(w)) for w in ws]
        training_error += tf.reduce_sum(
            [rho * rg_l1 for rho, rg_l1 in zip(rho_l1s, reg_l1s)])

    # layer-wise l2 regularizers]
    if isinstance(l2, float):
        rho_l2s = [
            tf.Variable(l1, name='rho_l2_%d' % k) for k in range(len(ws))
        ]
        reg_l2s = [tf.reduce_sum(tf.pow(w, 2)) for w in ws]
        training_error += tf.reduce_sum(
            [rho * rg_l1 for rho, rg_l1 in zip(rho_l2s, reg_l2s)])

    correct_prediction = tf.equal(tf.argmax(out, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"),
                              name='accuracy')

    return s, out, ws, y, error, training_error, rho_l1s, reg_l1s, rho_l2s, reg_l2s, accuracy, \
           base_training_error, gamma
示例#2
0
def iris_logistic_regression(augment=0):
    """
    Simple model for testing purposes
    
    :param augment: 
    :return: 
    """
    iris = load_iris(partitions_proportions=(.3, .3))
    x = tf.placeholder(tf.float32, name='x')
    y = tf.placeholder(tf.float32, name='y')
    model = rf.LinearModel(x, 4, 3)
    model_w, model_y = rf.vectorize_model(model.var_list,
                                          model.inp[-1],
                                          augment=augment)
    error = tf.reduce_mean(rf.cross_entropy_loss(model_y, y))

    correct_prediction = tf.equal(tf.argmax(model_y, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    return iris, x, y, model, model_w, model_y, error, accuracy
# ss.run(ts_hy)  # perform an hypergradient descent step....
# print(ss.run(far.utils.hyperparameters()))
# print(ss.run(oo))


# # check with rfho

# In[25]:

import rfho as rf


# In[26]:

w, c, co = rf.vectorize_model([v1, v2], cost, oo, augment=2)


# In[27]:

#dyn = rf.MomentumOptimizer.create(w, 0.1, 0.9, loss=c)
dyn = rf.AdamOptimizer.create(w, loss=c)


# In[28]:

hyperg = rf.HyperOptimizer(dyn, {co: lmbd}, rf.ReverseHG)


# In[29]:
示例#4
0
def experiment(mnist,
               optimizer=rf.AdamOptimizer,
               optimizer_kwargs=None,
               hyper_batch_size=100,
               T=200,
               hyper_learning_rate=1.e-4,
               use_mse=False):
    """
    Modified MNIST for expert (CNN part) tensorflow tutorial experiment to include real time
    hyperparameter optimization. Hyperparameters being optimized are learning rate for
    ADAM optimizer and coefficient of L2 norm of fully connected part of the network.
    Note that this codes requires ~ 3x (gpu) memory and ~ 4x time compared to the original one
    but should yield a final test error of around 99.4 %

    :return:
    """
    # Create the model
    x = tf.placeholder(tf.float32, [None, 784], name='x')

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10], name='y')

    # Build the graph for the deep net
    y_conv, W_fc1, W_fc2 = deepnn(x)

    # RFHO: collect model variables and "vectorize the model"
    model_vairables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    # use adam optimizer:
    w, y_conv, W_fc1, W_fc2 = rf.vectorize_model(
        model_vairables,
        y_conv,
        W_fc1,
        W_fc2,
        augment=optimizer.get_augmentation_multiplier())
    # w is now a vector that contains all the weights, y_conv and W_fc2 are the same tensor as earlier,
    # but in the new graph

    # RFHO use cross entropy defined in the package since tensorflow one does not have Hessian,
    # eps is the clipping threshold for cross entropy.
    if use_mse:
        error = tf.reduce_mean(tf.squared_difference(y_, y_conv), name='error')
    else:
        error = tf.reduce_mean(rf.cross_entropy_loss(labels=y_,
                                                     logits=y_conv,
                                                     eps=1.e-4),
                               name='error')
    # RFHO add an L2 regularizer on the last weight matrix, whose weight will be optimized
    rho = tf.Variable(0., name='rho')
    constraints = [rf.positivity(rho)]  # rho >= 0
    iterations_per_epoch = 1100  # with mini batch size of 50
    training_error = error + 1 / iterations_per_epoch * tf.multiply(
        rho,
        tf.nn.l2_loss(W_fc1) + tf.nn.l2_loss(W_fc2))

    # train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    # RFHO define learning rate as an hyperparameter and create the parameter optimization dynamics
    if optimizer_kwargs is None:
        optimizer_kwargs = {'lr': tf.Variable(1.e-4, name='alpha')}
    dynamics = optimizer.create(w, loss=training_error, **optimizer_kwargs)
    constraints += dynamics.get_natural_hyperparameter_constraints(
    )  # add 'usual' constraints for
    # if optimizer is rf.AdamOptimizer:
    #     constraints.append(dynamics.learning_rate.assign(tf.minimum(1.e-3, dynamics.learning_rate)))
    # algorithmic hyperparameters

    # RFHO we want to optimize learning rate and L2 coefficient w.r.t. cross entropy loss on validation set
    hyper_dict = {
        error:
        [rho] + dynamics.get_optimization_hyperparameters(only_variables=True)
    }
    # RFHO define the hyperparameter optimizer, we use Forward-HG method to compute hyper-gradients and RTHO algorithm
    hyper_opt = rf.HyperOptimizer(dynamics,
                                  hyper_dict,
                                  rf.ForwardHG,
                                  lr=hyper_learning_rate)

    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
                              name='accuracy')

    # RFHO last thing before running: define the example supplier:
    _train_fd = rf.ExampleVisiting(mnist.train,
                                   batch_size=50).create_feed_dict_supplier(
                                       x, y_)
    _validation_fd = mnist.validation.create_supplier(x, y_)

    with tf.Session(config=rf.CONFIG_GPU_GROWTH).as_default(
    ):  # RFHO use default session.
        hyper_opt.initialize(
        )  # RFHO this will initialize all the variables, including hyperparameters
        for i in range(T):  # RFHO we run for 200 hyper-iterations
            hyper_opt.run(hyper_batch_size,
                          train_feed_dict_supplier=_train_fd,
                          val_feed_dict_suppliers={error: _validation_fd},
                          hyper_constraints_ops=constraints)

        test_accuracy = accuracy.eval(
            feed_dict=mnist.test.create_supplier(x, y_)())
        print('test accuracy %g' % test_accuracy)
        return test_accuracy
示例#5
0
def main(_):
    """
    Modified MNIST for expert (CNN part) tensorflow tutorial experiment to include real time
    hyperparameter optimization. Hyperparameters being optimized are learning rate for
    ADAM optimizer and coefficient of L2 norm of fully connected part of the network.
    Note that this codes requires ~ 3x (gpu) memory and ~ 4x time compared to the original one
    but should yield a final test error of around 99.4 %

    :param _:
    :return:
    """
    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])

    # Build the graph for the deep net
    y_conv, W_fc1, W_fc2 = deepnn(x)

    # RFHO: collect model variables and "vectorize the model"
    model_vairables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    # use adam optimizer:
    optimizer = rf.AdamOptimizer
    w, y_conv, W_fc1, W_fc2 = rf.vectorize_model(
        model_vairables,
        y_conv,
        W_fc1,
        W_fc2,
        augment=optimizer.get_augmentation_multiplier(),
        suppress_err_out=False)
    # w is now a vector that contains all the weights, y_conv and W_fc2 are the same tensor as earlier,
    # but in the new graph

    # RFHO use cross entropy defined in the package since tensorflow one does not have Hessian,
    # eps is the clipping threshold for cross entropy.
    cross_entropy = tf.reduce_mean(
        rf.cross_entropy_loss(labels=y_, logits=y_conv, eps=1.e-4))
    # RFHO add an L2 regularizer on the last weight matrix, whose weight will be optimized
    rho = tf.Variable(0., name='rho')
    constraints = [rf.positivity(rho)]  # rho >= 0
    iterations_per_epoch = 1100  # with mini batch size of 50
    training_error = cross_entropy + 1 / iterations_per_epoch * tf.multiply(
        rho,
        tf.nn.l2_loss(W_fc1) + tf.nn.l2_loss(W_fc2))

    # train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    # RFHO define learning rate as an hyperparameter and create the parameter optimization dynamics
    alpha = tf.Variable(1.e-4, name='alpha')
    constraints.append(rf.positivity(alpha))
    dynamics = optimizer.create(w, lr=alpha, loss=training_error)

    # RFHO we want to optimize learning rate and L2 coefficient w.r.t. cross entropy loss on validation set
    hyper_dict = {cross_entropy: [alpha, rho]}
    # RFHO define the hyperparameter optimizer, we use Forward-HG method to compute hyper-gradients and RTHO algorithm
    hyper_opt = rf.HyperOptimizer(dynamics, hyper_dict, rf.ForwardHG, lr=1.e-5)

    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
                              name='accuracy')

    # RFHO last thing before running: define the example supplier:
    def _train_fd():
        batch = mnist.train.next_batch(50)  # batch size of 50
        return {x: batch[0], y_: batch[1]}

    def _validation_fd():
        return {x: mnist.validation.images, y_: mnist.validation.labels}

    with tf.Session(config=rf.CONFIG_GPU_GROWTH).as_default(
    ) as ss:  # RFHO use default session.
        hyper_opt.initialize(
        )  # RFHO this will initialize all the variables, including hyperparameters
        for i in range(200):  # RFHO we run for 200 hyper-iterations
            hyper_opt.run(
                100,
                train_feed_dict_supplier=_train_fd,
                val_feed_dict_suppliers={cross_entropy: _validation_fd},
                hyper_constraints_ops=constraints)

            # if i % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict=_train_fd())
            val_accuracy, val_error = ss.run([accuracy, cross_entropy],
                                             feed_dict=_validation_fd())

            print(
                'step %d, training accuracy %.2f; validation accuracy: %.4f, validation error: %.5f; '
                'alpha: %.6f, %.5f, rho: %.6f, %.5f' %
                (i * 100, train_accuracy, val_accuracy, val_error,
                 alpha.eval(),
                 hyper_opt.hyper_gradients.hyper_gradients_dict[alpha].eval(),
                 rho.eval(),
                 hyper_opt.hyper_gradients.hyper_gradients_dict[rho].eval()))
            # train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

        test_accuracy = accuracy.eval(feed_dict={
            x: mnist.test.images,
            y_: mnist.test.labels
        })
        print('test accuracy %g' % test_accuracy)
        return test_accuracy
示例#6
0
def data_hyper_cleaner(saver,
                       model,
                       y,
                       data,
                       T,
                       lr,
                       R,
                       toy_problem=True,
                       append_string='',
                       optimizer=rf.GradientDescentOptimizer,
                       optimizer_kwargs=None,
                       hyper_iterations=500,
                       hyper_grad_kwargs=None,
                       hyper_optimizer_class=rf.AdamOptimizer,
                       hyper_optimizer_kwargs=None):
    """

    :param append_string: string to append for file saving.
    :param toy_problem: if True computes _precision _recall and _f1. (in a real problem this would not be feasible..)
    :param saver: `Saver` object (can be None)
    :param data: `Datasets` object
    :param T: number of iterations
    :param model: a model (should comply with `rf.Network`)
    :param y: placeholder for output
    :param lr: learning rate
    :param R: radius of L1 ball
    :param optimizer: parameter optimizer
    :param optimizer_kwargs: optional arguments for parameter optimizer
    :param hyper_iterations: number of hyper-iterations
    :param hyper_grad_kwargs: optional arguments for `ReverseHG` (such as weight history)
    :param hyper_optimizer_class: optimizer class for hyperparameters
    :param hyper_optimizer_kwargs: optional arguments for hyperparameter optimizer
    :return:
    """
    if saver: saver.save_setting(vars())

    if hyper_optimizer_kwargs is None:
        hyper_optimizer_kwargs = {}

    x = model.inp[0]
    w, out = rf.vectorize_model(
        model.var_list,
        model.out,
        augment=optimizer.get_augmentation_multiplier())

    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(y, 1)), "float"))

    sample_error = rf.cross_entropy_loss(y, out)
    error = tf.reduce_mean(sample_error)

    lmd = tf.Variable(R * tf.ones([data.train.num_examples]) /
                      data.train.num_examples,
                      dtype=tf.float32,
                      name='lambda')

    weighted_error = tf.reduce_mean(tf.multiply(lmd, sample_error),
                                    name='train_weighted_error')
    # ADD REGULARIZATION??  (shouldn't be necessary)
    dynamics = optimizer.create(w,
                                lr=lr,
                                loss=weighted_error,
                                **optimizer_kwargs or {})

    hyper_opt = rf.HyperOptimizer(dynamics, {error: lmd},
                                  method=rf.ReverseHG,
                                  hyper_grad_kwargs=hyper_grad_kwargs or {},
                                  hyper_optimizer_class=hyper_optimizer_class,
                                  **hyper_optimizer_kwargs)

    # projection
    grad_hyper = tf.placeholder(tf.float32)
    lmd_assign = lmd.assign(grad_hyper)

    _project = _get_projector(R=R, N_ex=data.train.num_examples)

    def project():
        pt = lmd.eval()
        _resx = _project(pt)
        lmd_assign.eval(feed_dict={grad_hyper: _resx})

    # suppliers
    tr_s = data.train.create_supplier(x, y)
    val_s = data.validation.create_supplier(x, y)
    tst_s = data.test.create_supplier(x, y)

    if saver:
        saver.clear_items()  # just to be sure!
        saver.add_items(
            'validation accuracy',
            accuracy,
            val_s,
            'lambda',
            lmd,
        )
        if toy_problem:
            saver.add_items(
                'tp, fp, fn, tn',
                lambda stp: _calc_pos_neg(lmd.eval(), data),
                'Precision',
                lambda stp: _precision(lmd.eval(), data),
                'Recall',
                lambda stp: _recall(lmd.eval(), data),
                'F1',
                lambda stp: _f1(lmd.eval(), data),
            )

    with tf.Session(config=rf.CONFIG_GPU_GROWTH).as_default():
        for hyt in range(hyper_iterations):
            hyper_opt.initialize()
            hyper_opt.run(T,
                          train_feed_dict_supplier=tr_s,
                          val_feed_dict_suppliers={error: val_s},
                          hyper_constraints_ops=project)
            if saver: saver.save(hyt, append_string=append_string)
        saver.pack_save_dictionaries(append_string=append_string)  # zips all
        return lmd.eval()
示例#7
0
# In[24]:

# ss.run(ts_hy)  # perform an hypergradient descent step....
# print(ss.run(far.utils.hyperparameters()))
# print(ss.run(oo))

# # check with rfho

# In[25]:

import rfho as rf

# In[26]:

w, c, co = rf.vectorize_model([v1, v2], cost, oo, augment=2)

# In[27]:

#dyn = rf.MomentumOptimizer.create(w, 0.1, 0.9, loss=c)
dyn = rf.AdamOptimizer.create(w, loss=c)

# In[28]:

hyperg = rf.HyperOptimizer(dyn, {co: lmbd}, rf.ReverseHG)

# In[29]:

hyperg.initialize()

# In[30]: