Exemplo n.º 1
0
 def _call(*args, **_kwargs):
     if tensors:
         _tensors = [
             tf.get_default_graph().get_tensor_by_name(tns + ':0')
             if isinstance(tns, str) else tns for tns in tensors
         ]
     elif key:
         _tensors = tf.get_collection(key, scope=scope)
     elif name_contains:
         _names = rf.flatten_list([[
             n.name for n in tf.get_default_graph().as_graph_def().node
             if nc in n.name
         ] for nc in as_list(name_contains)])
         return Records.tensors(*_names,
                                rec_name=rec_name,
                                op=op,
                                fd=fd,
                                condition=True)(*args, **_kwargs)
     else:
         raise NotImplemented(
             'One between key and names should be given')
     # try with dictionary of form (string (simple name of placeholder), data)
     _rs2 = rf.flatten_list([
         rec_name + rf.simple_name(tns.name),
         op(tns),
         Records._process_feed_dicts_for_rec(fd, *args, **_kwargs),
         condition
     ] for tns in _tensors)
     return _rs2
Exemplo n.º 2
0
 def _call(*args, **kwargs):
     hyper_optimizer = args[0]
     assert isinstance(hyper_optimizer, rf.HyperOptimizer)
     return rf.flatten_list([
         'grad::' + rf.simple_name(hyp),
         hyper_optimizer.hyper_gradients.hyper_gradients_dict[hyp]
     ] for hyp in hyper_optimizer.hyper_list)
Exemplo n.º 3
0
def experiment(name_of_experiment,
               collect_data=True,
               datasets=None,
               model='log_reg',
               model_kwargs=None,
               l1=0.,
               l2=0.,
               synthetic_hypers=None,
               set_T=None,
               optimizer=rf.MomentumOptimizer,
               optimizer_kwargs=None,
               batch_size=200,
               algo_hyper_wrt_tr_error=False,
               mode='reverse',
               hyper_optimizer=rf.AdamOptimizer,
               hyper_optimizer_kwargs=None,
               hyper_iterations=100,
               hyper_batch_size=100,
               epochs=20,
               do_print=True):

    assert mode in HO_MODES

    # set random seeds!!!!
    np.random.seed(1)
    tf.set_random_seed(1)

    if synthetic_hypers is not None:
        batch_size = synthetic_hypers  # altrimenti divento matto!

    from rfho.examples.common import save_setting, Saver
    if name_of_experiment is not None:
        rf.settings['NOTEBOOK_TITLE'] = name_of_experiment
    if collect_data:
        save_setting(vars(), excluded=datasets, append_string='_%s' % mode)

    if datasets is None: datasets = load_dataset()

    x, model = create_model(datasets, model, **model_kwargs or {})
    s, out, ws, y, error, training_error, rho_l1s, reg_l1s, rho_l2s, reg_l2s, accuracy, base_tr_error, gamma = \
        define_errors_default_models(model, l1, l2, synthetic_hypers=synthetic_hypers,
                                     augment=optimizer.get_augmentation_multiplier())

    if optimizer_kwargs is None:
        optimizer_kwargs = {
            'lr': tf.Variable(.01, name='eta'),
            'mu': tf.Variable(.5, name='mu')
        }

    tr_dynamics = optimizer.create(s,
                                   loss=training_error,
                                   w_is_state=True,
                                   **optimizer_kwargs)

    # hyperparameters part!
    algorithmic_hyperparameters = []
    eta = tr_dynamics.learning_rate
    if isinstance(eta, tf.Variable):
        if mode != 'reverse':
            algorithmic_hyperparameters.append(
                (eta, tr_dynamics.d_dynamics_d_learning_rate()))
        else:
            algorithmic_hyperparameters.append(eta)
    if hasattr(tr_dynamics, 'momentum_factor'):
        mu = tr_dynamics.momentum_factor
        if isinstance(mu, tf.Variable):
            if mode != 'reverse':
                algorithmic_hyperparameters.append(
                    (mu, tr_dynamics.d_dynamics_d_momentum_factor()))
            else:
                algorithmic_hyperparameters.append(mu)

    regularization_hyperparameters = []
    vec_w = s.var_list(rf.Vl_Mode.TENSOR)[
        0]  # vectorized representation of _model weights (always the first!)
    if rho_l1s is not None:
        if mode != 'reverse':
            regularization_hyperparameters += [
                (r1,
                 tr_dynamics.d_dynamics_d_linear_loss_term(
                     tf.gradients(er1, vec_w)[0]))
                for r1, er1 in zip(rho_l1s, reg_l1s)
            ]
        else:
            regularization_hyperparameters += rho_l1s
    if rho_l2s is not None:
        if mode != 'reverse':
            regularization_hyperparameters += [
                (r2,
                 tr_dynamics.d_dynamics_d_linear_loss_term(
                     tf.gradients(er2, vec_w)[0]))
                for r2, er2 in zip(rho_l2s, reg_l2s)
            ]
        else:
            regularization_hyperparameters += rho_l2s

    synthetic_hyperparameters = []
    if synthetic_hypers:
        if mode != 'reverse':
            da_grad = tf.transpose(
                tf.stack([
                    tf.gradients(base_tr_error[k], vec_w)[0]
                    for k in range(synthetic_hypers)
                ]))

            d_phi_d_gamma = rf.utils.ZMergedMatrix([-eta * da_grad, da_grad])
            synthetic_hyperparameters.append((gamma, d_phi_d_gamma))
        else:
            synthetic_hyperparameters.append(gamma)

    # end of hyperparameters

    # create hyper_dict
    hyper_dict = {
        error: regularization_hyperparameters + synthetic_hyperparameters
    }
    if algo_hyper_wrt_tr_error:  # it is possible to optimize different hyperparameters wrt different validation errors
        hyper_dict[training_error] = algorithmic_hyperparameters
    else:
        hyper_dict[error] += algorithmic_hyperparameters

    print(hyper_dict)

    hyper_gradients = rf.ReverseHyperGradient(tr_dynamics, hyper_dict) if mode == 'reverse' else \
        rf.ForwardHyperGradient(tr_dynamics, hyper_dict)

    hyper_optimizers = rf.create_hyperparameter_optimizers(
        hyper_gradients, hyper_optimizer, **hyper_optimizer_kwargs or {})
    positivity = rf.positivity(hyper_gradients.hyper_list)

    # builds an instance of Real Time Hyperparameter optimization if mode is rtho
    # RealTimeHO exploits partial hypergradients calculated with forward-mode to perform hyperparameter updates
    # while the _model is training...
    rtho = rf.RealTimeHO(hyper_gradients, hyper_optimizers,
                         positivity) if mode == 'rtho' else None

    # stochastic descent
    import rfho.datasets as dt
    ev_data = dt.ExampleVisiting(datasets,
                                 batch_size=batch_size,
                                 epochs=epochs)
    ev_data.generate_visiting_scheme()
    tr_supplier = ev_data.create_train_feed_dict_supplier(x, y)
    val_supplier = ev_data.create_all_valid_feed_dict_supplier(x, y)
    test_supplier = ev_data.create_all_test_feed_dict_supplier(x, y)

    def all_training_supplier(step=None):
        return {x: datasets.train.data, y: datasets.train.target}

    # feed_dict supplier for validation errors
    val_feed_dict_suppliers = {error: val_supplier}
    if algo_hyper_wrt_tr_error:
        val_feed_dict_suppliers[training_error] = all_training_supplier

    def calculate_memory_usage():
        memory_usage = rf.simple_size_of_with_pickle([
            hyper_gradients.w.eval(),
            [h.eval() for h in hyper_gradients.hyper_list]
        ])
        if mode == 'reverse':
            return memory_usage + rf.simple_size_of_with_pickle([
                hyper_gradients.w_hist,
                [p.eval() for p in hyper_gradients.p_dict.values()]
            ])
        else:
            return memory_usage + rf.simple_size_of_with_pickle(
                [[z.eval() for z in hyper_gradients.zs]])

    # number of iterations
    T = set_T or ev_data.T

    hyper_grads = hyper_gradients.hyper_gradients_dict
    # create a Saver object
    saver = Saver('step',
                  lambda step: step,
                  'test accuracy',
                  accuracy,
                  test_supplier,
                  'validation accuracy',
                  accuracy,
                  val_supplier,
                  'training accuracy',
                  accuracy,
                  tr_supplier,
                  'validation error',
                  error,
                  val_supplier,
                  'memory usage (mb)',
                  lambda step: calculate_memory_usage() * 9.5367e-7,
                  'weights',
                  vec_w,
                  '# weights',
                  lambda step: vec_w.get_shape().as_list()[0],
                  '# hyperparameters',
                  lambda step: len(hyper_gradients.hyper_list),
                  '# iterations',
                  lambda step: T,
                  *rf.flatten_list(
                      [rf.simple_name(hyp), [hyp, hyper_grads[hyp]]]
                      for hyp in hyper_gradients.hyper_list),
                  do_print=do_print,
                  collect_data=collect_data)

    with tf.Session(config=config).as_default() as ss:
        saver.timer.start()

        if mode == 'rtho':  # here we do not have hyper-iterations
            rtho.initialize()  # helper for initializing all variables...
            for k in range(hyper_iterations):
                rtho.hyper_batch(
                    hyper_batch_size,
                    train_feed_dict_supplier=tr_supplier,
                    val_feed_dict_suppliers=val_feed_dict_suppliers)

                saver.save(k, append_string='_%s' % mode)

        else:  # here we do complete hyper-iterations..
            #  initialize hyperparameters and support variables of hyperparameter optimizers
            tf.variables_initializer(hyper_gradients.hyper_list).run()
            [
                hod.support_variables_initializer().run()
                for hod in hyper_optimizers
            ]

            for k in range(hyper_iterations):  # start hyper-iterations
                hyper_gradients.run_all(
                    T,
                    train_feed_dict_supplier=tr_supplier,
                    val_feed_dict_suppliers=val_feed_dict_suppliers)

                # update hyperparameters
                [ss.run(hod.assign_ops) for hod in hyper_optimizers]
                [ss.run(prj) for prj in positivity]

                saver.save(k, append_string='_%s' % mode)
Exemplo n.º 4
0
 def _call(*args, **kwargs):
     hyper_optimizer = args[0]
     assert isinstance(hyper_optimizer, rf.HyperOptimizer)
     return rf.flatten_list([rf.simple_name(hyp), hyp]
                            for hyp in hyper_optimizer.hyper_list)
Exemplo n.º 5
0
def track_tensors(*tensors):
    # print(tensors)
    with tf.name_scope(TRACK):
        [tf.identity(t, name=rf.simple_name(t)) for t in tensors]
Exemplo n.º 6
0
def experiment(name_of_experiment,
               collect_data=False,
               datasets=None,
               model='log_reg',
               model_kwargs=None,
               l1=0.,
               l2=0.,
               synthetic_hypers=None,
               set_T=None,
               optimizer=rf.MomentumOptimizer,
               optimizer_kwargs=None,
               batch_size=200,
               algo_hyper_wrt_tr_error=False,
               mode='reverse',
               hyper_optimizer=rf.AdamOptimizer,
               hyper_optimizer_kwargs=None,
               hyper_iterations=100,
               hyper_batch_size=100,
               epochs=None,
               do_print=True):
    """
    General method for conducting various simple experiments (on MNIST dataset) with RFHO package.

    :param name_of_experiment: a name for the experiment. Will be used as root folder for the saver (this is the only
                                positional parameter..)
    :param collect_data: (default False) wheter to save data
    :param datasets: (some dataset, usually MNIST....)
    :param model: (default logarithmic regression) model type
    :param model_kwargs:
    :param l1: Initial value for l1 regularizer weight (if None does not uses it)
    :param l2: Initial value for l2 regularizer weight (if None does not uses it)
    :param synthetic_hypers: (default None, for benchmarking purposes) if integer adds some synthetic hyperparameters
                                to the task.
    :param set_T:
    :param optimizer: (default `MomentumOptimizer`) optimizer for parameters
    :param optimizer_kwargs:
    :param batch_size:
    :param epochs: number of ephocs
    :param algo_hyper_wrt_tr_error: (default False) if True optimizes the algorithmic hyperparameters (learning rate, ..
                                    w.r.t. training error instead of validation error
    :param mode: forward reverse or rtho
    :param hyper_optimizer: optimizer for the hyperparameters
    :param hyper_optimizer_kwargs:
    :param hyper_iterations: number of hyper-iterations
    :param hyper_batch_size: hyper-batch size when RTHO is used
    :param do_print: if True (default) prints intermediate results...
    :return:
    """
    assert mode in HO_MODES

    if synthetic_hypers:
        batch_size = synthetic_hypers  # altrimenti divento matto!

    if datasets is None: datasets = load_dataset()

    x, model = create_model(datasets, model, **model_kwargs or {})
    s, out, ws, y, error, training_error, rho_l1s, reg_l1s, rho_l2s, reg_l2s, accuracy, base_tr_error, gamma = \
        define_errors_default_models(model, l1, l2, synthetic_hypers=synthetic_hypers,
                                     augment=optimizer.get_augmentation_multiplier())

    if optimizer_kwargs is None:
        optimizer_kwargs = {
            'lr': tf.Variable(.01, name='eta'),
            'mu': tf.Variable(.5, name='mu')
        }

    tr_dynamics = optimizer.create(s,
                                   loss=training_error,
                                   w_is_state=True,
                                   **optimizer_kwargs)

    # hyperparameters part!
    algorithmic_hyperparameters = []
    eta = tr_dynamics.learning_rate
    if isinstance(eta, tf.Variable):
        algorithmic_hyperparameters.append(eta)
    if hasattr(tr_dynamics, 'momentum_factor'):
        mu = tr_dynamics.momentum_factor
        if isinstance(mu, tf.Variable):
            algorithmic_hyperparameters.append(mu)

    regularization_hyperparameters = []
    vec_w = s.var_list(rf.VlMode.TENSOR)[
        0]  # vectorized representation of _model weights (always the first!)
    if rho_l1s is not None:
        regularization_hyperparameters += rho_l1s
    if rho_l2s is not None:
        regularization_hyperparameters += rho_l2s

    synthetic_hyperparameters = []
    if synthetic_hypers:
        synthetic_hyperparameters.append(gamma)

    hyper_dict = {
        error: regularization_hyperparameters + synthetic_hyperparameters
    }  # create hyper_dict
    # end of hyperparameters

    if algo_hyper_wrt_tr_error:  # it is possible to optimize different hyperparameters wrt different validation errors
        hyper_dict[training_error] = algorithmic_hyperparameters
    else:
        hyper_dict[error] += algorithmic_hyperparameters
    # print(hyper_dict)

    hyper_opt = rf.HyperOptimizer(
        tr_dynamics,
        hyper_dict,
        method=rf.ReverseHG if mode == 'reverse' else rf.ForwardHG,
        hyper_optimizer_class=hyper_optimizer,
        **hyper_optimizer_kwargs or {})

    positivity = rf.positivity(hyper_opt.hyper_list)

    # stochastic descent
    ev_data = ExampleVisiting(datasets.train,
                              batch_size=batch_size,
                              epochs=epochs)
    if epochs: ev_data.generate_visiting_scheme()
    tr_supplier = ev_data.create_feed_dict_supplier(x, y)
    val_supplier = datasets.validation.create_supplier(x, y)
    test_supplier = datasets.test.create_supplier(x, y)

    def _all_training_supplier():
        return {x: datasets.train.data, y: datasets.train.target}

    # feed_dict supplier for validation errors
    val_feed_dict_suppliers = {error: val_supplier}
    if algo_hyper_wrt_tr_error:
        val_feed_dict_suppliers[training_error] = _all_training_supplier

    def _calculate_memory_usage():
        memory_usage = rf.simple_size_of_with_pickle([
            hyper_opt.hyper_gradients.w.eval(),
            [h.eval() for h in hyper_opt.hyper_gradients.hyper_list]
        ])
        if mode == 'reverse':
            return memory_usage + rf.simple_size_of_with_pickle([
                hyper_opt.hyper_gradients.w_hist,
                [p.eval() for p in hyper_opt.hyper_gradients.p_dict.values()]
            ])
        else:
            return memory_usage + rf.simple_size_of_with_pickle(
                [[z.eval() for z in hyper_opt.hyper_gradients.zs]])

    # number of iterations
    T = set_T or ev_data.T if mode != 'rtho' else hyper_batch_size

    hyper_grads = hyper_opt.hyper_gradients.hyper_gradients_dict
    # create a Saver object
    if name_of_experiment:
        saver = rf.Saver(name_of_experiment,
                         'step',
                         lambda step: step,
                         'mode',
                         lambda step: mode,
                         'test accuracy',
                         accuracy,
                         test_supplier,
                         'validation accuracy',
                         accuracy,
                         val_supplier,
                         'training accuracy',
                         accuracy,
                         tr_supplier,
                         'validation error',
                         error,
                         val_supplier,
                         'memory usage (mb)',
                         lambda step: _calculate_memory_usage() * 9.5367e-7,
                         'weights',
                         vec_w,
                         '# weights',
                         lambda step: vec_w.get_shape().as_list()[0],
                         '# hyperparameters',
                         lambda step: len(hyper_opt.hyper_list),
                         '# iterations',
                         lambda step: T,
                         *rf.flatten_list(
                             [rf.simple_name(hyp), [hyp, hyper_grads[hyp]]]
                             for hyp in hyper_opt.hyper_list),
                         do_print=do_print,
                         collect_data=collect_data)
    else:
        saver = None

    save_dict_history = []

    with tf.Session(config=rf.CONFIG_GPU_GROWTH).as_default():
        if saver: saver.timer.start()
        hyper_opt.initialize()
        for k in range(hyper_iterations):
            hyper_opt.run(T,
                          train_feed_dict_supplier=tr_supplier,
                          val_feed_dict_suppliers=val_feed_dict_suppliers,
                          hyper_constraints_ops=positivity)

            if saver:
                save_dict_history.append(
                    saver.save(k, append_string='_%s' % mode))

            if mode != 'rtho':
                hyper_opt.initialize()

    return save_dict_history