def _call(*args, **_kwargs): if tensors: _tensors = [ tf.get_default_graph().get_tensor_by_name(tns + ':0') if isinstance(tns, str) else tns for tns in tensors ] elif key: _tensors = tf.get_collection(key, scope=scope) elif name_contains: _names = rf.flatten_list([[ n.name for n in tf.get_default_graph().as_graph_def().node if nc in n.name ] for nc in as_list(name_contains)]) return Records.tensors(*_names, rec_name=rec_name, op=op, fd=fd, condition=True)(*args, **_kwargs) else: raise NotImplemented( 'One between key and names should be given') # try with dictionary of form (string (simple name of placeholder), data) _rs2 = rf.flatten_list([ rec_name + rf.simple_name(tns.name), op(tns), Records._process_feed_dicts_for_rec(fd, *args, **_kwargs), condition ] for tns in _tensors) return _rs2
def _call(*args, **kwargs): hyper_optimizer = args[0] assert isinstance(hyper_optimizer, rf.HyperOptimizer) return rf.flatten_list([ 'grad::' + rf.simple_name(hyp), hyper_optimizer.hyper_gradients.hyper_gradients_dict[hyp] ] for hyp in hyper_optimizer.hyper_list)
def experiment(name_of_experiment, collect_data=True, datasets=None, model='log_reg', model_kwargs=None, l1=0., l2=0., synthetic_hypers=None, set_T=None, optimizer=rf.MomentumOptimizer, optimizer_kwargs=None, batch_size=200, algo_hyper_wrt_tr_error=False, mode='reverse', hyper_optimizer=rf.AdamOptimizer, hyper_optimizer_kwargs=None, hyper_iterations=100, hyper_batch_size=100, epochs=20, do_print=True): assert mode in HO_MODES # set random seeds!!!! np.random.seed(1) tf.set_random_seed(1) if synthetic_hypers is not None: batch_size = synthetic_hypers # altrimenti divento matto! from rfho.examples.common import save_setting, Saver if name_of_experiment is not None: rf.settings['NOTEBOOK_TITLE'] = name_of_experiment if collect_data: save_setting(vars(), excluded=datasets, append_string='_%s' % mode) if datasets is None: datasets = load_dataset() x, model = create_model(datasets, model, **model_kwargs or {}) s, out, ws, y, error, training_error, rho_l1s, reg_l1s, rho_l2s, reg_l2s, accuracy, base_tr_error, gamma = \ define_errors_default_models(model, l1, l2, synthetic_hypers=synthetic_hypers, augment=optimizer.get_augmentation_multiplier()) if optimizer_kwargs is None: optimizer_kwargs = { 'lr': tf.Variable(.01, name='eta'), 'mu': tf.Variable(.5, name='mu') } tr_dynamics = optimizer.create(s, loss=training_error, w_is_state=True, **optimizer_kwargs) # hyperparameters part! algorithmic_hyperparameters = [] eta = tr_dynamics.learning_rate if isinstance(eta, tf.Variable): if mode != 'reverse': algorithmic_hyperparameters.append( (eta, tr_dynamics.d_dynamics_d_learning_rate())) else: algorithmic_hyperparameters.append(eta) if hasattr(tr_dynamics, 'momentum_factor'): mu = tr_dynamics.momentum_factor if isinstance(mu, tf.Variable): if mode != 'reverse': algorithmic_hyperparameters.append( (mu, tr_dynamics.d_dynamics_d_momentum_factor())) else: algorithmic_hyperparameters.append(mu) regularization_hyperparameters = [] vec_w = s.var_list(rf.Vl_Mode.TENSOR)[ 0] # vectorized representation of _model weights (always the first!) if rho_l1s is not None: if mode != 'reverse': regularization_hyperparameters += [ (r1, tr_dynamics.d_dynamics_d_linear_loss_term( tf.gradients(er1, vec_w)[0])) for r1, er1 in zip(rho_l1s, reg_l1s) ] else: regularization_hyperparameters += rho_l1s if rho_l2s is not None: if mode != 'reverse': regularization_hyperparameters += [ (r2, tr_dynamics.d_dynamics_d_linear_loss_term( tf.gradients(er2, vec_w)[0])) for r2, er2 in zip(rho_l2s, reg_l2s) ] else: regularization_hyperparameters += rho_l2s synthetic_hyperparameters = [] if synthetic_hypers: if mode != 'reverse': da_grad = tf.transpose( tf.stack([ tf.gradients(base_tr_error[k], vec_w)[0] for k in range(synthetic_hypers) ])) d_phi_d_gamma = rf.utils.ZMergedMatrix([-eta * da_grad, da_grad]) synthetic_hyperparameters.append((gamma, d_phi_d_gamma)) else: synthetic_hyperparameters.append(gamma) # end of hyperparameters # create hyper_dict hyper_dict = { error: regularization_hyperparameters + synthetic_hyperparameters } if algo_hyper_wrt_tr_error: # it is possible to optimize different hyperparameters wrt different validation errors hyper_dict[training_error] = algorithmic_hyperparameters else: hyper_dict[error] += algorithmic_hyperparameters print(hyper_dict) hyper_gradients = rf.ReverseHyperGradient(tr_dynamics, hyper_dict) if mode == 'reverse' else \ rf.ForwardHyperGradient(tr_dynamics, hyper_dict) hyper_optimizers = rf.create_hyperparameter_optimizers( hyper_gradients, hyper_optimizer, **hyper_optimizer_kwargs or {}) positivity = rf.positivity(hyper_gradients.hyper_list) # builds an instance of Real Time Hyperparameter optimization if mode is rtho # RealTimeHO exploits partial hypergradients calculated with forward-mode to perform hyperparameter updates # while the _model is training... rtho = rf.RealTimeHO(hyper_gradients, hyper_optimizers, positivity) if mode == 'rtho' else None # stochastic descent import rfho.datasets as dt ev_data = dt.ExampleVisiting(datasets, batch_size=batch_size, epochs=epochs) ev_data.generate_visiting_scheme() tr_supplier = ev_data.create_train_feed_dict_supplier(x, y) val_supplier = ev_data.create_all_valid_feed_dict_supplier(x, y) test_supplier = ev_data.create_all_test_feed_dict_supplier(x, y) def all_training_supplier(step=None): return {x: datasets.train.data, y: datasets.train.target} # feed_dict supplier for validation errors val_feed_dict_suppliers = {error: val_supplier} if algo_hyper_wrt_tr_error: val_feed_dict_suppliers[training_error] = all_training_supplier def calculate_memory_usage(): memory_usage = rf.simple_size_of_with_pickle([ hyper_gradients.w.eval(), [h.eval() for h in hyper_gradients.hyper_list] ]) if mode == 'reverse': return memory_usage + rf.simple_size_of_with_pickle([ hyper_gradients.w_hist, [p.eval() for p in hyper_gradients.p_dict.values()] ]) else: return memory_usage + rf.simple_size_of_with_pickle( [[z.eval() for z in hyper_gradients.zs]]) # number of iterations T = set_T or ev_data.T hyper_grads = hyper_gradients.hyper_gradients_dict # create a Saver object saver = Saver('step', lambda step: step, 'test accuracy', accuracy, test_supplier, 'validation accuracy', accuracy, val_supplier, 'training accuracy', accuracy, tr_supplier, 'validation error', error, val_supplier, 'memory usage (mb)', lambda step: calculate_memory_usage() * 9.5367e-7, 'weights', vec_w, '# weights', lambda step: vec_w.get_shape().as_list()[0], '# hyperparameters', lambda step: len(hyper_gradients.hyper_list), '# iterations', lambda step: T, *rf.flatten_list( [rf.simple_name(hyp), [hyp, hyper_grads[hyp]]] for hyp in hyper_gradients.hyper_list), do_print=do_print, collect_data=collect_data) with tf.Session(config=config).as_default() as ss: saver.timer.start() if mode == 'rtho': # here we do not have hyper-iterations rtho.initialize() # helper for initializing all variables... for k in range(hyper_iterations): rtho.hyper_batch( hyper_batch_size, train_feed_dict_supplier=tr_supplier, val_feed_dict_suppliers=val_feed_dict_suppliers) saver.save(k, append_string='_%s' % mode) else: # here we do complete hyper-iterations.. # initialize hyperparameters and support variables of hyperparameter optimizers tf.variables_initializer(hyper_gradients.hyper_list).run() [ hod.support_variables_initializer().run() for hod in hyper_optimizers ] for k in range(hyper_iterations): # start hyper-iterations hyper_gradients.run_all( T, train_feed_dict_supplier=tr_supplier, val_feed_dict_suppliers=val_feed_dict_suppliers) # update hyperparameters [ss.run(hod.assign_ops) for hod in hyper_optimizers] [ss.run(prj) for prj in positivity] saver.save(k, append_string='_%s' % mode)
def _call(*args, **kwargs): hyper_optimizer = args[0] assert isinstance(hyper_optimizer, rf.HyperOptimizer) return rf.flatten_list([rf.simple_name(hyp), hyp] for hyp in hyper_optimizer.hyper_list)
def track_tensors(*tensors): # print(tensors) with tf.name_scope(TRACK): [tf.identity(t, name=rf.simple_name(t)) for t in tensors]
def experiment(name_of_experiment, collect_data=False, datasets=None, model='log_reg', model_kwargs=None, l1=0., l2=0., synthetic_hypers=None, set_T=None, optimizer=rf.MomentumOptimizer, optimizer_kwargs=None, batch_size=200, algo_hyper_wrt_tr_error=False, mode='reverse', hyper_optimizer=rf.AdamOptimizer, hyper_optimizer_kwargs=None, hyper_iterations=100, hyper_batch_size=100, epochs=None, do_print=True): """ General method for conducting various simple experiments (on MNIST dataset) with RFHO package. :param name_of_experiment: a name for the experiment. Will be used as root folder for the saver (this is the only positional parameter..) :param collect_data: (default False) wheter to save data :param datasets: (some dataset, usually MNIST....) :param model: (default logarithmic regression) model type :param model_kwargs: :param l1: Initial value for l1 regularizer weight (if None does not uses it) :param l2: Initial value for l2 regularizer weight (if None does not uses it) :param synthetic_hypers: (default None, for benchmarking purposes) if integer adds some synthetic hyperparameters to the task. :param set_T: :param optimizer: (default `MomentumOptimizer`) optimizer for parameters :param optimizer_kwargs: :param batch_size: :param epochs: number of ephocs :param algo_hyper_wrt_tr_error: (default False) if True optimizes the algorithmic hyperparameters (learning rate, .. w.r.t. training error instead of validation error :param mode: forward reverse or rtho :param hyper_optimizer: optimizer for the hyperparameters :param hyper_optimizer_kwargs: :param hyper_iterations: number of hyper-iterations :param hyper_batch_size: hyper-batch size when RTHO is used :param do_print: if True (default) prints intermediate results... :return: """ assert mode in HO_MODES if synthetic_hypers: batch_size = synthetic_hypers # altrimenti divento matto! if datasets is None: datasets = load_dataset() x, model = create_model(datasets, model, **model_kwargs or {}) s, out, ws, y, error, training_error, rho_l1s, reg_l1s, rho_l2s, reg_l2s, accuracy, base_tr_error, gamma = \ define_errors_default_models(model, l1, l2, synthetic_hypers=synthetic_hypers, augment=optimizer.get_augmentation_multiplier()) if optimizer_kwargs is None: optimizer_kwargs = { 'lr': tf.Variable(.01, name='eta'), 'mu': tf.Variable(.5, name='mu') } tr_dynamics = optimizer.create(s, loss=training_error, w_is_state=True, **optimizer_kwargs) # hyperparameters part! algorithmic_hyperparameters = [] eta = tr_dynamics.learning_rate if isinstance(eta, tf.Variable): algorithmic_hyperparameters.append(eta) if hasattr(tr_dynamics, 'momentum_factor'): mu = tr_dynamics.momentum_factor if isinstance(mu, tf.Variable): algorithmic_hyperparameters.append(mu) regularization_hyperparameters = [] vec_w = s.var_list(rf.VlMode.TENSOR)[ 0] # vectorized representation of _model weights (always the first!) if rho_l1s is not None: regularization_hyperparameters += rho_l1s if rho_l2s is not None: regularization_hyperparameters += rho_l2s synthetic_hyperparameters = [] if synthetic_hypers: synthetic_hyperparameters.append(gamma) hyper_dict = { error: regularization_hyperparameters + synthetic_hyperparameters } # create hyper_dict # end of hyperparameters if algo_hyper_wrt_tr_error: # it is possible to optimize different hyperparameters wrt different validation errors hyper_dict[training_error] = algorithmic_hyperparameters else: hyper_dict[error] += algorithmic_hyperparameters # print(hyper_dict) hyper_opt = rf.HyperOptimizer( tr_dynamics, hyper_dict, method=rf.ReverseHG if mode == 'reverse' else rf.ForwardHG, hyper_optimizer_class=hyper_optimizer, **hyper_optimizer_kwargs or {}) positivity = rf.positivity(hyper_opt.hyper_list) # stochastic descent ev_data = ExampleVisiting(datasets.train, batch_size=batch_size, epochs=epochs) if epochs: ev_data.generate_visiting_scheme() tr_supplier = ev_data.create_feed_dict_supplier(x, y) val_supplier = datasets.validation.create_supplier(x, y) test_supplier = datasets.test.create_supplier(x, y) def _all_training_supplier(): return {x: datasets.train.data, y: datasets.train.target} # feed_dict supplier for validation errors val_feed_dict_suppliers = {error: val_supplier} if algo_hyper_wrt_tr_error: val_feed_dict_suppliers[training_error] = _all_training_supplier def _calculate_memory_usage(): memory_usage = rf.simple_size_of_with_pickle([ hyper_opt.hyper_gradients.w.eval(), [h.eval() for h in hyper_opt.hyper_gradients.hyper_list] ]) if mode == 'reverse': return memory_usage + rf.simple_size_of_with_pickle([ hyper_opt.hyper_gradients.w_hist, [p.eval() for p in hyper_opt.hyper_gradients.p_dict.values()] ]) else: return memory_usage + rf.simple_size_of_with_pickle( [[z.eval() for z in hyper_opt.hyper_gradients.zs]]) # number of iterations T = set_T or ev_data.T if mode != 'rtho' else hyper_batch_size hyper_grads = hyper_opt.hyper_gradients.hyper_gradients_dict # create a Saver object if name_of_experiment: saver = rf.Saver(name_of_experiment, 'step', lambda step: step, 'mode', lambda step: mode, 'test accuracy', accuracy, test_supplier, 'validation accuracy', accuracy, val_supplier, 'training accuracy', accuracy, tr_supplier, 'validation error', error, val_supplier, 'memory usage (mb)', lambda step: _calculate_memory_usage() * 9.5367e-7, 'weights', vec_w, '# weights', lambda step: vec_w.get_shape().as_list()[0], '# hyperparameters', lambda step: len(hyper_opt.hyper_list), '# iterations', lambda step: T, *rf.flatten_list( [rf.simple_name(hyp), [hyp, hyper_grads[hyp]]] for hyp in hyper_opt.hyper_list), do_print=do_print, collect_data=collect_data) else: saver = None save_dict_history = [] with tf.Session(config=rf.CONFIG_GPU_GROWTH).as_default(): if saver: saver.timer.start() hyper_opt.initialize() for k in range(hyper_iterations): hyper_opt.run(T, train_feed_dict_supplier=tr_supplier, val_feed_dict_suppliers=val_feed_dict_suppliers, hyper_constraints_ops=positivity) if saver: save_dict_history.append( saver.save(k, append_string='_%s' % mode)) if mode != 'rtho': hyper_opt.initialize() return save_dict_history