def train_and_test(hparams): datasets = get_dataset(hparams) model = Model(hparams) optimizer = get_optimizer(hparams) loss_fn = keras.losses.SparseCategoricalCrossentropy() logger = Logger(hparams, optimizer) summary_start = hparams_summary.session_start_pb(hparams=hparams.__dict__) for epoch in range(hparams.epochs): start = time.time() for images, labels in datasets['train']: loss, predictions = train_step(images, labels, model, optimizer, loss_fn) logger.log_progress(loss, labels, predictions, mode='train') elapse = time.time() - start logger.write_scalars(mode='train') for images, labels in datasets['test']: logger.write_images(images, mode='test') loss, predictions = test_step(images, labels, model, loss_fn) logger.log_progress(loss, labels, predictions, mode='test') logger.write_scalars(mode='test', elapse=elapse) logger.print_progress(epoch, elapse) summary_end = hparams_summary.session_end_pb(api_pb2.STATUS_SUCCESS) logger.write_hparams_summary(summary_start, summary_end, elapse)
def test_session_start_pb(self): start_time_secs = 314160 session_start_info = plugin_data_pb2.SessionStartInfo( model_uri="//model/uri", group_name="session_group", start_time_secs=start_time_secs) session_start_info.hparams["param1"].string_value = "string" # TODO: Fix nondeterminism. # session_start_info.hparams["param2"].number_value = 5.0 # session_start_info.hparams["param3"].bool_value = False self.assertEqual( summary.session_start_pb( hparams={ "param1": "string", # "param2":5, # "param3":False, }, model_uri="//model/uri", group_name="session_group", start_time_secs=start_time_secs), tf.Summary(value=[ tf.Summary.Value( tag="_hparams_/session_start_info", metadata=tf.SummaryMetadata( plugin_data=tf.SummaryMetadata.PluginData( plugin_name="hparams", content=(plugin_data_pb2.HParamsPluginData( version=0, session_start_info=session_start_info). SerializeToString())))) ]))
def run_squad(sess, params): # with tf.distribute.MirroredStrategy().scope(): model = model_for(params) model.compile( optimizer=params.optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) ds_train = dataset_for('train', params) ds_test = dataset_for('test', params) save_p = pth.Path(params.dir_save) if save_p.exists(): model.train_on_batch(ds_train[:1]) model.load_weights(save_p) model.summary() p = params.log_dir + '/train/' + sess writer = tf.summary.create_file_writer(p) sum_s = hparams.session_start_pb(hparams=params.hparams) cbacks = [ kcb.TensorBoard( log_dir=p, histogram_freq=1, embeddings_freq=0, update_freq='epoch'), # kcb.EarlyStopping( # monitor='val_loss', min_delta=1e-2, patience=2, verbose=True), ] if save_p.exists(): cbacks.append( kcb.ModelCheckpoint( model_save_path=save_p, save_best_only=True, monitor='val_loss', verbose=True)) hist = model.fit( ds_train, callbacks=cbacks, epochs=params.train_epochs, validation_data=ds_test) print(f'History: {hist.history}') if save_p.exists(): model.save_weights(save_p, save_format='tf') loss, acc = model.evaluate(ds_test) print(f'\nTest loss, acc: {loss}, {acc}') with writer.as_default(): e = tf.compat.v1.Event(summary=sum_s).SerializeToString() tf.summary.import_event(e) tf.summary.scalar('accuracy', acc, step=1, description='Accuracy') sum_e = hparams.session_end_pb(api_pb2.STATUS_SUCCESS) e = tf.compat.v1.Event(summary=sum_e).SerializeToString() tf.summary.import_event(e)
def _experiment_impl(args): alpha, alpha_decay, gamma, init_epsilon, n_exploration_episodes = args hparams = { 'alpha': alpha, 'alpha_decay': alpha_decay, 'gamma': gamma, 'init_epsilon': init_epsilon, 'n_exploration_episodes': n_exploration_episodes } parent_self._alpha_list.append(alpha) parent_self._alpha_decay_list.append(alpha_decay) parent_self._gamma_list.append(gamma) parent_self._init_epsilon_list.append(init_epsilon) parent_self._n_exploration_episodes.append(n_exploration_episodes) writer = tf.summary.create_file_writer( log_dir + "/alpha_{}_alpha_decay_{}_gamma_{:.3f}_init_eps{}_n_explor_{}". format(alpha, alpha_decay, gamma, init_epsilon, n_exploration_episodes)) with writer.as_default(): summary_start = hparams_summary.session_start_pb( hparams=hparams) reinforce = ActorCriticNetworkTD(env, alpha=alpha, alpha_decay=alpha_decay, gamma=gamma, init_epsilon=init_epsilon, min_epsilon=0.0, batch_normalization=False, writer=writer) num_episodes = 1000 episode_length = 200 for episode in range(num_episodes): reinforce.training_episode( episode_lenght=episode_length, num_exploration_episodes=n_exploration_episodes, debug=False) average_sum_reward = reinforce.evaluate_average_sum_reward(5) print( 'Average sum reward after episode:{} for alpha: {}, alpha_decay: {}, gamma: {}, init_eps: {}, n_exploration_episodes: {}, reward: {}' .format(episode, alpha, alpha_decay, gamma, init_epsilon, n_exploration_episodes, average_sum_reward)) tf.summary.scalar('sum_reward', average_sum_reward, step=1, description="Average sum reward") summary_end = hparams_summary.session_end_pb( api_pb2.STATUS_SUCCESS) tf.summary.import_event( tf.compat.v1.Event( summary=summary_start).SerializeToString()) tf.summary.import_event( tf.compat.v1.Event( summary=summary_end).SerializeToString()) # hyperopt needs negative value to minimize provided function properly based on fmin (no fmax alternative yet..) return -average_sum_reward
def on_train_begin(self, logs=None): del logs # unused self._write_summary( summary.session_start_pb(self._hparams, group_name=self._group_name), step=0, )
def run_quess(sess, params): PS = params ds_train = dataset_for('train', PS) ds_test = dataset_for('test', PS) # with tf.distribute.MirroredStrategy().scope(): model = model_for(PS) model.compile(**compile_args_for(PS)) model.train_on_batch(ds_train[:1]) save_p = pth.Path(PS.dir_save) if save_p.exists(): model.load_weights(save_p) model.summary() p = PS.log_dir + '/train/' + sess writer = tf.summary.create_file_writer(p) sum_s = hparams.session_start_pb(hparams=PS.hparams) cbacks = [ # kcb.LambdaCallback(on_epoch_end=log_confusion_matrix), kcb.History(), kcb.BaseLogger(), kcb.TensorBoard(log_dir=p, histogram_freq=1, embeddings_freq=0, update_freq='epoch'), # kcb.EarlyStopping( # monitor='val_loss', min_delta=1e-2, patience=2, verbose=True), ] if save_p.exists(): cbacks.append( kcb.ModelCheckpoint(model_save_path=save_p, save_best_only=True, monitor='val_loss', verbose=True)) hist = model.fit( ds_train, epochs=PS.train_steps // PS.eval_frequency, steps_per_epoch=PS.eval_frequency, validation_data=ds_test, validation_steps=PS.eval_steps, callbacks=cbacks, ) print(f'History: {hist.history}') if save_p.exists(): model.save_weights(save_p, save_format='tf') loss, acc = model.evaluate(ds_test) print(f'\nTest loss, acc: {loss}, {acc}') with writer.as_default(): e = tf.compat.v1.Event(summary=sum_s).SerializeToString() tf.summary.import_event(e) tf.summary.scalar('accuracy', acc, step=1, description='Accuracy') sum_e = hparams.session_end_pb(api_pb2.STATUS_SUCCESS) e = tf.compat.v1.Event(summary=sum_e).SerializeToString() tf.summary.import_event(e)
def test_session_start_pb(self): start_time_secs = 314160 session_start_info = plugin_data_pb2.SessionStartInfo( model_uri="//model/uri", group_name="session_group", start_time_secs=start_time_secs) session_start_info.hparams["param1"].string_value = "string" session_start_info.hparams["param2"].number_value = 5.0 session_start_info.hparams["param3"].bool_value = False self.assertEqual( summary.session_start_pb( hparams={"param1":"string", "param2":5, "param3":False}, model_uri="//model/uri", group_name="session_group", start_time_secs=start_time_secs), tf.Summary( value=[ tf.Summary.Value( tag="_hparams_/session_start_info", metadata=tf.SummaryMetadata( plugin_data=tf.SummaryMetadata.PluginData( plugin_name="hparams", content=(plugin_data_pb2.HParamsPluginData( version=0, session_start_info=session_start_info ).SerializeToString())))) ]))
def start_session(): hparams = hparams_util_pb2.HParams() text_format.Merge(FLAGS.hparams, hparams) # Convert hparams.hparams values from google.protobuf.Value to Python native # objects. hparams = { key: value_to_python(value) for (key, value) in six.iteritems(hparams.hparams) } write_summary( summary.session_start_pb(hparams, FLAGS.model_uri, FLAGS.monitor_url, FLAGS.group_name, FLAGS.start_time_secs))
def on_train_begin(self, logs): # TODO: v2 of the callback has a "writers" object with cb._writers["train"].as_default(): exp = create_experiment_summary( [16, 32], [0.1, 0.5], ['adam', 'sgd']) tf.summary.import_event(tf.compat.v1.Event( summary=exp).SerializeToString()) summary_start = hparams_summary.session_start_pb( hparams={'num_units': 16, 'dropout_rate': 0.5, 'optimizer': 'adam'}) summary_end = hparams_summary.session_end_pb( api_pb2.STATUS_SUCCESS) tf.summary.import_event(tf.compat.v1.Event( summary=summary_start).SerializeToString()) tf.summary.import_event(tf.compat.v1.Event( summary=summary_end).SerializeToString())
def run(data, base_logdir, session_id, group_id, hparams): """Run a training/validation session. Flags must have been parsed for this function to behave. Args: data: The data as loaded by `prepare_data()`. base_logdir: The top-level logdir to which to write summary data. session_id: A unique string ID for this session. group_id: The string ID of the session group that includes this session. hparams: A dict mapping hyperparameters in `HPARAMS` to values. """ model = model_fn(hparams=hparams, seed=session_id) logdir = os.path.join(base_logdir, session_id) # We need a manual summary writer for writing hparams metadata. writer = tf.summary.create_file_writer(logdir) with writer.as_default(): pb = hparams_summary.session_start_pb( {h.name: hparams[h] for h in hparams}, group_name=group_id, ) tf.summary.experimental.write_raw_pb(pb.SerializeToString(), step=0) writer.flush() callback = tf.keras.callbacks.TensorBoard( logdir, update_freq=flags.FLAGS.summary_freq, profile_batch=0, # workaround for issue #2084 ) ((x_train, y_train), (x_test, y_test)) = data result = model.fit( x=x_train, y=y_train, epochs=flags.FLAGS.num_epochs, shuffle=False, validation_data=(x_test, y_test), callbacks=[callback], ) with writer.as_default(): pb = hparams_summary.session_end_pb(api_pb2.STATUS_SUCCESS) tf.summary.experimental.write_raw_pb(pb.SerializeToString(), step=0) writer.flush() writer.close()
def train(sid, ps, dset_fn, model_fn, cbacks=None): ds = dset_fn(ps, TRAIN) # with T.distribute.MirroredStrategy().scope(): mdl = model_fn(ps, compiled=True) mp = pth.Path.cwd() / ps.dir_model / ps.model if mp.exists() and tf.get_checkpoint_state(str(mp)): mdl.train_on_batch(ds) mdl.load_weights(str(mp / TRAIN)) lp = pth.Path.cwd() / ps.dir_log / ps.model if lp.exists(): sumy = tf.create_file_writer(str(lp / TRAIN / sid)) sum_s = tb_summary.session_start_pb(hparams=ps.hparams) cbs = cbacks or [] if lp.exists(): cbs.append( tf.TensorBoard( log_dir=str(lp / TRAIN / sid), histogram_freq=1, embeddings_freq=0, update_freq="epoch", )) cbs.append( tf.EarlyStopping(monitor="val_loss", min_delta=1e-2, patience=2, verbose=True)) if mp.exists(): cbs.append( tf.ModelCheckpoint( str(mp / TRAIN), save_weights_only=True, # save_best_only=True, verbose=True, )) ds_test = dset_fn(ps, "test") hist = mdl.fit(ds, callbacks=cbs, epochs=ps.train_epochs, validation_data=ds_test) print(f"History: {hist.history}") sp = pth.Path.cwd() / ps.dir_save / ps.model if sp.exists(): tf.export_saved_model(mdl, str(sp)) loss, acc = mdl.evaluate(ds_test) print(f"\nEval loss, acc: {loss}, {acc}") """
def _experiment(self, args): alpha, gamma = args hparams = {'alpha': alpha, 'gamma': gamma} self._alpha_list.append(alpha) self._gamma_list.append(gamma) writer = tf.summary.create_file_writer( self._log_dir + "/alpha_{}_gamma_{:.3f}".format(alpha, gamma)) with writer.as_default(): summary_start = hparams_summary.session_start_pb(hparams=hparams) env = CartPoleRewardWrapper(gym.make('CartPole-v1')) q_learning = DeepQNetwork(env, alpha=alpha, gamma=gamma, writer=writer) num_episodes = 10 episode_lenth = 50 for episode in range(num_episodes): q_learning.training_episode(num_exploration_episodes=int( num_episodes * 2 / 3), episode_lenght=episode_lenth) average_cmulative_reward = q_learning.evaluate_average_cumulative_reward( 100) print( 'Average cumulative reward after episode:{} for alpha: {} and gamma: {} is: {}' .format(episode, alpha, gamma, average_cmulative_reward)) summary_end = hparams_summary.session_end_pb( api_pb2.STATUS_SUCCESS) tf.summary.scalar('cummulative_reward', average_cmulative_reward, step=1, description="Average cummulative reward") tf.summary.import_event( tf.compat.v1.Event(summary=summary_start).SerializeToString()) tf.summary.import_event( tf.compat.v1.Event(summary=summary_end).SerializeToString()) # hyperopt needs negative value to minimize provided function properly based on fmin (no fmax alternative yet..) return -average_cmulative_reward
def run(logdir, session_id, hparams, group_name): """Runs a temperature simulation. This will simulate an object at temperature `initial_temperature` sitting at rest in a large room at temperature `ambient_temperature`. The object has some intrinsic `heat_coefficient`, which indicates how much thermal conductivity it has: for instance, metals have high thermal conductivity, while the thermal conductivity of water is low. Over time, the object's temperature will adjust to match the temperature of its environment. We'll track the object's temperature, how far it is from the room's temperature, and how much it changes at each time step. Arguments: logdir: the top-level directory into which to write summary data session_id: an id for the session. hparams: A dictionary mapping a hyperparameter name to its value. group_name: an id for the session group this session belongs to. """ tf.reset_default_graph() tf.set_random_seed(0) initial_temperature = hparams["initial_temperature"] ambient_temperature = hparams["ambient_temperature"] heat_coefficient = HEAT_COEFFICIENTS[hparams["material"]] session_dir = os.path.join(logdir, session_id) writer = tf.summary.FileWriter(session_dir) writer.add_summary( summary.session_start_pb(hparams=hparams, group_name=group_name)) writer.flush() with tf.name_scope("temperature"): # Create a mutable variable to hold the object's temperature, and # create a scalar summary to track its value over time. The name of # the summary will appear as 'temperature/current' due to the # name-scope above. temperature = tf.Variable(tf.constant(initial_temperature), name="temperature") scalar_summary.op( "current", temperature, display_name="Temperature", description="The temperature of the object under " "simulation, in Kelvins.", ) # Compute how much the object's temperature differs from that of its # environment, and track this, too: likewise, as # 'temperature/difference_to_ambient'. ambient_difference = temperature - ambient_temperature scalar_summary.op( "difference_to_ambient", ambient_difference, display_name="Difference to ambient temperature", description=("The difference between the ambient " "temperature and the temperature of the " "object under simulation, in Kelvins."), ) # Newton suggested that the rate of change of the temperature of an # object is directly proportional to this `ambient_difference` above, # where the proportionality constant is what we called the heat # coefficient. But in real life, not everything is quite so clean, so # we'll add in some noise. (The value of 50 is arbitrary, chosen to # make the data look somewhat interesting. :-) ) noise = 50 * tf.random.normal([]) delta = -heat_coefficient * (ambient_difference + noise) scalar_summary.op( "delta", delta, description="The change in temperature from the previous " "step, in Kelvins.", ) # Collect all the scalars that we want to keep track of. summ = tf.summary.merge_all() # Now, augment the current temperature by this delta that we computed, # blocking the assignment on summary collection to avoid race conditions # and ensure that the summary always reports the pre-update value. with tf.control_dependencies([summ]): update_step = temperature.assign_add(delta) sess = tf.Session() sess.run(tf.global_variables_initializer()) for step in range(FLAGS.num_steps): # By asking TensorFlow to compute the update step, we force it to # change the value of the temperature variable. We don't actually # care about this value, so we discard it; instead, we grab the # summary data computed along the way. (s, _) = sess.run([summ, update_step]) if (step % FLAGS.summary_freq) == 0: writer.add_summary(s, global_step=step) writer.add_summary(summary.session_end_pb(api_pb2.STATUS_SUCCESS)) writer.close()
def run(logdir, session_id, hparams, group_name): """Runs a temperature simulation. This will simulate an object at temperature `initial_temperature` sitting at rest in a large room at temperature `ambient_temperature`. The object has some intrinsic `heat_coefficient`, which indicates how much thermal conductivity it has: for instance, metals have high thermal conductivity, while the thermal conductivity of water is low. Over time, the object's temperature will adjust to match the temperature of its environment. We'll track the object's temperature, how far it is from the room's temperature, and how much it changes at each time step. Arguments: logdir: the top-level directory into which to write summary data session_id: an id for the session. hparams: A dictionary mapping an hyperparameter name to its value. group_name: an id for the session group this session belongs to. """ tf.reset_default_graph() tf.set_random_seed(0) initial_temperature = hparams['initial_temperature'] ambient_temperature = hparams['ambient_temperature'] heat_coefficient = hparams['heat_coefficient'] session_dir = os.path.join(logdir, session_id) writer = tf.summary.FileWriter(session_dir) writer.add_summary(summary.session_start_pb(hparams=hparams, group_name=group_name)) writer.flush() with tf.name_scope('temperature'): # Create a mutable variable to hold the object's temperature, and # create a scalar summary to track its value over time. The name of # the summary will appear as "temperature/current" due to the # name-scope above. temperature = tf.Variable(tf.constant(initial_temperature), name='temperature') scalar_summary.op('current', temperature, display_name='Temperature', description='The temperature of the object under ' 'simulation, in Kelvins.') # Compute how much the object's temperature differs from that of its # environment, and track this, too: likewise, as # "temperature/difference_to_ambient". ambient_difference = temperature - ambient_temperature scalar_summary.op('difference_to_ambient', ambient_difference, display_name='Difference to ambient temperature', description=('The difference between the ambient ' 'temperature and the temperature of the ' 'object under simulation, in Kelvins.')) # Newton suggested that the rate of change of the temperature of an # object is directly proportional to this `ambient_difference` above, # where the proportionality constant is what we called the heat # coefficient. But in real life, not everything is quite so clean, so # we'll add in some noise. (The value of 50 is arbitrary, chosen to # make the data look somewhat interesting. :-) ) noise = 50 * tf.random_normal([]) delta = -heat_coefficient * (ambient_difference + noise) scalar_summary.op('delta', delta, description='The change in temperature from the previous ' 'step, in Kelvins.') # Collect all the scalars that we want to keep track of. summ = tf.summary.merge_all() # Now, augment the current temperature by this delta that we computed, # blocking the assignment on summary collection to avoid race conditions # and ensure that the summary always reports the pre-update value. with tf.control_dependencies([summ]): update_step = temperature.assign_add(delta) sess = tf.Session() sess.run(tf.global_variables_initializer()) for step in xrange(STEPS): # By asking TensorFlow to compute the update step, we force it to # change the value of the temperature variable. We don't actually # care about this value, so we discard it; instead, we grab the # summary data computed along the way. (s, _) = sess.run([summ, update_step]) writer.add_summary(s, global_step=step) writer.add_summary(summary.session_end_pb(api_pb2.STATUS_SUCCESS)) writer.close()
def eager_quess(sess, params): PS = params ds_train = dataset_for('train', PS) ds_test = dataset_for('test', PS) # with tf.distribute.MirroredStrategy().scope(): model = model_for(PS) model.compile(**compile_args_for(PS)) # model.train_on_batch(ds_train[:1]) save_p = pth.Path(PS.dir_save) if save_p.exists(): model.load_weights(save_p) model.summary() p = PS.log_dir + '/train/' + sess writer = tf.summary.create_file_writer(p) sum_s = hparams.session_start_pb(hparams=PS.hparams) cbacks = [ # kcb.LambdaCallback(on_epoch_end=log_confusion_matrix), kcb.History(), kcb.BaseLogger(), kcb.TensorBoard(log_dir=p, histogram_freq=1, embeddings_freq=0, update_freq='epoch'), # kcb.EarlyStopping( # monitor='val_loss', min_delta=1e-2, patience=2, verbose=True), ] if save_p.exists(): cbacks.append( kcb.ModelCheckpoint(model_save_path=save_p, save_best_only=True, monitor='val_loss', verbose=True)) opt = opt_fn(PS) for e in range(3): print(f'Start of epoch {e}') for s, (src, tgt, agree) in enumerate(ds_train): with tf.GradientTape() as tape: r = model([src, tgt]) loss = loss_fn(agree, r) # loss += sum(model.losses) gs = tape.gradient(loss, model.trainable_variables) opt.apply_gradients(zip(gs, model.trainable_variables)) # acc_metric(fit, f) if s % 200 == 0: print(f'Loss at step {s}: {loss}') # a = acc_metric.result() # acc_metric.reset_states() # print(f'Train acc over epoch: {float(a)}') for src, tgt, agree in ds_test: r = model([src, tgt]) # acc_metric(fit, f) # a = acc_metric.result() # acc_metric.reset_states() # print(f'Test acc: {float(a)}') if save_p.exists(): model.save_weights(save_p, save_format='tf') with writer.as_default(): e = tf.compat.v1.Event(summary=sum_s).SerializeToString() tf.summary.import_event(e) # tf.summary.scalar('accuracy', acc, step=1, description='Accuracy') sum_e = hparams.session_end_pb(api_pb2.STATUS_SUCCESS) e = tf.compat.v1.Event(summary=sum_e).SerializeToString() tf.summary.import_event(e)