def __init__(self, synthetic_prefs, max_segs, log_dir, zoom=4, channels=3, min_segments_to_test=10, n_pause_frames=4, user_response_timeout=3): if not synthetic_prefs: self.vid_q = mp.get_context('spawn').Queue() self.renderer = VideoRenderer(vid_queue=self.vid_q, mode=VideoRenderer.play_through_mode, zoom=zoom, channels=channels) else: self.renderer = None self.min_segments_to_test = min_segments_to_test self.synthetic_prefs = synthetic_prefs self.zoom = zoom self.seg_idx = 0 self.segments = [] self.channels = channels self.max_segs = max_segs self.tested_pairs = set() self.n_pause_frames = n_pause_frames self.user_response_timeout = user_response_timeout easy_tf_log.set_dir(log_dir)
def run(env_defs, kargs=None): args, lr_args, log_dir, preprocess_wrapper = parse_args(env_defs, kargs) easy_tf_log.set_dir(log_dir) utils_tensorflow.set_random_seeds(args.seed) sess = tf.Session() envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops, args.n_workers, args.seed, args.debug, log_dir, env_defs) step_counter = utils.TensorFlowCounter(sess) update_counter = utils.TensorFlowCounter(sess) lr = make_lr(lr_args, step_counter.value) optimizer = make_optimizer(lr) extra_inputs_shape = None if 'extra_inputs_shape' in env_defs: extra_inputs_shape = env_defs['extra_inputs_shape'] networks = make_networks(n_workers=args.n_workers, obs_shape=envs[0].observation_space.shape, n_actions=envs[0].action_space.n, value_loss_coef=args.value_loss_coef, entropy_bonus=args.entropy_bonus, max_grad_norm=args.max_grad_norm, optimizer=optimizer, detailed_logs=args.detailed_logs, debug=args.debug, env_defs=env_defs, extra_inputs_shape=extra_inputs_shape) global_vars = tf.trainable_variables('global') # Why save_relative_paths=True? # So that the plain-text 'checkpoint' file written uses relative paths, so that we can restore # from checkpoints created on another machine. saver = tf.train.Saver(global_vars, max_to_keep=1, save_relative_paths=True) if args.load_ckpt: print("Restoring from checkpoint '{}'...".format(args.load_ckpt), end='', flush=True) saver.restore(sess, args.load_ckpt) print("done!") else: sess.run(tf.global_variables_initializer()) workers = make_workers(sess, envs, networks, args.n_workers, log_dir) worker_threads = start_worker_threads(workers, args.n_steps, args.steps_per_update, step_counter, update_counter) run_manager(worker_threads, sess, lr, step_counter, update_counter, log_dir, saver, args.manager_wake_interval_seconds, args.ckpt_interval_seconds) for env in envs: env.close()
def __init__(self, cluster_job_name, core_network, lr=1e-4, cluster_dict=None, batchnorm=False, dropout=0.0, n_preds=1, log_dir=None): self.n_preds = n_preds graph, self.sess = self.init_sess(cluster_dict, cluster_job_name) # Why not just use soft device placement? With soft placement, # if we have a bug which prevents an operation being placed on the GPU # (e.g. we're using uint8s for operations that the GPU can't do), # then TensorFlow will be silent and just place the operation on a CPU. # Instead, we want to say: if there's a GPU present, definitely try and # put things on the GPU. If it fails, tell us! if tf.test.gpu_device_name(): worker_device = "/job:{}/task:0/gpu:0".format(cluster_job_name) else: worker_device = "/job:{}/task:0".format(cluster_job_name) device_setter = tf.train.replica_device_setter( cluster=cluster_dict, ps_device="/job:ps/task:0", worker_device=worker_device) self.rps = [] with graph.as_default(): for pred_n in range(n_preds): with tf.device(device_setter): with tf.variable_scope("pred_{}".format(pred_n)): rp = RewardPredictorNetwork(core_network=core_network, dropout=dropout, batchnorm=batchnorm, lr=lr) self.rps.append(rp) self.init_op = tf.global_variables_initializer() # Why save_relative_paths=True? # So that the plain-text 'checkpoint' file written uses relative paths, # which seems to be needed in order to avoid confusing saver.restore() # when restoring from FloydHub runs. self.saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True) self.summaries = self.add_summary_ops() self.checkpoint_file = osp.join(log_dir, 'reward_predictor_checkpoints', 'reward_predictor.ckpt') self.train_writer = tf.summary.FileWriter(osp.join( log_dir, 'reward_predictor', 'train'), flush_secs=5) self.test_writer = tf.summary.FileWriter(osp.join( log_dir, 'reward_predictor', 'test'), flush_secs=5) self.n_steps = 0 self.r_norm = RunningStat(shape=n_preds) misc_logs_dir = osp.join(log_dir, 'reward_predictor', 'misc') easy_tf_log.set_dir(misc_logs_dir)
def __init__(self, core_network, obs_shape, logger, lr=1e-4, batchnorm=False, dropout=0.0, n_preds=1, log_dir=None): self.n_preds = n_preds self.obs_shape = obs_shape self.sess = tf.Session() self.logger = logger graph = tf.get_default_graph() self.rps = [] with graph.as_default(): for pred_n in range(n_preds): #with tf.device(device_setter): # Create pred_n different reward predictors, each of which are in their own variable scope with tf.variable_scope("pred_{}".format(pred_n)): rp = RewardPredictorNetwork(core_network=core_network, dropout=dropout, batchnorm=batchnorm, lr=lr, obs_shape=self.obs_shape) self.rps.append(rp) self.init_op = tf.global_variables_initializer() # Why save_relative_paths=True? # So that the plain-text 'checkpoint' file written uses relative paths, # which seems to be needed in order to avoid confusing saver.restore() # when restoring from FloydHub runs. self.saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True) self.summaries = self.add_summary_ops() # Try to fix bug, based on here https://stackoverflow.com/questions/34001922/failedpreconditionerror-attempting-to-use-uninitialized-in-tensorflow init_op = tf.global_variables_initializer() try: self.sess.run(init_op) except Exception as e: print(e) self.checkpoint_dir = osp.join(log_dir, 'reward_predictor_checkpoints') self.checkpoint_file = osp.join(self.checkpoint_dir, 'reward_predictor.ckpt') self.train_writer = tf.summary.FileWriter(osp.join( log_dir, 'reward_predictor', 'train'), flush_secs=5) self.test_writer = tf.summary.FileWriter(osp.join( log_dir, 'reward_predictor', 'test'), flush_secs=5) self.n_steps = 0 self.r_norm = RunningStat(shape=n_preds) misc_logs_dir = osp.join(log_dir, 'reward_predictor', 'misc') easy_tf_log.set_dir(misc_logs_dir)
def test_set_dir(self): """ Confirm that set_dir works. """ with tempfile.TemporaryDirectory() as temp_dir: os.chdir(temp_dir) easy_tf_log.set_dir('logs2') easy_tf_log.tflog('var', 0) self.assertEqual(os.listdir(), ['logs2']) self.assertIn('events.out.tfevents', os.listdir('logs2')[0])
def main(): args, lr_args, log_dir, preprocess_wrapper = parse_args() # parse_args() é importado de params easy_tf_log.set_dir(log_dir) # seta o caminho dos logs em easy_ty_log utils_tensorflow.set_random_seeds(args.seed) # iniciando a semente aleatóriamente sess = tf.Session() # Uma classe para executar operações do TensorFlow. Um Sessionobjeto encapsula o ambiente no qual os Operation objetos são executados e os Tensorobjetos são avaliados. envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops, args.n_workers, args.seed, args.debug, log_dir) step_counter = utils.TensorFlowCounter(sess) update_counter = utils.TensorFlowCounter(sess) lr = make_lr(lr_args, step_counter.value) optimizer = make_optimizer(lr) # Criando o conjunto de redes por threads networks = make_networks(n_workers=args.n_workers, obs_shape=envs[0].observation_space.shape, n_actions=envs[0].action_space.n, value_loss_coef=args.value_loss_coef, entropy_bonus=args.entropy_bonus, max_grad_norm=args.max_grad_norm, optimizer=optimizer, detailed_logs=args.detailed_logs, debug=args.debug) # Retorna todas as variáveis criadas com trainable=True. # scope: (Opcional.) Uma string. Se fornecida, a lista resultante é filtrada para incluir apenas itens cujo nameatributo corresponde ao scopeuso re.match global_vars = tf.trainable_variables('global') # Por que save_relative_paths = True? # De modo que o arquivo de 'checkpoint' em texto simples use caminhos relativos, # para que possamos restaurar a partir de pontos de verificação criados em outra máquina. saver = tf.train.Saver(global_vars, max_to_keep=1, save_relative_paths=True) # se existir um checkpoint para carregar ele restaura os dados para proceguir de onde parou, caso contrário ele inicia do 0 if args.load_ckpt: print("Restoring from checkpoint '{}'...".format(args.load_ckpt), end='', flush=True) saver.restore(sess, args.load_ckpt) # restaura(carrega) a sessão do checkpoint especificado print("done!") else: sess.run(tf.global_variables_initializer()) # Criando as workes workers = make_workers(sess, envs, networks, args.n_workers, log_dir) # inicia as threads referente a cada workers criada worker_threads = start_worker_threads(workers, args.n_steps, args.steps_per_update, step_counter, update_counter) # Gerenciador de execução das workers_threads run_manager(worker_threads, sess, lr, step_counter, update_counter, log_dir, saver, args.manager_wake_interval_seconds, args.ckpt_interval_seconds) for env in envs: env.close()
def test_fork(self): with tempfile.TemporaryDirectory() as temp_dir: easy_tf_log.set_dir(temp_dir) def f(queue): easy_tf_log.tflog('foo', 0) queue.put(True) q = Queue() Process(target=f, args=[q], daemon=True).start() try: q.get(timeout=1.0) except queue.Empty: self.fail("Process did not return")
def __init__(self, synthetic_prefs, max_segs, log_dir): self.vid_q = Queue() if not synthetic_prefs: self.renderer = VideoRenderer(vid_queue=self.vid_q, mode=VideoRenderer.restart_on_get_mode, zoom=4) else: self.renderer = None self.synthetic_prefs = synthetic_prefs self.seg_idx = 0 self.segments = [] self.tested_pairs = set() # For O(1) lookup self.max_segs = max_segs easy_tf_log.set_dir(log_dir)
def __init__(self, env, prefix="", log_dir=None): Wrapper.__init__(self, env) if prefix: self.log_prefix = prefix + ": " else: self.log_prefix = "" if log_dir is not None: easy_tf_log.set_dir(log_dir) self.episode_rewards = None self.episode_length_steps = None self.episode_n = -1 self.episode_done = None self.log_dir = log_dir
def f(): if make_reward_predictor: reward_predictor = make_reward_predictor('a2c', cluster_dict) else: reward_predictor = None misc_logs_dir = osp.join(log_dir, 'a2c_misc') easy_tf_log.set_dir(misc_logs_dir) learn(policy=policy_fn, env=env, seg_pipe=seg_pipe, start_policy_training_pipe=start_policy_training_pipe, episode_vid_queue=episode_vid_queue, reward_predictor=reward_predictor, ckpt_save_dir=ckpt_dir, gen_segments=gen_segments, **a2c_params)
def main(): args, lr_args, log_dir, preprocess_wrapper, ckpt_timer = parse_args() easy_tf_log.set_dir(log_dir) utils.set_random_seeds(args.seed) sess = tf.Session() envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops, args.n_workers, args.seed, args.debug, log_dir) step_counter = utils.GraphCounter(sess) update_counter = utils.GraphCounter(sess) lr = make_lr(lr_args, step_counter.value) optimizer = make_optimizer(lr) networks = make_networks(n_workers=args.n_workers, n_actions=envs[0].action_space.n, weight_inits=args.weight_inits, value_loss_coef=args.value_loss_coef, entropy_bonus=args.entropy_bonus, max_grad_norm=args.max_grad_norm, optimizer=optimizer, debug=args.debug) # Why save_relative_paths=True? # So that the plain-text 'checkpoint' file written uses relative paths, # which seems to be needed in order to avoid confusing saver.restore() # when restoring from FloydHub runs. global_vars = tf.trainable_variables('global') saver = tf.train.Saver(global_vars, max_to_keep=1, save_relative_paths=True) checkpoint_dir = osp.join(log_dir, 'checkpoints') os.makedirs(checkpoint_dir) checkpoint_file = osp.join(checkpoint_dir, 'network.ckpt') if args.load_ckpt: print("Restoring from checkpoint '%s'..." % args.load_ckpt, end='', flush=True) saver.restore(sess, args.load_ckpt) print("done!") else: sess.run(tf.global_variables_initializer()) workers = make_workers(sess=sess, envs=envs, networks=networks, n_workers=args.n_workers, log_dir=log_dir) worker_threads = start_workers(n_steps=args.n_steps, steps_per_update=args.steps_per_update, step_counter=step_counter, update_counter=update_counter, workers=workers) ckpt_timer.reset() step_rate = utils.RateMeasure() step_rate.reset(int(step_counter)) while True: time.sleep(args.wake_interval_seconds) steps_per_second = step_rate.measure(int(step_counter)) easy_tf_log.tflog('misc/steps_per_second', steps_per_second) easy_tf_log.tflog('misc/steps', int(step_counter)) easy_tf_log.tflog('misc/updates', int(update_counter)) easy_tf_log.tflog('misc/lr', sess.run(lr)) alive = [t.is_alive() for t in worker_threads] if ckpt_timer.done() or not any(alive): saver.save(sess, checkpoint_file, int(step_counter)) print("Checkpoint saved to '{}'".format(checkpoint_file)) ckpt_timer.reset() if not any(alive): break for env in envs: env.close()
#!/usr/bin/env python import time import easy_tf_log # Logging using the global logger # Will log to automatically-created 'logs' directory for i in range(10): easy_tf_log.tflog('foo', i) for j in range(10, 20): easy_tf_log.tflog('bar', j) easy_tf_log.set_dir('logs2') for k in range(20, 30): easy_tf_log.tflog('baz', k) for l in range(5): easy_tf_log.tflog('qux', l, step=(10 * l)) # Logging using a Logger object logger = easy_tf_log.Logger(log_dir='logs3') for i in range(10): logger.log_key_value('quux', i) logger.log_list_stats('quuz', [1, 2, 3, 4, 5]) logger.measure_rate('corge', 10) time.sleep(1)
tf.flags.DEFINE_integer('seed', 888, 'random seed for training sampling') tf.flags.DEFINE_bool( 'use_lsh', False, 'use locality-sensitive hashing ' '(NOTE: not fully tested)') tf.flags.DEFINE_bool('use_resnet', True, 'use resnet instead of inception') tf.flags.DEFINE_integer('num_parts', 2, 'number of parts to use') # set the log directory to logs/preifx date_time = datetime.today().strftime('%Y%m%d_%H%M%S') prefix = date_time+'_rep_dim_'+str(FLAGS.rep_dim)+'_episode_length_'+str(FLAGS.episode_length) + \ '_episode_width_'+str(FLAGS.episode_width)+'_batch_size_'+str(FLAGS.batch_size) + \ '_num_episodes_'+str(FLAGS.num_episodes)+'_num_parts_'+str(FLAGS.num_parts) tf.flags.DEFINE_string('save_dir', os.path.join(CURRENT_DIR, '../checkpoints/' + prefix), 'directory to save model to') easy_tf_log.set_dir(os.path.join(CURRENT_DIR, '../logs/' + prefix)) class Trainer(object): """Class that takes care of training, validating, and checkpointing model.""" def __init__(self, train_data, valid_data, input_dim=(84, 84, 3), output_dim=None): self.train_data = train_data self.valid_data = valid_data self.input_dim = input_dim self.rep_dim = FLAGS.rep_dim self.episode_length = FLAGS.episode_length
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon May 27 20:36:14 2019 @author: starstorms """ import easy_tf_log as etl scale = 1.7 etl.set_dir('logs/1') for i in range(10): etl.tflog('foo', i * scale) for j in range(10, 20): etl.tflog('bar', j * scale) etl.set_dir('logs/2') for k in range(20, 30): etl.tflog('foo', k * scale) for l in range(5): etl.tflog('bar', l * scale, step=(10 * l)) etl.set_dir('logs/3 extra stuff') for k in range(20, 40): etl.tflog('foo', k * scale) for l in range(15):
import gc import os from tqdm import tqdm import numpy as np import matplotlib.pyplot as plt from random import shuffle from datetime import datetime from collections import deque import tensorflow as tf import easy_tf_log from easy_tf_log import tflog easy_tf_log.set_dir('tboard/') from pommerman import agents from rnn_agent import RNN_Agent from pomm_dataset import dataset tf_tag = tf.saved_model.tag_constants # Generate Data --------------------------------------------------------------------------------------------- #------------------------------------------------------------------------------------------------------------ def generate_data(EPISODES, save_file_nm, shuffle_agents=False): rnn_agent = RNN_Agent() # Init dataset
#%% Tensorflow / Keras #For specifying device to use with tf.device('/gpu:0'): pass # Adding new axis to array x_train = train[..., tf.newaxis] # Tensorboard setup logdir="logs/" + datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) callbacks=[tensorboard_callback] # in model.fit() # Easy tf log to tensorboard for scalars etl.set_dir('logs2') for k in range(20, 30): etl.tflog('baz', k) # to start tensorboard put this into the terminal: tensorboard --logdir path/to/log/dir # Plot Graphs def plot_graphs(history, string): plt.plot(history.history[string]) plt.plot(history.history['val_'+string]) plt.xlabel("Epochs") plt.ylabel(string) plt.legend([string, 'val_'+string]) plt.show() pass # Class for displaying progress on the end of an epoch class DisplayCallback(tf.keras.callbacks.Callback):
def run_worker(env_id, preprocess_wrapper, seed, worker_n, n_steps_to_run, ckpt_timer, load_ckpt_file, render, log_dir, max_n_noops, debug, steps_per_update): utils.set_random_seeds(seed) mem_log = osp.join(log_dir, "worker_{}_memory.log".format(worker_n)) memory_profiler = MemoryProfiler(pid=-1, log_path=mem_log) memory_profiler.start() worker_log_dir = osp.join(log_dir, "worker_{}".format(worker_n)) easy_tf_log_dir = osp.join(worker_log_dir, 'easy_tf_log') os.makedirs(easy_tf_log_dir) easy_tf_log.set_dir(easy_tf_log_dir) server = tf.train.Server(cluster, job_name="worker", task_index=worker_n) sess = tf.Session(server.target) with tf.device("/job:worker/task:0"): create_network('global') with tf.device("/job:worker/task:%d" % worker_n): w = Worker(sess=sess, env_id=env_id, preprocess_wrapper=preprocess_wrapper, worker_n=worker_n, seed=seed, log_dir=worker_log_dir, max_n_noops=max_n_noops, debug=debug) init_op = tf.global_variables_initializer() if render: w.render = True # Worker 0 initialises the global network as well as the per-worker networks # Other workers only initialise their own per-worker networks sess.run(init_op) if worker_n == 0: saver = tf.train.Saver() checkpoint_dir = osp.join(log_dir, 'checkpoints') os.makedirs(checkpoint_dir) checkpoint_file = osp.join(checkpoint_dir, 'network.ckpt') if load_ckpt_file is not None: print("Restoring from checkpoint '%s'..." % load_ckpt_file, end='', flush=True) saver.restore(sess, load_ckpt_file) print("done!") updates = 0 steps = 0 ckpt_timer.reset() while steps < n_steps_to_run: start_time = time.time() steps_ran = w.run_update(steps_per_update) steps += steps_ran updates += 1 end_time = time.time() steps_per_second = steps_ran / (end_time - start_time) easy_tf_log.tflog('misc/steps_per_second', steps_per_second) easy_tf_log.tflog('misc/steps', steps) easy_tf_log.tflog('misc/updates', updates) if worker_n == 0 and ckpt_timer.done(): saver.save(sess, checkpoint_file) print("Checkpoint saved to '{}'".format(checkpoint_file)) ckpt_timer.reset() memory_profiler.stop()