def testMLPSmall(self): with tf.Session(config=tf.ConfigProto( )) as sess: MLPSmall(sess=sess, observation_dims=[80, 80], history_length=4, output_size=18, hidden_activation_fn=tf.sigmoid, network_output_type='normal', name='pred_network', trainable=True)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in [ 'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step' ]: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, [ 'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 'gpu_fraction', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale' ]) # start gpu_options = tensor.GPUOptions( per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction)) with tensor.Session(config=tensor.ConfigProto( gpu_options=gpu_options)) as sess: env = StageEnvironment(conf.max_random_start, conf.observation_dims, conf.data_format, conf.display, conf.use_cumulated_reward) if conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.action_size, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.action_size, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.action_size, hidden_activation_fn=tensor.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.action_size, hidden_activation_fn=tensor.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in [ 'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step' ]: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, [ 'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 'gpu_fraction', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale' ]) # start #gpu_options = tf.GPUOptions( # per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction)) sess_config = tf.ConfigProto( log_device_placement=False, allow_soft_placement=conf.allow_soft_placement) sess_config.gpu_options.allow_growth = conf.allow_soft_placement with tf.Session(config=sess_config) as sess: if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']): env = ToyEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display, conf.use_cumulated_reward) else: env = AtariEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display, conf.use_cumulated_reward) if conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall( sess=sess, data_format=conf.data_format, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall( sess=sess, data_format=conf.data_format, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end) merged_summary = tf.summary.merge_all() file_writer = tf.summary.FileWriter("tensorboardLogs", sess.graph)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in [ 'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step' ]: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, [ 'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale' ]) # start with tf.Session() as sess: if 'Corridor' in conf.env_name: env = ToyEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) else: env = AtariEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) if conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in [ 'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step' ]: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, [ 'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 'gpu_fraction', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale' ]) # start #gpu_options = tf.GPUOptions( # per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction)) # TODO: just manually set for now gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']): env = ToyEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) else: env = AtariEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) if conf.network_header_type == 'rnn_cnn': pred_network = RNNCNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, num_steps=conf.num_steps, num_layers=conf.num_layers, attention=conf.attention, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = RNNCNN( sess=sess, data_format=conf.data_format, history_length=conf.history_length, num_steps=conf.num_steps, num_layers=conf.num_layers, attention=conf.attention, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)