def setup_experiment( sess, agent_config, env_config, paths, seed=None ): """ Initialize an experiment sess (tf.Session) agent_config (dict) env_config (dict) paths (dict) seed (int) """ env = energypy.make_env(**env_config) save_args(env_config, path=paths['env_args']) if seed: logger.info('random seed is {}'.format(seed)) env.seed(seed) agent_config['env'] = env agent_config['sess'] = sess agent_config['act_path'] = paths['tb_act'] agent_config['learn_path'] = paths['tb_learn'] agent_memory = agent_config.pop('load_memory', None) if agent_memory: agent_config['load_memory_path'] = paths['memory'] agent = energypy.make_agent(**agent_config) save_args(agent_config, path=paths['agent_args']) if hasattr(agent, 'acting_writer'): agent.acting_writer.add_graph(sess.graph) # TODO copy the dataset into the run folder as well logger.info('setup experiment of {} steps'.format(total_steps)) return agent, env
def setup_agent(sess, double_q=False): """ Sets up an agent & fills memory args sess (tf.Session) returns agent (energypy DQN agent) env (energypy Battery environment) """ env = energypy.make_env('2048', observation_dims='2D') # use high learning rate to get weight changes agent = energypy.make_agent(agent_id='dqn', sess=sess, env=env, total_steps=10, discount=0.9, memory_type='deque', learning_rate=1.0, double_q=double_q, update_target_net=100, network='conv', filters=(8, 16), kernels=(2, 2), strides=(1, 1)) for step in range(48): obs = env.observation_space.sample() action = env.action_space.sample() reward = random.random() * 10 next_obs = env.observation_space.sample() done = random.choice([True, False]) agent.remember(obs, action, reward, next_obs, done) batch = agent.memory.get_batch(agent.batch_size) return agent, batch, env
def setup_run(cfg, run, sess): run_cfg = make_run_config(cfg, run) run_logger = make_new_logger('run_setup', run_cfg['run_dir']) runner = Runner(sess, run_cfg) dump_config(run_cfg, run_logger) env_config = run_cfg['env'] env = make_env(**env_config) if hasattr(env.observation_space, 'info') and hasattr( env.state_space, 'info'): run_logger.debug(json.dumps({'state_info': env.state_space.info})) run_logger.debug( json.dumps({'observation_info': env.observation_space.info})) agent_config = run_cfg['agent'] agent_config['env'] = env agent_config['sess'] = sess agent_config['tensorboard_dir'] = run_cfg['tensorboard_dir'] agent = make_agent(**agent_config) return run_cfg, agent, env, runner