Пример #1
0
def setup_experiment(
        sess,
        agent_config,
        env_config,
        paths,
        seed=None
):
    """
    Initialize an experiment

        sess (tf.Session)
        agent_config (dict)
        env_config (dict)
        paths (dict)
        seed (int)
    """

    env = energypy.make_env(**env_config)
    save_args(env_config, path=paths['env_args'])

    if seed:
        logger.info('random seed is {}'.format(seed))
        env.seed(seed)

    agent_config['env'] = env
    agent_config['sess'] = sess
    agent_config['act_path'] = paths['tb_act']
    agent_config['learn_path'] = paths['tb_learn']

    agent_memory = agent_config.pop('load_memory', None)

    if agent_memory:
        agent_config['load_memory_path'] = paths['memory']

    agent = energypy.make_agent(**agent_config)
    save_args(agent_config, path=paths['agent_args'])

    if hasattr(agent, 'acting_writer'):
        agent.acting_writer.add_graph(sess.graph)

    #  TODO copy the dataset into the run folder as well
    logger.info('setup experiment of {} steps'.format(total_steps))

    return agent, env
Пример #2
0
def setup_agent(sess, double_q=False):
    """
    Sets up an agent & fills memory

    args
        sess (tf.Session)

    returns
        agent (energypy DQN agent)
        env (energypy Battery environment)
    """

    env = energypy.make_env('2048', observation_dims='2D')

    #  use high learning rate to get weight changes
    agent = energypy.make_agent(agent_id='dqn',
                                sess=sess,
                                env=env,
                                total_steps=10,
                                discount=0.9,
                                memory_type='deque',
                                learning_rate=1.0,
                                double_q=double_q,
                                update_target_net=100,
                                network='conv',
                                filters=(8, 16),
                                kernels=(2, 2),
                                strides=(1, 1))

    for step in range(48):
        obs = env.observation_space.sample()
        action = env.action_space.sample()
        reward = random.random() * 10
        next_obs = env.observation_space.sample()
        done = random.choice([True, False])
        agent.remember(obs, action, reward, next_obs, done)

    batch = agent.memory.get_batch(agent.batch_size)

    return agent, batch, env
Пример #3
0
def setup_run(cfg, run, sess):
    run_cfg = make_run_config(cfg, run)

    run_logger = make_new_logger('run_setup', run_cfg['run_dir'])
    runner = Runner(sess, run_cfg)
    dump_config(run_cfg, run_logger)

    env_config = run_cfg['env']
    env = make_env(**env_config)

    if hasattr(env.observation_space, 'info') and hasattr(
            env.state_space, 'info'):
        run_logger.debug(json.dumps({'state_info': env.state_space.info}))
        run_logger.debug(
            json.dumps({'observation_info': env.observation_space.info}))

    agent_config = run_cfg['agent']
    agent_config['env'] = env
    agent_config['sess'] = sess
    agent_config['tensorboard_dir'] = run_cfg['tensorboard_dir']

    agent = make_agent(**agent_config)

    return run_cfg, agent, env, runner