def evaluate(environment_config, agent_config, options): """ Evaluate an agent interacting with an environment. :param environment_config: the path of the environment configuration file :param agent_config: the path of the agent configuration file :param options: the evaluation options """ gym.logger.set_level( gym.logger.DEBUG if options['--verbose'] else gym.logger.INFO) env = load_environment(environment_config) agent = load_agent(agent_config, env) run_directory = Path(agent_config).with_suffix( '').name if options['--name-from-config'] else None options['--seed'] = int( options['--seed']) if options['--seed'] is not None else None evaluation = Evaluation(env, agent, run_directory=run_directory, num_episodes=int(options['--episodes']), sim_seed=options['--seed'], recover=options['--recover'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display']) if options['--train']: evaluation.train() elif options['--test']: evaluation.test() else: evaluation.close() if options['--analyze'] and not options['<benchmark>']: RunAnalyzer([evaluation.monitor.directory]) return os.path.relpath(evaluation.monitor.directory)
def evaluate(environment_config, agent_config, options): """ Evaluate an agent interacting with an environment. :param environment_config: the path of the environment configuration file :param agent_config: the path of the agent configuration file :param options: the evaluation options """ logger.configure(LOGGING_CONFIG) if options['--verbose']: logger.configure(VERBOSE_CONFIG) env = load_environment(environment_config) agent = load_agent(agent_config, env) run_directory = Path(agent_config).with_suffix( '').name if options['--name-from-config'] else None options['--seed'] = int( options['--seed']) if options['--seed'] is not None else None evaluation = Evaluation(env, agent, run_directory=run_directory, num_episodes=int(options['--episodes']), sim_seed=options['--seed'], recover=options['--recover'] or options['--recover-from'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display']) if options['--train']: evaluation.train() elif options['--test']: evaluation.test() else: evaluation.close() return os.path.relpath(evaluation.monitor.directory)
def test_evaluation(tmpdir): env = gym.make('CartPole-v0') agent = RandomAgent(env) evaluation = Evaluation(env, agent, directory=tmpdir.strpath, num_episodes=3, display_env=False, display_agent=False, display_rewards=False) evaluation.train() artifacts = tmpdir.listdir() assert any(['manifest' in file.basename for file in artifacts]) assert any(['metadata' in file.basename for file in artifacts]) assert any(['stats' in file.basename for file in artifacts])
def test_evaluation(tmpdir): env = gym.make('CartPole-v0') agent = RandomAgent(env) evaluation = Evaluation(env, agent, directory=tmpdir.strpath, num_episodes=3, display_env=False, display_agent=False, display_rewards=False) evaluation.monitor._monitor = True # TODO: dirty fix until merge of https://github.com/openai/gym/pull/1362 evaluation.train() artifacts = tmpdir.listdir() assert any(['manifest' in file.basename for file in artifacts]) assert any(['metadata' in file.basename for file in artifacts]) assert any(['stats' in file.basename for file in artifacts])
def test_evaluation(tmpdir): env = gym.make('CartPole-v0') agent = RandomUniformAgent(env) evaluation = Evaluation(env, agent, directory=tmpdir, num_episodes=3, display_env=False, display_agent=False, display_rewards=False) evaluation.train() assert any([ 'manifest' in file.name for file in evaluation.run_directory.iterdir() ]) assert any([ 'metadata' in file.name for file in evaluation.run_directory.iterdir() ]) assert any( ['stats' in file.name for file in evaluation.run_directory.iterdir()])
def evaluate(environment_config, agent_config, options): """ Evaluate an agent interacting with an environment. :param environment_config: the path of the environment configuration file :param agent_config: the path of the agent configuration file :param options: the evaluation options """ gym.logger.set_level(gym.logger.INFO) env = load_environment(environment_config) agent = load_agent(agent_config, env) if options['--name-from-config']: directory = os.path.join( Evaluation.OUTPUT_FOLDER, os.path.basename(environment_config).split('.')[0], os.path.basename(agent_config).split('.')[0]) else: directory = None options['--seed'] = int( options['--seed']) if options['--seed'] is not None else None evaluation = Evaluation(env, agent, directory=directory, num_episodes=int(options['--episodes']), sim_seed=options['--seed'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display']) if options['--train']: evaluation.train() elif options['--test']: evaluation.test() else: evaluation.close() if options['--analyze'] and not options['<benchmark>']: RunAnalyzer([evaluation.monitor.directory]) return os.path.relpath(evaluation.monitor.directory)
from rl_agents.agents.common.factory import load_agent, load_environment # Get the environment and agent configurations from the rl-agents repository # %cd /content/rl-agents/scripts/ env_config = 'configs/IntersectionEnv/env.json' agent_config = 'configs/IntersectionEnv/agents/DQNAgent/ego_attention_2h.json' env = load_environment(env_config) agent = load_agent(agent_config, env) evaluation = Evaluation(env, agent, num_episodes=3000, display_env=False) print(f"Ready to train {agent} on {env}") """Run tensorboard locally to visualize training.""" # Commented out IPython magic to ensure Python compatibility. # %tensorboard --logdir "{evaluation.directory}" """Start training. This should take about an hour.""" evaluation.train() """Progress can be visualised in the tensorboard cell above, which should update every 30s (or manually). You may need to click the *Fit domain to data* buttons below each graph. ## Testing Run the learned policy for a few episodes. """ env = load_environment(env_config) env.configure({"offscreen_rendering": True}) agent = load_agent(agent_config, env) evaluation = Evaluation(env, agent, num_episodes=3, recover=True) evaluation.test() show_videos(evaluation.run_directory)
def evaluate(environment_config, agent_config, options): """ Evaluate an agent interacting with an environment. :param environment_config: the path of the environment configuration file :param agent_config: the path of the agent configuration file :param options: the evaluation options """ logger.configure(LOGGING_CONFIG) if options['--verbose']: logger.configure(VERBOSE_CONFIG) run_directory = None if options['--name-from-config']: run_directory = "{}_{}_{}".format( Path(agent_config).with_suffix('').name, datetime.datetime.now().strftime('%Y%m%d-%H%M%S'), os.getpid()) options['--seed'] = int( options['--seed']) if options['--seed'] is not None else None env = load_environment(environment_config) if agent_config == "None": agent_config = env.config["agent_config"] if "auto_tau" in agent_config["exploration"] and ( agent_config["exploration"]["auto_tau"]): agent_config["exploration"]["tau"] = env.config[ "policy_frequency"] * env.config["duration"] * int( options['--episodes'] * env.config["controlled_vehicles"]) / 50 agent = load_agent(agent_config, env) # TODO diferent display options for agent, env, rewards if options['--offscreen_rendering']: env.config['offscreen_rendering'] = True evaluation_train = Evaluation(env, agent, run_directory=run_directory, num_episodes=int(options['--episodes']), sim_seed=options['--seed'], recover=options['--recover'] or options['--recover-from'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display'], training=options['--train'], options=options) if options['--train']: evaluation_train.train() else: evaluation_train.close() if options['--test']: agent_test = load_agent(agent_config, env) if options['--train']: agent_test = evaluation_train.agent evaluation_test = Evaluation( env, agent_test, run_directory=run_directory, num_episodes=int(options['--episodes_test']), sim_seed=options['--seed'], recover=options['--recover'] or options['--recover-from'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display'], training=False, options=options) evaluation_test.test()