def main(): gym_id = 'CartPole-v0' logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) max_episodes = 10000 max_timesteps = 1000 env = OpenAIGym(gym_id, monitor=False, monitor_video=False) config = Configuration(repeat_actions=1, actions=env.actions, states=env.states, exploration='constant', exploration_args=[0.1], network=[{ "type": "linear", "size": 16 }]) agent = create_agent(SimpleQAgent, config) runner = Runner(agent, env) def episode_finished(r): if r.episode % 10 == 0: logger.info("Finished episode {ep} after {ts} timesteps".format( ep=r.episode + 1, ts=r.timestep + 1)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 10 rewards: {}".format( np.mean(r.episode_rewards[-10:]))) return True logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=env)) runner.run(max_episodes, max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.episode + 1))
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") parser.add_argument('-a', '--agent', default='DQNAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000 * 60, help="Maximum number of timesteps per episode") # parser.add_argument('-m', '--monitor', help="Save results to this directory") # parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") # parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() env = OpenAIUniverse(args.gym_id) env.configure(remotes=1) default = dict(repeat_actions=1, actions=env.actions, states=env.states, max_episode_length=args.max_timesteps) if args.agent_config: config = Configuration.from_json(args.agent_config) else: config = Configuration() config.default(default) if args.network_config: network_config = Configuration.from_json( args.network_config).network_layers else: if config.network_layers: network_config = config.network_layers else: raise TensorForceError("Error: No network configuration provided.") if args.debug: print("Configuration:") print(config) logger = logging.getLogger(__name__) logger.setLevel(log_levels[config['loglevel']]) # preprocessing_config = config['preprocessing'] # if preprocessing_config: # stack = build_preprocessing_stack(preprocessing_config) # config.states['shape'] = stack.shape(config.states['shape']) # else: stack = None agent = create_agent(args.agent, config, network_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(config) runner = Runner(agent, env, preprocessor=stack, repeat_actions=config.repeat_actions) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) runner.save_model(args.save, args.save_episodes) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format( ep=r.episode, ts=r.timestep)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( np.mean(r.episode_rewards[-500:]))) logger.info("Average of last 100 rewards: {}".format( np.mean(r.episode_rewards[-100:]))) return True logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=env)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.episode + 1)) if args.monitor: env.gym.monitor.close() env.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") parser.add_argument('-a', '--agent', default='DQNAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000*60, help="Maximum number of timesteps per episode") # parser.add_argument('-m', '--monitor', help="Save results to this directory") # parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") # parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() env = OpenAIUniverse(args.gym_id) env.configure(remotes=1) default = dict( repeat_actions=1, actions=env.actions, states=env.states, max_episode_length=args.max_timesteps ) if args.agent_config: config = Configuration.from_json(args.agent_config) else: config = Configuration() config.default(default) if args.network_config: network_config = Configuration.from_json(args.network_config).network_layers else: if config.network_layers: network_config = config.network_layers else: raise TensorForceError("Error: No network configuration provided.") if args.debug: print("Configuration:") print(config) logger = logging.getLogger(__name__) logger.setLevel(log_levels[config.loglevel]) stack = None agent = create_agent(args.agent, config, network_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(config) runner = Runner(agent, env, preprocessor=stack, repeat_actions=config.repeat_actions) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError("Cannot save agent to dir {} ()".format(save_dir)) runner.save_model(args.save, args.save_episodes) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format(np.mean(r.episode_rewards[-500:]))) logger.info("Average of last 100 rewards: {}".format(np.mean(r.episode_rewards[-100:]))) return True logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=env)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode)) if args.monitor: env.gym.monitor.close() env.close()