def __init__(self, agent_spec, agent_additional_parameters, agents_count): self.agents = [] first_agent = Agent.from_spec(spec=agent_spec, kwargs=agent_additional_parameters) self.agents.append(first_agent) self.model = first_agent.model self.stop = False for _ in range(agents_count - 1): agent = Agent.from_spec(spec=agent_spec, kwargs=agent_additional_parameters) agent.model.close() agent.model = self.model self.agents.append(agent)
def restore_agent(self, path: str, model_path: str = None): """Deserialize the strategy's learning agent from a file. Arguments: path: The `str` path of the file the agent specification is stored in. The `.json` file extension will be automatically appended if not provided. model_path (optional): The `str` path of the file or directory the agent checkpoint is stored in. If not provided, the `model_path` will default to `{path_without_dot_json}/agents`. """ path_with_ext = path if path.endswith('.json') else f'{path}.json' with open(path_with_ext) as json_file: spec = json.load(json_file) self._agent_spec = spec.agent self._network_spec = spec.network self._agent = Agent.from_spec(spec=self._agent_spec, kwargs=dict( network=self._network_spec, states=self._environment.states, actions=self._environment.actions)) path_without_ext = path_with_ext.replace('.json', '') model_path = model_path or f'{path_without_ext}/agent' self._agent.restore_model(file=model_path) self._runner = Runner(agent=self._agent, environment=self._environment)
def __init__(self, environment: TradingEnvironment, agent_spec: Dict = None, network_spec: Dict = None, **kwargs): """ Arguments: environment: A `TradingEnvironment` instance for the agent to trade within. agent_spec: A specification dictionary for the `Tensorforce` agent. network_sepc: A specification dictionary for the `Tensorforce` agent's model network. kwargs (optional): Optional keyword arguments to adjust the strategy. """ self._environment = environment self._max_episode_timesteps = kwargs.get('max_episode_timesteps', None) if agent_spec and network_spec: self._agent_spec = agent_spec self._network_spec = network_spec self._agent = Agent.from_spec(spec=agent_spec, kwargs=dict( network=network_spec, states=environment.states, actions=environment.actions)) self._runner = Runner(agent=self._agent, environment=environment)
def __init__(self, env=None, device=None): self.env = env if self.env.saver.model_file_name == "": try: self.env.saver.model_file_name = self.env.model_name + "_" + self.env.dataDirectory.replace("/", "") except: self.env.saver.model_file_name = self.env.model_name + "_" + self.env.dataDirectory.replace("/", "") if not os.path.exists(self.env.saver.model_directory+ "/model"): os.mkdir(self.env.saver.model_directory+ "/model") self.env.saver.model_file_path = self.env.saver.model_directory + "/model/" + self.env.saver.model_file_name self.agent = Agents.from_spec( self.env.settings['agent'], kwargs=dict( states=self.env.states, actions=dict(type='int', num_actions=self.env.actions), network=self.env.settings['network'], device=device ) ) try: self.agent.restore_model(self.env.saver.model_directory+"/model") except: pass
def __init__(self, agent, environments): if not util.is_iterable(x=environments): raise TensorforceError.type(name='parallel-runner', argument='environments', value=environments) elif len(environments) == 0: raise TensorforceError.value(name='parallel-runner', argument='environments', value=environments) if not isinstance(agent, Agent): agent = Agent.from_spec(spec=agent, states=environments[0].states(), actions=environments[0].actions(), parallel_interactions=len(environments)) if len(environments) > agent.parallel_interactions: raise TensorforceError(message="Too many environments.") self.agent = agent self.environments = tuple(environments) self.agent.initialize() self.global_episode = self.agent.episode self.global_timestep = self.agent.timestep self.episode_rewards = list() self.episode_timesteps = list() self.episode_times = list()
def build(agent_spec, actor, env): agent = Agent.from_spec(spec=agent_spec, kwargs=dict(states=env.states, actions=env.actions, network=actor)) runner = Runner(agent=agent, environment=env, repeat_actions=1) return runner, agent
def __init__(self, agent_type, network, action_type, preprocessor_type, reward, tag): rf = reward_functions.__dict__[reward] super().__init__("-".join( [agent_type, network, action_type, reward, tag])) agent_spec = create_spec(action_type, agent_type, network) self._tf_agent = Agent.from_spec(agent_spec, {}) self.action_translator = get_action_translator(action_type) self.preprocessor = get_observation_preprocessor(preprocessor_type)
def generate_tensorforce_agent(self): with open('tensorforce_configs/mlp2_128_network.json', 'r') as fp: network_spec = json.load(fp=fp) with open('tensorforce_configs/ppo.json', 'r') as fp: agent_config = json.load(fp=fp) tensorforce_agent = Agent.from_spec( spec=agent_config, kwargs=dict( states=self.environment.states, actions=self.environment.actions, network=network_spec, )) return tensorforce_agent
def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--config', help="Configuration file") args = parser.parse_args() print(args) sys.stdout.flush() if args.config is not None: with open(args.config, 'r') as fp: config = json.load(fp=fp) else: raise TensorforceError("No configuration provided.") if 'agent' not in config: raise TensorforceError("No agent configuration provided.") else: agent_config = config['agent'] if 'network_spec' not in config: network_spec = None print("No network configuration provided.") else: network_spec = config['network_spec'] if 'env' not in config: raise TensorforceError("No environment configuration provided.") else: env_config = config['env'] environment = RecTableEnv(config) environment.set_up() agent_config['env'] = environment agent = Agent.from_spec(spec=agent_config, kwargs=dict( states_spec=environment.states, actions_spec=environment.actions, network_spec=network_spec, batch_data=environment.get_input_tensor())) environment.set_session(agent.model.get_session()) print("********** Configuration ************") for key, value in agent_config.items(): print(str(key) + ": {}".format(value)) agent.run_worker() agent.close()
def run_experiment(self, environment, experiment_num=0): config = copy(self.config) max_episodes = config.pop('max_episodes') max_episode_timesteps = config.pop('max_episode_timesteps') network_spec = config.pop('network') agent = Agent.from_spec( spec=config, kwargs=dict( states_spec=environment.states, actions_spec=environment.actions, network_spec=network_spec ) ) if experiment_num == 0 and self.history_data: logging.info("Attaching history data to runner") history_data = self.history_data else: history_data = None if experiment_num == 0 and self.load_model_file: logging.info("Loading model data from file: {}".format(self.load_model)) agent.load_model(self.load_model_file) runner = Runner( agent=agent, environment=environment, repeat_actions=1, history=history_data # save_path=args.model, # save_episodes=args.save_model ) environment.reset() agent.reset() runner.run(episodes=max_episodes, max_episode_timesteps=max_episode_timesteps, episode_finished=self.episode_finished) return dict( initial_reset_time=0, episode_rewards=runner.episode_rewards, episode_timesteps=runner.episode_timesteps, episode_end_times=runner.episode_times )
def __init__(self, agent, environment, evaluation_environment=None): if not isinstance(agent, Agent): agent = Agent.from_spec(spec=agent, states=environment.states(), actions=environment.actions()) self.agent = agent self.environment = environment self.evaluation_environment = evaluation_environment self.agent.initialize() self.global_episode = self.agent.episode self.global_timestep = self.agent.timestep self.episode_rewards = list() self.episode_timesteps = list() self.episode_times = list()
def main(): env = EnvArDrone() restore_model_path = "./models" #Optional GPU usage configuration gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.25) #Network configuration network_spec = [ dict(type='dense', size=64, activation='tanh'), dict(type='dense', size=64, activation='tanh') ] agent_file = "../configs/ppo.json" #Agent configuration file with open(agent_file, 'r') as fp: agent_config = json.load(fp=fp) #agent_config['execution']['session_config'] = tf.ConfigProto(gpu_options=gpu_options) agent = Agent.from_spec(spec=agent_config, kwargs=dict( states=env.states, actions=env.actions, network=network_spec, )) if os.path.exists(restore_model_path): print("Restoring saved model....") agent.restore_model(directory=restore_model_path) print("Running model trained on {agent} for Environment '{env}'".format( agent=agent, env=env)) #Running policy state = env.reset() try: while (True): actions = agent.act(state, deterministic=True) state, _, _ = env.execute(actions) except KeyboardInterrupt: print("Run finished")
def __init__(self, env, save_dir, deterministic=True): self.save_dir = save_dir self.env = env self.deterministic = deterministic # training spec with open(os.path.join(save_dir, 'training_spec.json'), 'r') as f: training_spec = json.load(f) self.repeat_actions = training_spec["repeat_actions"] self.momentum = training_spec["momentum"] self.max_episode_timesteps = training_spec["max_episode_timesteps"] self.num_agent_clones = env.gym.dog_count # network spec with open(os.path.join(save_dir, 'network_spec.json'), 'r') as f: network_spec = json.load(f) if os.path.exists(os.path.join(save_dir, 'preprocessing_spec.json')): with open(os.path.join(save_dir, 'preprocessing_spec.json'), 'r') as f: preprocessing_spec = json.load(f) else: preprocessing_spec = None # agent spec with open(os.path.join(save_dir, 'agent_spec.json'), 'r') as f: agent_spec = json.load(f) self.agent = Agent.from_spec( spec=agent_spec, kwargs=dict(states=env.states, actions=env.actions, network=network_spec, states_preprocessing=preprocessing_spec)) self.load_model()
def __init__(self, env=None, device=None): ''' Wrapper agent class ''' self.env = env if not self.env.settings['agent']['type'] == "DEEP": # Hide TF loading logs os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' from tensorforce.agents import Agent as Agents # Check if there is any existing model for this config # Make directory if config doesn't exist if self.env.saver.model_file_name == "": try: self.env.saver.model_file_name = self.env.model_name except: self.env.saver.model_file_name = self.env.model_name if not os.path.exists(self.env.saver.model_directory + "/model"): os.mkdir(self.env.saver.model_directory + "/model") self.env.saver.model_file_path = self.env.saver.model_directory + "/model/" # Load agent from current config self.agent = Agents.from_spec( self.env.settings['agent'], kwargs=dict(states=self.env.states, actions=dict(type='int', num_actions=self.env.actions), network=self.env.settings['network'], device=device)) # Load agent if it already exists try: self.agent.restore_model(self.env.saver.model_file_path) print("agent loaded") except: print("agent not loaded") pass else: from .DEEP import DEEP # Load deep learning model (keras model), only work with eval mode self.agent = DEEP("deep_model.h5")
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-P', '--port', default=6025, help= "Port on which the UE4 Game listens on for incoming RL-client connections" ) parser.add_argument('-H', '--host', default=None, help="Hostname of the UE4 Game (default: localhost)") parser.add_argument('-a', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-spec', default=None, help="Network specification file") parser.add_argument('-e', '--episodes', type=int, default=None, help="Number of episodes") parser.add_argument('-t', '--timesteps', type=int, default=None, help="Number of timesteps") parser.add_argument('-m', '--max-episode-timesteps', type=int, default=None, help="Maximum number of timesteps per episode") parser.add_argument('-d', '--deterministic', action='store_true', default=False, help="Choose actions deterministically") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") parser.add_argument('-R', '--random-test-run', action="store_true", help="Do a quick random test run on the env") args = parser.parse_args() #logging.basicConfig(filename="logfile.txt", level=logging.INFO) logging.basicConfig(stream=sys.stderr) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # We have to connect this remote env to get the specs. # We also discretize axis-mappings b/c we will use a deep q-network. # Use num_ticks==6 to match Nature paper by Mnih et al. # ("human cannot press fire button with more than 10Hz", dt=1/60) # TODO: Need to build in capturing and concat'ing last 4 images (plus 8-bit conversion!) into 1 input state signal. # TODO: Use pre-processor for that. environment = UE4Environment(host=args.host, port=args.port, connect=True, discretize_actions=True, num_ticks=6) environment.seed(200) # Do a quick random test-run with image capture of the first n images -> then exit after 1000 steps. if args.random_test_run: # Reset the env. s = environment.reset() img = Image.fromarray(s, "RGB") # Save first received image as a sanity-check. img.save("reset.png") for i in range(1000): s, is_terminal, r = environment.execute(actions=random.choice( range(environment.actions["num_actions"]))) if i < 10: img = Image.fromarray(s, "RGB") img.save("{:03d}.png".format(i)) logging.debug("i={} r={} term={}".format(i, r, is_terminal)) if is_terminal: environment.reset() quit() if args.agent_config is not None: with open(args.agent_config, 'r') as fp: agent_config = json.load(fp=fp) else: raise TensorForceError("No agent configuration provided.") if args.network_spec is not None: with open(args.network_spec, 'r') as fp: network_spec = json.load(fp=fp) else: network_spec = None logger.info("No network configuration provided.") agent = Agent.from_spec(spec=agent_config, kwargs=dict(states_spec=environment.states, actions_spec=environment.actions, network_spec=network_spec)) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.restore_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) runner = Runner(agent=agent, environment=environment, repeat_actions=1) if args.debug: # TODO: Timestep-based reporting report_episodes = 1 else: report_episodes = 100 logger.info("Starting {agent} for Environment '{env}'".format( agent=agent, env=environment)) def episode_finished(r): if r.episode % report_episodes == 0: steps_per_second = r.timestep / (time.time() - r.start_time) logger.info( "Finished episode {} after {} timesteps. Steps Per Second {}". format(r.agent.episode, r.episode_timestep, steps_per_second)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( sum(r.episode_rewards[-500:]) / min(500, len(r.episode_rewards)))) logger.info("Average of last 100 rewards: {}".format( sum(r.episode_rewards[-100:]) / min(100, len(r.episode_rewards)))) return True runner.run(timesteps=args.timesteps, episodes=args.episodes, max_episode_timesteps=args.max_episode_timesteps, deterministic=args.deterministic, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.agent.episode))
from tensorforce.agents import Agent from tensorforce.execution import Runner from tensorforce.contrib.openai_gym import OpenAIGym # Create an OpenAIgym environment env = OpenAIGym('Pendulum-v0', visualize=False) network_path = './pendulum_ppo_network.json' agent_path = './pendulum_ppo.json' with open(network_path, 'r') as fp: network_spec = json.load(fp=fp) with open(agent_path, 'r') as fp: agent_config = json.load(fp=fp) agent = Agent.from_spec(spec=agent_config, kwargs=dict(states=env.states, actions=env.actions, network=network_spec)) # Create the runner runner = Runner(agent=agent, environment=env) # Callback function printing episode statistics def episode_finished(r): print( "Finished episode {ep} after {ts} timesteps (reward: {reward})".format( ep=r.episode, ts=r.episode_timestep, reward=r.episode_rewards[-1])) return True # Start learning
def main(): theMarket = Market( data_path="data/%s_Candlestick_4_Hour_BID_01.08.2018-30.11.2018.csv" % CURRENCY_PAIR) #, indicators={'ADX': 12}) MyRecord = Record() MyOrderManager = OrderManager(market=theMarket, record=MyRecord) MyTrader = SureFireTrader(orderManager=MyOrderManager) SLTP_pips = [20, 25, 30] start_order_type = ['BUY', 'SELL'] max_level_limit = [2, 3, 4] window_size = 12 # Create a RL agent with open("config/%s.json" % AGENT_METHOD, 'r') as fp: agent_config = json.load(fp=fp) with open("config/conv2d.json", 'r') as fp: network_config = json.load(fp=fp) agent = Agent.from_spec( spec=agent_config, kwargs=dict( states=dict(type='float', shape=(window_size, window_size, 4)), #[Open, High, Low, Close] actions=dict( SLTP_pips=dict(type='int', num_actions=len(SLTP_pips)), #[20,25,30] start_order_type=dict( type='int', num_actions=len(start_order_type)), #['BUY','SELL'] max_level_limit=dict( type='int', num_actions=len(max_level_limit)) #[2,3,4,5] ), network=network_config)) if not os.path.exists("save_model/%s/trades" % AGENT_METHOD): os.makedirs("save_model/%s/trades" % AGENT_METHOD) reward_history = [] for episode in trange(100 + 1, ascii=True): profit_history = [] this_reward_history = [] idle_count = 0 round_count = 0 episode_end = False max_idle_limit = 12 #future action MyRecord.reset() MyOrderManager.reset() theMarket.reset(start_index=window_size) pbar = tqdm() while (theMarket.next()): #main loop, essential pbar.update(1) # simple-GUI ################### ROUTINES ################### MyOrderManager.orders_check() #routine, after market.next trade_status, other_detail = MyTrader.status_check( ) #routine, after orders_check ################################################ ################### GET STATE ################## ohlc = theMarket.get_ohlc(size=window_size) indicators = theMarket.get_indicators(size=window_size) O, H, L, C = gaf_encode(ohlc['Open']), gaf_encode(ohlc['High']), \ gaf_encode(ohlc['Low']),gaf_encode(ohlc['Close']) #ADX = gaf_encode(indicators['ADX']) state = np.stack((O, H, L, C), axis=-1) ################################################ ################## TAKE ACTION ################# if trade_status == 'TRADE_OVER': ############ GET REWARD & TRAIN ################ if theMarket.get_current_index() > window_size: ''' profit = sum(round(order['profit'],5) for order in other_detail if order['profit']>0) loss = sum(round(order['profit'],5) for order in other_detail if order['profit']<0) this_profit_factor = MyRecord.get_profit_factor() this_trade_length = len(MyRecord.get_history()) reward = this_profit_factor*np.sqrt(this_trade_length)#SQN ''' raw_reward = (MyRecord.get_net_profit() - profit_history[-1]) / theMarket.get_pip() penalty = 1.0 - 0.1 * len(other_detail) if raw_reward > 0: reward = raw_reward * penalty else: if len(other_detail) == 0: reward = 0 else: reward = -np.abs(other_detail[0]['TP'] - other_detail[0]['price'] ) / theMarket.get_pip() if theMarket.get_current_index( ) >= theMarket.get_data_length( ) - max_idle_limit * max_level_limit[-1]: episode_end = True agent.observe( reward=reward, terminal=episode_end ) # Add experience, agent automatically updates model according to batch size this_reward_history.append(reward) if episode_end == True: if episode % 100 == 0: this_dir = 'save_model/%s/%04d' % (AGENT_METHOD, episode) if not os.path.exists(this_dir): os.makedirs(this_dir) agent.save_model(this_dir + '/model') pbar.close() reward_history.append(this_reward_history) with open( 'save_model/%s/trades/episode_%04d.pkl' % (AGENT_METHOD, episode), 'wb') as f: pickle.dump(MyRecord.get_history(), f, protocol=-1) break action = agent.act(state) # Get prediction from agent, execute SL_pip = SLTP_pips[action['SLTP_pips']] * 2 TP_pip = SLTP_pips[action['SLTP_pips']] MyTrader.set_max_level( max_level_limit[action['max_level_limit']]) first_order_type = start_order_type[action['start_order_type']] ################################################ MyTrader.new_trade(SL_pip=SL_pip, TP_pip=TP_pip, start_order_type=first_order_type) round_count += 1 idle_count = 0 logging.info( "NewTradeStarted: current net profit=%f (price@%f)" % (MyRecord.get_net_profit(), theMarket.get_market_price())) elif trade_status == 'ADD_ORDER': last_order = MyTrader.get_orders_detail()[-1] if last_order['order_type'] == 'BUY': price = last_order['price'] - theMarket.get_pip(TP_pip) elif last_order['order_type'] == 'SELL': price = last_order['price'] + theMarket.get_pip(TP_pip) MyTrader.add_reverse_order(price=price, SL_pip=SL_pip, TP_pip=TP_pip) idle_count = 0 elif trade_status == 'ERROR': logging.warning("SureFireError: order issues...") elif trade_status == 'NONE': idle_count += 1 if idle_count >= max_idle_limit: ############ GET REWARD & TRAIN ################ ''' profit = sum(round(order['profit'],5) for order in other_detail if order['profit']>0) loss = sum(round(order['profit'],5) for order in other_detail if order['profit']<0) this_profit_factor = MyRecord.get_profit_factor() this_trade_length = len(MyRecord.get_history()) reward = this_profit_factor*np.sqrt(this_trade_length)#SQN ''' raw_reward = (MyRecord.get_net_profit() - profit_history[-1]) / theMarket.get_pip() penalty = 1.0 - 0.1 * len(other_detail) if raw_reward > 0: reward = raw_reward * penalty else: if len(other_detail) == 0: reward = 0 else: reward = -np.abs(other_detail[0]['TP'] - other_detail[0]['price'] ) / theMarket.get_pip() if theMarket.get_current_index( ) >= theMarket.get_data_length( ) - max_idle_limit * max_level_limit[-1]: episode_end = True agent.observe( reward=reward, terminal=episode_end ) # Add experience, agent automatically updates model according to batch size this_reward_history.append(reward) if episode_end == True: if episode % 100 == 0: this_dir = 'save_model/%s/%04d' % (AGENT_METHOD, episode) if not os.path.exists(this_dir): os.makedirs(this_dir) agent.save_model(this_dir + '/model') pbar.close() reward_history.append(this_reward_history) with open( 'save_model/%s/trades/episode_%04d.pkl' % (AGENT_METHOD, episode), 'wb') as f: pickle.dump(MyRecord.get_history(), f, protocol=-1) break action = agent.act( state) # Get prediction from agent, execute SL_pip = SLTP_pips[action['SLTP_pips']] * 2 TP_pip = SLTP_pips[action['SLTP_pips']] MyTrader.set_max_level( max_level_limit[action['max_level_limit']]) first_order_type = start_order_type[ action['start_order_type']] ################################################ MyTrader.new_trade(SL_pip=SL_pip, TP_pip=TP_pip, start_order_type=first_order_type) idle_count = 0 logging.info( "NewTradeStarted: current net profit=%f (price@%f)" % (MyRecord.get_net_profit(), theMarket.get_market_price())) ################################################ profit_history.append(MyRecord.get_net_profit()) #for plotting #MyRecord.show_details() #print("Rounds of Tradings: %d\n"%round_count) #with open('save_model/%s/trades/profit_history.pkl'%AGENT_METHOD, 'wb') as f: #pickle.dump(profit_history,f,protocol=-1) with open('save_model/%s/trades/reward_history.pkl' % AGENT_METHOD, 'wb') as f: pickle.dump(reward_history, f, protocol=-1)
def main(): logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) console_handler = logging.StreamHandler() console_handler.setFormatter( logging.Formatter("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s")) logger.addHandler(console_handler) parser = argparse.ArgumentParser() parser.add_argument('-a', '--agent-config', help="Agent configuration file") parser.add_argument('-v', '--vizdoom-config', help="VizDoom configuration file") parser.add_argument('-n', '--network-spec', default=None, help="Network specification file") args = parser.parse_args() if args.agent_config is not None: with open(args.agent_config, 'r') as fp: agent_config = json.load(fp=fp) else: raise TensorForceError("No agent configuration provided.") if args.network_spec is not None: with open(args.network_spec, 'r') as fp: network_spec = json.load(fp=fp) else: network_spec = None logger.info("No network configuration provided.") if network_spec[0]['type'] == 'conv2d': agent_config['states_preprocessing'] = [{'type': 'expand_dims', 'axis': -1}] else: agent_config['states_preprocessing'] = [{'type': 'flatten'}] logger.info("Start training") environment = ViZDoom(args.vizdoom_config) agent = Agent.from_spec( spec=agent_config, kwargs=dict( states=environment.states, actions=environment.actions, network=network_spec, ) ) runner = Runner( agent=agent, environment=environment, repeat_actions=1 ) def episode_finished(r): if r.episode % 100 == 0: sps = r.timestep / (time.time() - r.start_time) logger.info("Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}".format(ep=r.episode, ts=r.timestep, sps=sps)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Episode timesteps: {}".format(r.episode_timestep)) logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100)) return True runner.run( timesteps=6000000, episodes=1000, max_episode_timesteps=10000, deterministic=False, episode_finished=episode_finished ) terminal = False state = environment.reset() while not terminal: action = agent.act(state) state, terminal, reward = environment.execute(action) runner.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="Id of the Gym environment") parser.add_argument('-a', '--agent', help="Agent configuration file") parser.add_argument('-n', '--network', default=None, help="Network specification file") parser.add_argument('-e', '--episodes', type=int, default=None, help="Number of episodes") parser.add_argument('-t', '--timesteps', type=int, default=None, help="Number of timesteps") parser.add_argument('-m', '--max-episode-timesteps', type=int, default=None, help="Maximum number of timesteps per episode") parser.add_argument('-d', '--deterministic', action='store_true', help="Choose actions deterministically") parser.add_argument('-M', '--mode', choices=('tmux', 'child'), default='tmux', help="Starter mode") parser.add_argument('-W', '--num-workers', type=int, default=1, help="Number of worker agents") parser.add_argument('-C', '--child', action='store_true', help="Child process") parser.add_argument('-P', '--parameter-server', action='store_true', help="Parameter server") parser.add_argument('-I', '--task-index', type=int, default=0, help="Task index") parser.add_argument('-K', '--kill', action='store_true', help="Kill runners") parser.add_argument('-L', '--logdir', default='logs_async', help="Log directory") parser.add_argument('-D', '--debug', action='store_true', help="Show debug outputs") args = parser.parse_args() session_name = 'OpenAI-' + args.gym_id shell = '/bin/bash' kill_cmds = [ "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format( 12222 + args.num_workers), "tmux kill-session -t {}".format(session_name), ] if args.kill: os.system("\n".join(kill_cmds)) return 0 if not args.child: # start up child processes target_script = os.path.abspath(inspect.stack()[0][1]) def wrap_cmd(session, name, cmd): if isinstance(cmd, list): cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd) if args.mode == 'tmux': return 'tmux send-keys -t {}:{} {} Enter'.format( session, name, shlex_quote(cmd)) elif args.mode == 'child': return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format( cmd, args.logdir, session, name, args.logdir) def build_cmd(ps, index): cmd_args = [ 'CUDA_VISIBLE_DEVICES=', sys.executable, target_script, args.gym_id, '--agent', os.path.join(os.getcwd(), args.agent), '--network', os.path.join(os.getcwd(), args.network), '--num-workers', args.num_workers, '--child', '--task-index', index ] if args.episodes is not None: cmd_args.append('--episodes') cmd_args.append(args.episodes) if args.timesteps is not None: cmd_args.append('--timesteps') cmd_args.append(args.timesteps) if args.max_episode_timesteps is not None: cmd_args.append('--max-episode-timesteps') cmd_args.append(args.max_episode_timesteps) if args.deterministic: cmd_args.append('--deterministic') if ps: cmd_args.append('--parameter-server') if args.debug: cmd_args.append('--debug') return cmd_args if args.mode == 'tmux': cmds = kill_cmds + [ 'tmux new-session -d -s {} -n ps'.format(session_name) ] elif args.mode == 'child': cmds = [ 'mkdir -p {}'.format(args.logdir), 'rm -f {}/kill.sh'.format(args.logdir), 'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir), 'chmod +x {}/kill.sh'.format(args.logdir) ] cmds.append(wrap_cmd(session_name, 'ps', build_cmd(ps=True, index=0))) for i in xrange(args.num_workers): name = 'worker{}'.format(i) if args.mode == 'tmux': cmds.append('tmux new-window -t {} -n {} -d {}'.format( session_name, name, shell)) cmds.append( wrap_cmd(session_name, name, build_cmd(ps=False, index=i))) # add one PS call # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell)) print("\n".join(cmds)) os.system("\n".join(cmds)) return 0 ps_hosts = ['127.0.0.1:{}'.format(12222)] worker_hosts = [] port = 12223 for _ in range(args.num_workers): worker_hosts.append('127.0.0.1:{}'.format(port)) port += 1 cluster = {'ps': ps_hosts, 'worker': worker_hosts} cluster_spec = tf.train.ClusterSpec(cluster) environment = OpenAIGym(args.gym_id) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # log_levels[agent.log_level]) if args.agent is not None: with open(args.agent, 'r') as fp: agent = json.load(fp=fp) else: raise TensorForceError("No agent configuration provided.") if args.network is not None: with open(args.network, 'r') as fp: network = json.load(fp=fp) else: network = None logger.info("No network configuration provided.") if args.parameter_server: agent['device'] = '/job:ps/task:{}'.format(args.task_index) # '/cpu:0' else: agent['device'] = '/job:worker/task:{}'.format( args.task_index) # '/cpu:0' agent['execution'] = dict( type='distributed', distributed_spec=dict(cluster_spec=cluster_spec, task_index=args.task_index, job='ps' if args.parameter_server else 'worker', protocol='grpc')) agent = Agent.from_spec(spec=agent, kwargs=dict(states=environment.states, actions=environment.actions, network=network)) logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format( gym_id=args.gym_id)) logger.info("Config:") logger.info(agent) runner = Runner(agent=agent, environment=environment, repeat_actions=1) if args.debug: # TODO: Timestep-based reporting report_episodes = 1 else: report_episodes = 100 def episode_finished(r): if r.episode % report_episodes == 0: steps_per_second = r.timestep / (time.time() - r.start_time) logger.info( "Finished episode {} after overall {} timesteps. Steps Per Second {}" .format(r.agent.episode, r.agent.timestep, steps_per_second)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( sum(r.episode_rewards[-500:]) / min(500, len(r.episode_rewards)))) logger.info("Average of last 100 rewards: {}".format( sum(r.episode_rewards[-100:]) / min(100, len(r.episode_rewards)))) return True runner.run(timesteps=args.timesteps, episodes=args.episodes, max_episode_timesteps=args.max_episode_timesteps, deterministic=args.deterministic, episode_finished=episode_finished) runner.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--mode', help="ID of the game mode") parser.add_argument('--hide', dest='hide', action='store_const', const=True, default=False, help="Hide output window") parser.add_argument('-a', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-spec', default=None, help="Network specification file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # configurable!!! environment = MazeExplorer(mode_id=args.mode, visible=not args.hide) if args.agent_config is not None: with open(args.agent_config, 'r') as fp: agent_config = json.load(fp=fp) else: raise TensorForceError("No agent configuration provided.") if args.network_spec is not None: with open(args.network_spec, 'r') as fp: network_spec = json.load(fp=fp) else: network_spec = None logger.info("No network configuration provided.") agent = Agent.from_spec(spec=agent_config, kwargs=dict(states_spec=environment.states, actions_spec=environment.actions, network_spec=network_spec)) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.restore_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) runner = Runner(agent=agent, environment=environment, repeat_actions=1) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: sps = r.total_timesteps / (time.time() - r.start_time) logger.info( "Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}" .format(ep=r.episode, ts=r.timestep, sps=sps)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format( sum(r.episode_rewards[-100:]) / 100)) return True logger.info("Starting {agent} for Environment '{env}'".format( agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) runner.close() logger.info( "Learning finished. Total episodes: {ep}".format(ep=runner.episode)) if args.monitor: environment.gym.monitor.close() environment.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('rom', help="File path of the rom") parser.add_argument('-a', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-spec', default=None, help="Network specification file") parser.add_argument('-fs', '--frame-skip', help="Number of frames to repeat action", type=int, default=1) parser.add_argument('-rap', '--repeat-action-probability', help="Repeat action probability", type=float, default=0.0) parser.add_argument('-lolt', '--loss-of-life-termination', help="Loss of life counts as terminal state", action='store_true') parser.add_argument('-lolr', '--loss-of-life-reward', help="Loss of life reward/penalty. EX: -1 to penalize", type=float, default=0.0) parser.add_argument('-ds', '--display-screen', action='store_true', default=False, help="Display emulator screen") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # configurable!!! logger.addHandler(logging.StreamHandler(sys.stdout)) environment = ALE(args.rom, frame_skip=args.frame_skip, repeat_action_probability=args.repeat_action_probability, loss_of_life_termination=args.loss_of_life_termination, loss_of_life_reward=args.loss_of_life_reward, display_screen=args.display_screen) if args.agent_config is not None: with open(args.agent_config, 'r') as fp: agent_config = json.load(fp=fp) else: raise TensorForceError("No agent configuration provided.") if args.network_spec is not None: with open(args.network_spec, 'r') as fp: network_spec = json.load(fp=fp) else: network_spec = None logger.info("No network configuration provided.") agent = Agent.from_spec( spec=agent_config, kwargs=dict( states=environment.states, actions=environment.actions, network=network_spec ) ) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError("Cannot save agent to dir {} ()".format(save_dir)) runner = Runner( agent=agent, environment=environment, repeat_actions=1 ) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: sps = r.timestep / (time.time() - r.start_time) logger.info("Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}".format(ep=r.episode, ts=r.timestep, sps=sps)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100)) return True logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) runner.close() logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode)) environment.close()
monitor_video=False, visualize=True) print os.getcwd() with open( '/root/catkin_ws/src/drone_training/drone_training/configs/dqn_ue4.json', 'r') as fp: agent = json.load(fp=fp) with open( '/root/catkin_ws/src/drone_training/drone_training/configs/mynet.json', 'r') as fp: network = json.load(fp=fp) agent = Agent.from_spec(spec=agent, kwargs=dict( states=environment.states, actions=environment.actions, network=network, )) # if rospy.get_param("/load"): # load_dir = os.path.dirname(rospy.get_param("/load")) # if not os.path.isdir(load_dir): # raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) # agent.restore_model(rospy.get_param("/load")) if rospy.get_param("/save"): save_dir = os.path.dirname(rospy.get_param("/save")) if not os.path.isdir(rospy.get_param("/save")): try: os.mkdir(rospy.get_param("/save"), 0o755) except OSError: print(rospy.get_param("/save"))
def __init__(self, arguments): self.args = arguments if not os.path.isdir("tmp"): try: os.mkdir("tmp", 0o755) except OSError: raise OSError("Cannot create directory `tmp`") self.logger = get_logger(filename='tmp/train.log', logger_name='TrainLogger') self.logger.debug(self.args) self.train, self.scaler = self._get_data() # build environment self.environment = PortfolioEnv( self.train, nb_assets=config.NB_ASSETS, horizon=self.args.horizon, window_size=config.WINDOW_SIZE, portfolio_value=config.PORTFOLIO_VALUE, assets=config.ASSETS, risk_aversion=config.RISK_AVERSION, scaler=self.scaler, predictor=config.PREDICTION_MODEL, cost_buying=config.COST_BUYING, cost_selling=config.COST_SELLING, action_type=self.args.action_type, action_space=self.args.action_space, optimized=True, num_actions=self.args.num_actions, discrete_states=self.args.discrete_states, standardize=self.args.standardize_state, episodes=self.args.episodes, epochs=self.args.epochs, random_starts=self.args.random_starts) # load agent config with open(self.args.agent_config, 'r') as fp: self.agent_config = json.load(fp=fp) # load network config if self.args.net_config: with open(self.args.net_config, 'r') as fp: self.network_spec = json.load(fp=fp) try: print(f'Agent spec {self.agent_config}' f'\nNetwork spec {self.network_spec}' f'\nEnvironment spec: {self.environment.env_spec()}\n') self.logger.info( f'\nAgent spec: {self.agent_config}' f'\nNetwork spec: {self.network_spec}' f'\nEnvironment spec: {self.environment.env_spec()}\n') except Exception: # in case of using one of the basic agents pass # check if the agent can be saved if self.args.model_path: save_dir = os.path.dirname(self.args.model_path) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) # check if training evaluation files can be saved if self.args.eval_path: save_dir = os.path.dirname(self.args.eval_path) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save evaluation to dir {} ()".format(save_dir)) if self.agent_config['type'] == 'trpo_agent': # for some reason trpo + tensorboard does not work self.logger.warning('TensorBoard will not be supported') # build agent self.agent = Agent.from_spec( spec=self.agent_config, kwargs=dict(states_spec=self.environment.states, actions_spec=self.environment.actions, network_spec=self.network_spec)) else: # summary spec for TensorBoard self.summary_spec = dict(directory="./board/", steps=50, labels=[ 'regularization', 'losses', 'variables', 'actions', 'states', 'rewards', 'gradients', 'configuration' ]) # build agent self.agent = Agent.from_spec( spec=self.agent_config, kwargs=dict(states_spec=self.environment.states, actions_spec=self.environment.actions, network_spec=self.network_spec, summary_spec=self.summary_spec)) # if there is a pre trained agent -> continue training if self.args.load_agent: load_dir = os.path.dirname(self.args.load_agent) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) self.agent.restore_model(self.args.load_agent)
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") parser.add_argument('-a', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-spec', default=None, help="Network specification file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000 * 60, help="Maximum number of timesteps per episode") # parser.add_argument('-m', '--monitor', help="Save results to this directory") # parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") # parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) environment = OpenAIUniverse(args.gym_id) environment.configure(remotes=1) if args.agent_config is not None: with open(args.agent_config, 'r') as fp: agent_config = json.load(fp=fp) else: raise TensorForceError("No agent configuration provided.") if args.network_spec: with open(args.network_spec, 'r') as fp: network_spec = json.load(fp=fp) else: network_spec = None logger.info("No network configuration provided.") agent = Agent.from_spec(spec=agent_config, kwargs=dict(states_spec=environment.states, actions_spec=environment.actions, network_spec=network_spec)) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) runner = Runner(agent=agent, environment=environment, repeat_actions=1) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: steps_per_second = r.timestep / (time.time() - r.start_time) logger.info( "Finished episode {} after {} timesteps. Steps Per Second {}". format(r.episode, r.episode_timestep, steps_per_second)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format( sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format( sum(r.episode_rewards[-100:]) / 100)) return True logger.info("Starting {agent} for Environment '{env}'".format( agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) runner.close() logger.info( "Learning finished. Total episodes: {ep}".format(ep=runner.episode)) # if args.monitor: # environment.gym.monitor.close() environment.close()
def load_agent(agent_folder): with open(os.path.join(agent_folder, "agent.json"), 'r') as fp: agent_spec = json.load(fp=fp) return Agent.from_spec(agent_spec)
def main(): parser = argparse.ArgumentParser() parser.add_argument('rom', help="File path of the rom") parser.add_argument('-a', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-spec', default=None, help="Network specification file") parser.add_argument( '-w', '--workers', help="Number of threads to run where the model is shared", type=int, default=16) parser.add_argument('-fs', '--frame-skip', help="Number of frames to repeat action", type=int, default=1) parser.add_argument('-rap', '--repeat-action-probability', help="Repeat action probability", type=float, default=0.0) parser.add_argument('-lolt', '--loss-of-life-termination', help="Loss of life counts as terminal state", action='store_true') parser.add_argument('-lolr', '--loss-of-life-reward', help="Loss of life reward/penalty. EX: -1 to penalize", type=float, default=0.0) parser.add_argument( '-ea', '--epsilon-annealing', help='Create separate epislon annealing schedules per thread', action='store_true') parser.add_argument('-ds', '--display-screen', action='store_true', default=False, help="Display emulator screen") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # configurable!!! logger.addHandler(logging.StreamHandler(sys.stdout)) environments = [ ALE(args.rom, frame_skip=args.frame_skip, repeat_action_probability=args.repeat_action_probability, loss_of_life_termination=args.loss_of_life_termination, loss_of_life_reward=args.loss_of_life_reward, display_screen=args.display_screen) for _ in range(args.workers) ] if args.network_spec: with open(args.network_spec, 'r') as fp: network_spec = json.load(fp=fp) else: network_spec = None logger.info("No network configuration provided.") agent_configs = [] for i in range(args.workers): agent_config = json.loads(args.agent_config) # Optionally overwrite epsilon final values if "exploration" in agent_config and agent_config.exploration[ "type"] == "epsilon_anneal": if args.epsilon_annealing: # epsilon final values are [0.5, 0.1, 0.01] with probabilities [0.3, 0.4, 0.3] epsilon_final = np.random.choice([0.5, 0.1, 0.01], p=[0.3, 0.4, 0.3]) agent_config.exploration["epsilon_final"] = epsilon_final agent_configs.append(agent_config) # Let the first agent create the model # Manually assign model logger.info(agent_configs[0]) agent = Agent.from_spec(spec=agent_configs[0], kwargs=dict(states_spec=environments[0].states, actions_spec=environments[0].actions, network_spec=network_spec)) agents = [agent] for i in xrange(args.workers - 1): config = agent_configs[i] # Use default config from first agent config.default(agent.default_config) worker = WorkerAgentGenerator(AgentsDictionary[args.agent])( states_spec=environments[0].states, actions_spec=environments[0].actions, network_spec=network_spec, model=agent.model, kwargs=config) agents.append(worker) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.restore_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_configs[0]) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) def episode_finished(stats): if args.debug: logger.info( "Thread {t}. Finished episode {ep} after {ts} timesteps. Reward {r}" .format(t=stats['thread_id'], ep=stats['episode'], ts=stats['timestep'], r=stats['episode_reward'])) return True def summary_report(r): et = time.time() logger.info('=' * 40) logger.info('Current Step/Episode: {}/{}'.format( r.global_step, r.global_episode)) logger.info('SPS: {}'.format(r.global_step / (et - r.start_time))) reward_list = r.episode_rewards if len(reward_list) > 0: logger.info('Max Reward: {}'.format(np.max(reward_list))) logger.info("Average of last 500 rewards: {}".format( sum(reward_list[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format( sum(reward_list[-100:]) / 100)) logger.info('=' * 40) # Create runners threaded_runner = ThreadedRunner(agents, environments, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes) logger.info("Starting {agent} for Environment '{env}'".format( agent=agent, env=environments[0])) threaded_runner.run(summary_interval=100, episode_finished=episode_finished, summary_report=summary_report) logger.info("Learning finished. Total episodes: {ep}".format( ep=threaded_runner.global_episode)) [environments[t].close() for t in range(args.workers)]
app = App() app.init_screen() app.render() config = json.load(open(getenv('CONFIG', 'ppo.json'))) max_episodes = config.pop('max_episodes', None) max_timesteps = config.pop('max_timesteps', None) max_episode_timesteps = config.pop('max_episode_timesteps') network_spec = config.pop('network') agent = Agent.from_spec(spec=config, kwargs=dict( states=dict(type='float', shape=(len(app.get_state()), )), actions={ 'accel': dict(type='int', num_actions=3), 'turn': dict(type='int', num_actions=3), }, network=network_spec)) logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG) episode = 0 MAX_FRAMES_WITHOUT_REWARD = 200. while True: episode += 1 # XXX: make random switch app.checkpoints = app.checkpoints[::-1]
def main(): parser = argparse.ArgumentParser() parser.add_argument('run_number', help="Consecutive number of this run") parser.add_argument('gym_id', help="Id of the Gym environment") # parser.add_argument('-i', '--import-modules', help="Import module(s) required for environment") parser.add_argument('-a', '--agent', help="Agent configuration file") parser.add_argument('-n', '--network', default=None, help="Network specification file") parser.add_argument('-e', '--episodes', type=int, default=None, help="Number of episodes") parser.add_argument('-t', '--timesteps', type=int, default=None, help="Number of timesteps") parser.add_argument('-m', '--max-episode-timesteps', type=int, default=None, help="Maximum number of timesteps per episode") parser.add_argument('-d', '--deterministic', action='store_true', default=False, help="Choose actions deterministically") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-rl', '--reward-level', type=int, default=3) parser.add_argument('-rn', '--random-level', type=int, default=3) # parser.add_argument('--monitor', help="Save results to this directory") # parser.add_argument('--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") # parser.add_argument('--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('--visualize', action='store_true', default=False, help="Enable OpenAI Gym's visualization") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") parser.add_argument('-te', '--test', action='store_true', default=False, help="Test agent without learning.") parser.add_argument('-sd', '--seed', type=int, default=None, help='Random seed for this trial') parser.add_argument('-rp', '--repeat', type=int, default=6, help='How many times to repeat an action') # parser.add_argument('-sl', '--sleep', type=float, default=None, help="Slow down simulation by sleeping for x seconds (fractions allowed).") # parser.add_argument('--job', type=str, default=None, help="For distributed mode: The job type of this agent.") # parser.add_argument('--task', type=int, default=0, help="For distributed mode: The task index of this agent.") args = parser.parse_args() # SET BASIC PARAMETERS random_seed = args.seed agent_save_period = args.save_episodes visualize_period = 10 run_number = args.run_number load_agent = False if args.load: load_agent = True agent_filename = args.load else: load_agent = False if args.visualize: to_visualize = True else: to_visualize = False # Set logging level logging.basicConfig(level=logging.INFO) logger = logging.getLogger() logger.setLevel(logging.INFO) # if args.import_modules is not None: # for module in args.import_modules.split(','): # importlib.import_module(name=module) environment = OpenAIGym( gym_id=args.gym_id, monitor= 'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\savemonitor\\', monitor_safe=False, monitor_video=0, visualize=False # True to visualize first run. Otherwise visualisation is set in episode_finished() method ) # Set random seed for environment environment.gym.seed(random_seed) environment.gym.unwrapped.set_reward(args.reward_level) environment.gym.unwrapped.set_random(args.random_level) # Initialize Agent-Network-Model objects # with open('C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\examples\\configs\\ppo.json', 'r') as fp: # agent = json.load(fp=fp) with open( 'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\examples\\configs\\{}'. format(args.agent), 'r') as fp: agent = json.load(fp=fp) # with open('C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\examples\\configs\\mlp2_network.json', 'r') as fp: # network = json.load(fp=fp) with open( 'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\examples\\configs\\{}'. format(args.network), 'r') as fp: network = json.load(fp=fp) agent = Agent.from_spec(spec=agent, kwargs=dict(states=environment.states, actions=environment.actions, network=network, random_seed=random_seed)) if load_agent: agent.restore_model( directory= 'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\modelSave', file=agent_filename) runner = Runner(agent=agent, environment=environment, repeat_actions=args.repeat) report_frequently = True if report_frequently: report_episodes = 1 else: report_episodes = 100 logger.info("Starting {agent} for Environment '{env}'".format( agent=agent, env=environment)) # Naming variables nNum = str(run_number).zfill(3) task = environment.gym.unwrapped.task if task == 'LIFT': nTask = 'L' else: nTask = 'P' nReward = environment.gym.unwrapped.reward_level nRandom = environment.gym.unwrapped.rand_level nSeed = str(random_seed).zfill(2) nAlg = 'PPO' nName = ("{}-{}{}{}-{}-{}".format(nNum, nTask, nReward, nRandom, nSeed, nAlg)) def episode_finished(r, id_): # if r.episode == 1: # r.agent.restore_model('C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\modelSave') if r.episode % visualize_period == 0: if to_visualize: environment.visualize = True # Set to true to visualize else: environment.visualize = False save_period = 20 if r.episode % save_period == 0: with open( 'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\{}.csv' .format(nName), 'a+') as csv: for reward in r.episode_rewards[-save_period:]: csv.write("{:2.2f}\n".format(reward)) # print("Saving, yo!") if r.episode % report_episodes == 0: # steps_per_second = r.timestep / (time.time() - r.start_time) # logger.info("Finished episode {:d} after {:d} timesteps. Steps Per Second {:0.2f}".format( # r.agent.episode, r.episode_timestep, steps_per_second # )) # logger.info("Episode reward: {}".format(r.episode_rewards[-1])) # logger.info("Average of last 500 rewards: {:0.2f}". # format(sum(r.episode_rewards[-500:]) / min(500, len(r.episode_rewards)))) # logger.info("Average of last 100 rewards: {:0.2f}". # format(sum(r.episode_rewards[-100:]) / min(100, len(r.episode_rewards)))) logger.info("{:6d}, {:+6.2f}, {:+6.2f}".format( r.agent.episode, r.episode_rewards[-1], sum(r.episode_rewards[-100:]) / min(100, len(r.episode_rewards)))) if r.episode == 1 or (r.episode % agent_save_period == 0): logger.info("Saving agent to {} at episode {}".format( 'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\{}' .format(nName), r.episode)) r.agent.save_model( directory= 'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\modelSave\\{}-{}' .format(nName, r.episode), append_timestep=False) return True runner.run(num_timesteps=args.timesteps, num_episodes=args.episodes, max_episode_timesteps=args.max_episode_timesteps, deterministic=args.deterministic, episode_finished=episode_finished, testing=args.test, sleep=None) runner.close() logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.agent.episode))
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="Id of the Gym environment") parser.add_argument('-i', '--import-modules', help="Import module(s) required for environment") parser.add_argument('-a', '--agent', help="Agent configuration file") parser.add_argument('-n', '--network', default=None, help="Network specification file") parser.add_argument('-e', '--episodes', type=int, default=None, help="Number of episodes") parser.add_argument('-t', '--timesteps', type=int, default=None, help="Number of timesteps") parser.add_argument('-m', '--max-episode-timesteps', type=int, default=None, help="Maximum number of timesteps per episode") parser.add_argument('-d', '--deterministic', action='store_true', default=False, help="Choose actions deterministically") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('--monitor', help="Save results to this directory") parser.add_argument('--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('--visualize', action='store_true', default=False, help="Enable OpenAI Gym's visualization") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") parser.add_argument('-te', '--test', action='store_true', default=False, help="Test agent without learning.") parser.add_argument( '-sl', '--sleep', type=float, default=None, help= "Slow down simulation by sleeping for x seconds (fractions allowed).") parser.add_argument( '--job', type=str, default=None, help="For distributed mode: The job type of this agent.") parser.add_argument( '--task', type=int, default=0, help="For distributed mode: The task index of this agent.") args = parser.parse_args() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) if args.import_modules is not None: for module in args.import_modules.split(','): importlib.import_module(name=module) environment = OpenAIGym(gym_id=args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video, visualize=args.visualize) if args.agent is not None: with open(args.agent, 'r') as fp: agent = json.load(fp=fp) else: raise TensorForceError("No agent configuration provided.") if args.network is not None: with open(args.network, 'r') as fp: network = json.load(fp=fp) else: network = None logger.info("No network configuration provided.") agent = Agent.from_spec(spec=agent, kwargs=dict( states=environment.states, actions=environment.actions, network=network, )) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError( "Could not load agent from {}: No such directory.".format( load_dir)) agent.restore_model(args.load) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError( "Cannot save agent to dir {} ()".format(save_dir)) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent) runner = Runner(agent=agent, environment=environment, repeat_actions=1) if args.debug: # TODO: Timestep-based reporting report_episodes = 1 else: report_episodes = 100 logger.info("Starting {agent} for Environment '{env}'".format( agent=agent, env=environment)) def episode_finished(r, id_): if r.episode % report_episodes == 0: steps_per_second = r.timestep / (time.time() - r.start_time) logger.info( "Finished episode {:d} after {:d} timesteps. Steps Per Second {:0.2f}" .format(r.agent.episode, r.episode_timestep, steps_per_second)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {:0.2f}".format( sum(r.episode_rewards[-500:]) / min(500, len(r.episode_rewards)))) logger.info("Average of last 100 rewards: {:0.2f}".format( sum(r.episode_rewards[-100:]) / min(100, len(r.episode_rewards)))) if args.save and args.save_episodes is not None and not r.episode % args.save_episodes: logger.info("Saving agent to {}".format(args.save)) r.agent.save_model(args.save) return True runner.run(num_timesteps=args.timesteps, num_episodes=args.episodes, max_episode_timesteps=args.max_episode_timesteps, deterministic=args.deterministic, episode_finished=episode_finished, testing=args.test, sleep=args.sleep) runner.close() logger.info("Learning finished. Total episodes: {ep}".format( ep=runner.agent.episode))
def main(): parser = argparse.ArgumentParser() # Gym arguments parser.add_argument('-g', '--gym', help="Gym environment id") parser.add_argument('-i', '--import-modules', help="Import module(s) required for gym environment") parser.add_argument('--monitor', type=str, default=None, help="Save results to this directory") parser.add_argument('--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('--visualize', action='store_true', default=False, help="Enable OpenAI Gym's visualization") # Agent arguments parser.add_argument('-a', '--agent', help="Agent configuration file") parser.add_argument('-n', '--network', default=None, help="Network specification file") # Runner arguments parser.add_argument('-e', '--episodes', type=int, default=None, help="Number of episodes") parser.add_argument('-t', '--timesteps', type=int, default=None, help="Number of timesteps") parser.add_argument('-m', '--max-episode-timesteps', type=int, default=None, help="Maximum number of timesteps per episode") parser.add_argument('-d', '--deterministic', action='store_true', default=False, help="Choose actions deterministically") args = parser.parse_args() if args.import_modules is not None: for module in args.import_modules.split(','): importlib.import_module(name=module) environment = OpenAIGym(gym_id=args.gym, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video, visualize=args.visualize) agent = Agent.from_spec(spec=args.agent, states=environment.states(), actions=environment.actions(), network=args.network) runner = Runner(agent=agent, environment=environment) def callback(r): if r.episode % 100 == 0: print("================================================\n" "Average secs/episode over 100 episodes: {time:0.2f}\n" "Average steps/sec over 100 episodes: {timestep:0.2f}\n" "Average reward over 100 episodes: {reward100:0.2f}\n" "Average reward over 500 episodes: {reward500:0.2f}". format(time=(sum(r.episode_times[-100:]) / 100.0), timestep=(sum(r.episode_timesteps[-100:]) / sum(r.episode_times[-100:])), reward100=(sum(r.episode_rewards[-100:]) / min(100.0, r.episode)), reward500=(sum(r.episode_rewards[-500:]) / min(500.0, r.episode)))) return True runner.run(num_timesteps=args.timesteps, num_episodes=args.episodes, max_episode_timesteps=args.max_episode_timesteps, deterministic=args.deterministic, callback=callback) runner.close()