def main(): directory = os.path.join(os.getcwd(), "saves", args.name) filestar = os.path.join(directory, args.name) live_ish = args.live or args.test_live if not live_ish: try: shutil.rmtree(directory) except: pass os.mkdir(directory) hs = HSearchEnv(cli_args=args) flat, hydrated, network = hs.get_winner(id=args.id) env = BitcoinEnv(flat, cli_args=args) agent = agents_dict['ppo_agent'](states=env.states, actions=env.actions, network=network, **hydrated) if live_ish: agent.restore_model(directory) env.run_live(agent, test=args.test_live) else: env.train_and_test(agent, args.n_steps, args.n_tests, args.early_stop) agent.save_model(filestar) agent.close() env.close()
def main(): directory = f'./saves/{args.id}{"_early" if args.early_stop else ""}' if not args.live and not args.test_live: try: shutil.rmtree(directory) except: pass hs = HSearchEnv(gpu_split=args.gpu_split, net_type=args.net_type) flat, hydrated, network = hs.get_winner(id=args.id) env = BitcoinEnv(flat, name='ppo_agent') agent = agents_dict['ppo_agent']( saver_spec=dict( directory=directory, # saves this model every 6000 time-steps. I'd rather manually save it at the end, that way we could save # a winning combo in hypersearch.py and remove this redundant training step - but TForce doesn't have # working manual-save code yet, only automatic. steps=6000 ), states_spec=env.states, actions_spec=env.actions, network_spec=network, **hydrated ) if args.live or args.test_live: env.run_live(agent, test=args.test_live) else: env.train_and_test(agent, early_stop=args.early_stop, n_tests=args.runs) agent.close() env.close()
def main(): directory = os.path.join(os.getcwd(), "saves", args.name) filestar = os.path.join(directory, args.name) live_ish = args.live or args.test_live if not live_ish: try: shutil.rmtree(directory) except: pass os.mkdir(directory) hs = HSearchEnv(cli_args=args) flat, hydrated, network = hs.get_winner(id=args.id) env = BitcoinEnv(flat, name='ppo_agent') agent = agents_dict['ppo_agent']( states_spec=env.states, actions_spec=env.actions, network_spec=network, **hydrated ) if live_ish: agent.restore_model(directory) env.run_live(agent, test=args.test_live) else: env.train_and_test(agent, args.n_steps, args.n_tests, args.early_stop) agent.save_model(filestar) agent.close() env.close()
def execute(self, actions): flat, hydrated, network = self.get_hypers(actions) env = BitcoinEnv(flat, name=self.agent) agent = agents_dict[self.agent]( states_spec=env.states, actions_spec=env.actions, network_spec=network, **hydrated ) env.train_and_test(agent, self.cli_args.n_steps, self.cli_args.n_tests, -1) step_acc, ep_acc = env.acc.step, env.acc.episode adv_avg = utils.calculate_score(ep_acc.advantages) print(flat, f"\nAdvantage={adv_avg}\n\n") sql = """ insert into runs (hypers, advantage_avg, advantages, uniques, prices, actions, agent, flag) values (:hypers, :advantage_avg, :advantages, :uniques, :prices, :actions, :agent, :flag) returning id; """ row = self.conn_runs.execute( text(sql), hypers=json.dumps(flat), advantage_avg=adv_avg, advantages=list(ep_acc.advantages), uniques=list(ep_acc.uniques), prices=list(env.prices), actions=list(step_acc.signals), agent=self.agent, flag=self.cli_args.net_type ).fetchone() if ep_acc.advantages[-1] > 0: _id = str(row[0]) directory = os.path.join(os.getcwd(), "saves", _id) filestar = os.path.join(directory, _id) os.mkdir(directory) agent.save_model(filestar) agent.close() env.close() return adv_avg
def execute(self, actions): flat, hydrated, network = self.get_hypers(actions) env = BitcoinEnv(flat, name=self.agent) agent = agents_dict[self.agent](states_spec=env.states, actions_spec=env.actions, network_spec=network, **hydrated) env.train_and_test(agent) step_acc, ep_acc = env.acc.step, env.acc.episode adv_avg = ep_acc.advantages[-1] print(flat, f"\nAdvantage={adv_avg}\n\n") sql = """ insert into runs (hypers, advantage_avg, advantages, uniques, prices, actions, agent, flag) values (:hypers, :advantage_avg, :advantages, :uniques, :prices, :actions, :agent, :flag) """ self.conn_runs.execute(text(sql), hypers=json.dumps(flat), advantage_avg=adv_avg, advantages=list(ep_acc.advantages), uniques=list(ep_acc.uniques), prices=list(env.prices), actions=list(step_acc.signals), agent=self.agent, flag=self.net_type) agent.close() env.close() return adv_avg
def loss_fn(hypers): processed = post_process(hypers) network = network_spec(processed['custom']) agent = processed['ppo_agent'] ## GPU split gpu_split = args.gpu_split if gpu_split != 1: fraction = .9 / gpu_split if gpu_split > 1 else gpu_split session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=fraction)) agent['execution'] = {'type': 'single', 'session_config': session_config, 'distributed_spec': None} pprint(processed) pprint(network) env = BitcoinEnv(processed, args) agent = agents_dict['ppo_agent']( states=env.states, actions=env.actions, network=network, **agent ) env.train_and_test(agent) acc = env.acc.test adv_avg = utils.calculate_score(acc.ep.returns) print(hypers, f"\nScore={adv_avg}\n\n") df = pd.DataFrame([dict( id=uuid.uuid4(), hypers=json.dumps(hypers), returns=list(acc.ep.returns), uniques=list(acc.ep.uniques), prices=list(env.data.get_prices(acc.ep.i, 0)), signals=list(acc.step.signals), )]).set_index('id') dtype = { 'hypers': psql.JSONB, **{k: psql.ARRAY(psql.DOUBLE_PRECISION) for k in ['returns', 'signals', 'prices', 'uniques']}, } with data.engine_runs.connect() as conn: df.to_sql('runs', conn, if_exists='append', index_label='id', dtype=dtype) # TODO restore save_model() from git agent.close() env.close() return -adv_avg # maximize
def execute(self, actions): flat, hydrated, network = self.get_hypers(actions) env = BitcoinEnv(flat, self.cli_args) agent = agents_dict[self.agent]( states=env.states, actions=env.actions, network=network, **hydrated ) env.train_and_test(agent, self.cli_args.n_steps, self.cli_args.n_tests, -1) step_acc, ep_acc = env.acc.step, env.acc.episode adv_avg = utils.calculate_score(ep_acc.returns) print(flat, f"\nScore={adv_avg}\n\n") sql = """ insert into runs (hypers, sharpes, returns, uniques, prices, signals, agent, flag) values (:hypers, :sharpes, :returns, :uniques, :prices, :signals, :agent, :flag) returning id; """ row = self.conn_runs.execute( text(sql), hypers=json.dumps(flat), sharpes=list(ep_acc.sharpes), returns=list(ep_acc.returns), uniques=list(ep_acc.uniques), prices=list(env.prices), signals=list(step_acc.signals), agent=self.agent, flag=self.cli_args.net_type ).fetchone() if ep_acc.returns[-1] > 0: _id = str(row[0]) directory = os.path.join(os.getcwd(), "saves", _id) filestar = os.path.join(directory, _id) os.makedirs(directory, exist_ok=True) agent.save_model(filestar) agent.close() env.close() return adv_avg
def execute(self, actions): flat, hydrated, network = self.get_hypers(actions) env = BitcoinEnv(flat, name=self.agent) agent = agents_dict[self.agent](states_spec=env.states, actions_spec=env.actions, network_spec=network, **hydrated) env.train_and_test(agent) step_acc, ep_acc = env.acc.step, env.acc.episode adv_avg = ep_acc.advantages[-1] print(flat, f"\nAdvantage={adv_avg}\n\n") sql = """ insert into runs (hypers, advantage_avg, advantages, uniques, prices, actions, agent, flag) values (:hypers, :advantage_avg, :advantages, :uniques, :prices, :actions, :agent, :flag) returning id; """ row = self.conn_runs.execute(text(sql), hypers=json.dumps(flat), advantage_avg=adv_avg, advantages=list(ep_acc.advantages), uniques=list(ep_acc.uniques), prices=list(env.prices), actions=list(step_acc.signals), agent=self.agent, flag=self.net_type).fetchone() if ep_acc.advantages[-1] > 0: _id = str(row[0]) directory = os.path.join(os.getcwd(), "saves", _id) filestar = os.path.join(directory, _id) os.mkdir(directory) agent.save_model(filestar) agent.close() env.close() return adv_avg
def main(): hs = HSearchEnv(net_type='conv2d') flat, hydrated, network = hs.get_winner() flat['unimodal'] = True flat['arbitrage'] = False flat['indicators'] = False flat['step_window'] = 10 data.tables = [dict(name='a', ts='ts', cols=dict(o=F, h=F, l=F, c=F, v=Z))] data.target = 'a_c' data.count_rows = count_rows data.db_to_dataframe = db_to_dataframe_wrapper(1) env = BitcoinEnv(flat, name='ppo_agent') # Hold reset(env) for i in range(90): # step_window - start_timestep next_state, terminal, reward = env.execute(0) env.episode_finished(None) assert env.acc.episode.advantages[-1] == 0 # > 1 reset(env) for i in range(90): next_state, terminal, reward = env.execute(1) env.episode_finished(None) assert env.acc.episode.advantages[-1] > 0 # < 1 reset(env) for i in range(90): next_state, terminal, reward = env.execute(-1) env.episode_finished(None) assert env.acc.episode.advantages[-1] < 0 # Try just one reset(env) env.execute(0) env.episode_finished(None) assert env.acc.episode.advantages[-1] == 0 reset(env) env.execute(1) env.episode_finished(None) assert env.acc.episode.advantages[-1] > 0 reset(env) env.execute(-1) env.episode_finished(None) assert env.acc.episode.advantages[-1] < 0 # Now for a bear market data.db_to_dataframe = db_to_dataframe_wrapper(-1) # Hold reset(env) for i in range(90): env.execute(0) env.episode_finished(None) assert env.acc.episode.advantages[-1] == 0 # > 1 reset(env) for i in range(90): env.execute(1) env.episode_finished(None) assert env.acc.episode.advantages[-1] < 0 # < 1 reset(env) for i in range(90): env.execute(-1) env.episode_finished(None) assert env.acc.episode.advantages[-1] > 0 # Try just one reset(env) env.execute(0) env.episode_finished(None) assert env.acc.episode.advantages[-1] == 0 reset(env) env.execute(1) env.episode_finished(None) assert env.acc.episode.advantages[-1] < 0 reset(env) env.execute(-1) env.episode_finished(None) assert env.acc.episode.advantages[-1] > 0
def main(): hs = HSearchEnv(net_type='conv2d') flat, hydrated, network = hs.get_winner() flat['unimodal'] = True flat['arbitrage'] = False flat['indicators'] = False flat['step_window'] = 10 data.tables = [ dict( name='a', ts='ts', cols=dict(o=F, h=F, l=F, c=F, v=Z) ) ] data.target = 'a_c' data.count_rows = count_rows data.db_to_dataframe = db_to_dataframe_wrapper(1) env = BitcoinEnv(flat, name='ppo_agent') # Hold reset(env) for i in range(90): # step_window - start_timestep next_state, terminal, reward = env.execute(0) env.episode_finished(None) assert env.acc.episode.advantages[-1] == 0 # > 1 reset(env) for i in range(90): next_state, terminal, reward = env.execute(1) env.episode_finished(None) assert env.acc.episode.advantages[-1] > 0 # < 1 reset(env) for i in range(90): next_state, terminal, reward = env.execute(-1) env.episode_finished(None) assert env.acc.episode.advantages[-1] < 0 # Try just one reset(env) env.execute(0) env.episode_finished(None) assert env.acc.episode.advantages[-1] == 0 reset(env) env.execute(1) env.episode_finished(None) assert env.acc.episode.advantages[-1] > 0 reset(env) env.execute(-1) env.episode_finished(None) assert env.acc.episode.advantages[-1] < 0 # Now for a bear market data.db_to_dataframe = db_to_dataframe_wrapper(-1) # Hold reset(env) for i in range(90): env.execute(0) env.episode_finished(None) assert env.acc.episode.advantages[-1] == 0 # > 1 reset(env) for i in range(90): env.execute(1) env.episode_finished(None) assert env.acc.episode.advantages[-1] < 0 # < 1 reset(env) for i in range(90): env.execute(-1) env.episode_finished(None) assert env.acc.episode.advantages[-1] > 0 # Try just one reset(env) env.execute(0) env.episode_finished(None) assert env.acc.episode.advantages[-1] == 0 reset(env) env.execute(1) env.episode_finished(None) assert env.acc.episode.advantages[-1] < 0 reset(env) env.execute(-1) env.episode_finished(None) assert env.acc.episode.advantages[-1] > 0