Пример #1
0
def main():
    directory = os.path.join(os.getcwd(), "saves", args.name)
    filestar = os.path.join(directory, args.name)

    live_ish = args.live or args.test_live
    if not live_ish:
        try:
            shutil.rmtree(directory)
        except:
            pass
        os.mkdir(directory)

    hs = HSearchEnv(cli_args=args)
    flat, hydrated, network = hs.get_winner(id=args.id)
    env = BitcoinEnv(flat, cli_args=args)

    agent = agents_dict['ppo_agent'](states=env.states,
                                     actions=env.actions,
                                     network=network,
                                     **hydrated)

    if live_ish:
        agent.restore_model(directory)
        env.run_live(agent, test=args.test_live)
    else:
        env.train_and_test(agent, args.n_steps, args.n_tests, args.early_stop)
        agent.save_model(filestar)
        agent.close()
        env.close()
Пример #2
0
    def execute(self, actions):
        flat, hydrated, network = self.get_hypers(actions)

        env = BitcoinEnv(flat, name=self.agent)
        agent = agents_dict[self.agent](states_spec=env.states,
                                        actions_spec=env.actions,
                                        network_spec=network,
                                        **hydrated)

        env.train_and_test(agent)

        step_acc, ep_acc = env.acc.step, env.acc.episode
        adv_avg = ep_acc.advantages[-1]
        print(flat, f"\nAdvantage={adv_avg}\n\n")

        sql = """
          insert into runs (hypers, advantage_avg, advantages, uniques, prices, actions, agent, flag) 
          values (:hypers, :advantage_avg, :advantages, :uniques, :prices, :actions, :agent, :flag)
        """
        self.conn_runs.execute(text(sql),
                               hypers=json.dumps(flat),
                               advantage_avg=adv_avg,
                               advantages=list(ep_acc.advantages),
                               uniques=list(ep_acc.uniques),
                               prices=list(env.prices),
                               actions=list(step_acc.signals),
                               agent=self.agent,
                               flag=self.net_type)

        agent.close()
        env.close()
        return adv_avg
Пример #3
0
def main():
    directory = f'./saves/{args.id}{"_early" if args.early_stop else ""}'
    if not args.live and not args.test_live:
        try: shutil.rmtree(directory)
        except: pass

    hs = HSearchEnv(gpu_split=args.gpu_split, net_type=args.net_type)
    flat, hydrated, network = hs.get_winner(id=args.id)
    env = BitcoinEnv(flat, name='ppo_agent')
    agent = agents_dict['ppo_agent'](
        saver_spec=dict(
            directory=directory,
            # saves this model every 6000 time-steps. I'd rather manually save it at the end, that way we could save
            # a winning combo in hypersearch.py and remove this redundant training step - but TForce doesn't have
            # working manual-save code yet, only automatic.
            steps=6000
        ),
        states_spec=env.states,
        actions_spec=env.actions,
        network_spec=network,
        **hydrated
    )

    if args.live or args.test_live:
        env.run_live(agent, test=args.test_live)
    else:
        env.train_and_test(agent, early_stop=args.early_stop, n_tests=args.runs)
        agent.close()
        env.close()
Пример #4
0
def main():
    directory = os.path.join(os.getcwd(), "saves", args.name)
    filestar = os.path.join(directory, args.name)

    live_ish = args.live or args.test_live
    if not live_ish:
        try: shutil.rmtree(directory)
        except: pass
        os.mkdir(directory)

    hs = HSearchEnv(cli_args=args)
    flat, hydrated, network = hs.get_winner(id=args.id)
    env = BitcoinEnv(flat, name='ppo_agent')
    agent = agents_dict['ppo_agent'](
        states_spec=env.states,
        actions_spec=env.actions,
        network_spec=network,
        **hydrated
    )

    if live_ish:
        agent.restore_model(directory)
        env.run_live(agent, test=args.test_live)
    else:
        env.train_and_test(agent, args.n_steps, args.n_tests, args.early_stop)
        agent.save_model(filestar)
        agent.close()
        env.close()
Пример #5
0
    def loss_fn(hypers):
        processed = post_process(hypers)
        network = network_spec(processed['custom'])

        agent = processed['ppo_agent']
        ## GPU split
        gpu_split = args.gpu_split
        if gpu_split != 1:
            fraction = .9 / gpu_split if gpu_split > 1 else gpu_split
            session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=fraction))
            agent['execution'] = {'type': 'single', 'session_config': session_config, 'distributed_spec': None}

        pprint(processed)
        pprint(network)

        env = BitcoinEnv(processed, args)
        agent = agents_dict['ppo_agent'](
            states=env.states,
            actions=env.actions,
            network=network,
            **agent
        )

        env.train_and_test(agent)

        acc = env.acc.test
        adv_avg = utils.calculate_score(acc.ep.returns)
        print(hypers, f"\nScore={adv_avg}\n\n")

        df = pd.DataFrame([dict(
            id=uuid.uuid4(),
            hypers=json.dumps(hypers),
            returns=list(acc.ep.returns),
            uniques=list(acc.ep.uniques),
            prices=list(env.data.get_prices(acc.ep.i, 0)),
            signals=list(acc.step.signals),
        )]).set_index('id')
        dtype = {
            'hypers': psql.JSONB,
            **{k: psql.ARRAY(psql.DOUBLE_PRECISION) for k in ['returns', 'signals', 'prices', 'uniques']},
        }
        with data.engine_runs.connect() as conn:
            df.to_sql('runs', conn, if_exists='append', index_label='id', dtype=dtype)

        # TODO restore save_model() from git

        agent.close()
        env.close()
        return -adv_avg  # maximize
Пример #6
0
    def execute(self, actions):
        flat, hydrated, network = self.get_hypers(actions)

        env = BitcoinEnv(flat, self.cli_args)
        agent = agents_dict[self.agent](
            states=env.states,
            actions=env.actions,
            network=network,
            **hydrated
        )

        env.train_and_test(agent, self.cli_args.n_steps, self.cli_args.n_tests, -1)

        step_acc, ep_acc = env.acc.step, env.acc.episode
        adv_avg = utils.calculate_score(ep_acc.returns)
        print(flat, f"\nScore={adv_avg}\n\n")

        sql = """
          insert into runs (hypers, sharpes, returns, uniques, prices, signals, agent, flag)
          values (:hypers, :sharpes, :returns, :uniques, :prices, :signals, :agent, :flag)
          returning id;
        """
        row = self.conn_runs.execute(
            text(sql),
            hypers=json.dumps(flat),
            sharpes=list(ep_acc.sharpes),
            returns=list(ep_acc.returns),
            uniques=list(ep_acc.uniques),
            prices=list(env.prices),
            signals=list(step_acc.signals),
            agent=self.agent,
            flag=self.cli_args.net_type
        ).fetchone()

        if ep_acc.returns[-1] > 0:
            _id = str(row[0])
            directory = os.path.join(os.getcwd(), "saves", _id)
            filestar = os.path.join(directory, _id)
            os.makedirs(directory, exist_ok=True)
            agent.save_model(filestar)

        agent.close()
        env.close()
        return adv_avg
Пример #7
0
    def execute(self, actions):
        flat, hydrated, network = self.get_hypers(actions)

        env = BitcoinEnv(flat, name=self.agent)
        agent = agents_dict[self.agent](
            states_spec=env.states,
            actions_spec=env.actions,
            network_spec=network,
            **hydrated
        )

        env.train_and_test(agent, self.cli_args.n_steps, self.cli_args.n_tests, -1)

        step_acc, ep_acc = env.acc.step, env.acc.episode
        adv_avg = utils.calculate_score(ep_acc.advantages)
        print(flat, f"\nAdvantage={adv_avg}\n\n")

        sql = """
          insert into runs (hypers, advantage_avg, advantages, uniques, prices, actions, agent, flag) 
          values (:hypers, :advantage_avg, :advantages, :uniques, :prices, :actions, :agent, :flag)
          returning id;
        """
        row = self.conn_runs.execute(
            text(sql),
            hypers=json.dumps(flat),
            advantage_avg=adv_avg,
            advantages=list(ep_acc.advantages),
            uniques=list(ep_acc.uniques),
            prices=list(env.prices),
            actions=list(step_acc.signals),
            agent=self.agent,
            flag=self.cli_args.net_type
        ).fetchone()

        if ep_acc.advantages[-1] > 0:
            _id = str(row[0])
            directory = os.path.join(os.getcwd(), "saves", _id)
            filestar = os.path.join(directory, _id)
            os.mkdir(directory)
            agent.save_model(filestar)

        agent.close()
        env.close()
        return adv_avg
Пример #8
0
    def execute(self, actions):
        flat, hydrated, network = self.get_hypers(actions)

        env = BitcoinEnv(flat, name=self.agent)
        agent = agents_dict[self.agent](states_spec=env.states,
                                        actions_spec=env.actions,
                                        network_spec=network,
                                        **hydrated)

        env.train_and_test(agent)

        step_acc, ep_acc = env.acc.step, env.acc.episode
        adv_avg = ep_acc.advantages[-1]
        print(flat, f"\nAdvantage={adv_avg}\n\n")

        sql = """
          insert into runs (hypers, advantage_avg, advantages, uniques, prices, actions, agent, flag) 
          values (:hypers, :advantage_avg, :advantages, :uniques, :prices, :actions, :agent, :flag)
          returning id;
        """
        row = self.conn_runs.execute(text(sql),
                                     hypers=json.dumps(flat),
                                     advantage_avg=adv_avg,
                                     advantages=list(ep_acc.advantages),
                                     uniques=list(ep_acc.uniques),
                                     prices=list(env.prices),
                                     actions=list(step_acc.signals),
                                     agent=self.agent,
                                     flag=self.net_type).fetchone()

        if ep_acc.advantages[-1] > 0:
            _id = str(row[0])
            directory = os.path.join(os.getcwd(), "saves", _id)
            filestar = os.path.join(directory, _id)
            os.mkdir(directory)
            agent.save_model(filestar)

        agent.close()
        env.close()
        return adv_avg