Exemplo n.º 1
0
def main():
    directory = f'./saves/{args.id}{"_early" if args.early_stop else ""}'
    if not args.live and not args.test_live:
        try: shutil.rmtree(directory)
        except: pass

    hs = HSearchEnv(gpu_split=args.gpu_split, net_type=args.net_type)
    flat, hydrated, network = hs.get_winner(id=args.id)
    env = BitcoinEnv(flat, name='ppo_agent')
    agent = agents_dict['ppo_agent'](
        saver_spec=dict(
            directory=directory,
            # saves this model every 6000 time-steps. I'd rather manually save it at the end, that way we could save
            # a winning combo in hypersearch.py and remove this redundant training step - but TForce doesn't have
            # working manual-save code yet, only automatic.
            steps=6000
        ),
        states_spec=env.states,
        actions_spec=env.actions,
        network_spec=network,
        **hydrated
    )

    if args.live or args.test_live:
        env.run_live(agent, test=args.test_live)
    else:
        env.train_and_test(agent, early_stop=args.early_stop, n_tests=args.runs)
        agent.close()
        env.close()
Exemplo n.º 2
0
def main():
    directory = os.path.join(os.getcwd(), "saves", args.name)
    filestar = os.path.join(directory, args.name)

    live_ish = args.live or args.test_live
    if not live_ish:
        try:
            shutil.rmtree(directory)
        except:
            pass
        os.mkdir(directory)

    hs = HSearchEnv(cli_args=args)
    flat, hydrated, network = hs.get_winner(id=args.id)
    env = BitcoinEnv(flat, cli_args=args)

    agent = agents_dict['ppo_agent'](states=env.states,
                                     actions=env.actions,
                                     network=network,
                                     **hydrated)

    if live_ish:
        agent.restore_model(directory)
        env.run_live(agent, test=args.test_live)
    else:
        env.train_and_test(agent, args.n_steps, args.n_tests, args.early_stop)
        agent.save_model(filestar)
        agent.close()
        env.close()
Exemplo n.º 3
0
def main():
    directory = os.path.join(os.getcwd(), "saves", args.name)
    filestar = os.path.join(directory, args.name)

    live_ish = args.live or args.test_live
    if not live_ish:
        try: shutil.rmtree(directory)
        except: pass
        os.mkdir(directory)

    hs = HSearchEnv(cli_args=args)
    flat, hydrated, network = hs.get_winner(id=args.id)
    env = BitcoinEnv(flat, name='ppo_agent')
    agent = agents_dict['ppo_agent'](
        states_spec=env.states,
        actions_spec=env.actions,
        network_spec=network,
        **hydrated
    )

    if live_ish:
        agent.restore_model(directory)
        env.run_live(agent, test=args.test_live)
    else:
        env.train_and_test(agent, args.n_steps, args.n_tests, args.early_stop)
        agent.save_model(filestar)
        agent.close()
        env.close()