Exemplo n.º 1
0
    parser.add_argument("-r", "--run", required=True, help="Run name")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    saves_path = SAVE_DIR / f"simple-{args.run}"
    saves_path.mkdir(parents=True, exist_ok=True)

    data_path = pathlib.Path(args.data)
    val_path = pathlib.Path(args.val)

    if args.year is not None or data_path.is_file():
        if args.year is not None:
            stock_data = data.load_year_data(args.year)
        else:
            stock_data = {"YNDX": data.load_relative(data_path)}
        env = environ.StockEnv(stock_data, bars_count=BARS_COUNT)
        env_tst = environ.StockEnv(stock_data, bars_count=BARS_COUNT)
    elif data_path.is_dir():
        env = environ.StockEnv.from_dir(data_path, bars_count=BARS_COUNT)
        env_tst = environ.StockEnv.from_dir(data_path, bars_count=BARS_COUNT)
    else:
        raise RuntimeError("No data to train on")

    env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)
    val_data = {"YNDX": data.load_relative(val_path)}

    net = models.SimpleFFDQN(env.observation_space.shape[0], env.action_space.n).to(device)
    tgt_net = drl.agent.TargetNet(net)

    selector = drl.actions.EpsilonGreedySelector(epsilon=EPS_START)
    eps_tracker = drl.tracker.EpsilonTracker(selector, EPS_START, EPS_END, EPS_STEPS)
Exemplo n.º 2
0
                    action='store_true',
                    help='enable cuda')
args = parser.parse_args()

device = 'cuda' if args.cuda else 'cpu'

try:
    from lib import data
    play_data = data.read_csv(file_name='data/000001_%d.csv' % args.year)
except ModuleNotFoundError:
    play_data = (pd.read_csv('data/prices_%d.csv' % args.year, index_col=0),
                 pd.read_csv('data/factors_%d.csv' % args.year, index_col=0))

env = environ.StockEnv(play_data,
                       bars_count=BARS_COUNT,
                       commission=args.commission,
                       reset_on_sell=False,
                       random_ofs_on_reset=False)
net = models.DQNConv1d(env.observation_space.shape, env.action_space.n)
datestr = datetime.strftime(date(2019, 2, 2), '%Y-%m-%d')
save_path = os.path.join('saves', datestr)
state_dict = torch.load(os.path.join(save_path, 'best_mean_val.pth'),
                        map_location=lambda storage, loc: storage)
net.load_state_dict(state_dict)

obs = env.reset()
start_price = env.state._close()
total_reward = 0.0
frame_idx = 0
rewards = []
Exemplo n.º 3
0
    args = parser.parse_args()

    device = torch.device('cuda' if args.cuda else 'cpu')

    try:
        from lib import data
        train_data = data.load_data(year=args.year)
    except ModuleNotFoundError:
        # workaround that Ta-lib cannot be installed on Colab
        train_data = (pd.read_csv('data/000001_prices_%d.csv' % args.year,
                                  index_col=0),
                      pd.read_csv('data/000001_factors_%d.csv' % args.year,
                                  index_col=0))

    env = environ.StockEnv(train_data,
                           bars_count=BARS_COUNT,
                           commission=0.0,
                           reset_on_sell=False)
    # env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)

    net = models.A2CConv1d(env.observation_space.shape,
                           env.action_space.n).to(device)

    agent = agent.ProbabilityAgent(lambda x: net(x)[0],
                                   apply_softmax=True,
                                   device=device)
    exp_source = experience.ExperienceSource(env,
                                             agent,
                                             GAMMA,
                                             steps_count=REWARD_STEPS)
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3)
Exemplo n.º 4
0
    args = parser.parse_args()

    device = torch.device('cuda' if args.cuda else 'cpu')

    try:
        from lib import data
        train_data = data.read_csv(file_name='data/000001_2017.csv')
        val_data = data.read_csv(file_name='data/000001_2018.csv')
    except ModuleNotFoundError:
        train_data = (pd.read_csv('data/prices_2017.csv', index_col=0),
                      pd.read_csv('data/factors_2017.csv', index_col=0))
        val_data = (pd.read_csv('data/prices_2018.csv', index_col=0),
                    pd.read_csv('data/factors_2018.csv', index_col=0))

    env = environ.StockEnv(train_data,
                           bars_count=BARS_COUNT,
                           reset_on_sell=True)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)
    env_test = environ.StockEnv(train_data,
                                bars_count=BARS_COUNT,
                                reset_on_sell=True)
    env_test = gym.wrappers.TimeLimit(env_test, max_episode_steps=1000)
    env_val = environ.StockEnv(val_data,
                               bars_count=BARS_COUNT,
                               reset_on_sell=True)
    env_val = gym.wrappers.TimeLimit(env_val, max_episode_steps=1000)

    net = models.DQNConv1d(env.observation_space.shape,
                           env.action_space.n).to(device)
    tgt_net = models.DQNConv1d(env.observation_space.shape,
                               env.action_space.n).to(device)
Exemplo n.º 5
0
def worker(net, device, train_queue, proc_idx, save_path):
    try:
        from lib import data
        train_data = data.read_csv(file_name='data/000001_2018.csv')
    except ModuleNotFoundError:
        train_data = (pd.read_csv('data/prices_2018.csv', index_col=0),
                      pd.read_csv('data/factors_2018.csv', index_col=0))
    env = environ.StockEnv(train_data,
                           bars_count=BARS_COUNT,
                           reset_on_sell=True)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)
    agt = agent.ProbabilityAgent(lambda x: net(x)[0],
                                 apply_softmax=True,
                                 device=device)
    exp_source = experience.ExperienceSource(env,
                                             agt,
                                             GAMMA,
                                             steps_count=REWARD_STEPS)

    batch = []
    frame_idx = 0
    total_reward = []
    total_steps = []
    reward_buf = []
    steps_buf = []
    frame_idx = 0
    frame_prev = 0
    ts = time.time()
    best_mean_reward = None

    stats = collections.defaultdict(list)

    file_name = os.path.splitext(os.path.basename(__file__))[0]
    file_name = file_name.split('_')[-1]
    proc_name = 'worker_' + '%d' % proc_idx
    writer = SummaryWriter(os.path.join('runs', file_name, proc_name))

    logging.basicConfig(level=logging.INFO,
                        format='%(levelname)s:%(message)s',
                        handlers=[
                            logging.FileHandler(
                                os.path.join(save_path, 'console.log')),
                            logging.StreamHandler()
                        ])

    for exp in exp_source:
        frame_idx += 1
        batch.append(exp)
        if len(batch) < GRAD_BATCH:
            continue

        net.zero_grad()
        loss_val_v, loss_policy_v, loss_entropy_v = helper.a2c_loss(
            batch, net, GAMMA**REWARD_STEPS, ENTROPY_BETA, device)
        batch.clear()
        loss_v = loss_entropy_v + loss_val_v + loss_policy_v
        loss_v.backward()
        nn_utils.clip_grad_norm_(net.parameters(), CLIP_GRAD)
        grads = [
            param.grad.data.cpu().numpy() if param.grad is not None else None
            for param in net.parameters()
        ]
        train_queue.put(grads)

        stats['loss_value'].append(loss_val_v)
        stats['loss_policy'].append(loss_policy_v)
        stats['loss_entropy'].append(loss_entropy_v)
        stats['loss_total'].append(loss_v)
        for stat in stats:
            if len(stat) >= STATS_GROUPS:
                writer.add_scalar(stat,
                                  torch.mean(torch.stack(stats[stat])).item(),
                                  frame_idx)
                stats[stat].clear()

        ep_reward, ep_steps = exp_source.pop_episode_result()
        if ep_reward:
            print(
                'Worker_%d: %d done, Episode reward: %.4f, Episode step: %d' %
                (proc_idx, frame_idx, ep_reward, ep_steps))
            reward_buf.append(ep_reward)
            steps_buf.append(ep_steps)
            if len(reward_buf) == REWARD_GROUPS:
                reward = np.mean(reward_buf)
                steps = np.mean(steps_buf)
                reward_buf.clear()
                steps_buf.clear()
                total_reward.append(reward)
                total_steps.append(steps)
                speed = (frame_idx - frame_prev) / (time.time() - ts)
                frame_prev = frame_idx
                ts = time.time()
                mean_reward = np.mean(total_reward[-100:])
                mean_step = np.mean(total_steps[-100:])
                logging.info(
                    '%d done, mean reward %.3f, mean step %d, speed %d f/s' %
                    (frame_idx, mean_reward, mean_step, speed))
                writer.add_scalar('speed', speed, frame_idx)
                writer.add_scalar('reward', reward, frame_idx)
                writer.add_scalar('reward_100', mean_reward, frame_idx)
                writer.add_scalar('steps', steps, frame_idx)
                writer.add_scalar('steps_100', mean_step, frame_idx)
                if best_mean_reward is None or best_mean_reward < mean_reward:
                    torch.save(
                        net.state_dict(),
                        os.path.join(save_path, 'best_mean_reward-%.3f.pth') %
                        mean_reward)
                    if best_mean_reward is not None:
                        logging.info(
                            'Worker_%d: Best mean value updated %.3f -> %.3f' %
                            (proc_idx, best_mean_reward, mean_reward))
                    best_mean_reward = mean_reward
    writer.close()
Exemplo n.º 6
0
                        default=False,
                        action='store_true',
                        help='enable colab hosted runtime')
    args = parser.parse_args()

    device = torch.device('cuda' if args.cuda else 'cpu')

    try:
        from lib import data
        train_data = data.read_csv(file_name='data/000001_2018.csv')
    except ModuleNotFoundError:
        train_data = (pd.read_csv('data/prices_2018.csv', index_col=0),
                      pd.read_csv('data/factors_2018.csv', index_col=0))

    env = environ.StockEnv(train_data,
                           bars_count=BARS_COUNT,
                           reset_on_sell=True)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)
    net = models.A2CConv1d(env.observation_space.shape,
                           env.action_space.n).to(device)
    net.share_memory()
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3)

    file_name = os.path.splitext(os.path.basename(__file__))[0]
    file_name = file_name.split('_')[-1]
    save_path = os.path.join('saves', file_name)
    os.makedirs(save_path, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(levelname)s:%(message)s',
                        handlers=[