Пример #1
0
def train():
    hyperparams = {'batch_size': 50,
                   'learning_rate': 0.001,
                   'grad_decay': 0.99,
                   'grad_epsilon': 0.01,
                   'num_updates': 20000,
                   'grad_norm_clip': 5}
    model = CoinbaseModel(hyperparams)
    loss = tb.Crossentropy(hyperparams)
    acc = tb.CatAcc(hyperparams)
    evaluator = tb.Evaluator(hyperparams, loss, acc)
    optim = tb.RMSPropOptim(hyperparams)
    trainer = tb.Trainer(model, hyperparams, loss, optim, evaluator)

    split = 10000
    data = np.load('data/coinbase_n1.npz')
    train_xs = {'first_ticks': data['first_ticks'][:split],
                'last_ticks': data['last_ticks'][:split],
                'features': data['features'][:split]}
    train_y = data['targets'][:split]

    val_xs = {'first_ticks': data['first_ticks'][split:],
              'last_ticks': data['last_ticks'][split:],
              'features': data['features'][split:]}
    val_y = data['targets'][split:]

    trainer.train(train_xs, train_y,
                  val_xs, val_y,
                  val_cmp=True)
    evaluator.eval(model, val_xs, val_y)
Пример #2
0
def train():
    hyperparams = {'batch_size': 512,
                   'learning_rate': 0.0001,
                   'grad_decay': 0.95,
                   'grad_epsilon': 0.01,
                   'num_updates': 100000,
                   'grad_norm_clip': 5}
    with tf.device('/cpu:0'):
        model = CoinbaseModel(hyperparams)
    loss = tb.Crossentropy(hyperparams)
    acc = tb.CatAcc(hyperparams)
    evaluator = tb.Evaluator(hyperparams, loss, acc)
    optim = tb.RMSPropOptim(hyperparams)
    trainer = tb.Trainer(model, hyperparams, loss, optim, evaluator)

    split = 90000
    data = np.load('data/coinbase-ticks-100000.npz')
    print(data['ticks'].shape)
    train_xs = {'ticks': data['ticks'][:split]}
    train_y = data['targets'][:split]

    val_xs = {'ticks': data['ticks'][split:]}
    val_y = data['targets'][split:]

    with tf.device('/cpu:0'):
        trainer.train(train_xs, train_y,
                      val_xs, val_y,
                      val_cmp=True)
    evaluator.eval(model, val_xs, val_y)
Пример #3
0
def train_dqn():
    hyperparams = {
        'batch_size': 32,
        'init_explore_len': 50000,
        # 'init_explore_len': 50,
        'learning_rate': 0.00025,
        # 'grad_momentum': 0.0,
        'grad_decay': 0.95,
        'grad_epsilon': 0.01,
        # 'grad_norm_clip': 5,
        'epsilon': (1.0, 0.1, 1000000),
        'frame_skip': 10,
        'num_recent_feats': 25,
        'steps_per_episode': 150,
        'reward_discount': 0.99,
        'show_screen': True,
        'target_update_freq': 10000,
        'display_freq': 25,
        'updates_per_iter': 1,
        'update_freq': 4,
        'frames_per_epoch': 100000,
        # 'frames_per_epoch': 250,
        'frames_per_eval': 50000,
        # 'screen_resize': (110, 84),
        'experience_replay_len': 1000000,
        # 'cache_size': int(2e4),
        'state_len': 4,
        'joint_vel': 0.5,
        # 'num_frames': 10000000,
        # 'save_freq': 100000,
        # 'eval_freq': 10,
        'num_epochs': 200,  # 1e7 frames
        'eval_epsilon': 0.05,
        'num_recent_episodes': 100,
        'num_recent_steps': 10000
    }
    q_model = CookModel(hyperparams)
    loss = tb.MSE(hyperparams)
    optim = tb.RMSPropOptim(hyperparams)
    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.DQNAgent(hyperparams, q_model, optim, loss,
                        'params/cook_dqn.json')
    task = CookingTask(hyperparams)
    trainer = tb.RLTrainer(hyperparams, agent, task, load_first=True)
    trainer.train_by_epoch()
Пример #4
0
def train_dqn():
    hyperparams = {
        'batch_size': 32,
        # 'init_explore_len': 500,
        # 'init_explore_len': 50,
        'learning_rate': 0.0005,
        # 'grad_momentum': 0.0,
        'grad_decay': 0.95,
        'grad_epsilon': 0.01,
        # 'grad_norm_clip': 5,
        'epsilon': (1.0, 0.1, 4000000),
        'frame_skip': 4,
        'reward_discount': 0.99,
        'show_screen': False,
        # 'target_update_freq': 10000,
        'display_freq': 100,
        'updates_per_iter': 50000,
        'update_freq': 4,
        'frames_per_epoch': 100000,
        # 'frames_per_epoch': 250,
        'frames_per_eval': 25000,
        # 'screen_resize': (110, 84),
        # 'experience_replay_len': 1000000
        # 'cache_size': int(2e4),
        'state_len': 4,
        # 'num_frames': 10000000,
        # 'save_freq': 100000,
        # 'eval_freq': 10,
        'num_epochs': 400,  # 1e7 frames
        'eval_epsilon': 0.05,
        'num_recent_episodes': 100,
        'tmax': 5,
        'num_recent_steps': 10000
    }
    q_model = BreakoutModel(hyperparams)
    loss = tb.MSE(hyperparams)
    optim = tb.RMSPropOptim(hyperparams)
    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.SNDQNAgent(hyperparams, q_model, optim, loss,
                          'params/breakout_sndqn_l0005.json')
    task = AtariTask(hyperparams, 'data/roms/breakout.bin')
    trainer = tb.RLTrainer(hyperparams, agent, task, load_first=False)
    trainer.train_by_epoch()
Пример #5
0
def train_dqn():
    hyperparams = {
        'batch_size': 32,
        'learning_rate': 0.001,
        'grad_decay': 0.95,
        'grad_epsilon': 0.01,
        'epsilon': (1, 0.1, 4000000),
        'frame_skip': 4,
        'reward_discount': 0.99,
        'show_screen': False,
        'display_freq': 100,
        # 'updates_per_iter': 1000,
        'updates_per_iter': 20000,
        'num_threads': 16,
        # 'num_threads': 1,
        # 'update_freq': 4,
        # 'frames_per_epoch': 5000,
        'frames_per_epoch': 100000,
        'episodes_per_eval': 32,
        # 'frames_per_eval': 25000,
        # 'frames_per_eval': 5000,
        'state_len': 4,
        'num_epochs': 400,
        'eval_epsilon': 0.05,
        'num_recent_episodes': 100,
        'tmax': 5,
        'num_recent_steps': 10000
    }
    q_model = BreakoutModel(hyperparams)
    loss = tb.MSE(hyperparams)
    optim = tb.RMSPropOptim(hyperparams)
    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.AsyncSNDQNFastAgent(hyperparams, q_model, optim, loss,
                                   'params/breakout_async_sndqn_fast.json')
    task = AsyncAtariTask(hyperparams, 'data/roms/breakout.bin')
    trainer = tb.AsyncSleepTrainer(hyperparams, agent, task, load_first=False)
    trainer.train_by_epoch()
Пример #6
0
def train_rdrl():
    hyperparams = {'batch_size': 32,
                   'init_explore_len': 500000,
                   'num_mega_updates': 100000,
                   # 'init_model_train': 500000,
                   # 'init_explore_len': 50,
                   'learning_rate': 0.05,
                   # 'grad_momentum': 0.0,
                   'grad_decay': 0.95,
                   'grad_epsilon': 0.01,
                   # 'grad_norm_clip': 5,
                   'epsilon': (1.0, 0.1, 1000000),
                   'frame_skip': 4,
                   'reward_discount': 0.99,
                   'display_freq': 100,
                   'updates_per_model_iter': 1,
                   'updates_per_iter': 1,
                   # 'trains_per_action_train': 500,
                   'train_freq': 16,
                   'action_train_freq': 16,
                   # 'action_train_freq': 10000,
                   'frames_per_epoch': 100000,
                   # 'frames_per_epoch': 250,
                   'frames_per_eval': 50000,
                   # 'screen_resize': (110, 84),
                   'experience_replay_len': 4000000,
                   'update_target_freq': 20000,
                   # 'cache_size': int(2e4),
                   'state_len': 1,
                   # 'num_frames': 10000000,
                   # 'save_freq': 100000,
                   # 'eval_freq': 10,
                   'num_epochs': 200,  # 1e7 frames
                   'show_screen': False,
                   'rollout_length': 4,
                   'value_rollout_length': 4,
                   'eval_epsilon': 0.05,
                   'action_train_scale': 5,
                   'num_recent_episodes': 100,
                   'num_recent_steps': 10000}
    action_model = ActionModel(hyperparams)
    action_optim = tb.RMSPropOptim(hyperparams)

    state_model = StateModel(hyperparams)
    state_optim = tb.RMSPropOptim(hyperparams)
    state_loss = tb.MSE(hyperparams)

    reward_model = RewardModel(hyperparams)
    reward_optim = tb.RMSPropOptim(hyperparams)
    reward_loss = tb.MSE(hyperparams)

    value_model = ValueModel(hyperparams)
    value_optim = tb.RMSPropOptim(hyperparams)
    value_loss = tb.MSE(hyperparams)

    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.RDRLAgent(hyperparams,
                         action_model, action_optim,
                         state_model, state_loss, state_optim,
                         reward_model, reward_loss, reward_optim,
                         value_model, value_loss, value_optim,
                         'params/breakout_rdrl.json')
    task = AtariTask(hyperparams, 'data/roms/breakout.bin')
    trainer = tb.RLTrainer(hyperparams, agent, task)
    trainer.train_by_epoch()