Пример #1
0
def train_dqn():
    hyperparams = {
        'batch_size':
        32,
        'learning_rate': (0.5, 0.001, 4000000),
        'grad_decay':
        0.99,
        'grad_epsilon':
        0.01,
        'epsilon': [(1, 0.1, 4000000, 0.4), (1, 0.01, 4000000, 0.3),
                    (1, 0.5, 4000000, 0.3)],
        # 'epsilon': (1, 0.1, 4000000),
        'frame_skip':
        4,
        'reward_discount':
        0.99,
        'show_screen':
        False,
        'display_freq':
        100,
        # 'updates_per_iter': 1000,
        'updates_per_iter':
        40000,
        # 'init_frames': 20000,
        # 'init_frames': 200000,
        # 'init_updates': 20000,
        # 'init_updates': 100000,
        'num_threads':
        16,
        # 'update_freq': 4,
        # 'frames_per_epoch': 5000,
        'frames_per_epoch':
        100000,
        'episodes_per_eval':
        32,
        # 'episodes_per_eval': 16,
        'state_len':
        4,
        'num_epochs':
        400,
        'eval_freq':
        1,
        'eval_epsilon':
        0.05,
        'num_recent_episodes':
        100,
        # 'tmax': 5,
        'num_recent_steps':
        10000
    }
    q_model = BreakoutModel(hyperparams)
    loss = tb.MSE(hyperparams)
    optim = tb.RMSPropVarOptim(hyperparams)
    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.AsyncSIDQNAgent(hyperparams, q_model, optim, loss,
                               'params/breakout_async_sidqn_fix.json')
    task = AsyncAtariTask(hyperparams, 'data/roms/breakout.bin')
    trainer = tb.AsyncSleepTrainer(hyperparams, agent, task, load_first=False)
    trainer.train_by_epoch()
Пример #2
0
def train_dqn():
    hyperparams = {
        'batch_size': 32,
        'init_explore_len': 50000,
        # 'init_explore_len': 50,
        'learning_rate': 0.00025,
        # 'grad_momentum': 0.0,
        'grad_decay': 0.95,
        'grad_epsilon': 0.01,
        # 'grad_norm_clip': 5,
        'epsilon': (1.0, 0.1, 1000000),
        'frame_skip': 10,
        'num_recent_feats': 25,
        'steps_per_episode': 150,
        'reward_discount': 0.99,
        'show_screen': True,
        'target_update_freq': 10000,
        'display_freq': 25,
        'updates_per_iter': 1,
        'update_freq': 4,
        'frames_per_epoch': 100000,
        # 'frames_per_epoch': 250,
        'frames_per_eval': 50000,
        # 'screen_resize': (110, 84),
        'experience_replay_len': 1000000,
        # 'cache_size': int(2e4),
        'state_len': 4,
        'joint_vel': 0.5,
        # 'num_frames': 10000000,
        # 'save_freq': 100000,
        # 'eval_freq': 10,
        'num_epochs': 200,  # 1e7 frames
        'eval_epsilon': 0.05,
        'num_recent_episodes': 100,
        'num_recent_steps': 10000
    }
    q_model = CookModel(hyperparams)
    loss = tb.MSE(hyperparams)
    optim = tb.RMSPropOptim(hyperparams)
    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.DQNAgent(hyperparams, q_model, optim, loss,
                        'params/cook_dqn.json')
    task = CookingTask(hyperparams)
    trainer = tb.RLTrainer(hyperparams, agent, task, load_first=True)
    trainer.train_by_epoch()
Пример #3
0
def train_dqn():
    hyperparams = {
        'batch_size': 32,
        # 'init_explore_len': 500,
        # 'init_explore_len': 50,
        'learning_rate': 0.0005,
        # 'grad_momentum': 0.0,
        'grad_decay': 0.95,
        'grad_epsilon': 0.01,
        # 'grad_norm_clip': 5,
        'epsilon': (1.0, 0.1, 4000000),
        'frame_skip': 4,
        'reward_discount': 0.99,
        'show_screen': False,
        # 'target_update_freq': 10000,
        'display_freq': 100,
        'updates_per_iter': 50000,
        'update_freq': 4,
        'frames_per_epoch': 100000,
        # 'frames_per_epoch': 250,
        'frames_per_eval': 25000,
        # 'screen_resize': (110, 84),
        # 'experience_replay_len': 1000000
        # 'cache_size': int(2e4),
        'state_len': 4,
        # 'num_frames': 10000000,
        # 'save_freq': 100000,
        # 'eval_freq': 10,
        'num_epochs': 400,  # 1e7 frames
        'eval_epsilon': 0.05,
        'num_recent_episodes': 100,
        'tmax': 5,
        'num_recent_steps': 10000
    }
    q_model = BreakoutModel(hyperparams)
    loss = tb.MSE(hyperparams)
    optim = tb.RMSPropOptim(hyperparams)
    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.SNDQNAgent(hyperparams, q_model, optim, loss,
                          'params/breakout_sndqn_l0005.json')
    task = AtariTask(hyperparams, 'data/roms/breakout.bin')
    trainer = tb.RLTrainer(hyperparams, agent, task, load_first=False)
    trainer.train_by_epoch()
Пример #4
0
def train_rdrl():
    hyperparams = {'batch_size': 32,
                   'init_explore_len': 500000,
                   'num_mega_updates': 100000,
                   # 'init_model_train': 500000,
                   # 'init_explore_len': 50,
                   'learning_rate': 0.05,
                   # 'grad_momentum': 0.0,
                   'grad_decay': 0.95,
                   'grad_epsilon': 0.01,
                   # 'grad_norm_clip': 5,
                   'epsilon': (1.0, 0.1, 1000000),
                   'frame_skip': 4,
                   'reward_discount': 0.99,
                   'display_freq': 100,
                   'updates_per_model_iter': 1,
                   'updates_per_iter': 1,
                   # 'trains_per_action_train': 500,
                   'train_freq': 16,
                   'action_train_freq': 16,
                   # 'action_train_freq': 10000,
                   'frames_per_epoch': 100000,
                   # 'frames_per_epoch': 250,
                   'frames_per_eval': 50000,
                   # 'screen_resize': (110, 84),
                   'experience_replay_len': 4000000,
                   'update_target_freq': 20000,
                   # 'cache_size': int(2e4),
                   'state_len': 1,
                   # 'num_frames': 10000000,
                   # 'save_freq': 100000,
                   # 'eval_freq': 10,
                   'num_epochs': 200,  # 1e7 frames
                   'show_screen': False,
                   'rollout_length': 4,
                   'value_rollout_length': 4,
                   'eval_epsilon': 0.05,
                   'action_train_scale': 5,
                   'num_recent_episodes': 100,
                   'num_recent_steps': 10000}
    action_model = ActionModel(hyperparams)
    action_optim = tb.RMSPropOptim(hyperparams)

    state_model = StateModel(hyperparams)
    state_optim = tb.RMSPropOptim(hyperparams)
    state_loss = tb.MSE(hyperparams)

    reward_model = RewardModel(hyperparams)
    reward_optim = tb.RMSPropOptim(hyperparams)
    reward_loss = tb.MSE(hyperparams)

    value_model = ValueModel(hyperparams)
    value_optim = tb.RMSPropOptim(hyperparams)
    value_loss = tb.MSE(hyperparams)

    # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator)
    agent = tb.RDRLAgent(hyperparams,
                         action_model, action_optim,
                         state_model, state_loss, state_optim,
                         reward_model, reward_loss, reward_optim,
                         value_model, value_loss, value_optim,
                         'params/breakout_rdrl.json')
    task = AtariTask(hyperparams, 'data/roms/breakout.bin')
    trainer = tb.RLTrainer(hyperparams, agent, task)
    trainer.train_by_epoch()