def train(): hyperparams = {'batch_size': 50, 'learning_rate': 0.001, 'grad_decay': 0.99, 'grad_epsilon': 0.01, 'num_updates': 20000, 'grad_norm_clip': 5} model = CoinbaseModel(hyperparams) loss = tb.Crossentropy(hyperparams) acc = tb.CatAcc(hyperparams) evaluator = tb.Evaluator(hyperparams, loss, acc) optim = tb.RMSPropOptim(hyperparams) trainer = tb.Trainer(model, hyperparams, loss, optim, evaluator) split = 10000 data = np.load('data/coinbase_n1.npz') train_xs = {'first_ticks': data['first_ticks'][:split], 'last_ticks': data['last_ticks'][:split], 'features': data['features'][:split]} train_y = data['targets'][:split] val_xs = {'first_ticks': data['first_ticks'][split:], 'last_ticks': data['last_ticks'][split:], 'features': data['features'][split:]} val_y = data['targets'][split:] trainer.train(train_xs, train_y, val_xs, val_y, val_cmp=True) evaluator.eval(model, val_xs, val_y)
def train(): hyperparams = {'batch_size': 512, 'learning_rate': 0.0001, 'grad_decay': 0.95, 'grad_epsilon': 0.01, 'num_updates': 100000, 'grad_norm_clip': 5} with tf.device('/cpu:0'): model = CoinbaseModel(hyperparams) loss = tb.Crossentropy(hyperparams) acc = tb.CatAcc(hyperparams) evaluator = tb.Evaluator(hyperparams, loss, acc) optim = tb.RMSPropOptim(hyperparams) trainer = tb.Trainer(model, hyperparams, loss, optim, evaluator) split = 90000 data = np.load('data/coinbase-ticks-100000.npz') print(data['ticks'].shape) train_xs = {'ticks': data['ticks'][:split]} train_y = data['targets'][:split] val_xs = {'ticks': data['ticks'][split:]} val_y = data['targets'][split:] with tf.device('/cpu:0'): trainer.train(train_xs, train_y, val_xs, val_y, val_cmp=True) evaluator.eval(model, val_xs, val_y)
def train_dqn(): hyperparams = { 'batch_size': 32, 'init_explore_len': 50000, # 'init_explore_len': 50, 'learning_rate': 0.00025, # 'grad_momentum': 0.0, 'grad_decay': 0.95, 'grad_epsilon': 0.01, # 'grad_norm_clip': 5, 'epsilon': (1.0, 0.1, 1000000), 'frame_skip': 10, 'num_recent_feats': 25, 'steps_per_episode': 150, 'reward_discount': 0.99, 'show_screen': True, 'target_update_freq': 10000, 'display_freq': 25, 'updates_per_iter': 1, 'update_freq': 4, 'frames_per_epoch': 100000, # 'frames_per_epoch': 250, 'frames_per_eval': 50000, # 'screen_resize': (110, 84), 'experience_replay_len': 1000000, # 'cache_size': int(2e4), 'state_len': 4, 'joint_vel': 0.5, # 'num_frames': 10000000, # 'save_freq': 100000, # 'eval_freq': 10, 'num_epochs': 200, # 1e7 frames 'eval_epsilon': 0.05, 'num_recent_episodes': 100, 'num_recent_steps': 10000 } q_model = CookModel(hyperparams) loss = tb.MSE(hyperparams) optim = tb.RMSPropOptim(hyperparams) # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator) agent = tb.DQNAgent(hyperparams, q_model, optim, loss, 'params/cook_dqn.json') task = CookingTask(hyperparams) trainer = tb.RLTrainer(hyperparams, agent, task, load_first=True) trainer.train_by_epoch()
def train_dqn(): hyperparams = { 'batch_size': 32, # 'init_explore_len': 500, # 'init_explore_len': 50, 'learning_rate': 0.0005, # 'grad_momentum': 0.0, 'grad_decay': 0.95, 'grad_epsilon': 0.01, # 'grad_norm_clip': 5, 'epsilon': (1.0, 0.1, 4000000), 'frame_skip': 4, 'reward_discount': 0.99, 'show_screen': False, # 'target_update_freq': 10000, 'display_freq': 100, 'updates_per_iter': 50000, 'update_freq': 4, 'frames_per_epoch': 100000, # 'frames_per_epoch': 250, 'frames_per_eval': 25000, # 'screen_resize': (110, 84), # 'experience_replay_len': 1000000 # 'cache_size': int(2e4), 'state_len': 4, # 'num_frames': 10000000, # 'save_freq': 100000, # 'eval_freq': 10, 'num_epochs': 400, # 1e7 frames 'eval_epsilon': 0.05, 'num_recent_episodes': 100, 'tmax': 5, 'num_recent_steps': 10000 } q_model = BreakoutModel(hyperparams) loss = tb.MSE(hyperparams) optim = tb.RMSPropOptim(hyperparams) # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator) agent = tb.SNDQNAgent(hyperparams, q_model, optim, loss, 'params/breakout_sndqn_l0005.json') task = AtariTask(hyperparams, 'data/roms/breakout.bin') trainer = tb.RLTrainer(hyperparams, agent, task, load_first=False) trainer.train_by_epoch()
def train_dqn(): hyperparams = { 'batch_size': 32, 'learning_rate': 0.001, 'grad_decay': 0.95, 'grad_epsilon': 0.01, 'epsilon': (1, 0.1, 4000000), 'frame_skip': 4, 'reward_discount': 0.99, 'show_screen': False, 'display_freq': 100, # 'updates_per_iter': 1000, 'updates_per_iter': 20000, 'num_threads': 16, # 'num_threads': 1, # 'update_freq': 4, # 'frames_per_epoch': 5000, 'frames_per_epoch': 100000, 'episodes_per_eval': 32, # 'frames_per_eval': 25000, # 'frames_per_eval': 5000, 'state_len': 4, 'num_epochs': 400, 'eval_epsilon': 0.05, 'num_recent_episodes': 100, 'tmax': 5, 'num_recent_steps': 10000 } q_model = BreakoutModel(hyperparams) loss = tb.MSE(hyperparams) optim = tb.RMSPropOptim(hyperparams) # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator) agent = tb.AsyncSNDQNFastAgent(hyperparams, q_model, optim, loss, 'params/breakout_async_sndqn_fast.json') task = AsyncAtariTask(hyperparams, 'data/roms/breakout.bin') trainer = tb.AsyncSleepTrainer(hyperparams, agent, task, load_first=False) trainer.train_by_epoch()
def train_rdrl(): hyperparams = {'batch_size': 32, 'init_explore_len': 500000, 'num_mega_updates': 100000, # 'init_model_train': 500000, # 'init_explore_len': 50, 'learning_rate': 0.05, # 'grad_momentum': 0.0, 'grad_decay': 0.95, 'grad_epsilon': 0.01, # 'grad_norm_clip': 5, 'epsilon': (1.0, 0.1, 1000000), 'frame_skip': 4, 'reward_discount': 0.99, 'display_freq': 100, 'updates_per_model_iter': 1, 'updates_per_iter': 1, # 'trains_per_action_train': 500, 'train_freq': 16, 'action_train_freq': 16, # 'action_train_freq': 10000, 'frames_per_epoch': 100000, # 'frames_per_epoch': 250, 'frames_per_eval': 50000, # 'screen_resize': (110, 84), 'experience_replay_len': 4000000, 'update_target_freq': 20000, # 'cache_size': int(2e4), 'state_len': 1, # 'num_frames': 10000000, # 'save_freq': 100000, # 'eval_freq': 10, 'num_epochs': 200, # 1e7 frames 'show_screen': False, 'rollout_length': 4, 'value_rollout_length': 4, 'eval_epsilon': 0.05, 'action_train_scale': 5, 'num_recent_episodes': 100, 'num_recent_steps': 10000} action_model = ActionModel(hyperparams) action_optim = tb.RMSPropOptim(hyperparams) state_model = StateModel(hyperparams) state_optim = tb.RMSPropOptim(hyperparams) state_loss = tb.MSE(hyperparams) reward_model = RewardModel(hyperparams) reward_optim = tb.RMSPropOptim(hyperparams) reward_loss = tb.MSE(hyperparams) value_model = ValueModel(hyperparams) value_optim = tb.RMSPropOptim(hyperparams) value_loss = tb.MSE(hyperparams) # q_trainer = tb.Trainer(q_model, hyperparams, loss, optim, evaluator) agent = tb.RDRLAgent(hyperparams, action_model, action_optim, state_model, state_loss, state_optim, reward_model, reward_loss, reward_optim, value_model, value_loss, value_optim, 'params/breakout_rdrl.json') task = AtariTask(hyperparams, 'data/roms/breakout.bin') trainer = tb.RLTrainer(hyperparams, agent, task) trainer.train_by_epoch()