def pivoting_rl(args): device = torch.device('cuda:'+str(args.gpu) if torch.cuda.is_available() else 'cpu') seed = 1002 # Set random seed in python std lib, numpy and pytorch set_seed(seed) vec_env = DummyVecEnvWrapper( MujocoEnv('HalfCheetah-v2') ).instantiate(parallel_envs=1, seed=seed) if args.algo == 'ddpg': model, reinforcer = get_ddpg(vec_env, device) elif args.algo == 'ppo': model, reinforcer = get_ppo(vec_env, device) else: print('Unknown algo', args.algo); assert(False) # Optimizer helper - A weird regularization settings I've copied from OpenAI code adam_optimizer = AdamFactory( lr=[1.0e-4, 1.0e-3, 1.0e-3], weight_decay=[0.0, 0.0, 0.001], eps=1.0e-4, layer_groups=True ).instantiate(model) # Overall information store for training information training_info = TrainingInfo( metrics=[ EpisodeRewardMetric('episode_rewards'), # Calculate average reward from episode ], callbacks=[StdoutStreaming()] # Print live metrics every epoch to standard output ) # A bit of training initialization bookkeeping... training_info.initialize() reinforcer.initialize_training(training_info) training_info.on_train_begin() # Let's make 20 batches per epoch to average metrics nicely num_epochs = int(1.0e6 / 2 / 1000) # Normal handrolled training loop for i in range(1, num_epochs+1): epoch_info = EpochInfo( training_info=training_info, global_epoch_idx=i, batches_per_epoch=1000, optimizer=adam_optimizer ) reinforcer.train_epoch(epoch_info) training_info.on_train_end()
metrics=[ EpisodeRewardMetric( 'episode_rewards'), # Calculate average reward from episode ], callbacks=[StdoutStreaming() ] # Print live metrics every epoch to standard output ) # A bit of training initialization bookkeeping... training_info.initialize() reinforcer.initialize_training(training_info) training_info.on_train_begin() # Let's make 20 batches per epoch to average metrics nicely num_epochs = int(1.0e6 / 2 / 1000) # Normal handrolled training loop for i in range(1, num_epochs + 1): epoch_info = EpochInfo(training_info=training_info, global_epoch_idx=i, batches_per_epoch=1000, optimizer=adam_optimizer) reinforcer.train_epoch(epoch_info) training_info.on_train_end() if __name__ == '__main__': half_cheetah_ddpg()