set_s[i].actor.layerNmu.bias)) noise = torch.FloatTensor(noise) noise = torch.mul(set_s[i].actor.layerNmu.bias.data, noise) set_s[i].actor.layerNmu.bias.data = copy.deepcopy( set_s[i].actor.layerNmu.bias.data + noise) return set_s if __name__ == "__main__": parse_arguments() args = parser.parse_args() args.env_name = "Springmass-v0" print("Running environment" + str(args.env_name)) env = NormalizedActions(gym.make(args.env_name)) # env = wrappers.Monitor(env, '/tmp/{}-experiment'.format(args.env_name), force=True) env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) ''' DEFINE THE ACTOR RL AGENT ''' if args.algo == "NAF": agent = NAF(args.gamma, args.tau, args.hidden_size, env.observation_space.shape[0], env.action_space) print("Initialized NAF") else: agent = DDPG(args.gamma, args.tau, args.hidden_size, env.observation_space.shape[0], env.action_space)
help='number of episodes (default: 128)') parser.add_argument('--updates_per_step', type=int, default=5, metavar='N', help='model updates per simulator step (default: 5)') parser.add_argument('--num-stack', type=int, default=1, help='number of frames to stack') parser.add_argument('--model-suffix', default="", help='To resume training or not') args = parser.parse_args() env = NormalizedActions(gym.make(args.env_name)) writer = SummaryWriter() env.seed(args.seed) if torch.cuda.is_available(): device = torch.device("cuda:0") torch.cuda.manual_seed(args.seed) else: device = torch.device("cpu") torch.manual_seed(args.seed) np.random.seed(args.seed) obs_shape = env.observation_space.shape obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:])
help='random seed (default: 4)') parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='batch size (default: 128)') parser.add_argument('--num_steps', type=int, default=1000, metavar='N', help='max episode length (default: 1000)') parser.add_argument('--num_episodes', type=int, default=1000, metavar='N', help='number of episodes (default: 1000)') parser.add_argument('--hidden_size', type=int, default=128, metavar='N', help='number of episodes (default: 128)') parser.add_argument('--updates_per_step', type=int, default=5, metavar='N', help='model updates per simulator step (default: 5)') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N', help='size of replay buffer (default: 1000000)') args = parser.parse_args() env = NormalizedActions(gym.make(args.env_name)) writer = SummaryWriter() env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) if args.algo == "NAF": agent = NAF(args.gamma, args.tau, args.hidden_size, env.observation_space.shape[0], env.action_space) else: agent = DDPG(args.gamma, args.tau, args.hidden_size, env.observation_space.shape[0], env.action_space) memory = ReplayMemory(args.replay_size)
if args.gpu >= 0: print("gpu ok") ptu.set_gpu_mode(True, args.gpu) # set env if args.env_name == 'Humanoidrllab': from rllab.envs.mujoco.humanoid_env import HumanoidEnv from rllab.envs.normalized_env import normalize env = normalize(HumanoidEnv()) max_episode_steps = float('inf') if args.seed >= 0: global seed_ seed_ = args.seed else: env = gym.make(args.env_name) max_episode_steps=env._max_episode_steps env=NormalizedActions(env) if args.seed >= 0: env.seed(args.seed) if args.seed >= 0: torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) torch.random.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # set args args.num_actions = env.action_space.shape[0] args.max_action = env.action_space.high
parser.add_argument('--batch_size', type=int, default=256, metavar='N', help='batch size (default: 256)') parser.add_argument('--num_steps', type=int, default=1000001, metavar='N', help='maximum number of steps (default: 1000000)') parser.add_argument('--hidden_size', type=int, default=256, metavar='N', help='hidden size (default: 256)') parser.add_argument('--updates_per_step', type=int, default=1, metavar='N', help='model updates per simulator step (default: 1)') parser.add_argument('--target_update_interval', type=int, default=1, metavar='N', help='Value target update per no. of updates per step (default: 1)') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N', help='size of replay buffer (default: 10000000)') args = parser.parse_args() # Environment env = NormalizedActions(gym.make(args.env_name)) env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Agent agent = SAC(env.observation_space.shape[0], env.action_space, args) writer = SummaryWriter() # Memory memory = ReplayMemory(args.replay_size) # Training Loop rewards = [] total_numsteps = 0
type=str, default='min', choices=['min', 'max', 'mean'], help= 'The Q value for each sample is determined based on this operator over the two Q networks.' ) parser.add_argument('--temp', type=float, default=1.0, help='Boltzman Temperature for normalizing actions') args = parser.parse_args() assert args.num_outputs > 0 env = NormalizedActions(gym.make(args.env_name)) eval_env = NormalizedActions(gym.make(args.env_name)) if args.policy_type == 'generative': agent = Generative(gamma=args.gamma, tau=args.tau, num_inputs=env.observation_space.shape[0], action_space=env.action_space, replay_size=args.replay_size, num_outputs=args.num_outputs, q_normalization=args.q_normalization, target_policy=args.target_policy, target_policy_q=args.target_policy_q, normalize_obs=args.normalize_obs, normalize_returns=args.normalize_rew, autoregressive=not args.not_autoregressive,
parser.add_argument('--iter', type=int, default=10, metavar='N', help='number of iterations of solving x constraits') # Save & render parser.add_argument('--render', action='store_true', help='render the environment') parser.add_argument('--ckpt_freq', type=int, default=2, help='model saving frequency') parser.add_argument('--display', type=bool, default=False, help='display or not') args = parser.parse_args() ''' Initiate enviornment ''' env_name = args.env_name env = gym.make(env_name) if type(env.action_space) != gym.spaces.discrete.Discrete: from LPO_continuous import LPO env = NormalizedActions(gym.make(env_name)) else: from LPO_discrete import LPO if args.display: env = wrappers.Monitor(env, '/tmp/{}-experiment'.format(env_name), force=True) env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) ckpt = 'ckpt_' + env_name if not os.path.exists(ckpt): os.mkdir(ckpt) '''
if random.random() < alpha: action = noise.sample(action.shape).view(action.shape) state, reward, done, _ = _env.step(action.cpu().numpy()[0]) total_reward += reward state = agent.Tensor([state]) if done: break return total_reward test_episodes = 100 for env_name in [args.env]:#os.listdir(base_dir): env = NormalizedActions(gym.make(env_name)) agent = DDPG(beta=0.9, epsilon=0, learning_rate=1e-4, gamma=0.99, tau=0.01, hidden_size_dim0=args.hidden_size, hidden_size_dim1=args.hidden_size, num_inputs=env.observation_space.shape[0], action_space=env.action_space, train_mode=False, alpha=0, replay_size=0, optimizer = 0, two_player=args.two_player, normalize_obs=True) noise = uniform.Uniform(agent.Tensor([-1.0]), agent.Tensor([1.0])) basic_bm = copy.deepcopy(env.env.env.model.body_mass.copy()) env_dir = base_dir + env_name + '/' for optimizer in [args.optimizer]: #['RMSprop', 'SGLD_thermal_0.01', 'SGLD_thermal_0.001', 'SGLD_thermal_0.0001', 'SGLD_thermal_1e-05']: for noise_type in [args.action_noise]: noise_dir = env_dir + optimizer + '/' + noise_type + '/nr_mdp_' + str(args.alpha) + '_1/' if os.path.exists(noise_dir): for subdir in sorted(os.listdir(noise_dir)): results = {} run_number = 0
s = env.reset() rew = 0. for t in range(T): a = policy.get_action(s) s, r, done, _ = env.step(a) rew += r if done: break return rew if __name__ == '__main__': env_name = args.env try: env = NormalizedActions(envs.env_list[env_name](render=args.render)) except TypeError as err: print('no argument render, assumping env.render will just work') env = NormalizedActions(envs.env_list[env_name]()) assert np.any(np.abs(env.action_space.low) <= 1.) and np.any( np.abs(env.action_space.high) <= 1.), 'Action space not normalizd' env.reset() env.seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False torch.manual_seed(args.seed) now = datetime.now()
parser.set_defaults(done_util=True) parser.add_argument('--render', dest='render', action='store_true') parser.add_argument('--no_render', dest='render', action='store_false') parser.set_defaults(render=False) parser.add_argument('--record', dest='record', action='store_true') parser.add_argument('--no-record', dest='record', action='store_false') parser.set_defaults(record=False) args = parser.parse_args() if __name__ == '__main__': env_name = args.env try: env = NormalizedActions(envs.env_list[env_name](render=args.render)) except TypeError as err: print('no argument render, assumping env.render will just work') env = NormalizedActions(envs.env_list[env_name]()) assert np.any(np.abs(env.action_space.low) <= 1.) and np.any( np.abs(env.action_space.high) <= 1.), 'Action space not normalizd' if args.record: env = gym.wrappers.Monitor(env, './data/vid/mpc/{}-{}'.format( env_name, args.frame), force=True) env.reset() env.seed(args.seed)
print("=== HYPERPARAMETERS ===") for key in hp: print(f"{key} : {hp[key]}") print("=======================") logger.debug("Initial setup completed.") # Create JSON of Hyper-Parameters for reproducibility with open("./runs/" + folder + "hp.json", 'w') as outfile: json.dump(vars(args), outfile) cnn = args.pics for i_run in range(args.max_num_run): logger.important(f"START TRAINING RUN {i_run}") # Make the environment env = gym.make(args.env_name) env._max_episode_steps = args.max_num_step env = NormalizedActions(env) if cnn: env = ImageWrapper(args.img_size, env) # Set Seed for repeatability torch.manual_seed(args.seed + i_run) np.random.seed(args.seed + i_run) env.seed(args.seed + i_run) env.action_space.np_random.seed(args.seed + i_run) # Setup the agent agent = SAC(args.state_buffer_size, env.action_space, args) # Setup TensorboardX writer_train = SummaryWriter(log_dir='runs/' + folder + 'run_' + str(i_run) + '/train')
parser.add_argument('--num_steps', type=int, default=1000, metavar='N', help='max episode length (default: 1000)') parser.add_argument('--num_rollouts', type=int, default=2000, metavar='N', help='number of rollouts (default: 2000)') parser.add_argument('--hidden_size', type=int, default=15, metavar='N', help='number of hidden neurons (default: 100)') parser.add_argument('--constraint_size',type=int, default=10, metavar='N', help='number of constraint to be solved each time') parser.add_argument('--layers', type=int, default=2, metavar='N', help='number of layers inf the policy NN') # Save & render parser.add_argument('--render', action='store_true', help='render the environment') parser.add_argument('--ckpt_freq', type=int, default=200, help='model saving frequency') parser.add_argument('--display', type=bool, default=False, help='display or not') args = parser.parse_args() env_name = args.env_name env = gym.make(env_name) if type(env.action_space) != gym.spaces.discrete.Discrete: from LPO_continuous import LPO env = NormalizedActions(gym.make(env_name)) else: from LPO_discrete import LPO if args.display: env = wrappers.Monitor(env, '/tmp/{}-experiment'.format(env_name), force=True) env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) if args.layers == 1: policy = SingleLayerPolicy(args.hidden_size, env.observation_space.shape[0], env.action_space) elif args.layers== 2: policy = TwoLayerPolicy(args.hidden_size, env.observation_space.shape[0], env.action_space) agent = LPO(args.hidden_size, env.observation_space.shape[0], env.action_space, args.constraint_size, policy)
parser.add_argument('--warmup', type=int, default=10000, help='Number of insertions before updates') args = parser.parse_args() try: os.makedirs(args.log_dir) except OSError: files = glob.glob(os.path.join(args.log_dir, '*.monitor.csv')) for f in files: os.remove(f) if not args.discrete: env = NormalizedActions(gym.make(args.env_name)) else: env = [ make_env(args.env_name, args.seed, i, args.log_dir, False) for i in range(args.num_processes) ] env = SubprocVecEnv(env) #writer = SummaryWriter() if args.vis: from visdom import Visdom viz = Visdom(port=8097, server='http://eos11') win = None #env.seed(args.seed) torch.manual_seed(args.seed)
if __name__ == '__main__': env = sys.argv[1] args = None if env == 'mc': args = args_mc elif env == 'pd': args = args_pd elif env == 'll': args = args_ll else: print('Environment not selected, Please choose from: mc, pd,ll') exit(-1) env = NormalizedActions(gym.make(args['env_name'])) env.seed(args['seed']) torch.manual_seed(args['seed']) np.random.seed(args['seed']) agent = NAF(args['gamma'], args['tau'], args['hidden_size'], env.observation_space.shape[0], env.action_space) agent.load_model(f'models/naf_{args["env_name"]}') replay_buffer = ReplayBuffer(args['replay_size']) ounoise = OUNoise(env.action_space.shape[0]) if args['ou_noise'] else None run()
import numpy as np from normalized_actions import NormalizedActions if __name__ == '__main__': args = get_args() # initialize environment env_name = args.env_name env = gym.make(env_name) # choose agent according to action space if type(env.action_space) != gym.spaces.discrete.Discrete: from reinforce_continuous import REINFORCE env = NormalizedActions(gym.make(env_name)) else: from reinforce_discrete import REINFORCE if args.display: env = wrappers.Monitor(env, '/tmp/{}-experiment'.format(env_name), force=True) env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) agent = REINFORCE(args.hidden_size, env.observation_space.shape[0], env.action_space)