def onpolicy_inference(): env = make_vec_envs( args.env_name, args.seed + 1000, 1, None, None, device='cuda:0', allow_early_resets=False, env_kwargs=env_kwargs, ) env_obj = env.venv.venv.envs[0].env.env if args.env_name.find('door') <= -1: env_obj.unity = None render_func = get_render_func(env) if evaluation and not render: render_func = None if env_kwargs['visionnet_input']: visionmodel = VisionModelXYZ() visionmodel = load_visionmodel(args.load_name, args.visionmodel_path, VisionModelXYZ()) actor_critic, ob_rms = torch.load(args.load_name) actor_critic = actor_critic.eval() if env_kwargs['visionnet_input'] and args.env_name.find('doorenv') > -1: actor_critic.visionmodel = visionmodel actor_critic.visionnet_input = env_obj.visionnet_input actor_critic.to("cuda:0") if args.env_name.find('doorenv') > -1: actor_critic.nn = env_obj.nn recurrent_hidden_states = torch.zeros( 1, actor_critic.recurrent_hidden_state_size) masks = torch.zeros(1, 1) knob_noisy = args.knob_noisy def add_noise(obs, epoch=100): satulation = 100. sdv = torch.tensor([ 3.440133806003181, 3.192113342496682, 1.727412865751099 ]) / satulation #Vision SDV for arm noise = torch.distributions.Normal(torch.tensor([0.0, 0.0, 0.0]), sdv).sample().cuda() noise *= min(1., epoch / satulation) obs[:, -3:] += noise return obs full_obs = env.reset() # print("init obs", full_obs) initial_state = full_obs[:, 2:2 + env.action_space.shape[0]] if args.env_name.find('doorenv') > -1 and env_obj.visionnet_input: obs = actor_critic.obs2inputs(full_obs, 0) else: if knob_noisy: obs = add_noise(full_obs) else: obs = full_obs if render_func is not None: render_func('human') if args.env_name.find('doorenv') > -1: if env_obj.xml_path.find("baxter") > -1: doorhinge_idx = 20 elif env_obj.xml_path.find("float") > -1: if env_obj.xml_path.find("hook") > -1: doorhinge_idx = 6 elif env_obj.xml_path.find("gripper") > -1: doorhinge_idx = 11 else: if env_obj.xml_path.find("mobile") > -1: if env_obj.xml_path.find("hook") > -1: doorhinge_idx = 9 if env_obj.xml_path.find("gripper") > -1: doorhinge_idx = 14 else: if env_obj.xml_path.find("hook") > -1: doorhinge_idx = 7 if env_obj.xml_path.find("gripper") > -1: doorhinge_idx = 12 start_time = int(time.mktime(time.localtime())) i = 0 epi_step = 0 total_time = 0 epi_counter = 1 dooropen_counter = 0 door_opened = False test_num = 100 while True: with torch.no_grad(): value, action, _, recurrent_hidden_states = actor_critic.act( obs, recurrent_hidden_states, masks, deterministic=args.det) next_action = action if i % 511 == 0: current_state = initial_state pos_control = False if pos_control: frame_skip = 1 if i % (512 / frame_skip - 1) == 0: current_state = initial_state next_action = current_state + next_action for kk in range(frame_skip): full_obs, reward, done, infos = env.step(next_action) else: full_obs, reward, done, infos = env.step(next_action) current_state = full_obs[:, 2:2 + env.action_space.shape[0]] if args.env_name.find('doorenv') > -1 and env_obj.visionnet_input: obs = actor_critic.obs2inputs(full_obs, 0) else: if knob_noisy: obs = add_noise(full_obs) else: obs = full_obs masks.fill_(0.0 if done else 1.0) if render_func is not None: render_func('human') i += 1 epi_step += 1 if args.env_name.find('doorenv') > -1: if not door_opened and abs( env_obj.sim.data.qpos[doorhinge_idx]) >= 0.2: dooropen_counter += 1 opening_time = epi_step / 50 print("door opened! opening time is {}".format(opening_time)) total_time += opening_time door_opened = True if args.env_name.find('Fetch') > -1: if not door_opened and infos[0]['is_success'] == 1: dooropen_counter += 1 opening_time = epi_step / 50 print("Reached destenation! Time is {}".format(opening_time)) total_time += opening_time door_opened = True if evaluation: if i % 512 == 511: if env_obj.unity: env_obj.close() env = make_vec_envs( args.env_name, args.seed + 1000, 1, None, None, device='cuda:0', allow_early_resets=False, env_kwargs=env_kwargs, ) if render: render_func = get_render_func(env) env_obj = env.venv.venv.envs[0].env.env if args.env_name.find('doorenv') <= -1: env_obj.unity = None env.reset() print("{} ep end >>>>>>>>>>>>>>>>>>>>>>>>".format(epi_counter)) eval_print(dooropen_counter, epi_counter, start_time, total_time) epi_counter += 1 epi_step = 0 door_opened = False if i >= 512 * test_num: eval_print(dooropen_counter, epi_counter - 1, start_time, total_time) break
help='obstacle size factor') parser.add_argument('--rew_factor', type=float, default=1., help='reward factor') args = parser.parse_args() from assistive_gym.envs import FeedingEnvHomotopyDownAdjust, FeedingEnvHomotopyUpAdjust FeedingEnvHomotopyDownAdjust.obs_size = args.obs_size FeedingEnvHomotopyUpAdjust.obs_size = args.obs_size args.det = not args.non_det env = make_vec_envs(args.env_name, args.seed + 1000, 1, None, None, args.add_timestep, device='cpu', allow_early_resets=False) # Get a render function render_func = get_render_func(env) # We need to use the same statistics for normalization as used in training if args.load_model is not None: actor_critic, ob_rms = torch.load(args.load_model) else: actor_critic, ob_rms = torch.load(os.path.join(args.load_dir, args.env_name + "epoch_{:07d}.pt".format(args.load_epoch))) vec_norm = get_vec_normalize(env) if vec_norm is not None: vec_norm.eval() vec_norm.ob_rms = ob_rms recurrent_hidden_states = torch.zeros(1, actor_critic.recurrent_hidden_state_size) masks = torch.zeros(1, 1)
def main(): sys.path.append('a2c_ppo_acktr') parser = argparse.ArgumentParser(description='RL') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, help='log interval, one log per n updates (default: 10)') parser.add_argument( '--env-name', default='HumanoidDeepMimicWalkBulletEnv-v1', help= 'environment to train on (default: HumanoidDeepMimicWalkBulletEnv-v1)') parser.add_argument( '--load-dir', default='./trained_models/a2c', help='directory to save agent logs (default: ./trained_models/)') parser.add_argument('--non-det', action='store_true', default=False, help='whether to use a non-deterministic policy') args = parser.parse_args() args.det = not args.non_det env = make_vec_envs(args.env_name, args.seed + 1000, 1, None, None, device='cpu', allow_early_resets=False, test=True) # Get a render function render_func = get_render_func(env) # We need to use the same statistics for normalization as used in training actor_critic, ob_rms = \ torch.load(os.path.join(args.load_dir, args.env_name + "_39061.pt")) vec_norm = get_vec_normalize(env) if vec_norm is not None: vec_norm.eval() vec_norm.ob_rms = ob_rms recurrent_hidden_states = torch.zeros( 1, actor_critic.recurrent_hidden_state_size) masks = torch.zeros(1, 1) obs = env.reset() if render_func is not None: render_func('human') if args.env_name.find('Bullet') > -1: import pybullet as p torsoId = -1 for i in range(p.getNumBodies()): if (p.getBodyInfo(i)[0].decode() == "torso"): torsoId = i while True: time.sleep(0.1) with torch.no_grad(): value, action, _, recurrent_hidden_states = actor_critic.act( obs, recurrent_hidden_states, masks, deterministic=args.det) # Obser reward and next obs obs, reward, done, _ = env.step(action) masks.fill_(0.0 if done else 1.0) if args.env_name.find('Bullet') > -1: if torsoId > -1: distance = 5 yaw = 0 humanPos, humanOrn = p.getBasePositionAndOrientation(torsoId) p.resetDebugVisualizerCamera(distance, yaw, -20, humanPos) if render_func is not None: render_func('human')
def traj_1_generator(actor_critic, ob_rms, simple_env_name): device = torch.device("cuda:0" if torch.cuda.is_available else "cpu") env = make_vec_envs( args.env_name, args.seed + 1, 1, None, None, device=device, allow_early_resets=False) # Get a render function render_func = get_render_func(env) vec_norm = get_vec_normalize(env) if vec_norm is not None: vec_norm.eval() vec_norm.ob_rms = ob_rms if args.render: if render_func is not None: render_func('human') if args.env_name.find('Bullet') > -1: import pybullet as p torsoId = -1 for i in range(p.getNumBodies()): if (p.getBodyInfo(i)[0].decode() == "torso"): torsoId = i masks = torch.zeros(1, 1) recurrent_hidden_states = torch.zeros(1, actor_critic.recurrent_hidden_state_size) done = False eps_states = [] eps_actions = [] eps_rewards = [] steps = 0 reward = 0 eps_return = 0 eps_length = 0 obs = env.reset() while True: with torch.no_grad(): value, action, _, recurrent_hidden_states = actor_critic.act( obs, recurrent_hidden_states, masks, deterministic=args.det) # Obser reward and next obs obs, reward, done, info = env.step(action) eps_states.append(preprocess(obs.cpu().numpy(), args.preprocess_type, simple_env_name)[0]) eps_actions.append(action[0][0].cpu().numpy()) eps_rewards.append(reward[0][0].cpu().numpy()) steps += 1 eps_length += 1 eps_return += reward[0][0].cpu().numpy() masks.fill_(0.0 if done else 1.0) if args.render: if render_func is not None: render_func('human') if steps % 1000 ==0: print('steps', steps) if done: print('info: ', info) break env.close() eps_states = np.array(eps_states) eps_actions = np.array(eps_actions) eps_rewards = np.array(eps_rewards) print('eps_return', eps_return) print('eps_length', eps_length) return eps_states, eps_actions, eps_rewards, eps_return, eps_length
def main(): global g_globals uiscale = 1.5 m.createMainWin(int((1024 + 180) * uiscale), int((600 + 100) * uiscale), int(1024 * uiscale), int(600 * uiscale), uiscale) m.showMainWin() m.getPythonWin().loadEmptyScript() parser = argparse.ArgumentParser(description='RL') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, help='log interval, one log per n updates (default: 10)') parser.add_argument( '--env-name', default='PongNoFrameskip-v4', help='environment to train on (default: PongNoFrameskip-v4)') parser.add_argument( '--load-dir', default='./trained_models/', help='directory to save agent logs (default: ./trained_models/)') parser.add_argument('--non-det', action='store_true', default=False, help='whether to use a non-deterministic policy') args = parser.parse_args() args.det = not args.non_det env = make_vec_envs(args.env_name, args.seed + 1000, 1, None, None, device='cpu', allow_early_resets=False) # Get a render function render_func = get_render_func(env) # We need to use the same statistics for normalization as used in training actor_critic, ob_rms = \ torch.load(os.path.join(args.load_dir, args.env_name + ".pt")) vec_norm = get_vec_normalize(env) if vec_norm is not None: vec_norm.eval() vec_norm.ob_rms = ob_rms recurrent_hidden_states = torch.zeros( 1, actor_critic.recurrent_hidden_state_size) masks = torch.zeros(1, 1) obs = env.reset() if render_func is not None: render_func('human') g_globals = (actor_critic, obs, recurrent_hidden_states, masks, args, env, render_func) m.startMainLoop() # this finishes when program finishes
def main(): device = torch.device("cuda:0" if args.cuda else "cpu") policies = torch.load(os.path.join(args.load_dir, args.env_name + ".pt"), map_location=device) if args.e2e: e2e = policies e2e.eval() policies = None else: e2e = None estimator = torch.load(os.path.join(args.pe_load_dir, args.image_layer + ".pt")) if \ args.image_layer else None if estimator: estimator.eval() pose_estimator_info = (estimator, args.state_indices, rack_lower, rack_upper) if \ args.image_layer else None pipeline = pipelines[args.pipeline] env = make_vec_envs(pipeline['sparse'], pipeline['task'], args.seed + 1000, args.num_processes, None, None, device, False, policies, show=(args.num_processes == 1), no_norm=True, pose_estimator=pose_estimator_info) null_action = torch.zeros((1, env.action_space.shape[0])) # Get a render function render_func = get_render_func(env) if e2e: env.get_images(mode='activate') obs = env.reset() if render_func is not None: render_func('human') i = 0 total_successes = 0 num_trials = 50 low = torch.Tensor([-0.3] * 7) high = torch.Tensor([0.3] * 7) while i < num_trials: with torch.no_grad(): if e2e: images = torch.Tensor( np.transpose(env.get_images(), (0, 3, 1, 2))).to(device) output = e2e.predict(images, obs[:, :7]) action = unnormalise_y(output, low, high) else: action = null_action # Obser reward and next obs obs, rews, dones, _ = env.step(action) if np.all(dones): i += args.num_processes rew = sum([int(rew > 0) for rew in rews]) total_successes += rew if render_func is not None: render_func('human') p_succ = 100 * total_successes / i print(f"{p_succ}% successful")
def onpolicy_inference(seed, env_name, det, load_name, evaluation, render, knob_noisy, visionnet_input, env_kwargs, actor_critic=None, verbose=True, pos_control=True, step_skip=4): env = make_vec_envs( env_name, seed + 1000, 1, None, None, device='cuda:0', allow_early_resets=False, env_kwargs=env_kwargs, ) env_obj = env.venv.venv.envs[0].env.env if env_name.find('door') <= -1: env_obj.unity = None render_func = get_render_func(env) if evaluation and not render: render_func = None if env_kwargs['visionnet_input']: visionmodel = VisionModelXYZ() visionmodel = load_visionmodel(load_name, args.visionmodel_path, VisionModelXYZ()) if not actor_critic: actor_critic, ob_rms = torch.load(load_name) actor_critic = actor_critic.eval() if env_kwargs['visionnet_input'] and env_name.find('doorenv') > -1: actor_critic.visionmodel = visionmodel actor_critic.visionnet_input = env_obj.visionnet_input actor_critic.to("cuda:0") if env_name.find('doorenv') > -1: actor_critic.nn = env_obj.nn recurrent_hidden_states = torch.zeros( 1, actor_critic.recurrent_hidden_state_size) masks = torch.zeros(1, 1) full_obs = env.reset() initial_state = full_obs[:, :env.action_space.shape[0]] if env_name.find('doorenv') > -1 and env_obj.visionnet_input: obs = actor_critic.obs2inputs(full_obs, 0) else: if knob_noisy: obs = add_noise(full_obs) else: obs = full_obs if render_func is not None: render_func('human') # if env_name.find('doorenv')>-1: # if env_obj.xml_path.find("baxter")>-1: # doorhinge_idx = 20 # elif env_obj.xml_path.find("float")>-1: # if env_obj.xml_path.find("hook")>-1: # doorhinge_idx = 6 # elif env_obj.xml_path.find("gripper")>-1: # doorhinge_idx = 11 # else: # if env_obj.xml_path.find("mobile")>-1: # if env_obj.xml_path.find("hook")>-1: # doorhinge_idx = 9 # if env_obj.xml_path.find("gripper")>-1: # doorhinge_idx = 14 # else: # if env_obj.xml_path.find("hook")>-1: # doorhinge_idx = 7 # if env_obj.xml_path.find("gripper")>-1: # doorhinge_idx = 12 start_time = int(time.mktime(time.localtime())) i = 0 epi_step = 0 total_time = 0 epi_counter = 1 dooropen_counter = 0 door_opened = False test_num = 100 while True: with torch.no_grad(): value, action, _, recurrent_hidden_states = actor_critic.act( obs, recurrent_hidden_states, masks, deterministic=det) next_action = action if pos_control: # print("enjoy step_skip",step_skip) if i % (512 / step_skip - 1) == 0: current_state = initial_state next_action = current_state + next_action for kk in range(step_skip): full_obs, reward, done, infos = env.step(next_action) current_state = full_obs[:, :env.action_space.shape[0]] else: for kk in range(step_skip): full_obs, reward, done, infos = env.step(next_action) if env_name.find('doorenv') > -1 and env_obj.visionnet_input: obs = actor_critic.obs2inputs(full_obs, 0) else: if knob_noisy: obs = add_noise(full_obs) else: obs = full_obs masks.fill_(0.0 if done else 1.0) if render_func is not None: render_func('human') i += 1 epi_step += 1 if env_name.find('doorenv') > -1: # if not door_opened and abs(env_obj.sim.data.qpos[doorhinge_idx])>=0.2: if not door_opened and abs(env_obj.get_doorangle()) >= 0.2: dooropen_counter += 1 opening_time = epi_step / (1.0 / mujoco_timestep) * step_skip if verbose: print( "door opened! opening time is {}".format(opening_time)) total_time += opening_time door_opened = True if env_name.find('Fetch') > -1: if not door_opened and infos[0]['is_success'] == 1: dooropen_counter += 1 opening_time = epi_step / (1.0 / mujoco_timestep) * step_skip if verbose: print( "Reached destenation! Time is {}".format(opening_time)) total_time += opening_time door_opened = True if evaluation: if i % (512 / step_skip - 1) == 0: if env_obj.unity: env_obj.close() env = make_vec_envs( env_name, seed + 1000, 1, None, None, device='cuda:0', allow_early_resets=False, env_kwargs=env_kwargs, ) if render: render_func = get_render_func(env) env_obj = env.venv.venv.envs[0].env.env if env_name.find('doorenv') <= -1: env_obj.unity = None env.reset() if verbose: print("{} ep end >>>>>>>>>>>>>>>>>>>>>>>>".format( epi_counter)) eval_print(dooropen_counter, epi_counter, start_time, total_time) epi_counter += 1 epi_step = 0 door_opened = False if i >= 512 / step_skip * test_num: if verbose: print("dooropening counter:", dooropen_counter, " epi counter:", epi_counter) eval_print(dooropen_counter, epi_counter - 1, start_time, total_time) break opening_rate, opening_timeavg = eval_print(dooropen_counter, epi_counter - 1, start_time, total_time) return opening_rate, opening_timeavg