Exemplo n.º 1
0
def onpolicy_inference():
    env = make_vec_envs(
        args.env_name,
        args.seed + 1000,
        1,
        None,
        None,
        device='cuda:0',
        allow_early_resets=False,
        env_kwargs=env_kwargs,
    )
    env_obj = env.venv.venv.envs[0].env.env
    if args.env_name.find('door') <= -1:
        env_obj.unity = None

    render_func = get_render_func(env)
    if evaluation and not render:
        render_func = None

    if env_kwargs['visionnet_input']:
        visionmodel = VisionModelXYZ()
        visionmodel = load_visionmodel(args.load_name, args.visionmodel_path,
                                       VisionModelXYZ())

    actor_critic, ob_rms = torch.load(args.load_name)
    actor_critic = actor_critic.eval()
    if env_kwargs['visionnet_input'] and args.env_name.find('doorenv') > -1:
        actor_critic.visionmodel = visionmodel
        actor_critic.visionnet_input = env_obj.visionnet_input
    actor_critic.to("cuda:0")

    if args.env_name.find('doorenv') > -1:
        actor_critic.nn = env_obj.nn

    recurrent_hidden_states = torch.zeros(
        1, actor_critic.recurrent_hidden_state_size)
    masks = torch.zeros(1, 1)

    knob_noisy = args.knob_noisy

    def add_noise(obs, epoch=100):
        satulation = 100.
        sdv = torch.tensor([
            3.440133806003181, 3.192113342496682, 1.727412865751099
        ]) / satulation  #Vision SDV for arm
        noise = torch.distributions.Normal(torch.tensor([0.0, 0.0, 0.0]),
                                           sdv).sample().cuda()
        noise *= min(1., epoch / satulation)
        obs[:, -3:] += noise
        return obs

    full_obs = env.reset()
    # print("init obs", full_obs)
    initial_state = full_obs[:, 2:2 + env.action_space.shape[0]]

    if args.env_name.find('doorenv') > -1 and env_obj.visionnet_input:
        obs = actor_critic.obs2inputs(full_obs, 0)
    else:
        if knob_noisy:
            obs = add_noise(full_obs)
        else:
            obs = full_obs

    if render_func is not None:
        render_func('human')

    if args.env_name.find('doorenv') > -1:
        if env_obj.xml_path.find("baxter") > -1:
            doorhinge_idx = 20
        elif env_obj.xml_path.find("float") > -1:
            if env_obj.xml_path.find("hook") > -1:
                doorhinge_idx = 6
            elif env_obj.xml_path.find("gripper") > -1:
                doorhinge_idx = 11
        else:
            if env_obj.xml_path.find("mobile") > -1:
                if env_obj.xml_path.find("hook") > -1:
                    doorhinge_idx = 9
                if env_obj.xml_path.find("gripper") > -1:
                    doorhinge_idx = 14
            else:
                if env_obj.xml_path.find("hook") > -1:
                    doorhinge_idx = 7
                if env_obj.xml_path.find("gripper") > -1:
                    doorhinge_idx = 12

    start_time = int(time.mktime(time.localtime()))

    i = 0
    epi_step = 0
    total_time = 0
    epi_counter = 1
    dooropen_counter = 0
    door_opened = False

    test_num = 100

    while True:
        with torch.no_grad():
            value, action, _, recurrent_hidden_states = actor_critic.act(
                obs, recurrent_hidden_states, masks, deterministic=args.det)

        next_action = action

        if i % 511 == 0: current_state = initial_state

        pos_control = False
        if pos_control:
            frame_skip = 1
            if i % (512 / frame_skip - 1) == 0: current_state = initial_state
            next_action = current_state + next_action
            for kk in range(frame_skip):
                full_obs, reward, done, infos = env.step(next_action)
        else:
            full_obs, reward, done, infos = env.step(next_action)

        current_state = full_obs[:, 2:2 + env.action_space.shape[0]]

        if args.env_name.find('doorenv') > -1 and env_obj.visionnet_input:
            obs = actor_critic.obs2inputs(full_obs, 0)
        else:
            if knob_noisy:
                obs = add_noise(full_obs)
            else:
                obs = full_obs

        masks.fill_(0.0 if done else 1.0)

        if render_func is not None:
            render_func('human')

        i += 1
        epi_step += 1

        if args.env_name.find('doorenv') > -1:
            if not door_opened and abs(
                    env_obj.sim.data.qpos[doorhinge_idx]) >= 0.2:
                dooropen_counter += 1
                opening_time = epi_step / 50
                print("door opened! opening time is {}".format(opening_time))
                total_time += opening_time
                door_opened = True

        if args.env_name.find('Fetch') > -1:
            if not door_opened and infos[0]['is_success'] == 1:
                dooropen_counter += 1
                opening_time = epi_step / 50
                print("Reached destenation! Time is {}".format(opening_time))
                total_time += opening_time
                door_opened = True

        if evaluation:
            if i % 512 == 511:
                if env_obj.unity:
                    env_obj.close()
                env = make_vec_envs(
                    args.env_name,
                    args.seed + 1000,
                    1,
                    None,
                    None,
                    device='cuda:0',
                    allow_early_resets=False,
                    env_kwargs=env_kwargs,
                )
                if render:
                    render_func = get_render_func(env)
                env_obj = env.venv.venv.envs[0].env.env
                if args.env_name.find('doorenv') <= -1:
                    env_obj.unity = None
                env.reset()
                print("{} ep end >>>>>>>>>>>>>>>>>>>>>>>>".format(epi_counter))
                eval_print(dooropen_counter, epi_counter, start_time,
                           total_time)
                epi_counter += 1
                epi_step = 0
                door_opened = False

        if i >= 512 * test_num:
            eval_print(dooropen_counter, epi_counter - 1, start_time,
                       total_time)
            break
Exemplo n.º 2
0
                    help='obstacle size factor')
parser.add_argument('--rew_factor', type=float, default=1.,
                    help='reward factor')
args = parser.parse_args()

from assistive_gym.envs import FeedingEnvHomotopyDownAdjust, FeedingEnvHomotopyUpAdjust
FeedingEnvHomotopyDownAdjust.obs_size = args.obs_size
FeedingEnvHomotopyUpAdjust.obs_size = args.obs_size

args.det = not args.non_det

env = make_vec_envs(args.env_name, args.seed + 1000, 1, None, None,
                    args.add_timestep, device='cpu', allow_early_resets=False)

# Get a render function
render_func = get_render_func(env)

# We need to use the same statistics for normalization as used in training
if args.load_model is not None:
    actor_critic, ob_rms = torch.load(args.load_model) 
else:
    actor_critic, ob_rms = torch.load(os.path.join(args.load_dir, args.env_name + "epoch_{:07d}.pt".format(args.load_epoch)))

vec_norm = get_vec_normalize(env)
if vec_norm is not None:
    vec_norm.eval()
    vec_norm.ob_rms = ob_rms

recurrent_hidden_states = torch.zeros(1, actor_critic.recurrent_hidden_state_size)
masks = torch.zeros(1, 1)
Exemplo n.º 3
0
def main():

    sys.path.append('a2c_ppo_acktr')

    parser = argparse.ArgumentParser(description='RL')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        help='log interval, one log per n updates (default: 10)')
    parser.add_argument(
        '--env-name',
        default='HumanoidDeepMimicWalkBulletEnv-v1',
        help=
        'environment to train on (default: HumanoidDeepMimicWalkBulletEnv-v1)')
    parser.add_argument(
        '--load-dir',
        default='./trained_models/a2c',
        help='directory to save agent logs (default: ./trained_models/)')
    parser.add_argument('--non-det',
                        action='store_true',
                        default=False,
                        help='whether to use a non-deterministic policy')
    args = parser.parse_args()

    args.det = not args.non_det

    env = make_vec_envs(args.env_name,
                        args.seed + 1000,
                        1,
                        None,
                        None,
                        device='cpu',
                        allow_early_resets=False,
                        test=True)

    # Get a render function
    render_func = get_render_func(env)

    # We need to use the same statistics for normalization as used in training
    actor_critic, ob_rms = \
                torch.load(os.path.join(args.load_dir, args.env_name + "_39061.pt"))

    vec_norm = get_vec_normalize(env)
    if vec_norm is not None:
        vec_norm.eval()
        vec_norm.ob_rms = ob_rms

    recurrent_hidden_states = torch.zeros(
        1, actor_critic.recurrent_hidden_state_size)
    masks = torch.zeros(1, 1)

    obs = env.reset()

    if render_func is not None:
        render_func('human')

    if args.env_name.find('Bullet') > -1:
        import pybullet as p

        torsoId = -1
        for i in range(p.getNumBodies()):
            if (p.getBodyInfo(i)[0].decode() == "torso"):
                torsoId = i

    while True:
        time.sleep(0.1)
        with torch.no_grad():
            value, action, _, recurrent_hidden_states = actor_critic.act(
                obs, recurrent_hidden_states, masks, deterministic=args.det)

        # Obser reward and next obs
        obs, reward, done, _ = env.step(action)

        masks.fill_(0.0 if done else 1.0)

        if args.env_name.find('Bullet') > -1:
            if torsoId > -1:
                distance = 5
                yaw = 0
                humanPos, humanOrn = p.getBasePositionAndOrientation(torsoId)
                p.resetDebugVisualizerCamera(distance, yaw, -20, humanPos)

        if render_func is not None:
            render_func('human')
Exemplo n.º 4
0
def traj_1_generator(actor_critic, ob_rms, simple_env_name):
    device = torch.device("cuda:0" if torch.cuda.is_available else "cpu")
    
    env = make_vec_envs(
        args.env_name,
        args.seed + 1,
        1,
        None,
        None,
        device=device,
        allow_early_resets=False)
    
    # Get a render function
    render_func = get_render_func(env)
    
    vec_norm = get_vec_normalize(env)
    if vec_norm is not None:
        vec_norm.eval()
        vec_norm.ob_rms = ob_rms
    
    if args.render:
        if render_func is not None:
            render_func('human')
    
    if args.env_name.find('Bullet') > -1:
        import pybullet as p
    
        torsoId = -1
        for i in range(p.getNumBodies()):
            if (p.getBodyInfo(i)[0].decode() == "torso"):
                torsoId = i
 

    masks = torch.zeros(1, 1)
    recurrent_hidden_states = torch.zeros(1, actor_critic.recurrent_hidden_state_size)
    
    done = False
    eps_states = []
    eps_actions = []
    eps_rewards = []

    steps = 0
    reward = 0
    eps_return = 0
    eps_length = 0

    obs = env.reset()

    while True:
        with torch.no_grad():
            value, action, _, recurrent_hidden_states = actor_critic.act(
                obs, recurrent_hidden_states, masks, deterministic=args.det)

        # Obser reward and next obs
        obs, reward, done, info = env.step(action)

        eps_states.append(preprocess(obs.cpu().numpy(), args.preprocess_type, simple_env_name)[0])
        eps_actions.append(action[0][0].cpu().numpy())
        eps_rewards.append(reward[0][0].cpu().numpy())

        steps += 1
        eps_length += 1
        eps_return += reward[0][0].cpu().numpy()

        masks.fill_(0.0 if done else 1.0)

        if args.render:
            if render_func is not None:
                render_func('human')

        if steps % 1000 ==0:
            print('steps', steps)
        if done:
            print('info: ', info)
            break

    env.close()

    eps_states = np.array(eps_states)
    eps_actions = np.array(eps_actions)
    eps_rewards = np.array(eps_rewards)
   
    print('eps_return', eps_return)
    print('eps_length', eps_length)
   
    return eps_states, eps_actions, eps_rewards, eps_return, eps_length
Exemplo n.º 5
0
def main():
    global g_globals
    uiscale = 1.5
    m.createMainWin(int((1024 + 180) * uiscale), int((600 + 100) * uiscale),
                    int(1024 * uiscale), int(600 * uiscale), uiscale)
    m.showMainWin()
    m.getPythonWin().loadEmptyScript()

    parser = argparse.ArgumentParser(description='RL')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        help='log interval, one log per n updates (default: 10)')
    parser.add_argument(
        '--env-name',
        default='PongNoFrameskip-v4',
        help='environment to train on (default: PongNoFrameskip-v4)')
    parser.add_argument(
        '--load-dir',
        default='./trained_models/',
        help='directory to save agent logs (default: ./trained_models/)')
    parser.add_argument('--non-det',
                        action='store_true',
                        default=False,
                        help='whether to use a non-deterministic policy')
    args = parser.parse_args()

    args.det = not args.non_det

    env = make_vec_envs(args.env_name,
                        args.seed + 1000,
                        1,
                        None,
                        None,
                        device='cpu',
                        allow_early_resets=False)

    # Get a render function
    render_func = get_render_func(env)

    # We need to use the same statistics for normalization as used in training
    actor_critic, ob_rms = \
                torch.load(os.path.join(args.load_dir, args.env_name + ".pt"))

    vec_norm = get_vec_normalize(env)
    if vec_norm is not None:
        vec_norm.eval()
        vec_norm.ob_rms = ob_rms

    recurrent_hidden_states = torch.zeros(
        1, actor_critic.recurrent_hidden_state_size)
    masks = torch.zeros(1, 1)

    obs = env.reset()

    if render_func is not None:
        render_func('human')

    g_globals = (actor_critic, obs, recurrent_hidden_states, masks, args, env,
                 render_func)

    m.startMainLoop()  # this finishes when program finishes
Exemplo n.º 6
0
def main():
    device = torch.device("cuda:0" if args.cuda else "cpu")
    policies = torch.load(os.path.join(args.load_dir, args.env_name + ".pt"),
                          map_location=device)
    if args.e2e:
        e2e = policies
        e2e.eval()
        policies = None
    else:
        e2e = None

    estimator = torch.load(os.path.join(args.pe_load_dir, args.image_layer + ".pt")) if \
        args.image_layer else None
    if estimator:
        estimator.eval()

    pose_estimator_info = (estimator, args.state_indices, rack_lower, rack_upper) if \
        args.image_layer else None

    pipeline = pipelines[args.pipeline]

    env = make_vec_envs(pipeline['sparse'],
                        pipeline['task'],
                        args.seed + 1000,
                        args.num_processes,
                        None,
                        None,
                        device,
                        False,
                        policies,
                        show=(args.num_processes == 1),
                        no_norm=True,
                        pose_estimator=pose_estimator_info)
    null_action = torch.zeros((1, env.action_space.shape[0]))

    # Get a render function
    render_func = get_render_func(env)

    if e2e:
        env.get_images(mode='activate')
    obs = env.reset()

    if render_func is not None:
        render_func('human')

    i = 0
    total_successes = 0
    num_trials = 50
    low = torch.Tensor([-0.3] * 7)
    high = torch.Tensor([0.3] * 7)
    while i < num_trials:
        with torch.no_grad():
            if e2e:
                images = torch.Tensor(
                    np.transpose(env.get_images(), (0, 3, 1, 2))).to(device)
                output = e2e.predict(images, obs[:, :7])
                action = unnormalise_y(output, low, high)
            else:
                action = null_action

        # Obser reward and next obs
        obs, rews, dones, _ = env.step(action)
        if np.all(dones):
            i += args.num_processes
            rew = sum([int(rew > 0) for rew in rews])
            total_successes += rew

        if render_func is not None:
            render_func('human')

    p_succ = 100 * total_successes / i
    print(f"{p_succ}% successful")
Exemplo n.º 7
0
def onpolicy_inference(seed,
                       env_name,
                       det,
                       load_name,
                       evaluation,
                       render,
                       knob_noisy,
                       visionnet_input,
                       env_kwargs,
                       actor_critic=None,
                       verbose=True,
                       pos_control=True,
                       step_skip=4):

    env = make_vec_envs(
        env_name,
        seed + 1000,
        1,
        None,
        None,
        device='cuda:0',
        allow_early_resets=False,
        env_kwargs=env_kwargs,
    )

    env_obj = env.venv.venv.envs[0].env.env
    if env_name.find('door') <= -1:
        env_obj.unity = None

    render_func = get_render_func(env)
    if evaluation and not render:
        render_func = None

    if env_kwargs['visionnet_input']:
        visionmodel = VisionModelXYZ()
        visionmodel = load_visionmodel(load_name, args.visionmodel_path,
                                       VisionModelXYZ())

    if not actor_critic:
        actor_critic, ob_rms = torch.load(load_name)
    actor_critic = actor_critic.eval()
    if env_kwargs['visionnet_input'] and env_name.find('doorenv') > -1:
        actor_critic.visionmodel = visionmodel
        actor_critic.visionnet_input = env_obj.visionnet_input
    actor_critic.to("cuda:0")

    if env_name.find('doorenv') > -1:
        actor_critic.nn = env_obj.nn

    recurrent_hidden_states = torch.zeros(
        1, actor_critic.recurrent_hidden_state_size)
    masks = torch.zeros(1, 1)

    full_obs = env.reset()
    initial_state = full_obs[:, :env.action_space.shape[0]]

    if env_name.find('doorenv') > -1 and env_obj.visionnet_input:
        obs = actor_critic.obs2inputs(full_obs, 0)
    else:
        if knob_noisy:
            obs = add_noise(full_obs)
        else:
            obs = full_obs

    if render_func is not None:
        render_func('human')

    # if env_name.find('doorenv')>-1:
    #     if env_obj.xml_path.find("baxter")>-1:
    #         doorhinge_idx = 20
    #     elif env_obj.xml_path.find("float")>-1:
    #         if env_obj.xml_path.find("hook")>-1:
    #             doorhinge_idx = 6
    #         elif env_obj.xml_path.find("gripper")>-1:
    #             doorhinge_idx = 11
    #     else:
    #         if env_obj.xml_path.find("mobile")>-1:
    #             if env_obj.xml_path.find("hook")>-1:
    #                 doorhinge_idx = 9
    #             if env_obj.xml_path.find("gripper")>-1:
    #                 doorhinge_idx = 14
    #         else:
    #             if env_obj.xml_path.find("hook")>-1:
    #                 doorhinge_idx = 7
    #             if env_obj.xml_path.find("gripper")>-1:
    #                 doorhinge_idx = 12

    start_time = int(time.mktime(time.localtime()))

    i = 0
    epi_step = 0
    total_time = 0
    epi_counter = 1
    dooropen_counter = 0
    door_opened = False
    test_num = 100

    while True:
        with torch.no_grad():
            value, action, _, recurrent_hidden_states = actor_critic.act(
                obs, recurrent_hidden_states, masks, deterministic=det)
        next_action = action

        if pos_control:
            # print("enjoy step_skip",step_skip)
            if i % (512 / step_skip - 1) == 0: current_state = initial_state
            next_action = current_state + next_action
            for kk in range(step_skip):
                full_obs, reward, done, infos = env.step(next_action)

            current_state = full_obs[:, :env.action_space.shape[0]]
        else:
            for kk in range(step_skip):
                full_obs, reward, done, infos = env.step(next_action)

        if env_name.find('doorenv') > -1 and env_obj.visionnet_input:
            obs = actor_critic.obs2inputs(full_obs, 0)
        else:
            if knob_noisy:
                obs = add_noise(full_obs)
            else:
                obs = full_obs

        masks.fill_(0.0 if done else 1.0)

        if render_func is not None:
            render_func('human')

        i += 1
        epi_step += 1

        if env_name.find('doorenv') > -1:
            # if not door_opened and abs(env_obj.sim.data.qpos[doorhinge_idx])>=0.2:
            if not door_opened and abs(env_obj.get_doorangle()) >= 0.2:
                dooropen_counter += 1
                opening_time = epi_step / (1.0 / mujoco_timestep) * step_skip
                if verbose:
                    print(
                        "door opened! opening time is {}".format(opening_time))
                total_time += opening_time
                door_opened = True

        if env_name.find('Fetch') > -1:
            if not door_opened and infos[0]['is_success'] == 1:
                dooropen_counter += 1
                opening_time = epi_step / (1.0 / mujoco_timestep) * step_skip
                if verbose:
                    print(
                        "Reached destenation! Time is {}".format(opening_time))
                total_time += opening_time
                door_opened = True

        if evaluation:
            if i % (512 / step_skip - 1) == 0:
                if env_obj.unity:
                    env_obj.close()
                env = make_vec_envs(
                    env_name,
                    seed + 1000,
                    1,
                    None,
                    None,
                    device='cuda:0',
                    allow_early_resets=False,
                    env_kwargs=env_kwargs,
                )

                if render:
                    render_func = get_render_func(env)
                env_obj = env.venv.venv.envs[0].env.env
                if env_name.find('doorenv') <= -1:
                    env_obj.unity = None
                env.reset()
                if verbose:
                    print("{} ep end >>>>>>>>>>>>>>>>>>>>>>>>".format(
                        epi_counter))
                    eval_print(dooropen_counter, epi_counter, start_time,
                               total_time)
                epi_counter += 1
                epi_step = 0
                door_opened = False

        if i >= 512 / step_skip * test_num:
            if verbose:
                print("dooropening counter:", dooropen_counter,
                      " epi counter:", epi_counter)
                eval_print(dooropen_counter, epi_counter - 1, start_time,
                           total_time)
            break

    opening_rate, opening_timeavg = eval_print(dooropen_counter,
                                               epi_counter - 1, start_time,
                                               total_time)
    return opening_rate, opening_timeavg