Пример #1
0
def main(unused_argv):
    # SAY : a PyBullet gym class environment.
    # Initialize environment and task.
    env = Environment(
        FLAGS.assets_root,
        disp=FLAGS.disp,
        shared_memory=FLAGS.shared_memory,
        hz=480)
    task = tasks.names[FLAGS.task]()
    task.mode = FLAGS.mode

    # SAY: For each task, we have a scripted oracle that performs the task with ground truth information.
    # Initialize scripted oracle agent and dataset.
    agent = task.oracle(env)
    # SAY: this dataset resembles RL convention, storing experiences in episode.
    dataset = Dataset(os.path.join(FLAGS.data_dir, f'{FLAGS.task}-{task.mode}'))

    # Train seeds are even and test seeds are odd.
    seed = dataset.max_seed
    if seed < 0:
        seed = -1 if (task.mode == 'test') else -2

    # SAY: experience collection process resembles reinforcement learning.
    # Collect training data from oracle demonstrations.
    while dataset.n_episodes < FLAGS.n:
        print(f'Oracle demonstration: {dataset.n_episodes + 1}/{FLAGS.n}')
        episode, total_reward = [], 0
        seed += 2
        # SAY: set random seed to reproduce the result.
        np.random.seed(seed)
        env.set_task(task)
        obs = env.reset()
        obs = env._get_obs(skip_fixed=True)
        info = None
        reward = 0
        for _ in range(task.max_steps):
            act = agent.act(obs, info)
            episode.append((obs, act, reward, info))
            obs, reward, done, info = env.step(act)
            obs = env._get_obs(skip_fixed=True)
            total_reward += reward
            print(f'Total Reward: {total_reward} Done: {done}')
            if done:
                break
        # say: append the final obs as goal, fairly good, perfect!
        obs = env._get_obs(skip_fixed=True)
        episode.append((obs, None, reward, info))

        # SAY: some interesting games may worth being checked out.
        # Only save completed demonstrations.
        # TODO(andyzeng): add back deformable logic.
        if total_reward > 0.99:
            dataset.add(seed, episode)
Пример #2
0
def main(unused_argv):
    # Configure which GPU to use.
    cfg = tf.config.experimental
    gpus = cfg.list_physical_devices('GPU')
    if not gpus:
        print('No GPUs detected. Running with CPU.')
    else:
        cfg.set_visible_devices(gpus[FLAGS.gpu], 'GPU')

    # Configure how much GPU to use (in Gigabytes).
    if FLAGS.gpu_limit is not None:
        mem_limit = 1024 * FLAGS.gpu_limit
        dev_cfg = [cfg.VirtualDeviceConfiguration(memory_limit=mem_limit)]
        cfg.set_virtual_device_configuration(gpus[0], dev_cfg)

    # Load train and test datasets.
    train_dataset = Dataset(os.path.join(FLAGS.data_dir, f'{FLAGS.task}-train'))
    test_dataset = Dataset(os.path.join(FLAGS.data_dir, f'{FLAGS.task}-test'))


    # Run training from scratch multiple times.
    for train_run in range(FLAGS.n_runs):
        name = f'{FLAGS.task}-{FLAGS.agent}-{FLAGS.n_demos}-{train_run}'

        # Set up tensorboard logger.
        curr_time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
        log_dir = os.path.join(FLAGS.train_dir, 'logs', FLAGS.agent, FLAGS.task,
                               curr_time, 'train')
        writer = tf.summary.create_file_writer(log_dir)

        # Initialize agent.
        np.random.seed(train_run)
        tf.random.set_seed(train_run)

        agent = agents.names[FLAGS.agent](name, FLAGS.task, FLAGS.train_dir)

        # Limit random sampling during training to a fixed dataset.
        max_demos = train_dataset.n_episodes
        episodes = np.random.choice(range(max_demos), FLAGS.n_demos, False)
        train_dataset.set(episodes)

        if FLAGS.continue_logging:
            agent.load(FLAGS.log_ckpt)
            agent.total_steps = FLAGS.log_ckpt + 1

        # Train agent and save snapshots.
        while agent.total_steps < FLAGS.n_steps:
            for _ in range(FLAGS.interval):
                agent.train(train_dataset, writer)
            agent.validate(test_dataset, writer)
            agent.save()
Пример #3
0
def main(unused_argv):

  # Initialize environment and task.
  env_cls = ContinuousEnvironment if FLAGS.continuous else Environment
  env = env_cls(
      FLAGS.assets_root,
      disp=FLAGS.disp,
      shared_memory=FLAGS.shared_memory,
      hz=480)
  task = tasks.names[FLAGS.task](continuous=FLAGS.continuous)
  task.mode = FLAGS.mode

  # Initialize scripted oracle agent and dataset.
  agent = task.oracle(env, steps_per_seg=FLAGS.steps_per_seg)
  dataset = Dataset(os.path.join(FLAGS.data_dir, f'{FLAGS.task}-{task.mode}'))

  # Train seeds are even and test seeds are odd.
  seed = dataset.max_seed
  if seed < 0:
    seed = -1 if (task.mode == 'test') else -2

  # Determine max steps per episode.
  max_steps = task.max_steps
  if FLAGS.continuous:
    max_steps *= (FLAGS.steps_per_seg * agent.num_poses)

  # Collect training data from oracle demonstrations.
  while dataset.n_episodes < FLAGS.n:
    print(f'Oracle demonstration: {dataset.n_episodes + 1}/{FLAGS.n}')
    episode, total_reward = [], 0
    seed += 2
    np.random.seed(seed)
    env.set_task(task)
    obs = env.reset()
    info = None
    reward = 0
    for _ in range(max_steps):
      act = agent.act(obs, info)
      episode.append((obs, act, reward, info))
      obs, reward, done, info = env.step(act)
      total_reward += reward
      print(f'Total Reward: {total_reward} Done: {done}')
      if done:
        break
    episode.append((obs, None, reward, info))

    # Only save completed demonstrations.
    # TODO(andyzeng): add back deformable logic.
    if total_reward > 0.99:
      dataset.add(seed, episode)
Пример #4
0
def main(unused_argv):
    # Initialize environment and task.
    env = Environment(
        FLAGS.assets_root,
        disp=FLAGS.disp,
        shared_memory=FLAGS.shared_memory,
        hz=480)
    task = tasks.names[FLAGS.task]()
    task.mode = FLAGS.mode

    # Initialize scripted oracle agent and dataset.
    agent = task.oracle(env)
    dataset = Dataset(os.path.join(FLAGS.data_dir, f'{FLAGS.task}-{task.mode}'))

    # Train seeds are even and test seeds are odd.
    seed = dataset.max_seed
    if seed < 0:
        seed = -1 if (task.mode == 'test') else -2

    count = 0

    # Collect training data from oracle demonstrations.
    while dataset.n_episodes < FLAGS.n:
        print(f'Oracle demonstration: {dataset.n_episodes + 1}/{FLAGS.n}')
        episode, total_reward = [], 0
        seed += 2
        np.random.seed(seed)
        env.set_task(task)
        obs = env.reset()
        info = None
        reward = 0
        count += 1
        for _ in range(task.max_steps):
            act = agent.act(obs, info)
            episode.append((obs, act, reward, info))
            obs, reward, done, info = env.step(act)
            total_reward += reward
            print(f'Total Reward: {total_reward} Done: {done} Episode: {dataset.n_episodes}')
            if done:
                break
        episode.append((obs, None, reward, info))

        # Only save completed demonstrations.
        # TODO(andyzeng): add back deformable logic.
        if total_reward > 0.99:
            dataset.add(seed, episode)
            # print(dataset.shape())
    if dataset.n_episodes == FLAGS.n:
        pybullet.disconnect()
Пример #5
0
def main():

    train_dataset = Dataset(os.path.join(dataset_root,
                                         'ravens_demo-1623923819265029807'),
                            use_aux=True)
    max_demos = train_dataset.n_episodes

    episodes = np.random.choice(range(max_demos), max_demos, False)
    print(episodes)
    train_dataset.set(episodes)

    agent = BC_Agent()
    if hasattr(torch.cuda, 'empty_cache'):
        torch.cuda.empty_cache()
    #agent.train_model(train_dataset)

    agent.load_whole_dataset(train_dataset, n_max_demos=1000)

    agent.aux_pretraining(train_dataset,
                          load_model='pretrain_model_600_epoch',
                          batch_size=100)
    #agent.train_model_with_pretrained_cnn(train_dataset)
    #agent.train_full_model(train_dataset)

    #cmap, _, _, _  = agent.get_sample(train_dataset)

    plt.ion()

    img_draw = plt.imshow(cmap, interpolation='nearest')

    while True:
        cmap, hmap, pose, grasp = agent.get_sample(train_dataset)
        img_draw.set_data(cmap)
        if grasp == 1:
            plt.pause(5)
        else:
            plt.pause(0.0001)
        print('pose:{}, grasp:{}'.format(pose, grasp))
        time.sleep(1)
def main():
    rospy.init_node('raven_vive_teleop')
    rate = rospy.Rate(60.0)
    train_dataset = Dataset(os.path.join(dataset_root,
                                         'ravens_demo-1623923819265029807'),
                            use_aux=True)
    max_demos = train_dataset.n_episodes

    episodes = np.random.choice(range(max_demos), max_demos, False)
    print(episodes)
    train_dataset.set(episodes)

    env = Environment(assets_root, disp=True, hz=60)

    task = tasks.names[task_name]()
    task.mode = mode

    env.set_task(task)
    obs = env.reset()
    info = None

    seed = 0

    agent = bc_softmax_aux.BC_Agent()
    #agent = bc_softmax2d_aux.BC_Agent()
    agent.load_pretrained_model(model_save_rootdir +
                                "pretrain_model_4400_epoch")
    agent.act(obs)

    episode_steps = 0
    n_episode = 1
    aux = {
        'ee_pose': ((0, 0, 0), (0, 0, 0, 1)),
        'obj_pose': ((0, 0, 0), (0, 0, 0, 1))
    }

    plt.ion()
    im_color = obs['color'][0]
    img_draw = plt.imshow(im_color, interpolation='nearest', cmap='gray')

    while not rospy.is_shutdown():
        (obs, act, _, _, aux_dataset), _ = train_dataset.sample()

        im_color = obs['color'][0]
        img_draw.set_data(im_color)
        plt.pause(0.00000001)

        action, predicted_aux = agent.act(obs, ret_aux=True)
        #if action != None:
        #    print(action)
        #obj_pose = predicted_aux['predicted_obj_pose']
        obj_pose = predicted_aux['predicted_ee_position']
        #obj_pose = aux['ee_pose']
        marker_head_point = [obj_pose[0], obj_pose[1], obj_pose[2]]
        marker_tail_point = [obj_pose[0], obj_pose[1], obj_pose[2] + 0.01]
        p.addUserDebugLine(marker_head_point,
                           marker_tail_point,
                           lineColorRGB=[0, 1, 0],
                           lifeTime=3,
                           lineWidth=5)

        obs, reward, done, info, aux = env.step_simple(action,
                                                       use_aux=True,
                                                       stand_still=True)
        #obs, reward, done, info, aux = env.step_simple(action,use_aux = True)
        plt.pause(3)
        print(aux_dataset)
        print(predicted_aux)
        auxg = np.array(aux_dataset['ee_pose'][0], dtype=float)
        auxp = np.array(predicted_aux['predicted_ee_position'], dtype=float)
        d = np.sum(np.square(auxg - auxp)) / 3
        print('mse:%f\n' % d)

        # im_depth = obs['depth'][0]
        # img_draw.set_data(im_depth)
        # plt.pause(0.00000001)
        # plt.show()
        # print( im_color.shape)

        # print(obs['color'])
        s = "Episode:%d, steps:%d" % (n_episode, episode_steps)
        print(s)
        if (done):
            n_episode += 1
            episode_steps = 0

            seed += 1

            episode = []

        else:
            episode_steps += 1

        if episode_steps > 100:
            episode = []
            episode_steps = 0
            n_episode += 1

            obs = env.reset()

        rate.sleep()
Пример #7
0
def main(unused_argv):
    
    
    
    
    rospy.init_node('raven_vive_teleop')
     

    temp_flag = 0
    
    pre_position = [0,0,0]
    pre_pose = [1, 0, 0, 0]
    temp = [1, 0, 0, 0]
    # same loop rate as gym environment
    rate = rospy.Rate(60.0)
    
    
    env = Environment(
      assets_root,
      disp=True,
      hz=60)
    
    task = tasks.names[task_name]()
    task.mode = mode

    
    env.set_task(task)
    obs = env.reset()
    info = None
    #agent = teleop_agent(env)
    dataset = Dataset(os.path.join(dataset_root, f'ravens-bc-rollout-{time.time_ns()}'))
    seed = 0

    br = tf.TransformBroadcaster()
    rci = RobotControlInterface(env)

    episode = []
    reward = 0
    done = False

    plt.ion()
    im_color = obs['color'][0]
    im_depth = obs['depth'][0]
    img_draw = plt.imshow(im_depth, interpolation='nearest')

    agent = train_bc.BC_Agent()
    agent.load_pretrained_model(model_save_rootdir + "model_500_epoch")
    agent.act(obs)
    f = open(model_save_rootdir+ "rollout_log.txt","w+")
    episode_steps = 0
    n_episode = 1
    while not rospy.is_shutdown():
  
        #p.stepSimulation()
        #p.getCameraImage(480, 320)
        keys = p.getKeyboardEvents()
        if ord("r") in keys and keys[ord("r")] & p.KEY_WAS_RELEASED:
            print("reset env")
            episode = []
            episode_steps = 0
            obs = env.reset()
        
        action = agent.act(obs)
        #if action != None:
        #    print(action)
    
        
        obs, reward, done, info = env.step_simple(action)
        im_depth = obs['depth'][0]
        img_draw.set_data(im_depth)
        plt.pause(0.00000001)
        plt.show()
        # print( im_color.shape)

        # print(obs['color'])
        s = "Episode:%d, steps:%d"%(n_episode, episode_steps)
        print(s)
        if(done):
            n_episode += 1
            episode_steps = 0
            s += ", succeed!\n" 
            f.write(s)
            f.flush()
            seed += 1
            dataset.add(seed, episode)
            episode = []

        else:
            episode_steps += 1

        if episode_steps > 100:
            episode = []
            episode_steps = 0
            n_episode += 1

            obs = env.reset()
            s += ", failed!\n" 
            f.write(s)
            f.flush()
        


        

        
        rate.sleep()
def main(unused_argv):

    rospy.init_node('raven_vive_teleop')

    temp_flag = 0

    pre_position = [0, 0, 0]
    pre_pose = [1, 0, 0, 0]
    temp = [1, 0, 0, 0]
    # same loop rate as gym environment
    rate = rospy.Rate(60.0)

    env = Environment(assets_root, disp=True, hz=60)

    task = tasks.names[task_name]()
    task.mode = mode

    env.set_task(task)
    obs = env.reset()
    info = None
    #agent = teleop_agent(env)
    dataset = Dataset(
        os.path.join(dataset_root, f'ravens-bc-rollout-{time.time_ns()}'))
    seed = 0

    br = tf.TransformBroadcaster()
    rci = RobotControlInterface(env)

    episode = []
    reward = 0
    done = False

    plt.ion()
    im_color = obs['color'][0]
    im_depth = obs['depth'][0]
    img_draw = plt.imshow(im_depth, interpolation='nearest')

    agent = bc_softmax_aux.BC_Agent()
    #agent = bc_softmax2d_aux.BC_Agent()
    agent.load_pretrained_model(model_save_rootdir + "model_8200_epoch")
    agent.act(obs)
    f = open(model_save_rootdir + "rollout_log.txt", "w+")
    episode_steps = 0
    n_episode = 1
    aux = {
        'ee_pose': ((0, 0, 0), (0, 0, 0, 1)),
        'obj_pose': ((0, 0, 0), (0, 0, 0, 1))
    }
    while not rospy.is_shutdown():

        #p.stepSimulation()
        #p.getCameraImage(480, 320)
        keys = p.getKeyboardEvents()
        if ord("r") in keys and keys[ord("r")] & p.KEY_WAS_RELEASED:
            print("reset env")
            episode = []
            episode_steps = 0
            n_episode += 1

            obs = env.reset()

        action, predicted_aux = agent.act(obs, ret_aux=True)
        #if action != None:
        #    print(action)
        obj_pose = predicted_aux['predicted_obj_pose']
        #obj_pose = predicted_aux['predicted_ee_position']
        #obj_pose = aux['ee_pose']
        marker_head_point = [obj_pose[0], obj_pose[1], obj_pose[2]]
        marker_tail_point = [obj_pose[0], obj_pose[1], obj_pose[2] + 0.01]
        p.addUserDebugLine(marker_head_point,
                           marker_tail_point,
                           lineColorRGB=[0, 1, 0],
                           lifeTime=0.2,
                           lineWidth=5)

        #obs, reward, done, info, aux = env.step_simple(action,use_aux = True, stand_still = True)
        obs, reward, done, info, aux = env.step_simple(action, use_aux=True)
        print(aux)
        print(predicted_aux)
        # im_depth = obs['depth'][0]
        # img_draw.set_data(im_depth)
        # plt.pause(0.00000001)
        # plt.show()
        # print( im_color.shape)

        # print(obs['color'])
        s = "Episode:%d, steps:%d" % (n_episode, episode_steps)
        print(s)
        if (done):
            n_episode += 1
            episode_steps = 0
            s += ", succeed!\n"
            f.write(s)
            f.flush()
            seed += 1
            dataset.add(seed, episode)
            episode = []

        else:
            episode_steps += 1

        if episode_steps > 100:
            episode = []
            episode_steps = 0
            n_episode += 1

            obs = env.reset()
            s += ", failed!\n"
            f.write(s)
            f.flush()

        rate.sleep()
Пример #9
0
def main(unused_argv):

    rospy.init_node('raven_vive_teleop')

    temp_flag = 0

    pre_position = [0, 0, 0]
    pre_pose = [1, 0, 0, 0]
    temp = [1, 0, 0, 0]
    # same loop rate as gym environment
    rate = rospy.Rate(60.0)

    env = Environment(assets_root, disp=True, hz=60)

    task = tasks.names[task_name]()
    task.mode = mode

    env.set_task(task)
    obs = env.reset()
    info = None
    #agent = teleop_agent(env)
    dataset = Dataset(
        os.path.join(dataset_root, f'ravens-bc-rollout-{time.time_ns()}'))
    seed = 0

    br = tf.TransformBroadcaster()
    rci = RobotControlInterface(env)

    episode = []
    reward = 0
    done = False

    plt.ion()
    im_color = obs['color'][0]
    im_depth = obs['depth'][0]
    img_draw = plt.imshow(im_depth, interpolation='nearest')

    agent = random_action_agent(env)

    f = open(model_save_rootdir + "rollout_log.txt", "w+")
    episode_steps = 0
    n_episode = 1
    while not rospy.is_shutdown():

        #p.stepSimulation()
        #p.getCameraImage(480, 320)
        keys = p.getKeyboardEvents()
        if ord("r") in keys and keys[ord("r")] & p.KEY_WAS_RELEASED:
            print("reset env")
            episode = []

            obs = env.reset()

        action = agent.act()
        #if action != None:
        #    print(action)
        i = 30
        while (i > 0):
            obs, reward, done, info, aux = env.step_simple(action,
                                                           use_aux=True)
            i -= 1
        state = get_state_ground_truth(env)
        print(state)
        print(aux)
        env.reset()

        rate.sleep()