示例#1
0
def main():

    # Parse command line arguments.
    parser = argparse.ArgumentParser()
    parser.add_argument('--task', default='insertion')
    parser.add_argument('--agent', default='transporter')
    parser.add_argument('--n_demos', default=100, type=int)
    parser.add_argument('--n_steps', default=40000, type=int)
    parser.add_argument('--n_runs', default=1, type=int)
    parser.add_argument('--interval', default=1000, type=int)
    parser.add_argument('--gpu', default=0, type=int)
    parser.add_argument('--gpu_limit', default=None, type=int)
    args = parser.parse_args()

    # Configure which GPU to use.
    cfg = tf.config.experimental
    gpus = cfg.list_physical_devices('GPU')
    if not gpus:
        print('No GPUs detected. Running with CPU.')
    else:
        cfg.set_visible_devices(gpus[args.gpu], 'GPU')

    # Configure how much GPU to use (in Gigabytes).
    if args.gpu_limit is not None:
        mem_limit = 1024 * args.gpu_limit
        dev_cfg = [cfg.VirtualDeviceConfiguration(memory_limit=mem_limit)]
        cfg.set_virtual_device_configuration(gpus[0], dev_cfg)

    # Load train and test datasets.
    train_dataset = Dataset(os.path.join('data', f'{args.task}-train'))
    test_dataset = Dataset(os.path.join('data', f'{args.task}-test'))

    # Run training from scratch multiple times.
    for train_run in range(args.n_runs):
        name = f'{args.task}-{args.agent}-{args.n_demos}-{train_run}'

        # Set up tensorboard logger.
        curr_time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
        log_dir = os.path.join('logs', args.agent, args.task, curr_time,
                               'train')
        writer = tf.summary.create_file_writer(log_dir)

        # Initialize agent.
        np.random.seed(train_run)
        tf.random.set_seed(train_run)
        agent = agents.names[args.agent](name, args.task)

        # Limit random sampling during training to a fixed dataset.
        max_demos = train_dataset.n_episodes
        episodes = np.random.choice(range(max_demos), args.n_demos, False)
        train_dataset.set(episodes)

        # Train agent and save snapshots.
        while agent.total_steps < args.n_steps:
            for _ in range(args.interval):
                agent.train(train_dataset, writer)
            agent.validate(test_dataset, writer)
            agent.save()
示例#2
0
def main():

  # Parse command line arguments.
  parser = argparse.ArgumentParser()
  parser.add_argument('--disp', action='store_true')
  parser.add_argument('--task', default='insertion')
  parser.add_argument('--mode', default='train')
  parser.add_argument('--n', default=1000, type=int)
  args = parser.parse_args()

  # Initialize environment and task.
  env = Environment(args.disp, hz=480)
  task = tasks.names[args.task]()
  task.mode = args.mode

  # Initialize scripted oracle agent and dataset.
  agent = task.oracle(env)
  dataset = Dataset(os.path.join('data', f'{args.task}-{task.mode}'))

  # Train seeds are even and test seeds are odd.
  seed = dataset.max_seed
  if seed < 0:
    seed = -1 if (task.mode == 'test') else -2

  # Collect training data from oracle demonstrations.
  while dataset.n_episodes < args.n:
    print(f'Oracle demonstration: {dataset.n_episodes + 1}/{args.n}')
    episode, total_reward = [], 0
    seed += 2
    np.random.seed(seed)
    obs, reward, _, info = env.reset(task)
    for _ in range(task.max_steps):
      act = agent.act(obs, info)
      episode.append((obs, act, reward, info))
      obs, reward, done, info = env.step(act)
      total_reward += reward
      print(f'{done} {total_reward}')
      if done:
        break
    episode.append((obs, None, reward, info))

    # Only save completed demonstrations.
    # TODO(andyzeng): add back deformable logic.
    if total_reward > 0.99:
      dataset.add(seed, episode)
示例#3
0
def main():

  # Parse command line arguments.
  parser = argparse.ArgumentParser()
  parser.add_argument('--disp', action='store_true')
  parser.add_argument('--task', default='insertion')
  parser.add_argument('--agent', default='transporter')
  parser.add_argument('--n_demos', default=100, type=int)
  parser.add_argument('--n_steps', default=40000, type=int)
  parser.add_argument('--n_runs', default=1, type=int)
  parser.add_argument('--gpu', default=0, type=int)
  parser.add_argument('--gpu_limit', default=None, type=int)
  args = parser.parse_args()

  # Configure which GPU to use.
  cfg = tf.config.experimental
  gpus = cfg.list_physical_devices('GPU')
  if not gpus:
    print('No GPUs detected. Running with CPU.')
  else:
    cfg.set_visible_devices(gpus[args.gpu], 'GPU')

  # Configure how much GPU to use (in Gigabytes).
  if args.gpu_limit is not None:
    mem_limit = 1024 * args.gpu_limit
    dev_cfg = [cfg.VirtualDeviceConfiguration(memory_limit=mem_limit)]
    cfg.set_virtual_device_configuration(gpus[0], dev_cfg)

  # Initialize environment and task.
  env = Environment(args.disp, hz=480)
  task = tasks.names[args.task]()
  task.mode = 'test'

  # Load test dataset.
  dataset = Dataset(os.path.join('data', f'{args.task}-test'))

  # Run testing for each training run.
  for train_run in range(args.n_runs):
    name = f'{args.task}-{args.agent}-{args.n_demos}-{train_run}'

    # Initialize agent.
    np.random.seed(train_run)
    tf.random.set_seed(train_run)
    agent = agents.names[args.agent](name, args.task)

    # # Run testing every interval.
    # for train_step in range(0, args.n_steps + 1, args.interval):

    # Load trained agent.
    if args.n_steps > 0:
      agent.load(args.n_steps)

    # Run testing and save total rewards with last transition info.
    results = []
    for i in range(dataset.n_episodes):
      print(f'Test: {i + 1}/{dataset.n_episodes}')
      episode, seed = dataset.load(i)
      goal = episode[-1]
      total_reward = 0
      np.random.seed(seed)
      obs, reward, _, info = env.reset(task)
      for _ in range(task.max_steps):
        act = agent.act(obs, info, goal)
        obs, reward, done, info = env.step(act)
        total_reward += reward
        print(f'{done} {total_reward}')
        if done:
          break
      results.append((total_reward, info))

      # Save results.
      pickle.dump(results, open(f'{name}-{args.n_steps}.pkl', 'wb'))
示例#4
0
    gpus = cfg.list_physical_devices('GPU')
    if len(gpus) == 0:
        print('No GPUs detected. Running with CPU.')
    else:
        cfg.set_visible_devices(gpus[int(args.gpu)], 'GPU')

    # Configure how much GPU to use.
    if args.gpu_mem_limit is not None:
        MEM_LIMIT = 1024 * int(args.gpu_mem_limit)
        print(args.gpu_mem_limit)
        dev_cfg = [cfg.VirtualDeviceConfiguration(memory_limit=MEM_LIMIT)]
        cfg.set_virtual_device_configuration(gpus[0], dev_cfg)

    # Initialize task. Later, initialize Environment if necessary.
    task = tasks.names[args.task]()
    dataset = Dataset(os.path.join('data', args.task))
    if args.subsamp_g:
        dataset.subsample_goals = True

    # Collect training data from oracle demonstrations.
    max_demos = 10**MAX_ORDER
    task.mode = 'train'
    seed_to_add = 0  # Daniel: check carefully if resuming the bag-items tasks.

    # If continuing from prior calls, the demo index starts counting based on
    # the number of demos that exist in `data/{task}`. Make the environment
    # here, to issues with cloth rendering + multiple Environment calls.
    make_new_env = (dataset.num_episodes < max_demos)
    if make_new_env:
        env = Environment(args.disp, hz=args.hz)
示例#5
0
def main():
    # Parse command line arguments.
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', default='0')
    parser.add_argument('--disp', action='store_true')
    parser.add_argument('--task', default='hanoi')
    parser.add_argument('--agent', default='transporter')
    parser.add_argument('--hz', default=240.0, type=float)
    parser.add_argument('--num_demos', default='100')
    parser.add_argument('--num_rots', default=36, type=int)
    parser.add_argument('--gpu_mem_limit', default=None)
    parser.add_argument('--subsamp_g', action='store_true')
    parser.add_argument('--crop_bef_q', default=1, type=int)
    args = parser.parse_args()

    # Configure which GPU to use.
    cfg = tf.config.experimental
    gpus = cfg.list_physical_devices('GPU')
    if not gpus:
        print('No GPUs detected. Running with CPU.')
    else:
        cfg.set_visible_devices(gpus[int(args.gpu)], 'GPU')

    # Configure how much GPU to use.
    if args.gpu_mem_limit is not None:
        mem_limit = 1024 * int(args.gpu_mem_limit)
        print(args.gpu_mem_limit)
        dev_cfg = [cfg.VirtualDeviceConfiguration(memory_limit=mem_limit)]
        cfg.set_virtual_device_configuration(gpus[0], dev_cfg)

    # Initialize environment and task.
    env = Environment(args.disp, hz=args.hz)
    task = tasks.names[args.task]()
    dataset = Dataset(os.path.join('data', args.task))
    if args.subsamp_g:
        dataset.subsample_goals = True

    # Collect training data from oracle demonstrations.
    max_order = 3
    max_demos = 10**max_order
    task.mode = 'train'
    seed_toadd_train = 0

    while dataset.num_episodes < max_demos:
        seed = dataset.num_episodes + seed_toadd_train
        np.random.seed(seed)
        print(
            f'Demonstration: {dataset.num_episodes + 1}/{max_demos}, seed {seed}'
        )
        total_reward, episode, t, last_obs_info = rollout(
            task.oracle(env), env, task)

        # Check if episode should be added, if not, then add seed offset.
        _, last_info = last_obs_info
        if ignore_this_demo(args, total_reward, t, last_info):
            seed_toadd_train += 1
            li = last_info['extras']
            print(f'Ignoring demo. {li}, seed_toadd: {seed_toadd_train}')
        else:
            dataset.add(episode, last_obs_info)

    # Collect validation dataset with different random seeds.
    validation_dataset = Dataset(os.path.join('validation_data', args.task))
    num_validation = 100
    seed_tosub_valid = 0

    while validation_dataset.num_episodes < num_validation:
        seed = 2**32 - 1 - validation_dataset.num_episodes - seed_tosub_valid
        np.random.seed(seed)
        print(
            f'Validation Demonstration: {validation_dataset.num_episodes + 1}/{num_validation}, seed {seed}'
        )
        total_reward, episode, t, last_obs_info = rollout(
            task.oracle(env), env, task)

        # Check if episode should be added, if not, then subtract seed offset.
        _, last_info = last_obs_info
        if ignore_this_demo(args, total_reward, t, last_info):
            seed_tosub_valid += 1
            li = last_info['extras']
            print(f'Ignoring demo. {li}, seed_tosub: {seed_tosub_valid}')
        else:
            validation_dataset.add(episode, last_obs_info)

    env.stop()
    del env

    # Evaluate on increasing orders of magnitude of demonstrations.
    num_train_runs = 1  # 3+ to measure variance over random initialization
    num_train_iters = 40000
    test_interval = 2000
    num_test_episodes = 20

    # there are a few seeds that the oracle
    # can't complete either, skip these
    # TODO(peteflorence): compute this automatically for each task
    oracle_cant_complete_seed = []
    if args.task == 'insertion-sixdof':
        oracle_cant_complete_seed.append(3)
    num_test_episodes += len(oracle_cant_complete_seed)

    # Do multiple training runs from scratch.
    for train_run in range(num_train_runs):

        # Set up tensorboard logger.
        current_time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
        train_log_dir = os.path.join('logs', args.agent, args.task,
                                     current_time, 'train')
        train_summary_writer = tf.summary.create_file_writer(train_log_dir)

        # Set the beginning of the agent name. We may add more to it.
        name = f'{args.task}-{args.agent}-{args.num_demos}-{train_run}'

        # Initialize agent and limit random dataset sampling to fixed set.
        tf.random.set_seed(train_run)
        if args.agent == 'transporter':
            name = f'{name}-rots-{args.num_rots}-crop_bef_q-{args.crop_bef_q}'
            agent = agents.names[args.agent](name,
                                             args.task,
                                             num_rotations=args.num_rots,
                                             crop_bef_q=(args.crop_bef_q == 1))
        elif 'transporter-goal' in args.agent:
            # For transporter-goal and transporter-goal-naive agents.
            name = f'{name}-rots-{args.num_rots}'
            if args.subsamp_g:
                name += '-sub_g'
            else:
                name += '-fin_g'
            agent = agents.names[args.agent](name,
                                             args.task,
                                             num_rotations=args.num_rots)
        else:
            agent = agents.names[args.agent](name, args.task)
        np.random.seed(train_run)
        num_demos = int(args.num_demos)
        train_episodes = np.random.choice(range(max_demos), num_demos, False)
        dataset.set(train_episodes)
        # agent.load(10000)

        performance = []
        while agent.total_iter < num_train_iters:

            # Train agent.
            tf.keras.backend.set_learning_phase(1)
            agent.train(dataset,
                        num_iter=test_interval,
                        writer=train_summary_writer,
                        validation_dataset=validation_dataset)
            tf.keras.backend.set_learning_phase(0)

            # Skip evaluation depending on the task or if it's a goal-based agent.
            if (skip_testing_during_training(args.task)
                    or 'transporter-goal' in args.agent):
                continue

            # Evaluate agent.
            task.mode = 'test'
            env = Environment(args.disp, hz=args.hz)
            for episode in range(num_test_episodes):
                if episode in oracle_cant_complete_seed:
                    continue
                np.random.seed(10**max_order + episode)
                total_reward, _, _, _ = rollout(agent, env, task)
                print(f'Test: {episode} Total Reward: {total_reward:.2f}')
                performance.append((agent.total_iter, total_reward))
            env.stop()
            del env

            # Save results.
            pickle.dump(performance, open(f'{name}.pkl', 'wb'))
示例#6
0
    return False


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--disp', action='store_true')
    parser.add_argument('--task', default='insertion-goal')
    parser.add_argument('--num_goals', default=20, type=int)
    parser.add_argument('--hz', default=240.0, type=float)
    args = parser.parse_args()
    assert is_goal_conditioned(args)

    # Initialize environment and task.
    env = Environment(args.disp, hz=args.hz)
    task = tasks.names[args.task]()
    dataset = Dataset(os.path.join('goals', args.task))
    task.mode = 'train'
    seed_to_add = 0

    # For some tasks, call reset() again with a new seed if init state is 'done'.
    while dataset.num_episodes < args.num_goals:
        seed = 10**MAX_ORDER + dataset.num_episodes + seed_to_add
        print(
            f'\nNEW GOAL: {dataset.num_episodes+1}/{args.num_goals}, seed: {seed}\n'
        )
        np.random.seed(seed)
        demo_reward, t, episode, last_stuff = rollout(task.oracle(env), env,
                                                      task)
        last_extras = last_stuff[1]['extras']
        if ignore_this_demo(args, demo_reward, t, last_extras):
            seed_to_add += 1