示例#1
0
def collect_episodes(config, output_dir=None, visualize=True):

    if output_dir is None:
        output_dir = os.path.join(os.getcwd(), 'data')

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # save the config
    config_save_file = os.path.join(output_dir, 'config.yaml')
    save_yaml(config, config_save_file)

    save_file = os.path.join(output_dir,
                             "%s.p" % (get_current_YYYY_MM_DD_hh_mm_ss_ms()))

    # initialize config for DataCollector
    dc = PusherSliderDataCollector(config)
    num_episodes = config['dataset']['num_episodes']
    multi_episode_container = MultiEpisodeContainer()
    for i in range(num_episodes):
        print("collecting episode %d of %d" % (i + 1, num_episodes))
        name = "%s_idx_%d" % (get_current_YYYY_MM_DD_hh_mm_ss_ms(), i)

        episode = dc.collect_single_episode(visualize, episode_name=name)
        multi_episode_container.add_episode(episode)

    print("saving data to %s" % save_file)
    multi_episode_container.save_to_file(save_file)
示例#2
0
def collect_episodes(config,
                     output_dir=None,
                     visualize=True,
                     use_threads=False):

    if output_dir is None:
        output_dir = os.path.join(os.getcwd(), 'data')

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # save the config
    config_save_file = os.path.join(output_dir, 'config.yaml')
    save_yaml(config, config_save_file)

    # initialize config for DataCollector
    dc = DrakePusherSliderEpisodeCollector(config)
    num_episodes = config['dataset']['num_episodes']

    # record some metadata
    metadata = dict()
    metadata['episodes'] = dict()

    for i in range(num_episodes):

        print("\n")
        start_time = time.time()
        print("collecting episode %d of %d" % (i + 1, num_episodes))
        name = "%s_idx_%d" % (get_current_YYYY_MM_DD_hh_mm_ss_ms(), i)

        episode = dc.collect_single_episode(visualize, episode_name=name)

        print("saving to disk")
        metadata['episodes'][name] = dict()

        image_data_file = episode.save_images_to_hdf5(output_dir)
        non_image_data_file = episode.save_non_image_data_to_pickle(output_dir)

        print("non_image_data.keys()", episode.non_image_data.keys())

        metadata['episodes'][name]['non_image_data_file'] = non_image_data_file
        metadata['episodes'][name]['image_data_file'] = image_data_file

        print("done saving to disk")
        elapsed = time.time() - start_time
        print("single episode took: %.2f seconds" % (elapsed))

    save_yaml(metadata, os.path.join(output_dir, 'metadata.yaml'))
示例#3
0
def train_dynamics(
        config,
        train_dir,  # str: directory to save output
):

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    st_epoch = config['train'][
        'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))

    print(config)

    # load the data
    episodes = load_episodes_from_config(config)

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(
            config,
            action_function=action_function,
            observation_function=observation_function,
            episodes=episodes,
            phase=phase)

        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'])

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()

    # compute normalization parameters if not starting from pre-trained network . . .
    '''
    define model for dynamics prediction
    '''
    model_dy = None

    if config['train']['resume_epoch'] >= 0:
        # if resume from a pretrained checkpoint
        state_dict_path = os.path.join(
            train_dir, 'net_dy_epoch_%d_iter_%d_state_dict.pth' %
            (config['train']['resume_epoch'], config['train']['resume_iter']))
        print("Loading saved ckp from %s" % state_dict_path)

        # why is this needed if we already do torch.load???
        model_dy.load_state_dict(torch.load(state_dict_path))

        # don't we also need to load optimizer state from pre-trained???
    else:
        # not starting from pre-trained create the network and compute the
        # normalization parameters
        model_dy = DynaNetMLP(config)

        # compute normalization params
        stats = datasets["train"].compute_dataset_statistics()

        obs_mean = stats['observations']['mean']
        obs_std = stats['observations']['std']
        observations_normalizer = DataNormalizer(obs_mean, obs_std)

        action_mean = stats['actions']['mean']
        action_std = stats['actions']['std']
        actions_normalizer = DataNormalizer(action_mean, action_std)

        model_dy.action_normalizer = actions_normalizer
        model_dy.state_normalizer = observations_normalizer

    print("model_dy #params: %d" % count_trainable_parameters(model_dy))

    # criterion
    criterionMSE = nn.MSELoss()

    # optimizer
    params = model_dy.parameters()
    optimizer = optim.Adam(params,
                           lr=config['train']['lr'],
                           betas=(config['train']['adam_beta1'], 0.999))
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  factor=0.9,
                                  patience=10,
                                  verbose=True)

    if use_gpu:
        model_dy = model_dy.cuda()

    best_valid_loss = np.inf
    global_iteration = 0

    epoch_counter_external = 0

    try:
        for epoch in range(st_epoch, config['train']['n_epoch']):
            phases = ['train', 'valid']
            epoch_counter_external = epoch

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:
                model_dy.train(phase == 'train')

                meter_loss_rmse = AverageMeter()

                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    global_iteration += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config[
                            'train']['n_rollout']
                        n_samples = n_his + n_roll

                        if config['env']['type'] in ['PusherSlider']:
                            states = data['observations']
                            actions = data['actions']

                            if use_gpu:
                                states = states.cuda()
                                actions = actions.cuda()

                            # states, actions = data
                            assert states.size(1) == n_samples

                            # normalize states and actions once for entire rollout
                            states = model_dy.state_normalizer.normalize(
                                states)
                            actions = model_dy.action_normalizer.normalize(
                                actions)

                            B = states.size(0)
                            loss_mse = 0.

                            # state_cur: B x n_his x state_dim
                            state_cur = states[:, :n_his]

                            for j in range(n_roll):

                                state_des = states[:, n_his + j]

                                # action_cur: B x n_his x action_dim
                                action_cur = actions[:, j:j +
                                                     n_his] if actions is not None else None

                                # state_pred: B x state_dim
                                # state_cur: B x n_his x state_dim
                                # state_pred: B x state_dim
                                state_pred = model_dy(state_cur, action_cur)

                                loss_mse_cur = criterionMSE(
                                    state_pred, state_des)
                                loss_mse += loss_mse_cur / n_roll

                                # update state_cur
                                # state_pred.unsqueeze(1): B x 1 x state_dim
                                state_cur = torch.cat([
                                    state_cur[:, 1:],
                                    state_pred.unsqueeze(1)
                                ], 1)

                            meter_loss_rmse.update(np.sqrt(loss_mse.item()), B)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss_mse.backward()
                        optimizer.step()

                    if i % config['train']['log_per_iter'] == 0:
                        log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                            phase, epoch, config['train']['n_epoch'], i,
                            data_n_batches[phase], get_lr(optimizer))
                        log += ', rmse: %.6f (%.6f)' % (np.sqrt(
                            loss_mse.item()), meter_loss_rmse.avg)

                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 500 iterations
                        if global_iteration > 500:
                            writer.add_scalar("Params/learning rate",
                                              get_lr(optimizer),
                                              global_iteration)
                            writer.add_scalar("Loss/train", loss_mse.item(),
                                              global_iteration)
                            writer.add_scalar("RMSE average loss/train",
                                              meter_loss_rmse.avg,
                                              global_iteration)

                    if phase == 'train' and i % config['train'][
                            'ckp_per_iter'] == 0:
                        save_model(
                            model_dy, '%s/net_dy_epoch_%d_iter_%d' %
                            (train_dir, epoch, i))

                log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, epoch, config['train']['n_epoch'],
                    meter_loss_rmse.avg, best_valid_loss)
                print(log)

                if phase == 'valid':
                    scheduler.step(meter_loss_rmse.avg)
                    writer.add_scalar("RMSE average loss/valid",
                                      meter_loss_rmse.avg, global_iteration)
                    if meter_loss_rmse.avg < best_valid_loss:
                        best_valid_loss = meter_loss_rmse.avg
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush()  # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(
            model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' %
            (train_dir, epoch_counter_external))
        writer.flush()  # flush SummaryWriter events to disk
示例#4
0
def train_dynamics(
    config,
    train_dir,  # str: directory to save output
    multi_episode_dict=None,
    spatial_descriptors_idx=None,
    metadata=None,
    spatial_descriptors_data=None,
):

    assert multi_episode_dict is not None
    # assert spatial_descriptors_idx is not None

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    st_epoch = config['train'][
        'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))

    if metadata is not None:
        save_pickle(metadata, os.path.join(train_dir, 'metadata.p'))

    if spatial_descriptors_data is not None:
        save_pickle(spatial_descriptors_data,
                    os.path.join(train_dir, 'spatial_descriptors.p'))

    training_stats = dict()
    training_stats_file = os.path.join(train_dir, 'training_stats.yaml')

    # load the data

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(
            config,
            action_function=action_function,
            observation_function=observation_function,
            episodes=multi_episode_dict,
            phase=phase)

        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'],
            drop_last=True)

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()

    # compute normalization parameters if not starting from pre-trained network . . .
    '''
    Build model for dynamics prediction
    '''
    model_dy = build_dynamics_model(config)
    camera_name = config['vision_net']['camera_name']

    # criterion
    criterionMSE = nn.MSELoss()
    l1Loss = nn.L1Loss()
    smoothL1 = nn.SmoothL1Loss()

    # optimizer
    params = model_dy.parameters()
    lr = float(config['train']['lr'])
    optimizer = optim.Adam(params,
                           lr=lr,
                           betas=(config['train']['adam_beta1'], 0.999))

    # setup scheduler
    sc = config['train']['lr_scheduler']
    scheduler = None

    if config['train']['lr_scheduler']['enabled']:
        if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau":
            scheduler = ReduceLROnPlateau(optimizer,
                                          mode='min',
                                          factor=sc['factor'],
                                          patience=sc['patience'],
                                          threshold_mode=sc['threshold_mode'],
                                          cooldown=sc['cooldown'],
                                          verbose=True)
        elif config['train']['lr_scheduler']['type'] == "StepLR":
            step_size = config['train']['lr_scheduler']['step_size']
            gamma = config['train']['lr_scheduler']['gamma']
            scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)
        else:
            raise ValueError("unknown scheduler type: %s" %
                             (config['train']['lr_scheduler']['type']))

    if use_gpu:
        print("using gpu")
        model_dy = model_dy.cuda()

    # print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device)
    # print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net)))

    best_valid_loss = np.inf
    valid_loss_type = config['train']['valid_loss_type']
    global_iteration = 0
    counters = {'train': 0, 'valid': 0}
    epoch_counter_external = 0
    loss = 0

    index_map = get_object_and_robot_state_indices(config)
    object_state_indices = torch.LongTensor(index_map['object_indices'])
    robot_state_indices = torch.LongTensor(index_map['robot_indices'])

    object_state_shape = config['dataset']['object_state_shape']

    try:
        for epoch in range(st_epoch, config['train']['n_epoch']):
            phases = ['train', 'valid']
            epoch_counter_external = epoch

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:

                # only validate at a certain frequency
                if (phase == "valid") and (
                    (epoch % config['train']['valid_frequency']) != 0):
                    continue

                model_dy.train(phase == 'train')

                average_meter_container = dict()

                step_duration_meter = AverageMeter()

                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    loss_container = dict()  # store the losses for this step

                    step_start_time = time.time()

                    global_iteration += 1
                    counters[phase] += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config[
                            'train']['n_rollout']
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" % (global_iteration))
                            print("n_samples", n_samples)

                        # [B, n_samples, obs_dim]
                        observations = data['observations']
                        visual_observations_list = data[
                            'visual_observations_list']

                        # [B, n_samples, action_dim]
                        actions = data['actions']
                        B = actions.shape[0]

                        if use_gpu:
                            observations = observations.cuda()
                            actions = actions.cuda()

                        # compile the visual observations
                        # compute the output of the visual model for all timesteps
                        visual_model_output_list = []
                        for visual_obs in visual_observations_list:
                            # visual_obs is a dict containing observation for a single
                            # time step (of course across a batch however)
                            # visual_obs[<camera_name>]['rgb_tensor'] has shape [B, 3, H, W]

                            # probably need to cast input to cuda
                            # [B, -1, 3]
                            keypoints = visual_obs[camera_name][
                                'descriptor_keypoints_3d_world_frame']

                            # [B, K, 3] where K = len(spatial_descriptors_idx)
                            keypoints = keypoints[:, spatial_descriptors_idx]

                            B, K, _ = keypoints.shape

                            # [B, K*3]
                            keypoints_reshape = keypoints.reshape([B, K * 3])

                            if DEBUG:
                                print("keypoints.shape", keypoints.shape)
                                print("keypoints_reshape.shape",
                                      keypoints_reshape.shape)
                            visual_model_output_list.append(keypoints_reshape)

                        visual_model_output = None
                        if len(visual_model_output_list) > 0:
                            # concatenate this into a tensor
                            # [B, n_samples, vision_model_out_dim]
                            visual_model_output = torch.stack(
                                visual_model_output_list, dim=1)

                        else:
                            visual_model_output = torch.Tensor(
                            )  # empty tensor

                        # states, actions = data
                        assert actions.shape[1] == n_samples

                        # cast this to float so it can be concatenated below
                        visual_model_output = visual_model_output.type_as(
                            observations)

                        # we don't have any visual observations, so states are observations
                        # states is gotten by concatenating visual_observations and observations
                        # [B, n_samples, vision_model_out_dim + obs_dim]
                        states = torch.cat((visual_model_output, observations),
                                           dim=-1)

                        # state_cur: B x n_his x state_dim
                        # state_cur = states[:, :n_his]

                        # [B, n_his, state_dim]
                        state_init = states[:, :n_his]

                        # We want to rollout n_roll steps
                        # actions = [B, n_his + n_roll, -1]
                        # so we want action_seq.shape = [B, n_roll, -1]
                        action_start_idx = 0
                        action_end_idx = n_his + n_roll - 1
                        action_seq = actions[:, action_start_idx:
                                             action_end_idx, :]

                        if DEBUG:
                            print("states.shape", states.shape)
                            print("state_init.shape", state_init.shape)
                            print("actions.shape", actions.shape)
                            print("action_seq.shape", action_seq.shape)

                        # try using models_dy.rollout_model instead of doing this manually
                        rollout_data = rollout_model(state_init=state_init,
                                                     action_seq=action_seq,
                                                     dynamics_net=model_dy,
                                                     compute_debug_data=False)

                        # [B, n_roll, state_dim]
                        state_rollout_pred = rollout_data['state_pred']

                        # [B, n_roll, state_dim]
                        state_rollout_gt = states[:, n_his:]

                        if DEBUG:
                            print("state_rollout_gt.shape",
                                  state_rollout_gt.shape)
                            print("state_rollout_pred.shape",
                                  state_rollout_pred.shape)

                        # the loss function is between
                        # [B, n_roll, state_dim]
                        state_pred_err = state_rollout_pred - state_rollout_gt

                        # [B, n_roll, object_state_dim]
                        object_state_err = state_pred_err[:, :,
                                                          object_state_indices]
                        B, n_roll, object_state_dim = object_state_err.shape

                        # [B, n_roll, *object_state_shape]
                        object_state_err_reshape = object_state_err.reshape(
                            [B, n_roll, *object_state_shape])

                        # num weights
                        J = object_state_err_reshape.shape[2]
                        weights = model_dy.weight_matrix

                        assert len(
                            weights) == J, "len(weights) = %d, but J = %d" % (
                                len(weights), J)

                        # loss mse object, note the use of broadcasting semantics
                        # [B, n_roll]
                        object_state_loss_mse = weights * torch.pow(
                            object_state_err_reshape, 2).sum(dim=-1)
                        object_state_loss_mse = object_state_loss_mse.mean()

                        l2_object = (weights * torch.norm(
                            object_state_err_reshape, dim=-1)).mean()

                        l2_object_final_step = (weights * torch.norm(
                            object_state_err_reshape[:, -1], dim=-1)).mean()

                        # [B, n_roll, robot_state_dim]
                        robot_state_err = state_pred_err[:, :,
                                                         robot_state_indices]
                        robot_state_loss_mse = torch.pow(robot_state_err,
                                                         2).sum(dim=-1).mean()

                        loss_container[
                            'object_state_loss_mse'] = object_state_loss_mse
                        loss_container[
                            'robot_state_loss_mse'] = robot_state_loss_mse
                        loss_container['l2_object'] = l2_object
                        loss_container[
                            'l2_object_final_step'] = l2_object_final_step

                        # total loss
                        loss = object_state_loss_mse + robot_state_loss_mse
                        loss_container['loss'] = loss

                        for key, val in loss_container.items():
                            if not key in average_meter_container:
                                average_meter_container[key] = AverageMeter()

                            average_meter_container[key].update(val.item(), B)

                    step_duration_meter.update(time.time() - step_start_time)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    if (i % config['train']['log_per_iter']
                            == 0) or (global_iteration %
                                      config['train']['log_per_iter'] == 0):
                        log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                            phase, epoch, config['train']['n_epoch'], i,
                            data_n_batches[phase], get_lr(optimizer))

                        # log += ', l2: %.6f' % (loss_container['l2'].item())
                        # log += ', l2_final_step: %.6f' %(loss_container['l2_final_step'].item())

                        log += ', step time %.6f' % (step_duration_meter.avg)
                        step_duration_meter.reset()

                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 100 iterations
                        if global_iteration > 100:
                            writer.add_scalar("Params/learning rate",
                                              get_lr(optimizer),
                                              global_iteration)
                            writer.add_scalar("Loss_train/%s" % (phase),
                                              loss.item(), global_iteration)

                            for loss_type, loss_obj in loss_container.items():
                                plot_name = "Loss/%s/%s" % (loss_type, phase)
                                writer.add_scalar(plot_name, loss_obj.item(),
                                                  counters[phase])

                            # only plot the weights if we are in the train phase . . . .
                            if phase == "train":
                                for i in range(len(weights)):
                                    plot_name = "Weights/%d" % (i)
                                    writer.add_scalar(plot_name,
                                                      weights[i].item(),
                                                      counters[phase])

                    if phase == 'train' and global_iteration % config['train'][
                            'ckp_per_iter'] == 0:
                        save_model(
                            model_dy, '%s/net_dy_epoch_%d_iter_%d' %
                            (train_dir, epoch, i))

                log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, epoch, config['train']['n_epoch'],
                    average_meter_container[valid_loss_type].avg,
                    best_valid_loss)
                print(log)

                # record all average_meter losses
                for key, meter in average_meter_container.items():
                    writer.add_scalar("AvgMeter/%s/%s" % (key, phase),
                                      meter.avg, epoch)

                if phase == "train":
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "StepLR"):
                        scheduler.step()

                if phase == 'valid':
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "ReduceLROnPlateau"):
                        scheduler.step(
                            average_meter_container[valid_loss_type].avg)

                    if average_meter_container[
                            valid_loss_type].avg < best_valid_loss:
                        best_valid_loss = average_meter_container[
                            valid_loss_type].avg
                        training_stats['epoch'] = epoch
                        training_stats['global_iteration'] = counters['valid']
                        save_yaml(training_stats, training_stats_file)
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush()  # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(
            model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' %
            (train_dir, epoch_counter_external))
        writer.flush()  # flush SummaryWriter events to disk
示例#5
0
def train_dynamics(config,
                   train_dir, # str: directory to save output
                   multi_episode_dict, # multi_episode_dict
                   ):

    use_precomputed_keypoints = config['dataset']['visual_observation']['enabled'] and config['dataset']['visual_observation']['descriptor_keypoints']

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    st_epoch = config['train']['resume_epoch'] if config['train']['resume_epoch'] > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))


    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(config,
                                              action_function=action_function,
                                              observation_function=observation_function,
                                              episodes=multi_episode_dict,
                                              phase=phase)

        dataloaders[phase] = DataLoader(
            datasets[phase], batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'], drop_last=True)

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()

    # compute normalization parameters if not starting from pre-trained network . . .


    '''
    define model for dynamics prediction
    '''

    model_dy = build_visual_dynamics_model(config)
    K = config['vision_net']['num_ref_descriptors']

    print("model_dy.vision_net._reference_descriptors.shape", model_dy.vision_net._ref_descriptors.shape)
    print("model_dy.vision_net.descriptor_dim", model_dy.vision_net.descriptor_dim)
    print("model_dy #params: %d" % count_trainable_parameters(model_dy))

    camera_name = config['vision_net']['camera_name']
    W = config['env']['rgbd_sensors']['sensor_list'][camera_name]['width']
    H = config['env']['rgbd_sensors']['sensor_list'][camera_name]['height']
    diag = np.sqrt(W**2 + H**2) # use this to scale the loss

    # sample reference descriptors unless using precomputed keypoints
    if not use_precomputed_keypoints:
        # sample reference descriptors
        episode_names = list(datasets["train"].episode_dict.keys())
        episode_names.sort()
        episode_name = episode_names[0]
        episode = datasets["train"].episode_dict[episode_name]
        episode_idx = 0
        camera_name = config["vision_net"]["camera_name"]
        image_data = episode.get_image_data(camera_name, episode_idx)
        des_img = torch.Tensor(image_data['descriptor'])
        mask_img = torch.Tensor(image_data['mask'])
        ref_descriptor_dict = sample_descriptors(des_img,
                                                 mask_img,
                                                 config['vision_net']['num_ref_descriptors'])



        model_dy.vision_net._ref_descriptors.data = ref_descriptor_dict['descriptors']
        model_dy.vision_net.reference_image = image_data['rgb']
        model_dy.vision_net.reference_indices = ref_descriptor_dict['indices']
    else:
        metadata_file = os.path.join(get_data_root(), config['dataset']['descriptor_keypoints_dir'], 'metadata.p')
        descriptor_metadata = load_pickle(metadata_file)

        # [32, 2]
        ref_descriptors = torch.Tensor(descriptor_metadata['ref_descriptors'])

        # [K, 2]
        ref_descriptors = ref_descriptors[:K]
        model_dy.vision_net._ref_descriptors.data = ref_descriptors
        model_dy.vision_net._ref_descriptors_metadata = descriptor_metadata

        # this is just a sanity check
        assert model_dy.vision_net.num_ref_descriptors == K

    print("reference_descriptors", model_dy.vision_net._ref_descriptors)

    # criterion
    criterionMSE = nn.MSELoss()
    l1Loss = nn.L1Loss()

    # optimizer
    params = model_dy.parameters()
    lr = float(config['train']['lr'])
    optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999))

    # setup scheduler
    sc = config['train']['lr_scheduler']
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=sc['factor'],
                                  patience=sc['patience'],
                                  threshold_mode=sc['threshold_mode'],
                                  cooldown= sc['cooldown'],
                                  verbose=True)

    if use_gpu:
        print("using gpu")
        model_dy = model_dy.cuda()

    print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device)
    print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net)))


    best_valid_loss = np.inf
    global_iteration = 0
    epoch_counter_external = 0

    try:
        for epoch in range(st_epoch, config['train']['n_epoch']):
            phases = ['train', 'valid']
            epoch_counter_external = epoch

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:
                model_dy.train(phase == 'train')

                meter_loss_rmse = AverageMeter()
                step_duration_meter = AverageMeter()


                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    step_start_time = time.time()

                    global_iteration += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config['train']['n_rollout']
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" %(global_iteration))


                        # visual_observations = data['visual_observations']
                        visual_observations_list = data['visual_observations_list']
                        observations = data['observations']
                        actions = data['actions']

                        if use_gpu:
                            observations = observations.cuda()
                            actions = actions.cuda()

                        # states, actions = data
                        assert actions.size(1) == n_samples

                        B = actions.size(0)
                        loss_mse = 0.


                        # compute the output of the visual model for all timesteps
                        visual_model_output_list = []
                        for visual_obs in visual_observations_list:
                            # visual_obs is a dict containing observation for a single
                            # time step (of course across a batch however)
                            # visual_obs[<camera_name>]['rgb_tensor'] has shape [B, 3, H, W]

                            # probably need to cast input to cuda
                            dynamics_net_input = None
                            if use_precomputed_keypoints:
                                # note precomputed descriptors stored on disk are of size
                                # K = 32. We need to trim it down to the appropriate size
                                # [B, K_disk, 2] where K_disk is num keypoints on disk
                                keypoints = visual_obs[camera_name]['descriptor_keypoints']


                                # [B, 32, 2] where K is num keypoints
                                keypoints = keypoints[:,:K]

                                if DEBUG:
                                    print("keypoints.shape", keypoints.shape)

                                dynamics_net_input = keypoints.flatten(start_dim=1)
                            else:
                                out_dict = model_dy.vision_net.forward(visual_obs)

                                # [B, vision_model_out_dim]
                                dynamics_net_input = out_dict['dynamics_net_input']

                            visual_model_output_list.append(dynamics_net_input)

                        # concatenate this into a tensor
                        # [B, n_samples, vision_model_out_dim]
                        visual_model_output = torch.stack(visual_model_output_list, dim=1)

                        # cast this to float so it can be concatenated below
                        visual_model_output = visual_model_output.type_as(observations)

                        if DEBUG:
                            print('visual_model_output.shape', visual_model_output.shape)
                            print("observations.shape", observations.shape)
                            print("actions.shape", actions.shape)

                        # states is gotten by concatenating visual_observations and observations
                        # [B, n_samples, vision_model_out_dim + obs_dim]
                        states = torch.cat((visual_model_output, observations), dim=-1)

                        # state_cur: B x n_his x state_dim
                        state_cur = states[:, :n_his]

                        if DEBUG:
                            print("states.shape", states.shape)

                        for j in range(n_roll):

                            if DEBUG:
                                print("n_roll j: %d" %(j))

                            state_des = states[:, n_his + j]

                            # action_cur: B x n_his x action_dim
                            action_cur = actions[:, j : j + n_his] if actions is not None else None

                            # state_pred: B x state_dim
                            # state_pred: B x state_dim
                            input = {'observation': state_cur,
                                     'action': action_cur,
                                     }

                            if DEBUG:
                                print("state_cur.shape", state_cur.shape)
                                print("action_cur.shape", action_cur.shape)

                            state_pred = model_dy.dynamics_net(input)

                            # normalize by diag to ensure the loss is in [0,1] range
                            loss_mse_cur = criterionMSE(state_pred/diag, state_des/diag)
                            loss_mse += loss_mse_cur / n_roll

                            # l1Loss
                            loss_l1 = l1Loss(state_pred, state_des)

                            # update state_cur
                            # state_pred.unsqueeze(1): B x 1 x state_dim
                            # state_cur: B x n_his x state_dim
                            state_cur = torch.cat([state_cur[:, 1:], state_pred.unsqueeze(1)], 1)

                            meter_loss_rmse.update(np.sqrt(loss_mse.item()), B)

                    step_duration_meter.update(time.time() - step_start_time)
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss_mse.backward()
                        optimizer.step()

                    if (i % config['train']['log_per_iter'] == 0) or (global_iteration % config['train']['log_per_iter'] == 0):
                        log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                            phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase],
                            get_lr(optimizer))
                        log += ', rmse: %.6f (%.6f)' % (
                            np.sqrt(loss_mse.item()), meter_loss_rmse.avg)

                        log += ', step time %.6f' %(step_duration_meter.avg)
                        step_duration_meter.reset()


                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 100 iterations
                        if global_iteration > 100:
                            writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration)
                            writer.add_scalar("Loss_MSE/%s" %(phase), loss_mse.item(), global_iteration)
                            writer.add_scalar("L1/%s" %(phase), loss_l1.item(), global_iteration)
                            writer.add_scalar("L1_fraction/%s" %(phase), loss_l1.item()/diag, global_iteration)
                            writer.add_scalar("RMSE average loss/%s" %(phase), meter_loss_rmse.avg, global_iteration)

                    if phase == 'train' and i % config['train']['ckp_per_iter'] == 0:
                        save_model(model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i))



                log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, epoch, config['train']['n_epoch'], meter_loss_rmse.avg, best_valid_loss)
                print(log)

                if phase == 'valid':
                    if config['train']['lr_scheduler']['enabled']:
                        scheduler.step(meter_loss_rmse.avg)

                    # print("\nPhase == valid")
                    # print("meter_loss_rmse.avg", meter_loss_rmse.avg)
                    # print("best_valid_loss", best_valid_loss)
                    if meter_loss_rmse.avg < best_valid_loss:
                        best_valid_loss = meter_loss_rmse.avg
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush() # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external))
        writer.flush() # flush SummaryWriter events to disk
def train_dynamics(
    config,
    train_dir,  # str: directory to save output
    multi_episode_dict=None,
    visual_observation_function=None,
    metadata=None,
    spatial_descriptors_data=None,
):
    assert multi_episode_dict is not None
    # assert spatial_descriptors_idx is not None

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    st_epoch = config['train'][
        'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0
    tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w')

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))

    if metadata is not None:
        save_pickle(metadata, os.path.join(train_dir, 'metadata.p'))

    if spatial_descriptors_data is not None:
        save_pickle(spatial_descriptors_data,
                    os.path.join(train_dir, 'spatial_descriptors.p'))

    training_stats = dict()
    training_stats_file = os.path.join(train_dir, 'training_stats.yaml')

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    datasets = {}
    dataloaders = {}
    data_n_batches = {}
    for phase in ['train', 'valid']:
        print("Loading data for %s" % phase)
        datasets[phase] = MultiEpisodeDataset(
            config,
            action_function=action_function,
            observation_function=observation_function,
            episodes=multi_episode_dict,
            phase=phase,
            visual_observation_function=visual_observation_function)

        print("len(datasets[phase])", len(datasets[phase]))
        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train']['num_workers'],
            drop_last=True)

        data_n_batches[phase] = len(dataloaders[phase])

    use_gpu = torch.cuda.is_available()

    # compute normalization parameters if not starting from pre-trained network . . .

    if False:
        dataset = datasets["train"]
        data = dataset[0]
        print("data['observations_combined'].shape",
              data['observations_combined'].shape)
        print("data.keys()", data.keys())

        print("data['observations_combined']",
              data['observations_combined'][0])
        print("data['observations_combined'].shape",
              data['observations_combined'].shape)
        print("data['actions'].shape", data['actions'].shape)
        print("data['actions']\n", data['actions'])
        quit()
    '''
    Build model for dynamics prediction
    '''
    model_dy = build_dynamics_model(config)
    if config['dynamics_net'] == "mlp_weight_matrix":
        raise ValueError("can't use weight matrix with standard setup")

    # criterion
    criterionMSE = nn.MSELoss()
    l1Loss = nn.L1Loss()
    smoothL1 = nn.SmoothL1Loss()

    # optimizer
    params = model_dy.parameters()
    lr = float(config['train']['lr'])
    optimizer = optim.Adam(params,
                           lr=lr,
                           betas=(config['train']['adam_beta1'], 0.999))

    # setup scheduler
    sc = config['train']['lr_scheduler']
    scheduler = None

    if config['train']['lr_scheduler']['enabled']:
        if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau":
            scheduler = ReduceLROnPlateau(optimizer,
                                          mode='min',
                                          factor=sc['factor'],
                                          patience=sc['patience'],
                                          threshold_mode=sc['threshold_mode'],
                                          cooldown=sc['cooldown'],
                                          verbose=True)
        elif config['train']['lr_scheduler']['type'] == "StepLR":
            step_size = config['train']['lr_scheduler']['step_size']
            gamma = config['train']['lr_scheduler']['gamma']
            scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)
        else:
            raise ValueError("unknown scheduler type: %s" %
                             (config['train']['lr_scheduler']['type']))

    if use_gpu:
        print("using gpu")
        model_dy = model_dy.cuda()

    # print("model_dy.vision_net._ref_descriptors.device", model_dy.vision_net._ref_descriptors.device)
    # print("model_dy.vision_net #params: %d" %(count_trainable_parameters(model_dy.vision_net)))

    best_valid_loss = np.inf
    valid_loss_type = config['train']['valid_loss_type']
    global_iteration = 0
    counters = {'train': 0, 'valid': 0}
    epoch_counter_external = 0
    loss = 0

    try:
        for epoch in range(st_epoch, config['train']['n_epoch']):
            phases = ['train', 'valid']
            epoch_counter_external = epoch

            writer.add_scalar("Training Params/epoch", epoch, global_iteration)
            for phase in phases:

                # only validate at a certain frequency
                if (phase == "valid") and (
                    (epoch % config['train']['valid_frequency']) != 0):
                    continue

                model_dy.train(phase == 'train')

                average_meter_container = dict()

                step_duration_meter = AverageMeter()

                # bar = ProgressBar(max_value=data_n_batches[phase])
                loader = dataloaders[phase]

                for i, data in enumerate(loader):

                    loss_container = dict()  # store the losses for this step

                    step_start_time = time.time()

                    global_iteration += 1
                    counters[phase] += 1

                    with torch.set_grad_enabled(phase == 'train'):
                        n_his, n_roll = config['train']['n_history'], config[
                            'train']['n_rollout']
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" % (global_iteration))
                            print("n_samples", n_samples)

                        # [B, n_samples, obs_dim]
                        states = data['observations_combined']

                        # [B, n_samples, action_dim]
                        actions = data['actions']
                        B = actions.shape[0]

                        if use_gpu:
                            states = states.cuda()
                            actions = actions.cuda()

                        # state_cur: B x n_his x state_dim
                        # state_cur = states[:, :n_his]

                        # [B, n_his, state_dim]
                        state_init = states[:, :n_his]

                        # We want to rollout n_roll steps
                        # actions = [B, n_his + n_roll, -1]
                        # so we want action_seq.shape = [B, n_roll, -1]
                        action_start_idx = 0
                        action_end_idx = n_his + n_roll - 1
                        action_seq = actions[:, action_start_idx:
                                             action_end_idx, :]

                        if DEBUG:
                            print("states.shape", states.shape)
                            print("state_init.shape", state_init.shape)
                            print("actions.shape", actions.shape)
                            print("action_seq.shape", action_seq.shape)

                        # try using models_dy.rollout_model instead of doing this manually
                        rollout_data = rollout_model(state_init=state_init,
                                                     action_seq=action_seq,
                                                     dynamics_net=model_dy,
                                                     compute_debug_data=False)

                        # [B, n_roll, state_dim]
                        state_rollout_pred = rollout_data['state_pred']

                        # [B, n_roll, state_dim]
                        state_rollout_gt = states[:, n_his:]

                        if DEBUG:
                            print("state_rollout_gt.shape",
                                  state_rollout_gt.shape)
                            print("state_rollout_pred.shape",
                                  state_rollout_pred.shape)

                        # the loss function is between
                        # [B, n_roll, state_dim]
                        state_pred_err = state_rollout_pred - state_rollout_gt

                        # everything is in 3D space now so no need to do any scaling
                        # all the losses would be in meters . . . .
                        loss_mse = criterionMSE(state_rollout_pred,
                                                state_rollout_gt)
                        loss_l1 = l1Loss(state_rollout_pred, state_rollout_gt)
                        loss_l2 = torch.norm(state_pred_err, dim=-1).mean()
                        loss_smoothl1 = smoothL1(state_rollout_pred,
                                                 state_rollout_gt)
                        loss_smoothl1_final_step = smoothL1(
                            state_rollout_pred[:, -1], state_rollout_gt[:, -1])

                        # compute losses at final step of the rollout
                        mse_final_step = criterionMSE(
                            state_rollout_pred[:, -1], state_rollout_gt[:, -1])
                        l2_final_step = torch.norm(state_pred_err[:, -1],
                                                   dim=-1).mean()
                        l1_final_step = l1Loss(state_rollout_pred[:, -1],
                                               state_rollout_gt[:, -1])

                        loss_container['mse'] = loss_mse
                        loss_container['l1'] = loss_l1
                        loss_container['mse_final_step'] = mse_final_step
                        loss_container['l1_final_step'] = l1_final_step
                        loss_container['l2_final_step'] = l2_final_step
                        loss_container['l2'] = loss_l2
                        loss_container['smooth_l1'] = loss_smoothl1
                        loss_container[
                            'smooth_l1_final_step'] = loss_smoothl1_final_step

                        # compute the loss
                        loss = 0
                        for key, val in config['loss_function'].items():
                            if val['enabled']:
                                loss += loss_container[key] * val['weight']

                        loss_container['loss'] = loss

                        for key, val in loss_container.items():
                            if not key in average_meter_container:
                                average_meter_container[key] = AverageMeter()

                            average_meter_container[key].update(val.item(), B)

                    step_duration_meter.update(time.time() - step_start_time)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    if (i % config['train']['log_per_iter']
                            == 0) or (global_iteration %
                                      config['train']['log_per_iter'] == 0):
                        log = '%s [%d/%d][%d/%d] LR: %.6f' % (
                            phase, epoch, config['train']['n_epoch'], i,
                            data_n_batches[phase], get_lr(optimizer))

                        log += ', l2: %.6f' % (loss_container['l2'].item())
                        log += ', l2_final_step: %.6f' % (
                            loss_container['l2_final_step'].item())

                        log += ', step time %.6f' % (step_duration_meter.avg)
                        step_duration_meter.reset()

                        print(log)

                        # log data to tensorboard
                        # only do it once we have reached 100 iterations
                        if global_iteration > 100:
                            writer.add_scalar("Params/learning rate",
                                              get_lr(optimizer),
                                              global_iteration)
                            writer.add_scalar("Loss_train/%s" % (phase),
                                              loss.item(), global_iteration)

                            for loss_type, loss_obj in loss_container.items():
                                plot_name = "Loss/%s/%s" % (loss_type, phase)
                                writer.add_scalar(plot_name, loss_obj.item(),
                                                  counters[phase])

                    if phase == 'train' and global_iteration % config['train'][
                            'ckp_per_iter'] == 0:
                        save_model(
                            model_dy, '%s/net_dy_epoch_%d_iter_%d' %
                            (train_dir, epoch, i))

                log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                    phase, epoch, config['train']['n_epoch'],
                    average_meter_container[valid_loss_type].avg,
                    best_valid_loss)
                print(log)

                # record all average_meter losses
                for key, meter in average_meter_container.items():
                    writer.add_scalar("AvgMeter/%s/%s" % (key, phase),
                                      meter.avg, epoch)

                if phase == "train":
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "StepLR"):
                        scheduler.step()

                if phase == 'valid':
                    if (scheduler is not None) and (
                            config['train']['lr_scheduler']['type']
                            == "ReduceLROnPlateau"):
                        scheduler.step(
                            average_meter_container[valid_loss_type].avg)

                    if average_meter_container[
                            valid_loss_type].avg < best_valid_loss:
                        best_valid_loss = average_meter_container[
                            valid_loss_type].avg
                        training_stats['epoch'] = epoch
                        training_stats['global_iteration'] = counters['valid']
                        save_yaml(training_stats, training_stats_file)
                        save_model(model_dy, '%s/net_best_dy' % (train_dir))

                writer.flush()  # flush SummaryWriter events to disk

    except KeyboardInterrupt:
        # save network if we have a keyboard interrupt
        save_model(
            model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' %
            (train_dir, epoch_counter_external))
        writer.flush()  # flush SummaryWriter events to disk
def evaluate_mpc(
    model_dy,  # dynamics model
    env,  # the environment
    episode,  # OnlineEpisodeReader
    mpc_input_builder,  # DynamicsModelInputBuilder
    planner,  # RandomShooting planner
    eval_indices=None,
    goal_func=None,  # function that gets goal from observation
    config=None,
    wait_for_user_input=False,
    save_dir=None,
    model_name="",
    experiment_name="",
    generate_initial_condition_func=None,
    # (optional) function to generate initial condition, takes episode length N as parameter
):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # must specify initial condition distribution
    assert generate_initial_condition_func is not None

    save_yaml(config, os.path.join(save_dir, 'config.yaml'))
    writer = SummaryWriter(log_dir=save_dir)

    pandas_data_list = []
    for episode_length in config['eval']['episode_length']:
        counter = 0
        seed = 0
        while counter < config['eval']['num_episodes']:

            start_time = time.time()
            seed += 1
            set_seed(seed)  # make it repeatable
            # initial_cond = generate_initial_condition(config, N=episode_length)
            initial_cond = generate_initial_condition_func(N=episode_length)

            env.set_initial_condition_from_dict(initial_cond)

            action_sequence_np = torch_utils.cast_to_numpy(
                initial_cond['action_sequence'])
            episode_data = mpc_single_episode(
                model_dy=model_dy,
                env=env,
                action_sequence=action_sequence_np,
                action_zero=np.zeros(2),
                episode=episode,
                mpc_input_builder=mpc_input_builder,
                planner=planner,
                eval_indices=eval_indices,
                goal_func=goal_func,
                config=config,
                wait_for_user_input=wait_for_user_input,
            )

            # continue if invalid
            if not episode_data['valid']:
                print("invalid episode, skipping")
                continue

            pose_error = compute_pose_error(
                obs=episode_data['obs_mpc_final'],
                obs_goal=episode_data['obs_goal'],
            )

            object_delta = compute_pose_error(
                obs=episode_data['obs_init'],
                obs_goal=episode_data['obs_goal'])

            print("object_delta\n", object_delta)

            if wait_for_user_input:
                print("pose_error\n", pose_error)

            pandas_data = {
                'episode_length': episode_length,
                'seed': counter,
                'model_name': model_name,
                'experiment_name': experiment_name,
                'object_pos_delta': object_delta['position_error'],
                'object_angle_delta': object_delta['angle_error'],
                'object_angle_delta_degrees':
                object_delta['angle_error_degrees'],
            }

            pandas_data.update(pose_error)
            pandas_data_list.append(pandas_data)

            # log to tensorboard
            for key, val in pose_error.items():
                plot_name = "%s/episode_len_%d" % (key, episode_length)
                writer.add_scalar(plot_name, val, counter)

            writer.flush()

            print("episode [%d/%d], episode_length %d, duration %.2f" %
                  (counter, config['eval']['num_episodes'], episode_length,
                   time.time() - start_time))
            counter += 1

        df_tmp = pd.DataFrame(pandas_data_list)
        keys = ["angle_error_degrees", "position_error"]
        for key in keys:
            for i in range(10):
                mean = df_tmp[key][df_tmp.episode_length ==
                                   episode_length].mean()
                median = df_tmp[key][df_tmp.episode_length ==
                                     episode_length].median()

                plot_name_mean = "mean/%s/episode_len_%d" % (key,
                                                             episode_length)
                writer.add_scalar(plot_name_mean, mean, i)

                plot_name_median = "median/%s/episode_len_%d" % (
                    key, episode_length)
                writer.add_scalar(plot_name_median, median, i)

    # save some data
    df = pd.DataFrame(pandas_data_list)
    df.to_csv(os.path.join(save_dir, "data.csv"))
def train_transporter(
    config,
    train_dir,
    ckp_dir=None,
    multi_episode_dict=None,
):

    assert multi_episode_dict is not None

    if ckp_dir is None:
        ckp_dir = os.path.join(train_dir)

    if not os.path.exists(ckp_dir):
        os.makedirs(ckp_dir)

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    images_dir = os.path.join(train_dir, 'images')
    if not os.path.exists(images_dir):
        os.makedirs(images_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, 'config.yaml'))

    # set random seed for reproduction
    set_seed(config['train_transporter']['random_seed'])

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # only use images from this specific config
    camera_names = [config['perception']['camera_name']]

    ### data
    datasets = {}
    dataloaders = {}
    for phase in ['train', 'valid']:
        datasets[phase] = ImageTupleDataset(config,
                                            phase=phase,
                                            episodes=multi_episode_dict,
                                            tuple_size=2,
                                            camera_names=camera_names)

        dataloaders[phase] = DataLoader(
            datasets[phase],
            batch_size=config['train_transporter']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train_transporter']['batch_size'])

    use_gpu = torch.cuda.is_available()

    crop_enabled = datasets['train'].crop_enabled
    rgb_tensor_key = None
    if crop_enabled:
        rgb_image_key = "rgb_crop"
        rgb_tensor_key = "rgb_crop_tensor"
    else:
        rgb_image_key = "rgb_masked_scaled"
        rgb_tensor_key = "rgb_masked_scaled_tensor"

    if False:
        dataset = datasets["train"]

        dataset_size = len(dataset)
        print("len(dataset)", len(dataset))
        print("len(dataset._image_dataset)", len(dataset._image_dataset))

        print("len(dataset['valid'])", len(datasets['valid']))
        print("len(dataset['train'])", len(datasets['train']))

        print("dataset.crop_enabled", dataset.crop_enabled)

        data = dataset[0]
        print("data.keys()", data.keys())
        print("data[0].keys()", data[0].keys())

        # rgb_crop_tensor = data[0]['rgb_crop_tensor']
        # print("rgb_crop_tensor.max()", rgb_crop_tensor.max())
        # print("rgb_crop_tensor.min()", rgb_crop_tensor.min())
        #
        # rgb_image = data[0]['rgb_masked_scaled']
        # print("rgb_crop.dtype", rgb_image.dtype)
        # print("rgb_image.shape", rgb_image.shape)

        rgb_image = data[0][rgb_image_key]
        rgb_tensor = data[0][rgb_tensor_key]
        print("rgb_image.shape", rgb_image.shape)
        print("rgb_tensor.shape", rgb_tensor.shape)

        plt.figure()
        # plt.imshow(rgb_image)
        plt.imshow(data[0][rgb_image_key])
        plt.show()
        quit()
        #

    ### model
    model_kp = Transporter(config, use_gpu=use_gpu)
    print("model_kp #params: %d" % count_parameters(model_kp))

    if config['train_transporter']['resume_epoch'] >= 0:
        model_kp_path = os.path.join(
            ckp_dir, 'net_kp_epoch_%d_iter_%d.pth' %
            (config['train_transporter']['resume_epoch'],
             config['train_transporter']['resume_iter']))
        print("Loading saved ckp from %s" % model_kp_path)
        model_kp.load_state_dict(torch.load(model_kp_path))

    # criterion
    criterionMSE = nn.MSELoss()

    # optimizer
    params = model_kp.parameters()
    optimizer = optim.Adam(params,
                           lr=float(config['train_transporter']['lr']),
                           betas=(config['train_transporter']['adam_beta1'],
                                  0.999))
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  factor=0.6,
                                  patience=2,
                                  verbose=True)

    if use_gpu:
        model_kp = model_kp.cuda()

    best_valid_loss = np.inf
    global_iteration = 0
    log_fout = open(os.path.join(ckp_dir, 'log.txt'), 'w')

    n_epoch = config['train_transporter']['n_epoch']
    for epoch in range(n_epoch):
        phases = ['train', 'valid']

        writer.add_scalar("Training Params/epoch", epoch, global_iteration)

        for phase in phases:
            model_kp.train(phase == 'train')

            meter_loss = AverageMeter()

            loader = dataloaders[phase]
            bar = ProgressBar(max_value=len(loader))

            for i, data in bar(enumerate(loader)):

                with torch.set_grad_enabled(phase == 'train'):
                    src = data[0][rgb_tensor_key]
                    des = data[1][rgb_tensor_key]

                    if use_gpu:
                        src = src.cuda()
                        des = des.cuda()

                    des_pred = model_kp(src, des)

                    # reconstruction loss
                    loss = criterionMSE(des_pred, des)
                    meter_loss.update(loss.item(), src.size(0))

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                if global_iteration > 100:
                    writer.add_scalar("Params/learning rate",
                                      get_lr(optimizer), global_iteration)
                    writer.add_scalar("Loss/%s" % (phase), loss.item(),
                                      global_iteration)

                if i % config['train_transporter']['log_per_iter'] == 0:
                    log = '%s [%d/%d][%d/%d] LR: %.6f, Loss: %.6f (%.6f)' % (
                        phase, epoch, n_epoch, i, len(loader),
                        get_lr(optimizer), loss.item(), meter_loss.avg)

                    print()
                    print(log)

                    log_fout.write(log + '\n')
                    log_fout.flush()

                if phase == 'train' and i % config['train_transporter'][
                        'ckp_per_iter'] == 0:
                    torch.save(
                        model_kp.state_dict(),
                        '%s/net_kp_epoch_%d_iter_%d.pth' % (ckp_dir, epoch, i))

                # compute some images and draw them
                if global_iteration % config['train_transporter'][
                        'image_per_iter'] == 0:
                    with torch.no_grad():
                        kp = model_kp.predict_keypoint(des)
                        heatmap = model_kp.keypoint_to_heatmap(
                            kp, inv_std=config['perception']['inv_std'])

                        images = visualize_transporter_output(
                            des=des, des_pred=des_pred, heatmap=heatmap, kp=kp)

                        print("images[0].shape", images[0].shape)

                        save_img = np.concatenate(images[:4], axis=0)
                        print("save_img.dtype", save_img.dtype)
                        print("save_img.shape", save_img.shape)

                        save_file = os.path.join(
                            images_dir,
                            '%s_epoch_%d_iter_%d.png' % (phase, epoch, i))
                        cv2.imwrite(save_file, save_img)

                    pass

                writer.flush()  # flush SummaryWriter events to disk
                global_iteration += 1

            log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                phase, epoch, n_epoch, meter_loss.avg, best_valid_loss)
            print(log)
            log_fout.write(log + '\n')
            log_fout.flush()

            if phase == 'valid':
                scheduler.step(meter_loss.avg)
                if meter_loss.avg < best_valid_loss:
                    best_valid_loss = meter_loss.avg

                    torch.save(model_kp.state_dict(),
                               '%s/net_best.pth' % ckp_dir)

    log_fout.close()
示例#9
0
def eval_dynamics(
    config,
    eval_dir,  # str: directory to save output
    multi_episode_dict=None,
    n_rollout_list=None,
    model_dy=None,  # should already be in eval mode
    phase_list=None,  # typically it's
    num_epochs=10,
):

    assert n_rollout_list is not None
    assert model_dy is not None
    assert multi_episode_dict is not None

    if phase_list is None:
        phase_list = ["valid"]

    # set random seed for reproduction
    set_seed(config['train']['random_seed'])

    tensorboard_dir = os.path.join(eval_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(eval_dir, "config.yaml"))

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.function_from_config(
        config)

    use_gpu = torch.cuda.is_available()

    best_valid_loss = np.inf
    global_iteration = 0
    counters = {'train': 0, 'valid': 0}
    epoch_counter_external = 0
    stats = dict()

    for n_rollout in n_rollout_list:
        stats[n_rollout] = dict()
        config_tmp = copy.copy(config)
        config_tmp['train']['n_rollout'] = n_rollout
        for phase in phase_list:
            stats[n_rollout][phase] = dict()
            print("Loading data for %s" % phase)
            dataset = MultiEpisodeDataset(
                config_tmp,
                action_function=action_function,
                observation_function=observation_function,
                episodes=multi_episode_dict,
                phase=phase)

            dataloader = DataLoader(dataset,
                                    batch_size=config['train']['batch_size'],
                                    shuffle=True,
                                    num_workers=config['train']['num_workers'],
                                    drop_last=True)

            loss_tensor_container = {"l2_avg": [], "l2_final_step": []}

            step_duration_meter = AverageMeter()
            global_iteration = 0

            for epoch in range(num_epochs):
                for i, data in enumerate(dataloader):

                    loss_container = dict()  # store the losses for this step
                    # types of losses ["l2_avg", "l2_final_step"]

                    step_start_time = time.time()
                    global_iteration += 1
                    counters[phase] += 1

                    with torch.no_grad():
                        n_his = config['train']['n_history']
                        n_roll = n_rollout
                        n_samples = n_his + n_roll

                        if DEBUG:
                            print("global iteration: %d" % (global_iteration))
                            print("n_samples", n_samples)

                        # [B, n_samples, obs_dim]
                        observations = data['observations']

                        # [B, n_samples, action_dim]
                        actions = data['actions']
                        B = actions.shape[0]

                        if use_gpu:
                            observations = observations.cuda()
                            actions = actions.cuda()

                        # states, actions = data
                        assert actions.shape[1] == n_samples
                        loss_mse = 0.

                        # we don't have any visual observations, so states are observations
                        states = observations

                        # state_cur: B x n_his x state_dim
                        # state_cur = states[:, :n_his]

                        # [B, n_his, state_dim]
                        state_init = states[:, :n_his]

                        # We want to rollout n_roll steps
                        # actions = [B, n_his + n_roll, -1]
                        # so we want action_seq.shape = [B, n_roll, -1]
                        action_start_idx = 0
                        action_end_idx = n_his + n_roll - 1
                        action_seq = actions[:, action_start_idx:
                                             action_end_idx, :]

                        if DEBUG:
                            print("states.shape", states.shape)
                            print("state_init.shape", state_init.shape)
                            print("actions.shape", actions.shape)
                            print("action_seq.shape", action_seq.shape)

                        # try using models_dy.rollout_model instead of doing this manually
                        rollout_data = rollout_model(state_init=state_init,
                                                     action_seq=action_seq,
                                                     dynamics_net=model_dy,
                                                     compute_debug_data=False)

                        # [B, n_roll, state_dim]
                        state_rollout_pred = rollout_data['state_pred']

                        # [B, n_roll, state_dim]
                        state_rollout_gt = states[:, n_his:]

                        if DEBUG:
                            print("state_rollout_gt.shape",
                                  state_rollout_gt.shape)
                            print("state_rollout_pred.shape",
                                  state_rollout_pred.shape)

                        # the loss function is between
                        # [B, n_roll, state_dim]
                        state_pred_err = state_rollout_pred - state_rollout_gt

                        # [B]
                        l2_avg_tensor = torch.mean(torch.norm(state_pred_err,
                                                              dim=-1),
                                                   dim=1).detach().cpu()
                        l2_avg = l2_avg_tensor.mean()

                        # [B]
                        l2_final_step_tensor = torch.norm(
                            state_pred_err[:, -1], dim=-1).detach().cpu()
                        l2_final_step = l2_final_step_tensor.mean()

                        loss_tensor_container["l2_avg"].append(l2_avg_tensor)
                        loss_container["l2_avg"] = l2_avg

                        loss_tensor_container["l2_final_step"].append(
                            l2_final_step_tensor)
                        loss_container["l2_final_step"] = l2_final_step

                    step_duration_meter.update(time.time() - step_start_time)

                    if (i % config['train']['log_per_iter']
                            == 0) or (global_iteration %
                                      config['train']['log_per_iter'] == 0):
                        # print some logging information
                        log = ""
                        log += ', step time %.6f' % (step_duration_meter.avg)

                        # log data to tensorboard
                        for loss_type, loss_obj in loss_container.items():
                            plot_name = "%s/n_roll_%s/%s" % (loss_type, n_roll,
                                                             phase)
                            writer.add_scalar(plot_name, loss_obj.item(),
                                              global_iteration)

                            log += " %s: %.6f," % (plot_name, loss_obj.item())

                        print(log)

                    writer.flush()  # flush SummaryWriter events to disk

            stats[n_rollout][phase] = dict()
            for loss_type in loss_tensor_container:
                t = torch.cat(loss_tensor_container[loss_type])
                mean = t.mean()
                median = t.median()
                std = t.std()

                stats[n_rollout][phase][loss_type] = {
                    'mean': mean,
                    'median': median,
                    'std': std
                }

                for stat_type, val in stats[n_rollout][phase][loss_type].items(
                ):
                    plot_name = "stats/%s/n_roll_%d/%s/%s" % (
                        loss_type, n_roll, phase, stat_type)

                    for idx_tmp in [0, 10, 100]:
                        writer.add_scalar(plot_name, val, idx_tmp)
def train_explore_and_learn(
        config,
        train_dir,  # str: directory to save output
        data_dir,
        visualize=False):

    # set random seed for reproduction
    set_seed(config['train_explore_and_learn']['random_seed'])

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, "config.yaml"))

    print(config)

    num_exploration_rounds = config['train_explore_and_learn'][
        'num_exploration_rounds']
    num_episodes_per_exploration_round = config['train_explore_and_learn'][
        'num_episodes_per_exploration_round']
    num_timesteps = config['train_explore_and_learn']['num_timesteps']

    # model_folder = os.path.join(train_dir, "../2020-04-05-23-00-30-887903")
    # model_file = os.path.join(model_folder, "net_best_dy_model.pth")
    # model_dy = torch.load(model_file)
    model_dy = None

    global_iteration = 0

    ##### setup to store the dataset
    metadata = dict()
    metadata['episodes'] = dict()

    # data collector
    data_collector = DrakePusherSliderEpisodeCollector(config)

    ##### explore and learn
    for idx_exploration_round in range(num_exploration_rounds):

        print("Exploration round %d / %d" %
              (idx_exploration_round, num_exploration_rounds))

        ### exploration

        if idx_exploration_round == 0:
            # initial exploration
            exploration_type = 'random'
        else:
            exploration_type = 'mppi'

        collect_episodes(
            config,
            metadata,
            data_collector,
            num_episodes_per_exploration_round,
            data_dir,
            visualize,
            exploration_type,
            model_dy=None if exploration_type == 'random' else model_dy)

        save_yaml(metadata, os.path.join(data_dir, 'metadata.yaml'))

        ### optimize the dynamics model
        model_dy, global_iteration = train_dynamics(config, train_dir,
                                                    data_dir, model_dy,
                                                    global_iteration, writer)
示例#11
0
def multiprocess_main(num_episodes=1000, num_threads=4):
    set_seed(500)  # just randomly chosen

    start_time = time.time()
    config = load_yaml(
        os.path.join(get_project_root(),
                     'experiments/exp_20_mugs/config.yaml'))

    num_episodes_per_thread = math.ceil(num_episodes / num_threads)
    num_episodes = num_threads * num_episodes_per_thread

    # DATASET_NAME = "mugs_random_colors_%d" % (num_episodes)
    # DATASET_NAME = "single_mug_%d"
    # DATASET_NAME = "correlle_mug-small_single_color_%d" %(num_episodes)
    # DATASET_NAME = "single_corelle_mug_%d" %(num_episodes)
    # DATASET_NAME = "correlle_mug-small_many_colors_%d" %(num_episodes)
    DATASET_NAME = "correlle_mug-small_many_colors_random_%d" % (num_episodes)
    # OUTPUT_DIR = os.path.join(get_data_root(), 'sandbox', DATASET_NAME)
    OUTPUT_DIR = os.path.join(get_data_ssd_root(), 'dataset', DATASET_NAME)
    print("OUTPUT_DIR:", OUTPUT_DIR)

    output_dir = OUTPUT_DIR
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    def f(q_tmp):
        config = load_yaml(
            os.path.join(get_project_root(),
                         'experiments/exp_20_mugs/config.yaml'))
        config['dataset']['num_episodes'] = num_episodes_per_thread
        out = collect_episodes(config,
                               output_dir=OUTPUT_DIR,
                               visualize=False,
                               debug=False,
                               run_from_thread=True)

        q_tmp.put(out)

    q = Queue()

    process_list = []
    for i in range(num_threads):
        p = Process(target=f, args=(q, ))
        p.start()
        process_list.append(p)

    metadata = {'episodes': {}}
    for p in process_list:
        while p.is_alive():
            p.join(timeout=1)

            # empty out the queue
            while not q.empty():
                out = q.get()
                metadata['episodes'].update(out['metadata']['episodes'])

    # double check
    for p in process_list:
        p.join()

    time.sleep(1.0)
    print("All threads joined")
    elapsed = time.time() - start_time

    # collect the metadata.yaml files

    while not q.empty():
        out = q.get()
        metadata['episodes'].update(out['metadata']['episodes'])

    save_yaml(metadata, os.path.join(OUTPUT_DIR, 'metadata.yaml'))
    print("Generating and saving dataset to disk took %d seconds" %
          (int(elapsed)))
示例#12
0
def collect_episodes(config,
                     output_dir=None,
                     visualize=True,
                     debug=False,
                     run_from_thread=False,
                     seed=None):

    # gets a random seed for each thread/process independently
    if seed is None:
        seed = np.random.RandomState().randint(0, 10000)

    set_seed(seed)

    if output_dir is None:
        output_dir = os.path.join(os.getcwd(), 'data')

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # save the config
    config_save_file = os.path.join(output_dir, 'config.yaml')
    save_yaml(config, config_save_file)

    # initialize config for DataCollector
    num_episodes = config['dataset']['num_episodes']

    # record some metadata
    metadata = dict()
    metadata['episodes'] = dict()

    while (len(metadata['episodes']) < num_episodes):

        i = len(metadata['episodes'])

        if debug:
            input("Press Enter to continue...")

        print("\n")
        start_time = time.time()
        print("collecting episode %d of %d" % (i + 1, num_episodes))
        name = "%s_idx_%d" % (get_current_YYYY_MM_DD_hh_mm_ss_ms(), i)

        n_his = config['train_dynamics']['n_history']
        ic = generate_initial_condition(
            config=config,
            T_aug_enabled=True,
            n_his=n_his,
            randomize_velocity=True,
            randomize_sdf=True,
            randomize_color=True,
        )

        env = DrakeMugsEnv(ic['config'], visualize=visualize)

        if debug:
            print("initial condition\n", ic)

        # set initial condition on environment
        if visualize:
            print("setting target realtime rate 1.0")
            env.simulator.set_target_realtime_rate(1.0)

        env.reset()
        context = env.get_mutable_context()
        env.set_object_position(context, ic['q_slider'])
        env.set_pusher_position(context, ic['q_pusher'])

        print("ic['action_sequence'].shape", ic['action_sequence'].shape)

        # simulate for 10 seconds to let the mug stabilize
        action_zero = env.get_zero_action()
        env.step(action_zero, dt=10.0)

        episode = collect_single_episode(
            env, action_seq=ic['action_sequence'])['episode_container']

        # potentially discard it if the object didn't move during the data collection
        if len(episode._data['trajectory']) < 10:
            print("trajectory was too short, skipping")
            continue

        obs_start = episode._data['trajectory'][0]['observation']
        obs_end = episode._data['trajectory'][-1]['observation']

        q_slider_start = obs_start['slider']['position']['translation']
        q_slider_end = obs_end['slider']['position']['translation']

        dq_slider = obs_start['slider']['position']['translation'] - obs_end[
            'slider']['position']['translation']

        if debug:
            print("len(episode._data['trajectory'])",
                  len(episode._data['trajectory']))
            print("q_slider_start", q_slider_start)
            print("q_slider_end", q_slider_end)
            print("dq_slider", dq_slider)
            print("np.linalg.norm(dq_slider)", np.linalg.norm(dq_slider))

        pose_error = compute_pose_error(obs_start, obs_end)

        # if slider didn't move by at least 1 mm then discard this episode
        if (pose_error['position_error'] <
                0.01) and (pose_error['angle_error_degrees'] < 10):
            print(
                "discarding episode since slider didn't move sufficiently far")
            continue

        print("saving to disk")
        metadata['episodes'][name] = dict()

        image_data_file = episode.save_images_to_hdf5(output_dir)
        non_image_data_file = episode.save_non_image_data_to_pickle(output_dir)

        print("output_dir:", output_dir)

        print("non_image_data.keys()", episode.non_image_data.keys())

        metadata['episodes'][name]['non_image_data_file'] = non_image_data_file
        metadata['episodes'][name]['image_data_file'] = image_data_file

        print("done saving to disk")
        elapsed = time.time() - start_time
        print("single episode took: %.2f seconds" % (elapsed))

    if not run_from_thread:
        save_yaml(metadata, os.path.join(output_dir, 'metadata.yaml'))

    print("Finished collecting episodes")

    return {'metadata': metadata}
def precompute_transporter_keypoints(
    multi_episode_dict,
    model_kp,
    output_dir,  # str
    batch_size=10,
    num_workers=10,
    camera_names=None,
    model_file=None,
):

    assert model_file is not None
    metadata = dict()
    metadata['model_file'] = model_file

    save_yaml(metadata, os.path.join(output_dir, 'metadata.yaml'))
    start_time = time.time()

    log_freq = 10

    device = next(model_kp.parameters()).device
    model_kp = model_kp.eval()  # make sure model is in eval mode

    image_data_config = {
        'rgb': True,
        'mask': True,
        'depth_int16': True,
    }

    # build all the dataset
    datasets = {}
    dataloaders = {}
    for episode_name, episode in multi_episode_dict.items():
        single_episode_dict = {episode_name: episode}
        config = model_kp.config

        # need to do this since transporter type data sampling only works
        # with tuple_size = 1
        dataset_config = copy.deepcopy(config)
        dataset_config['dataset']['use_transporter_type_data_sampling'] = False

        datasets[episode_name] = ImageTupleDataset(
            dataset_config,
            single_episode_dict,
            phase="all",
            image_data_config=image_data_config,
            tuple_size=1,
            compute_K_inv=True,
            camera_names=camera_names)

        dataloaders[episode_name] = DataLoader(datasets[episode_name],
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=False)

    episode_counter = 0
    num_episodes = len(multi_episode_dict)

    for episode_name, dataset in datasets.items():
        episode_counter += 1
        print("\n\n")

        episode = multi_episode_dict[episode_name]
        hdf5_file = None
        try:
            hdf5_file = os.path.basename(episode.image_data_file)
        except AttributeError:
            hdf5_file = "%s.h5" % (episode.name)

        hdf5_file_fullpath = os.path.join(output_dir, hdf5_file)

        str_split = hdf5_file_fullpath.split(".")
        assert len(str_split) == 2
        pickle_file_fullpath = str_split[0] + ".p"

        # print("episode_name", episode_name)
        # print("hdf5_file_fullpath", hdf5_file_fullpath)
        # print("pickle_file_fullpath", pickle_file_fullpath)

        if os.path.isfile(hdf5_file_fullpath):
            os.remove(hdf5_file_fullpath)

        if os.path.isfile(pickle_file_fullpath):
            os.remove(pickle_file_fullpath)

        episode_keypoint_data = dict()

        episode_start_time = time.time()
        with h5py.File(hdf5_file_fullpath, 'w') as hf:
            for i, data in enumerate(dataloaders[episode_name]):
                data = data[0]
                rgb_crop_tensor = data['rgb_crop_tensor'].to(device)
                crop_params = data['crop_param']
                depth_int16 = data['depth_int16']
                key_tree_joined = data['key_tree_joined']

                # print("\n\n i = %d, idx = %d, camera_name = %s" %(i, data['idx'], data['camera_name']))

                depth = depth_int16.float() * 1.0 / DEPTH_IM_SCALE

                if (i % log_freq) == 0:
                    log_msg = "computing [%d/%d][%d/%d]" % (
                        episode_counter, num_episodes, i + 1,
                        len(dataloaders[episode_name]))
                    print(log_msg)

                B = rgb_crop_tensor.shape[0]

                _, H, W, _ = data['rgb'].shape

                kp_pred = None
                kp_pred_full_pixels = None
                with torch.no_grad():
                    kp_pred = model_kp.predict_keypoint(rgb_crop_tensor)

                    # [B, n_kp, 2]
                    kp_pred_full_pixels = transporter_utils.map_cropped_pixels_to_full_pixels_torch(
                        kp_pred, crop_params)

                    xy = kp_pred_full_pixels.clone()
                    xy[:, :, 0] = (xy[:, :, 0]) * 2.0 / W - 1.0
                    xy[:, :, 1] = (xy[:, :, 1]) * 2.0 / H - 1.0

                    # debug
                    # print("xy[0,0]", xy[0,0])

                    # get depth values
                    kp_pred_full_pixels_int = kp_pred_full_pixels.type(
                        torch.LongTensor)

                    z = pdc_utils.index_into_batch_image_tensor(
                        depth.unsqueeze(1),
                        kp_pred_full_pixels_int.transpose(1, 2))

                    z = z.squeeze(1)
                    K_inv = data['K_inv']
                    pts_camera_frame = pdc_torch_utils.pinhole_unprojection(
                        kp_pred_full_pixels, z, K_inv)

                    # print("pts_camera_frame.shape", pts_camera_frame.shape)

                    pts_world_frame = pdc_torch_utils.transform_points_3D(
                        data['T_W_C'], pts_camera_frame)

                    # print("pts_world_frame.shape", pts_world_frame.shape)

                for j in range(B):

                    keypoint_data = {}

                    # this goes from [-1,1]
                    keypoint_data['xy'] = torch_utils.cast_to_numpy(xy[j])
                    keypoint_data['uv'] = torch_utils.cast_to_numpy(
                        kp_pred_full_pixels[j])
                    keypoint_data['uv_int'] = torch_utils.cast_to_numpy(
                        kp_pred_full_pixels_int[j])
                    keypoint_data['z'] = torch_utils.cast_to_numpy(z[j])
                    keypoint_data[
                        'pos_world_frame'] = torch_utils.cast_to_numpy(
                            pts_world_frame[j])
                    keypoint_data[
                        'pos_camera_frame'] = torch_utils.cast_to_numpy(
                            pts_camera_frame[j])

                    # save out some data in both hdf5 and pickle format
                    for key, val in keypoint_data.items():
                        save_key = key_tree_joined[
                            j] + "/transporter_keypoints/%s" % (key)
                        hf.create_dataset(save_key, data=val)
                        episode_keypoint_data[save_key] = val

            save_pickle(episode_keypoint_data, pickle_file_fullpath)
            print("duration: %.3f seconds" %
                  (time.time() - episode_start_time))
示例#14
0
def train_autoencoder(config,
                      train_dir,
                      ckp_dir=None,
                      multi_episode_dict=None,
                      type=None,  # ["SpatialAutoencoder", . . .]
                      ):
    assert multi_episode_dict is not None

    if ckp_dir is None:
        ckp_dir = os.path.join(train_dir, 'checkpoints')

    if not os.path.exists(ckp_dir):
        os.makedirs(ckp_dir)

    tensorboard_dir = os.path.join(train_dir, "tensorboard")
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    images_dir = os.path.join(train_dir, 'images')
    if not os.path.exists(images_dir):
        os.makedirs(images_dir)

    # save the config
    save_yaml(config, os.path.join(train_dir, 'config.yaml'))

    # set random seed for reproduction
    set_seed(config['train_autoencoder']['random_seed'])
    camera_names = [config['perception']['camera_name']]

    model = None
    image_preprocess_func = None
    if type == "SpatialAutoencoder":
        model = SpatialAutoencoder.from_global_config(config)
        image_preprocess_func = functools.partial(spatial_autoencoder_image_preprocessing,
                                                  H_in=model.input_image_shape[0],
                                                  W_in=model.input_image_shape[1],
                                                  H_out=model.output_image_shape[0],
                                                  W_out=model.output_image_shape[1])
    elif type == "ConvolutionalAutoencoder":
        model = ConvolutionalAutoencoder.from_global_config(config)
        image_preprocess_func = AutoencoderImagePreprocessFunctionFactory.convolutional_autoencoder(config)
    else:
        raise ValueError("unknown model type: %s" % (type))

    writer = SummaryWriter(log_dir=tensorboard_dir)

    # only use images from this specific config

    ### data
    datasets = {}
    dataloaders = {}
    for phase in ['train', 'valid']:
        datasets[phase] = AutoencoderImageDataset(config,
                                                  phase=phase,
                                                  episodes=multi_episode_dict,
                                                  camera_names=camera_names,
                                                  image_preprocess_func=image_preprocess_func)

        dataloaders[phase] = DataLoader(
            datasets[phase], batch_size=config['train_autoencoder']['batch_size'],
            shuffle=True if phase == 'train' else False,
            num_workers=config['train_autoencoder']['batch_size'])

    use_gpu = torch.cuda.is_available()

    params = model.parameters()
    optimizer = optim.Adam(
        params, lr=float(config['train_autoencoder']['lr']),
        betas=(config['train_autoencoder']['adam_beta1'], 0.999))

    scheduler = None
    if config['train_autoencoder']['lr_scheduler']['enabled']:
        scheduler = ReduceLROnPlateau(
            optimizer, 'min', factor=0.6, patience=2, verbose=True)

    if use_gpu:
        model = model.cuda()

    best_valid_loss = np.inf
    global_iteration = 0
    log_fout = open(os.path.join(ckp_dir, 'log.txt'), 'w')

    # criterion
    criterionMSE = nn.MSELoss()


    # a little test
    if False:
        data = datasets['train'][0]
        print(data.keys())

        print("data['target_tensor'].shape", data['target_tensor'].shape)
        print("data['target_mask'].shape", data['target_mask'].shape)
        fig = plt.figure()
        ax = fig.subplots(2)
        target_img = data['target']
        print("target_img.dtype", target_img.dtype)
        ax[0].imshow(data['input'])
        ax[1].imshow(data['target'], cmap='gray', vmin=0, vmax=255)
        plt.show()

        quit()

    # a little test
    if False:
        data = datasets['train'][0]
        print(data.keys())

        print("data['target_tensor'].shape", data['target_tensor'].shape)
        print("data['target_mask'].shape", data['target_mask'].shape)
        fig = plt.figure()
        ax = fig.subplots(2)
        target_img = data['target']
        target_tensor = data['target_tensor'].unsqueeze(0)
        target_tensor_np = torch_utils.convert_torch_image_to_numpy(target_tensor).squeeze()
        print("target_img.dtype", target_img.dtype)
        ax[0].imshow(target_img)
        ax[1].imshow(target_tensor_np)
        plt.show()

        quit()

    counters = {'train': 0, 'valid': 0}


    n_epoch = config['train_autoencoder']['n_epoch']
    for epoch in range(n_epoch):
        phases = ['train', 'valid']

        writer.add_scalar("Training Params/epoch", epoch, global_iteration)

        for phase in phases:
            model.train(phase == 'train')

            meter_loss = AverageMeter()

            loader = dataloaders[phase]
            bar = ProgressBar(max_value=len(loader))

            step_duration_meter = AverageMeter()
            epoch_start_time = time.time()
            prev_time = time.time()
            print("\n\n")
            for i, data in bar(enumerate(loader)):
                loss_container = dict() # store the losses for this step
                counters[phase] += 1

                with torch.set_grad_enabled(phase == 'train'):
                    input = data['input_tensor']
                    target = data['target_tensor']

                    if use_gpu:
                        input = input.cuda()
                        target = target.cuda()

                    out = model(input)
                    target_pred = out['output']

                    # print("target.shape", target.shape)
                    # print("target_pred.shape", target_pred.shape)

                    # reconstruction loss
                    l2_recon = criterionMSE(target, target_pred)
                    loss_container['l2_recon'] = l2_recon


                    # loss_masked
                    # [B, H', W']
                    mask = data['target_mask'].to(target.device)
                    mask_idx = mask > 0

                    # convert to BHWC ordering so we can directly index
                    target_masked = target.permute(0, 2, 3, 1)[mask_idx]
                    target_pred_masked = target_pred.permute(0, 2, 3, 1)[mask_idx]

                    # print('target_masked.shape', target_masked.shape)
                    # print("target_pred_masked.shape", target_pred_masked.shape)
                    l2_recon_masked = criterionMSE(target_masked, target_pred_masked)

                    loss_container['l2_recon_masked'] = l2_recon_masked

                    # compute the loss
                    loss = 0
                    for key, val in config['train_autoencoder']['loss_function'].items():
                        if val['enabled']:
                            loss += loss_container[key] * val['weight']

                    meter_loss.update(loss.item())

                step_duration_meter.update(time.time() - prev_time)
                prev_time = time.time()

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    nn.utils.clip_grad_norm_(params, 1)
                    optimizer.step()

                if global_iteration > 100:
                    writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration)
                    # writer.add_scalar("Loss/%s" % (phase), loss.item(), global_iteration)

                    writer.add_scalar("Loss_train/%s" % (phase), loss.item(), counters[phase])

                    for loss_type, loss_obj in loss_container.items():
                        plot_name = "Loss/%s/%s" % (loss_type, phase)
                        writer.add_scalar(plot_name, loss_obj.item(), counters[phase])

                if i % config['train_autoencoder']['log_per_iter'] == 0:
                    log = '%s [%d/%d][%d/%d] LR: %.6f, Loss: %.6f (%.6f)' % (
                        phase, epoch, n_epoch, i, len(loader), get_lr(optimizer),
                        loss.item(), meter_loss.avg)

                    log += ', step time %.6f' % (step_duration_meter.avg)
                    step_duration_meter.reset()

                    print(log)

                    log_fout.write(log + '\n')
                    log_fout.flush()

                if phase == 'train' and i % config['train_autoencoder']['ckp_per_iter'] == 0:
                    torch.save(
                        model.state_dict(),
                        '%s/net_kp_epoch_%d_iter_%d.pth' % (ckp_dir, epoch, i))

                if i % config['train_autoencoder']['img_save_per_iter'] == 0:

                    nrows = 4
                    ncols = 2
                    fig_width = 5

                    B, _, H, W = target.shape
                    fig_height = fig_width * ((nrows * H) / (ncols * W))
                    figsize = (fig_width, fig_height)
                    fig = plt.figure(figsize=figsize)

                    ax = fig.subplots(nrows=nrows, ncols=ncols)

                    target_np = torch_utils.convert_torch_image_to_numpy(target)
                    target_pred_np = torch_utils.convert_torch_image_to_numpy(target_pred)

                    for n in range(nrows):
                        ax[n, 0].imshow(target_np[n])
                        ax[n, 1].imshow(target_pred_np[n])

                    save_file = os.path.join(images_dir,
                                             '%s_epoch_%d_iter_%d.png' %(phase, epoch, i))

                    fig.savefig(save_file)
                    plt.close(fig)


                writer.flush()  # flush SummaryWriter events to disk
                global_iteration += 1

            log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % (
                phase, epoch, n_epoch, meter_loss.avg, best_valid_loss)
            print(log)
            print("Epoch Duration:", time.time() - epoch_start_time)
            log_fout.write(log + '\n')
            log_fout.flush()

            if phase == 'valid':
                if scheduler is not None:
                    scheduler.step(meter_loss.avg)
                if meter_loss.avg < best_valid_loss:
                    best_valid_loss = meter_loss.avg

                    torch.save(model.state_dict(), '%s/net_best.pth' % ckp_dir)

    log_fout.close()
示例#15
0
def collect_episodes(config,
                     output_dir=None,
                     visualize=True,
                     debug=False):
    if output_dir is None:
        output_dir = os.path.join(os.getcwd(), 'data')

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # save the config
    config_save_file = os.path.join(output_dir, 'config.yaml')
    save_yaml(config, config_save_file)

    # initialize config for DataCollector
    dc = DrakePusherSliderEpisodeCollector(config, visualize=visualize)
    num_episodes = config['dataset']['num_episodes']

    # record some metadata
    metadata = dict()
    metadata['episodes'] = dict()

    while (len(metadata['episodes']) < num_episodes):

        i = len(metadata['episodes'])

        if debug:
            input("Press Enter to continue...")

        print("\n")
        start_time = time.time()
        print("collecting episode %d of %d" % (i + 1, num_episodes))
        name = "%s_idx_%d" % (get_current_YYYY_MM_DD_hh_mm_ss_ms(), i)

        ic = generate_initial_condition(config=config,
                                        T_aug_enabled=True)

        if debug:
            print("initial condition\n", ic)

        episode = dc.collect_single_episode(visualize=visualize,
                                            episode_name=name,
                                            q_pusher=ic['q_pusher'],
                                            q_slider=ic['q_slider'],
                                            # v_pusher=ic['v_pusher'],
                                            action_seq=ic['action_seq'],
                                            )

        # potentially discard it if the object didn't move during the data collection
        if len(episode._data['trajectory']) < 10:
            print("trajectory was too short, skipping")
            continue

        obs_start = episode._data['trajectory'][5]['observation']
        obs_end = episode._data['trajectory'][-1]['observation']

        q_slider_start = obs_start['slider']['position']['translation']
        q_slider_end = obs_end['slider']['position']['translation']

        dq_slider = obs_start['slider']['position']['translation'] - obs_end['slider']['position']['translation']

        if debug:
            print("len(episode._data['trajectory'])", len(episode._data['trajectory']))
            print("q_slider_start", q_slider_start)
            print("q_slider_end", q_slider_end)
            print("dq_slider", dq_slider)
            print("np.linalg.norm(dq_slider)", np.linalg.norm(dq_slider))

        # if slider didn't move by at least 1 mm then discard this episode
        if (np.linalg.norm(dq_slider) < 0.001):  # one mm
            print("discarding episode since slider didn't move")
            continue

        print("saving to disk")
        metadata['episodes'][name] = dict()

        image_data_file = episode.save_images_to_hdf5(output_dir)
        non_image_data_file = episode.save_non_image_data_to_pickle(output_dir)

        print("output_dir:", output_dir)

        print("non_image_data.keys()", episode.non_image_data.keys())

        metadata['episodes'][name]['non_image_data_file'] = non_image_data_file
        metadata['episodes'][name]['image_data_file'] = image_data_file

        print("done saving to disk")
        elapsed = time.time() - start_time
        print("single episode took: %.2f seconds" % (elapsed))

    save_yaml(metadata, os.path.join(output_dir, 'metadata.yaml'))
示例#16
0
def evaluate_mpc(
        config,  # the global config
        dynamics_net,  # the dynamics model
        vision_net,  # the vision model
        save_dir,  # str: directory to store results
        observation_function,
        env,
        dataset,  # dataset
):
    # save config
    os.makedirs(save_dir)
    save_yaml(config, os.path.join(save_dir, 'config.yaml'))

    n_history = config['train']['n_history']
    start_idx = n_history - 1 + config['eval']['start_idx']
    end_idx = start_idx + config['eval']['episode_length']

    camera_name = config['vision_net']['camera_name']

    # build the planner
    planner = planner_from_config(config)

    episode_names = dataset.get_episode_names()
    episode_names.sort()
    num_episodes = min(config['eval']['num_episodes'], len(episode_names))

    mpc_idx = 0

    pandas_data_list = []

    for i in range(num_episodes):
        mpc_idx += 1

        episode_name = episode_names[i]
        episode = dataset.episode_dict[episode_name]

        data_goal = dataset._getitem(episode,
                                     end_idx,
                                     rollout_length=0,
                                     n_history=1)

        # goal_keypoints
        visual_observations = data_goal['visual_observations']
        vision_net_out = vision_net.forward_visual_obs(
            data_goal['visual_observations'])
        goal_keypoints = vision_net_out['dynamics_net_input'].squeeze().cpu(
        ).numpy()

        debug_dict = {
            'goal_data': data_goal,
            'goal_vision_net_out': vision_net_out
        }

        # reset the simulator state
        env.reset()
        observation_full = episode.get_observation(start_idx)
        context = env.get_mutable_context()
        env.set_simulator_state_from_observation_dict(context,
                                                      observation_full)

        folder_name = "episode_%d" % mpc_idx
        save_dir_tmp = os.path.join(save_dir, folder_name)
        os.makedirs(save_dir_tmp)

        # run the simulation for this episode
        mpc_out = mpc_episode_keypoint_observation(
            config=config,
            model_dy=dynamics_net,
            model_vision=vision_net,
            planner=planner,
            obs_goal=goal_keypoints,
            observation_function=observation_function,
            env=env,
            save_dir=save_dir_tmp,
            use_gpu=True,
            wait_for_user_input=False,
            debug_dict=debug_dict,
            visualize=True,
            verbose=False,
            video=True)

        # ground truth slider to world
        obs_goal = episode.get_observation(end_idx)
        T_W_S_goal = transform_utils.transform_from_pose_dict(
            obs_goal['slider']['position'])

        obs_final = mpc_out['debug_data'][-1]['obs']
        # actual T_W_S at end of MPC rollout
        T_W_S = transform_utils.transform_from_pose_dict(
            obs_final['slider']['position'])

        # error between target and actual
        T_goal_S = np.matmul(np.linalg.inv(T_W_S_goal), T_W_S)

        pos_err = np.linalg.norm(T_goal_S[:3, 3])
        axis, angle = transforms3d.axangles.mat2axangle(T_goal_S[:3, :3])

        print("T_W_S[:3, 3]", T_W_S[:3, 3])
        print("T_W_S_goal[:3, 3]", T_W_S_goal[:3, 3])

        data = {
            'position_error': pos_err,
            'angle_error': abs(angle),
            'angle_error_degrees': np.rad2deg(abs(angle)),
        }

        print("\ndata\n:", data)

        # parse the pandas data out
        pandas_data = mpc_out['pandas_data']
        pandas_data.update(data)

        # record some additional data
        pandas_data['episode_name'] = episode_name
        pandas_data['mpc_idx'] = mpc_idx
        pandas_data['start_idx'] = start_idx
        pandas_data['end_idx'] = end_idx
        pandas_data['output_dir'] = folder_name
        pandas_data_list.append(pandas_data)

    # create dataframe and save to csv
    df = pd.DataFrame(pandas_data_list)
    df.to_csv(os.path.join(save_dir, "data.csv"))

    # record some simple info in a metadata.yaml
    reward_vec = np.array(df['reward'])

    metadata = dict()
    for key in ['reward', 'position_error', 'angle_error_degrees']:
        vec = df[key]
        metadata[key] = {
            'mean': float(np.mean(vec)),
            'median': float(np.median(vec)),
            'std_dev': float(np.std(vec))
        }

    save_yaml(metadata, os.path.join(save_dir, 'metadata.yaml'))
from key_dynam.utils.utils import load_pickle, save_pickle, save_yaml
from key_dynam.utils import meshcat_utils
from key_dynam.utils import transform_utils

data_file = "/home/manuelli/data/key_dynam/hardware_experiments/closed_loop_rollouts/stable/2020-07-10-22-16-08_long_push_on_long_side/mpc_rollouts/2020-07-10-22-19-03-591910/data.p"

data = load_pickle(data_file)
pts = data['plan']['plan_data'][-1]['dynamics_model_input_data'][
    'visual_observation']['pts_W']
print("pts\n", pts)

centroid = np.mean(pts, axis=0)
pts_centered = pts - centroid
save_data = {'object_points': pts_centered.tolist()}
save_file = "object_points_master.yaml"
save_yaml(save_data, save_file)

# do some meshcat debug
vis = meshcat_utils.make_default_visualizer_object()
meshcat_utils.visualize_points(vis,
                               "object_points_centered",
                               pts_centered,
                               color=[0, 0, 255],
                               size=0.01)

meshcat_utils.visualize_points(vis,
                               "object_points_world",
                               pts,
                               color=[0, 255, 0],
                               size=0.01)
示例#18
0
def precompute_descriptors(multi_episode_dict,
                           model,
                           output_dir,  # str
                           batch_size=10,
                           num_workers=10,
                           model_file=None,
                           ):
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    metadata = {'model_file': model_file}
    metadata_file = os.path.join(output_dir, 'metadata.yaml')
    save_yaml(metadata, metadata_file)

    start_time = time.time()

    log_freq = 10

    device = next(model.parameters()).device
    model.eval()  # make sure model is in eval mode

    # build all the dataset
    datasets = {}
    dataloaders = {}
    for episode_name, episode in iteritems(multi_episode_dict):
        single_episode_dict = {episode_name: episode}
        config = None
        datasets[episode_name] = ImageDataset(config, single_episode_dict, phase="all")
        dataloaders[episode_name] = DataLoader(datasets[episode_name],
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=False)

    episode_counter = 0
    num_episodes = len(multi_episode_dict)

    for episode_name, dataset in iteritems(datasets):
        episode_counter += 1
        print("\n\n")

        episode = multi_episode_dict[episode_name]
        hdf5_file = os.path.basename(episode.image_data_file)
        hdf5_file_fullpath = os.path.join(output_dir, hdf5_file)

        if os.path.isfile(hdf5_file_fullpath):
            os.remove(hdf5_file_fullpath)

        dataloader = dataloaders[episode_name]

        episode_start_time = time.time()
        with h5py.File(hdf5_file_fullpath, 'w') as hf:
            for i, data in enumerate(dataloaders[episode_name]):
                rgb_tensor = data['rgb_tensor'].to(device)
                key_tree_joined = data['key_tree_joined']

                if (i % log_freq) == 0:
                    log_msg = "computing [%d/%d][%d/%d]" % (episode_counter, num_episodes, i + 1, len(dataloader))
                    print(log_msg)

                # don't use gradients
                with torch.no_grad():
                    start_time = time.time()
                    out = model.forward(rgb_tensor)
                    print("forward took", time.time() - start_time)
                    B, _, H, W = rgb_tensor.shape

                    # iterate over elements in the batch
                    start_time = time.time()
                    for j in range(B):
                        # [D, H, W]
                        des_image = out['descriptor_image'][j].cpu().numpy()
                        key = key_tree_joined[j] + "/descriptor_image"
                        hf.create_dataset(key, data=des_image)

                    print("saving images took", time.time() - start_time)

        print("duration: %.3f seconds" % (time.time() - episode_start_time))

    print("total time to compute descriptors: %.3f seconds" % (time.time() - start_time))