示例#1
0
def transform(zip_file, save_dir=None):
  """Refactor file directories, rename images and partition the train/val/test
  set.
  """

  train_test_split_file = osp.join(save_dir, 'train_test_split.pkl')
  train_test_split = save_images(zip_file, save_dir, train_test_split_file)
  # train_test_split = load_pickle(train_test_split_file)

  # partition train/val/test set

  # import pdb
  # pdb.set_trace()

  trainval_ids = list(set([parse_new_im_name(n, 'id')
                           for n in train_test_split['trainval_im_names']]))
  # Sort ids, so that id-to-label mapping remains the same when running
  # the code on different machines.
  # import pdb
  # pdb.set_trace()

  trainval_ids.sort()
  trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
  partitions = partition_train_val_set(
    train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=0)
  train_im_names = partitions['train_im_names']
  train_ids = list(set([parse_new_im_name(n, 'id')
                        for n in partitions['train_im_names']]))
  # Sort ids, so that id-to-label mapping remains the same when running
  # the code on different machines.
  train_ids.sort()
  train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

  # A mark is used to denote whether the image is from
  #   query (mark == 0), or
  #   gallery (mark == 1), or
  #   multi query (mark == 2) set

  val_marks = [0, ] * len(partitions['val_query_im_names']) \
              + [1, ] * len(partitions['val_gallery_im_names'])
  val_im_names = list(partitions['val_query_im_names']) \
                 + list(partitions['val_gallery_im_names'])

  test_im_names = list(train_test_split['q_im_names']) \
                  + list(train_test_split['gallery_im_names'])
  test_marks = [0, ] * len(train_test_split['q_im_names']) \
               + [1, ] * len(train_test_split['gallery_im_names'])

  partitions = {'trainval_im_names': train_test_split['trainval_im_names'],
                'trainval_ids2labels': trainval_ids2labels,
                'train_im_names': train_im_names,
                'train_ids2labels': train_ids2labels,
                'val_im_names': val_im_names,
                'val_marks': val_marks,
                'test_im_names': test_im_names,
                'test_marks': test_marks}
  partition_file = osp.join(save_dir, 'partitions.pkl')
  save_pickle(partitions, partition_file)
  print('Partition file saved to {}'.format(partition_file))
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file):
  im_names = []
  for dir_name in ['bounding_box_train', 'bounding_box_test', 'query', 'gt_bbox']:
    im_names_ = get_im_names(osp.join(raw_dir, dir_name), return_path=False, return_np=False)
    im_names_.sort()
    # Filter out id -1
    if dir_name == 'bounding_box_test':
      im_names_ = [n for n in im_names_ if not n.startswith('-1')]
    # Images in different original directories may have same names,
    # so here we use relative paths as original image names.
    im_names_ = [osp.join(dir_name, n) for n in im_names_]
    im_names += im_names_
  new_im_names = map_im_names(im_names, parse_original_im_name, new_im_name_tmpl)
  ori_to_new_im_name = dict(zip(im_names, new_im_names))
  save_pickle(ori_to_new_im_name, ori_to_new_im_name_file)
  print('File saved to {}'.format(ori_to_new_im_name_file))

  ##################
  # Just Some Info #
  ##################

  print('len(im_names)', len(im_names))
  print('len(set(im_names))', len(set(im_names)))
  print('len(set(new_im_names))', len(set(new_im_names)))
  print('len(ori_to_new_im_name)', len(ori_to_new_im_name))

  bounding_box_train_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=False, return_np=False)
  bounding_box_test_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=False, return_np=False)
  query_im_names = get_im_names(osp.join(raw_dir, 'query'), return_path=False, return_np=False)
  gt_bbox_im_names = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=False, return_np=False)

  print('set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))',
        set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names)))
  print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))',
        set(bounding_box_train_im_names).isdisjoint(set(query_im_names)))
  print('set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names))',
        set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names)))

  print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))',
        set(bounding_box_test_im_names).isdisjoint(set(query_im_names)))
  print('set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names))',
        set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names)))

  print('set(query_im_names).isdisjoint(set(gt_bbox_im_names))',
        set(query_im_names).isdisjoint(set(gt_bbox_im_names)))

  print('len(query_im_names)', len(query_im_names))
  print('len(gt_bbox_im_names)', len(gt_bbox_im_names))
  print('len(set(query_im_names) & set(gt_bbox_im_names))', len(set(query_im_names) & set(gt_bbox_im_names)))
  print('len(set(query_im_names) | set(gt_bbox_im_names))', len(set(query_im_names) | set(gt_bbox_im_names)))
def save_images(zip_file, save_dir=None, train_test_split_file=None):
    """Rename and move all used images to a directory."""

    print("Extracting zip file")
    root = osp.dirname(osp.abspath(zip_file))
    if save_dir is None:
        save_dir = root
    may_make_dir(save_dir)
    # with ZipFile(zip_file) as z:
    #   z.extractall(path=save_dir)
    # print("Extracting zip file done")

    new_im_dir = osp.join(save_dir, 'images')
    may_make_dir(new_im_dir)
    # raw_dir = osp.join(save_dir, osp.basename(zip_file[:-7]))
    raw_dir = save_dir

    im_paths = []
    nums = []

    for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']:

        # import pdb
        # pdb.set_trace()

        im_paths_ = get_im_names(osp.join(raw_dir, dir_name),
                                 pattern='*.jpg',
                                 return_path=True,
                                 return_np=False)
        im_paths_.sort()
        im_paths += list(im_paths_)
        nums.append(len(im_paths_))

    # import pdb
    # pdb.set_trace()

    im_names = move_ims(im_paths, new_im_dir, parse_original_im_name,
                        new_im_name_tmpl)

    split = dict()
    keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names']
    inds = [0] + nums
    inds = np.cumsum(inds)
    for i, k in enumerate(keys):
        split[k] = im_names[inds[i]:inds[i + 1]]

    save_pickle(split, train_test_split_file)
    print('Saving images done.')
    return split
def save_images_rap2(zip_file, annotation_file, query_file, save_dir=None, \
  train_test_split_file=None):
    """Rename and move all the used images to a diretory."""
    print("Extracting zip file")
    root = osp.dirname(osp.abspath(zip_file))
    if save_dir is None:
        save_dir = root
    may_make_dir(save_dir)
    with ZipFile(zip_file) as z:
        z.extractall(path=save_dir)
    print("Extracting zip file done")

    new_im_dir = osp.join(save_dir, 'images')
    may_make_dir(new_im_dir)
    raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4])

    # Get fnames
    img_fnames_train, img_fnames_test, img_fnames_query = \
      _get_im_names_rap2(annotation_file, query_file, 1)
    img_fnames_train.sort()
    img_fnames_test.sort()
    img_fnames_query.sort()
    im_paths = list(img_fnames_train) + list(img_fnames_test) + \
      list(img_fnames_query)
    nums = [img_fnames_train.shape[0], img_fnames_test.shape[0], \
      img_fnames_query.shape[0]]

    # Move images
    org_img_dir = osp.join(root, 'images-pedestrian')
    im_names = _move_ims(org_img_dir, im_paths, new_im_dir, \
      parse_original_im_name, new_im_name_tmpl)

    split = dict()
    keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names']
    inds = [0] + nums
    inds = np.cumsum(inds)
    for i, k in enumerate(keys):
        split[k] = im_names[inds[i]:inds[i + 1]]
    save_pickle(split, train_test_split_file)
    print('Saving images done.')
    return split
def combine_trainval_sets(im_dirs, partition_files, save_dir):
    new_im_dir = ospj(save_dir, 'trainval_images')
    may_make_dir(new_im_dir)
    new_im_names = []
    new_start_id = 0
    for im_dir, partition_file in zip(im_dirs, partition_files):
        partitions = load_pickle(partition_file)
        im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']]
        im_paths.sort()
        new_im_names_, id_mapping = move_ims(im_paths, new_im_dir,
                                             parse_im_name, new_im_name_tmpl,
                                             new_start_id)
        new_start_id += len(id_mapping)
        new_im_names += new_im_names_

    new_ids = range(new_start_id)
    partitions = {
        'trainval_im_names': new_im_names,
        'trainval_ids2labels': dict(zip(new_ids, new_ids)),
    }
    partition_file = ospj(save_dir, 'partitions.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file saved to {}'.format(partition_file))
def save_images(zip_file, save_dir=None, train_test_split_file=None):
    """Rename and move all used images to a directory."""

    print("Extracting zip file")
    root = osp.dirname(osp.abspath(zip_file))
    if save_dir is None:
        save_dir = root
    may_make_dir(osp.abspath(save_dir))
    # with ZipFile(zip_file) as z:
    #   z.extractall(path=save_dir)
    # print("Extracting zip file done")

    new_im_dir = osp.join(save_dir, 'images')
    may_make_dir(osp.abspath(new_im_dir))
    # raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4])

    raw_dir = save_dir
    im_paths = []
    nums = []

    im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'),
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))

    im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'),
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')]
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))

    im_paths_ = get_im_names(osp.join(raw_dir, 'query'),
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))
    q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'),
                       parse_original_im_name(osp.basename(p), 'cam'))
                      for p in im_paths_])

    im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'),
                             return_path=True,
                             return_np=False)
    im_paths_.sort()
    # Only gather images for those ids and cams used in testing.
    im_paths_ = [
        p for p in im_paths_
        if (parse_original_im_name(osp.basename(p), 'id'),
            parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams
    ]
    im_paths += list(im_paths_)
    nums.append(len(im_paths_))

    im_names = move_ims(im_paths, new_im_dir, parse_original_im_name,
                        new_im_name_tmpl)

    split = dict()
    keys = [
        'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names'
    ]
    inds = [0] + nums
    inds = np.cumsum(np.array(inds))
    for i, k in enumerate(keys):
        split[k] = im_names[inds[i]:inds[i + 1]]

    save_pickle(split, train_test_split_file)
    print('Saving images done.')
    return split