def get_sample(self, ptr):
     im_name = self.im_names[ptr]
     im_path = osp.join(self.im_dir, im_name)
     im = plt.imread(im_path)
     im, _ = self.pre_process_im(im)
     id = parse_im_name(self.im_names[ptr], 'id')
     cam = parse_im_name(self.im_names[ptr], 'cam')
     # denoting whether the im is from query, gallery, or multi query set
     mark = self.marks[ptr]
     return im, id, cam, im_name, mark
Exemplo n.º 2
0
def split_train_test_set(img_dir, save_pkl_dir):
    train_im_names = []
    test_im_names = []
    gallery_im_names = []
    query_im_names = []
    query_ids = []

    file_list = os.listdir(img_dir)
    file_list.sort()

    test_id = random.sample(range(total_id_num), test_id_num)
    test_id.sort()

    # get the id of training/testing in the form of 'XXXX'
    test_ids = ['{0:08}'.format(i) for i in test_id]
    train_ids = ['{0:08}'.format(i) for i in range(total_id_num) if i not in test_id]
    cam_ids = ['{0:04}'.format(i) for i in range(cam_num)]

    # generate training set and test set
    for tr in train_ids:
        for f in file_list:
            if tr == '{0:08}'.format(parse_im_name(f, 'id')):
                train_im_names.append(f)

    for te in test_ids:
        for f in file_list:
            if te == '{0:08}'.format(parse_im_name(f, 'id')):
                test_im_names.append(f)

    # generate query in test set
    item = 0
    while item < test_id_num:
        i_1, i_2 = random.sample(test_im_names, 2)
        if parse_im_name(i_1, 'id') == parse_im_name(i_2, 'id')\
                and parse_im_name(i_1, 'id') not in query_ids:
            if parse_im_name(i_1, 'cam') != parse_im_name(i_2, 'cam'):
                item += 1
                query_im_names.append(i_1)
                query_im_names.append(i_2)
                query_ids.append(parse_im_name(i_1, 'id'))
            else:
                continue
        else:
            continue

    # generate gallery in test set
    gallery_im_names = [f for f in test_im_names if f not in query_im_names]

    train_im_names.sort()
    gallery_im_names.sort()
    query_im_names.sort()

    partitions = {'gallery_im_names': np.asarray(gallery_im_names),
                  'query_im_names': np.asarray(query_im_names),
                  'train_im_names': np.asarray(train_im_names)}
    partitions = {'detected': partitions,
                  'labeled': partitions}
    save_pickle(partitions, save_pkl_dir)

    print("Spliting operation has been done!")
  def __init__(
      self,
      im_dir=None,
      im_names=None,
      ids2labels=None,
      ids_per_batch=None,
      ims_per_id=None,
      **kwargs):

    # The im dir of all images
    self.im_dir = im_dir
    self.im_names = im_names
    self.ids2labels = ids2labels
    self.ids_per_batch = ids_per_batch
    self.ims_per_id = ims_per_id

    im_ids = [parse_im_name(name, 'id') for name in im_names]
    self.ids_to_im_inds = defaultdict(list)
    for ind, id in enumerate(im_ids):
      self.ids_to_im_inds[id].append(ind)
    self.ids = self.ids_to_im_inds.keys()

    super(TrainSet, self).__init__(
      dataset_size=len(self.ids),
      batch_size=ids_per_batch,
      **kwargs)
Exemplo n.º 4
0
def move_ims(ori_im_paths, new_im_dir, parse_im_name, new_im_name_tmpl,
             new_start_id):
    """Rename and move images to new directory."""
    ids = [parse_im_name(osp.basename(p), 'id') for p in ori_im_paths]
    cams = [parse_im_name(osp.basename(p), 'cam') for p in ori_im_paths]

    unique_ids = list(set(ids))
    unique_ids.sort()
    id_mapping = dict(
        zip(unique_ids, range(new_start_id, new_start_id + len(unique_ids))))

    new_im_names = []
    cnt = defaultdict(int)
    for im_path, id, cam in zip(ori_im_paths, ids, cams):
        new_id = id_mapping[id]
        cnt[(new_id, cam)] += 1
        new_im_name = new_im_name_tmpl.format(new_id, cam,
                                              cnt[(new_id, cam)] - 1)
        shutil.copy(im_path, ospj(new_im_dir, new_im_name))
        new_im_names.append(new_im_name)
    return new_im_names, id_mapping
def move_ims(
    ori_im_paths,
    new_im_dir,
    parse_im_name,
    new_im_name_tmpl,
    new_start_id):
  """Rename and move images to new directory."""
  ids = [parse_im_name(osp.basename(p), 'id') for p in ori_im_paths]
  cams = [parse_im_name(osp.basename(p), 'cam') for p in ori_im_paths]

  unique_ids = list(set(ids))
  unique_ids.sort()
  id_mapping = dict(
    zip(unique_ids, range(new_start_id, new_start_id + len(unique_ids))))

  new_im_names = []
  cnt = defaultdict(int)
  for im_path, id, cam in zip(ori_im_paths, ids, cams):
    new_id = id_mapping[id]
    cnt[(new_id, cam)] += 1
    new_im_name = new_im_name_tmpl.format(new_id, cam, cnt[(new_id, cam)] - 1)
    shutil.copy(im_path, ospj(new_im_dir, new_im_name))
    new_im_names.append(new_im_name)
  return new_im_names, id_mapping
Exemplo n.º 6
0
  def __init__(self, im_dir=None, im_names=None, ids2labels=None,
               ids_per_batch=None, ims_per_id=None, **kwargs):
    # The im dir of all images
    self.im_dir = im_dir
    self.im_names = im_names
    self.ids2labels = ids2labels
    self.ids_per_batch = ids_per_batch
    self.ims_per_id = ims_per_id

    im_ids = [parse_im_name(name, 'id') for name in im_names]
    self.ids_to_im_inds = defaultdict(list)
    for ind, id in enumerate(im_ids):
      self.ids_to_im_inds[id].append(ind)
    self.ids = self.ids_to_im_inds.keys()
    super(TrainSet, self).__init__(
      dataset_size=len(self.ids),
      batch_size=ids_per_batch,
      **kwargs)
Exemplo n.º 7
0
def transform(zip_file, train_test_partition_file, save_dir=None):
    """Save images and partition the train/val/test set.
  """
    print("Extracting zip file")
    root = osp.dirname(osp.abspath(zip_file))
    if save_dir is None:
        save_dir = root
    may_make_dir(save_dir)
    with ZipFile(zip_file) as z:
        z.extractall(path=save_dir)
    print("Extracting zip file done")
    mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat')

    save_images(mat_file, save_dir, new_im_name_tmpl)

    if osp.exists(train_test_partition_file):
        train_test_partition = load_pickle(train_test_partition_file)
    else:
        raise RuntimeError('Train/test partition file should be provided.')

    for im_type in ['detected', 'labeled']:
        trainval_im_names = train_test_partition[im_type]['train_im_names']
        trainval_ids = list(
            set([parse_im_name(n, 'id') for n in trainval_im_names]))
        # Sort ids, so that id-to-label mapping remains the same when running
        # the code on different machines.
        trainval_ids.sort()
        trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
        train_val_partition = \
          partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100)
        train_im_names = train_val_partition['train_im_names']
        train_ids = list(
            set([
                parse_im_name(n, 'id')
                for n in train_val_partition['train_im_names']
            ]))
        # Sort ids, so that id-to-label mapping remains the same when running
        # the code on different machines.
        train_ids.sort()
        train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

        # A mark is used to denote whether the image is from
        #   query (mark == 0), or
        #   gallery (mark == 1), or
        #   multi query (mark == 2) set

        val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \
                    + [1, ] * len(train_val_partition['val_gallery_im_names'])
        val_im_names = list(train_val_partition['val_query_im_names']) \
                       + list(train_val_partition['val_gallery_im_names'])
        test_im_names = list(train_test_partition[im_type]['query_im_names']) \
                        + list(train_test_partition[im_type]['gallery_im_names'])
        test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \
                     + [1, ] * len(
          train_test_partition[im_type]['gallery_im_names'])
        partitions = {
            'trainval_im_names': trainval_im_names,
            'trainval_ids2labels': trainval_ids2labels,
            'train_im_names': train_im_names,
            'train_ids2labels': train_ids2labels,
            'val_im_names': val_im_names,
            'val_marks': val_marks,
            'test_im_names': test_im_names,
            'test_marks': test_marks
        }
        partition_file = osp.join(save_dir, im_type, 'partitions.pkl')
        save_pickle(partitions, partition_file)
        print('Partition file for "{}" saved to {}'.format(
            im_type, partition_file))
def create_dataset(
    name='market1501',
    part='trainval',
    **kwargs):

  assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \
    "Unsupported Dataset {}".format(name)

  assert part in ['trainval', 'train', 'val', 'test'], \
    "Unsupported Dataset Part {}".format(part)

  ########################################
  # Specify Directory and Partition File #
  ########################################

  if name == 'market1501':
    im_dir = ospeu('~/Dataset/market1501/images')
    partition_file = ospeu('~/Dataset/market1501/partitions.pkl')

  elif name == 'cuhk03':
    im_type = ['detected', 'labeled'][0]
    im_dir = ospeu(ospj('~/Dataset/cuhk03', im_type, 'images'))
    partition_file = ospeu(ospj('~/Dataset/cuhk03', im_type, 'partitions.pkl'))

  elif name == 'duke':
    im_dir = ospeu('~/Dataset/duke/images')
    partition_file = ospeu('~/Dataset/duke/partitions.pkl')

  elif name == 'combined':
    assert part in ['trainval'], \
      "Only trainval part of the combined dataset is available now."
    im_dir = ospeu('~/Dataset/market1501_cuhk03_duke/trainval_images')
    partition_file = ospeu('~/Dataset/market1501_cuhk03_duke/partitions.pkl')

  ##################
  # Create Dataset #
  ##################

  # Use standard Market1501 CMC settings for all datasets here.
  cmc_kwargs = dict(separate_camera_set=False,
                    single_gallery_shot=False,
                    first_match_break=True)

  partitions = load_pickle(partition_file)
  im_names = partitions['{}_im_names'.format(part)]

  if part == 'trainval':
    ids2labels = partitions['trainval_ids2labels']

    ret_set = TrainSet(
      im_dir=im_dir,
      im_names=im_names,
      ids2labels=ids2labels,
      **kwargs)

  elif part == 'train':
    ids2labels = partitions['train_ids2labels']

    ret_set = TrainSet(
      im_dir=im_dir,
      im_names=im_names,
      ids2labels=ids2labels,
      **kwargs)

  elif part == 'val':
    marks = partitions['val_marks']
    kwargs.update(cmc_kwargs)

    ret_set = TestSet(
      im_dir=im_dir,
      im_names=im_names,
      marks=marks,
      **kwargs)

  elif part == 'test':
    marks = partitions['test_marks']
    kwargs.update(cmc_kwargs)

    ret_set = TestSet(
      im_dir=im_dir,
      im_names=im_names,
      marks=marks,
      **kwargs)

  if part in ['trainval', 'train']:
    num_ids = len(ids2labels)
  elif part in ['val', 'test']:
    ids = [parse_im_name(n, 'id') for n in im_names]
    num_ids = len(list(set(ids)))
    num_query = np.sum(np.array(marks) == 0)
    num_gallery = np.sum(np.array(marks) == 1)
    num_multi_query = np.sum(np.array(marks) == 2)

  # Print dataset information
  print('-' * 40)
  print('{} {} set'.format(name, part))
  print('-' * 40)
  print('NO. Images: {}'.format(len(im_names)))
  print('NO. IDs: {}'.format(num_ids))

  try:
    print('NO. Query Images: {}'.format(num_query))
    print('NO. Gallery Images: {}'.format(num_gallery))
    print('NO. Multi-query Images: {}'.format(num_multi_query))
  except:
    pass

  print('-' * 40)

  return ret_set
def create_dataset(name='market1501', part='trainval', **kwargs):

    assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \
      "Unsupported Dataset {}".format(name)

    assert part in ['trainval', 'train', 'val', 'test'], \
      "Unsupported Dataset Part {}".format(part)

    ########################################
    # Specify Directory and Partition File #
    ########################################

    if name == 'market1501':
        im_dir = ospeu('~/Dataset/market1501/images')
        partition_file = ospeu('~/Dataset/market1501/partitions.pkl')

    elif name == 'cuhk03':
        im_type = ['detected', 'labeled'][0]
        im_dir = ospeu(ospj('~/Dataset/cuhk03', im_type, 'images'))
        partition_file = ospeu(
            ospj('~/Dataset/cuhk03', im_type, 'partitions.pkl'))

    elif name == 'duke':
        im_dir = ospeu('~/Dataset/duke/images')
        partition_file = ospeu('~/Dataset/duke/partitions.pkl')

    elif name == 'combined':
        assert part in ['trainval'], \
          "Only trainval part of the combined dataset is available now."
        im_dir = ospeu('~/Dataset/market1501_cuhk03_duke/trainval_images')
        partition_file = ospeu(
            '~/Dataset/market1501_cuhk03_duke/partitions.pkl')

    ##################
    # Create Dataset #
    ##################

    # Use standard Market1501 CMC settings for all datasets here.
    cmc_kwargs = dict(separate_camera_set=False,
                      single_gallery_shot=False,
                      first_match_break=True)

    partitions = load_pickle(partition_file)
    im_names = partitions['{}_im_names'.format(part)]

    if part == 'trainval':
        ids2labels = partitions['trainval_ids2labels']

        ret_set = TrainSet(im_dir=im_dir,
                           im_names=im_names,
                           ids2labels=ids2labels,
                           **kwargs)

    elif part == 'train':
        ids2labels = partitions['train_ids2labels']

        ret_set = TrainSet(im_dir=im_dir,
                           im_names=im_names,
                           ids2labels=ids2labels,
                           **kwargs)

    elif part == 'val':
        marks = partitions['val_marks']
        kwargs.update(cmc_kwargs)

        ret_set = TestSet(im_dir=im_dir,
                          im_names=im_names,
                          marks=marks,
                          **kwargs)

    elif part == 'test':
        marks = partitions['test_marks']
        kwargs.update(cmc_kwargs)

        ret_set = TestSet(im_dir=im_dir,
                          im_names=im_names,
                          marks=marks,
                          **kwargs)

    if part in ['trainval', 'train']:
        num_ids = len(ids2labels)
    elif part in ['val', 'test']:
        ids = [parse_im_name(n, 'id') for n in im_names]
        num_ids = len(list(set(ids)))
        num_query = np.sum(np.array(marks) == 0)
        num_gallery = np.sum(np.array(marks) == 1)
        num_multi_query = np.sum(np.array(marks) == 2)

    # Print dataset information
    print('-' * 40)
    print('{} {} set'.format(name, part))
    print('-' * 40)
    print('NO. Images: {}'.format(len(im_names)))
    print('NO. IDs: {}'.format(num_ids))

    try:
        print('NO. Query Images: {}'.format(num_query))
        print('NO. Gallery Images: {}'.format(num_gallery))
        print('NO. Multi-query Images: {}'.format(num_multi_query))
    except:
        pass

    print('-' * 40)

    return ret_set
def transform(zip_file, train_test_partition_file, save_dir=None):
  """Save images and partition the train/val/test set.
  """
  print("Extracting zip file")
  root = osp.dirname(osp.abspath(zip_file))
  if save_dir is None:
    save_dir = root
  may_make_dir(save_dir)
  with ZipFile(zip_file) as z:
    z.extractall(path=save_dir)
  print("Extracting zip file done")
  mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat')

  save_images(mat_file, save_dir, new_im_name_tmpl)

  if osp.exists(train_test_partition_file):
    train_test_partition = load_pickle(train_test_partition_file)
  else:
    raise RuntimeError('Train/test partition file should be provided.')

  for im_type in ['detected', 'labeled']:
    trainval_im_names = train_test_partition[im_type]['train_im_names']
    trainval_ids = list(set([parse_im_name(n, 'id')
                             for n in trainval_im_names]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    trainval_ids.sort()
    trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids))))
    train_val_partition = \
      partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100)
    train_im_names = train_val_partition['train_im_names']
    train_ids = list(set([parse_im_name(n, 'id')
                          for n in train_val_partition['train_im_names']]))
    # Sort ids, so that id-to-label mapping remains the same when running
    # the code on different machines.
    train_ids.sort()
    train_ids2labels = dict(zip(train_ids, range(len(train_ids))))

    # A mark is used to denote whether the image is from
    #   query (mark == 0), or
    #   gallery (mark == 1), or
    #   multi query (mark == 2) set

    val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \
                + [1, ] * len(train_val_partition['val_gallery_im_names'])
    val_im_names = list(train_val_partition['val_query_im_names']) \
                   + list(train_val_partition['val_gallery_im_names'])
    test_im_names = list(train_test_partition[im_type]['query_im_names']) \
                    + list(train_test_partition[im_type]['gallery_im_names'])
    test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \
                 + [1, ] * len(
      train_test_partition[im_type]['gallery_im_names'])
    partitions = {'trainval_im_names': trainval_im_names,
                  'trainval_ids2labels': trainval_ids2labels,
                  'train_im_names': train_im_names,
                  'train_ids2labels': train_ids2labels,
                  'val_im_names': val_im_names,
                  'val_marks': val_marks,
                  'test_im_names': test_im_names,
                  'test_marks': test_marks}
    partition_file = osp.join(save_dir, im_type, 'partitions.pkl')
    save_pickle(partitions, partition_file)
    print('Partition file for "{}" saved to {}'.format(im_type, partition_file))