Пример #1
0
    def _fork(self):
        self.rec_s = recordio.MXIndexedRecordIO(self.idx_fs, self.rec_fs, 'r')
        self.idx_s = list(self.rec_s.idx.keys())

        self.rec_l = recordio.MXIndexedRecordIO(self.idx_fl, self.rec_fl, 'r')
        self.idx_l = list(self.rec_l.idx.keys())

        self.rec_u = recordio.MXIndexedRecordIO(self.idx_fu, self.rec_fu, 'r')
        self.idx_u = list(self.rec_u.idx.keys())
Пример #2
0
    def _fork(self):

        self.rec1 = recordio.MXIndexedRecordIO(self.idx_f1, self.rec_f1, 'r')
        self.cls_idx_d1 = self.load_or_gen_dict(self.rec_f1, self.rec1)
        self.idx1 = list(self.rec1.idx.keys())

        self.rec2 = recordio.MXIndexedRecordIO(self.idx_2, self.rec_f2, 'r')
        self.cls_idx_d2 = self.load_or_gen_dict(self.rec_f2, self.rec2)
        self.idx2 = list(self.rec2.idx.keys())
Пример #3
0
 def __init__(self,batch_size,data_shape,path_imgrec=None,shuffle=False,data_name='data',gender_label_name='label_gender',
              age_lable_name='label_age',**kwargs):
     super(SSR_ITER,self).__init__()
     assert path_imgrec
     self.batch_size = self.batch_size
     logging.info('loading recordio %s...',path_imgrec)
     path_imgidx=path_imgrec[0:-4]+".idx"
     self.imgrec=recordio.MXIndexedRecordIO(path_imgidx,path_imgrec,'r')
     self.imgidx=list(self.imgrec.keys)
     if shuffle:
         self.seq=self.imgidx
         self.oseq=self.imgidx
     else:
         self.seq=None
     self.provide_data=[(data_name,(batch_size,)+data_shape)]
     self.batch_size=batch_size
     self.data_shape=data_shape
     self.shuffle=shuffle
     self.image_size = '%d,%d' % (data_shape[1], data_shape[2])
     # self.rand_mirror = rand_mirror
     # print('rand_mirror', rand_mirror)
     #self.provide_label=[(gender_label_name,(batch_size,1)),(age_lable_name, (batch_size,1))]
     self.provide_label = [(age_lable_name, (batch_size,))]
     self.cur = 0
     self.nbatch = 0
     self.is_init = False
Пример #4
0
 def __init__(self, path_imgrec, rand_mirror):
     self.rand_mirror = rand_mirror
     assert path_imgrec
     if path_imgrec:
         logging.info('loading recordio %s...', path_imgrec)
         path_imgidx = path_imgrec[0:-4] + ".idx"
         print(path_imgrec, path_imgidx)
         self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec,
                                                  'r')
         s = self.imgrec.read_idx(0)
         header, _ = recordio.unpack(s)
         if header.flag > 0:
             print('header0 label', header.label)
             self.header0 = (int(header.label[0]), int(header.label[1]))
             # assert(header.flag==1)
             # self.imgidx = range(1, int(header.label[0]))
             self.imgidx = []
             self.id2range = {}
             self.seq_identity = range(int(header.label[0]),
                                       int(header.label[1]))
             for identity in self.seq_identity:
                 s = self.imgrec.read_idx(identity)
                 header, _ = recordio.unpack(s)
                 a, b = int(header.label[0]), int(header.label[1])
                 count = b - a
                 self.id2range[identity] = (a, b)
                 self.imgidx += range(a, b)
             print('id2range', len(self.id2range))
         else:
             self.imgidx = list(self.imgrec.keys)
         self.seq = self.imgidx
Пример #5
0
    def __init__(self, batch_size, data_shape,
                 path_imgrec = None,
                 shuffle=False, aug_list=None, mean = None,
                 rand_mirror = False, cutoff = 0,
                 data_name='data', label_name='softmax_label', **kwargs):
        super(FaceImageIter, self).__init__()
        assert path_imgrec
        if path_imgrec:
            logging.info('loading recordio %s...',
                         path_imgrec)
            path_imgidx = path_imgrec[0:-4]+".idx"
            self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')  # pylint: disable=redefined-variable-type
            s = self.imgrec.read_idx(0)
            header, _ = recordio.unpack(s)
            if header.flag>0:
              print('header0 label', header.label)
              # print(header); exit(0)
              self.header0 = (int(header.label[0]), int(header.label[1]))
              #assert(header.flag==1)
              self.imgidx = range(1, int(header.label[0]))
              self.id2range = {}
              self.seq_identity = range(int(header.label[0]), int(header.label[1]))
              for identity in self.seq_identity:
                s = self.imgrec.read_idx(identity)
                header, _ = recordio.unpack(s)
                a,b = int(header.label[0]), int(header.label[1])
                self.id2range[identity] = (a,b)
                count = b-a
              print('id2range', len(self.id2range))
            else:
              self.imgidx = list(self.imgrec.keys)
              # print('header.flag = ', header.flag)
              # print(self.imgidx); exit(0)
            if shuffle:
              self.seq = self.imgidx
              self.oseq = self.imgidx
              print(len(self.seq))
            else:
              self.seq = None

        self.mean = mean
        self.nd_mean = None
        if self.mean:
          self.mean = np.array(self.mean, dtype=np.float32).reshape(1,1,3)
          self.nd_mean = mx.nd.array(self.mean).reshape((1,1,3))

        self.check_data_shape(data_shape)
        self.provide_data = [(data_name, (batch_size,) + data_shape)]
        self.batch_size = batch_size
        self.data_shape = data_shape
        self.shuffle = shuffle
        self.image_size = '%d,%d'%(data_shape[1],data_shape[2])
        self.rand_mirror = rand_mirror
        print('rand_mirror', rand_mirror)
        self.cutoff = cutoff
        self.provide_label = [(label_name, (batch_size,))]
        #print(self.provide_label[0][1])
        self.cur = 0
        self.nbatch = 0
        self.is_init = False
Пример #6
0
 def __init__(self, rec_path: Path, augs=None):
     self.rec_path = rec_path
     path_imgidx = rec_path.with_suffix('.idx')
     self.augs = augs
     self.imgrec = recordio.MXIndexedRecordIO(str(path_imgidx),
                                              str(rec_path), 'r')
     s = self.imgrec.read_idx(0)
     header, _ = recordio.unpack(s)
     if header.flag > 0:
         self.header0 = (int(header.label[0]), int(header.label[1]))
         self.imgidx = []
         self.id2range = {}
         self.seq_identity = range(int(header.label[0]),
                                   int(header.label[1]))
         for identity in self.seq_identity:
             s = self.imgrec.read_idx(identity)
             header, _ = recordio.unpack(s)
             a, b = int(header.label[0]), int(header.label[1])
             self.id2range[identity] = (a, b)
             self.imgidx += range(a, b)
     else:
         self.imgidx = list(self.imgrec.keys)
     self.seq = self.imgidx
     prop_path = rec_path.parent / 'property'
     with open(str(prop_path), 'r') as f:
         self.num_labels = int(f.readline().split(',')[0].strip())
Пример #7
0
    def _fork(self):
        if self.use_src:
            self.recs = recordio.MXIndexedRecordIO(self.idx_fs, self.rec_fs,
                                                   'r')
            self.idxs = list(self.recs.idx.keys())

        if self.use_tgt:
            self.rect = recordio.MXIndexedRecordIO(self.idx_ft, self.rec_ft,
                                                   'r')
            self.idxt = list(self.rect.idx.keys())

            if self.use_src:
                cls_lst = []
                for idx in self.idxt:
                    record = self.rect.read_idx(idx)
                    h, _ = recordio.unpack(record)
                    cls_lst.append(h.label)

                self.idxt_cls = self.generate_cls_dict(cls_lst)
Пример #8
0
 def __init__(self, cfg, mode='train'):
     self.prefix = Path(cfg.path)
     self.mode = mode
     meta_path = self.prefix / f'{mode}.meta'
     with meta_path.open('r') as f:
         meta_info = json.load(f)
         self.num_sample = meta_info['num_sample']
     print('num samples:', self.num_sample)
     self.record_reader = mxrec.MXIndexedRecordIO(
         str(self.prefix / f'{mode}.idx'), str(self.prefix / f'{mode}.rec'),
         'r')
Пример #9
0
    def __init__(self,
                 batch_size,
                 data_shape,
                 path_imgrec=None,
                 task='age',
                 shuffle=False,
                 aug_list=None,
                 mean=None,
                 rand_mirror=False,
                 cutoff=0,
                 data_name='data',
                 label_name='softmax_label',
                 **kwargs):
        super(FaceImageIter, self).__init__()
        assert path_imgrec
        if path_imgrec:
            logging.info('loading recordio %s...', path_imgrec)
            path_imgidx = path_imgrec[0:-4] + ".idx"
            self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec,
                                                     'r')  # pylint: disable=redefined-variable-type
            s = self.imgrec.read_idx(0)
            header, _ = recordio.unpack(s)
            self.imgidx = list(self.imgrec.keys)
            if shuffle:
                self.seq = self.imgidx
                self.oseq = self.imgidx
                print(len(self.seq))
            else:
                self.seq = None

        self.mean = mean
        self.nd_mean = None
        if self.mean:
            self.mean = np.array(self.mean, dtype=np.float32).reshape(1, 1, 3)
            self.nd_mean = mx.nd.array(self.mean).reshape((1, 1, 3))

        self.check_data_shape(data_shape)
        self.provide_data = [(data_name, (batch_size, ) + data_shape)]
        self.batch_size = batch_size
        self.data_shape = data_shape
        self.shuffle = shuffle
        self.image_size = '%d,%d' % (data_shape[1], data_shape[2])
        self.rand_mirror = rand_mirror
        print('rand_mirror', rand_mirror)
        self.cutoff = cutoff
        if task == 'age':
            self.provide_label = [(label_name, (batch_size, 100))]
        else:
            self.provide_label = [(label_name, (batch_size, ))]
        # print(self.provide_label[0][1])
        self.cur = 0
        self.nbatch = 0
        self.is_init = False
Пример #10
0
 def __init__(self,
              batch_size,
              data_shape,
              path_imgrec=None,
              shuffle=False,
              aug_list=None,
              mean=None,
              rand_mirror=False,
              cutoff=0,
              data_name='data',
              label_name='sigmoid_label',
              num_class=21,
              **kwargs):
     super(FaceImageIter, self).__init__()
     if path_imgrec:
         logging.info('loading record %s...', path_imgrec)
         path_imgidx = path_imgrec[0:-4] + ".idx"
         self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec,
                                                  'r')
         s_dataset = self.imgrec.read_idx(0)
         header, _ = recordio.unpack(s_dataset)
         if header.flag == 2:
             #print('header0 label', header.label)
             self.header0 = (int(header.label[0]), int(header.label[1]))
             #assert(header.flag==1)
             self.imgidx = range(1, int(header.label[0]))
         else:
             print("header flag is not 2 for dataset ", header.flag)
             #self.imgidx = list(self.imgrec.keys)
         if shuffle:
             self.seq = self.imgidx
             #print("init shutffle",len(self.seq))
         else:
             self.seq = None
     self.mean = mean
     if self.mean:
         self.mean = np.ones([1, 1, 3], dtype=np.float32) * self.mean
         self.nd_mean = mx.nd.array(self.mean).reshape((1, 1, 3))
     self.check_data_shape(data_shape)
     self.provide_data = [(data_name, (batch_size, ) + data_shape)]
     self.batch_size = batch_size
     self.data_shape = data_shape
     self.shuffle = shuffle
     self.image_size = '%d,%d' % (data_shape[1], data_shape[2])
     self.rand_mirror = rand_mirror
     self.cutoff = cutoff
     self.provide_label = [(label_name, (batch_size, num_class))]
     #print(self.provide_label[0][1])
     self.cur = 0
     self.nbatch = 0
     self.is_init = False
Пример #11
0
def extract2Output(prefix, database_output, search_output, samples=5):
    from pathlib import Path

    p = Path(database_output)
    if p.exists():
        import shutil
        shutil.rmtree(database_output)
        p.mkdir()
    p = Path(search_output)
    if p.exists():
        import shutil
        shutil.rmtree(search_output)
        p.mkdir()

    reader = io.MXIndexedRecordIO(prefix + '.idx', prefix + '.rec', 'r')

    #第0行是全部种类的信息,获得全部种类的索引
    s = reader.read_idx(0)
    header, _ = io.unpack(s)
    labels = range(int(header.label[0]), int(header.label[1]))
    ###############获得种类下实例的索引,imgs保存的是某一个种类下的实例索引####
    imgs = []
    for l in labels:
        s = reader.read_idx(int(l))
        header, _ = io.unpack(s)
        a, b = int(header.label[0]), int(header.label[1])
        imgs.append(range(a, b))

    ##########extract feature of every image##############
    import tqdm

    for ii, imgidxs in tqdm.tqdm(enumerate(imgs)):
        sc_path = os.path.join(search_output, str(ii))
        db_path = os.path.join(database_output, str(ii))
        os.mkdir(sc_path)
        os.mkdir(db_path)

        imgcount = len(list(imgidxs))
        smidx = []
        if samples > 0:
            smidx = np.random.choice(list(imgidxs), samples, False)

        for id in imgidxs:
            s = reader.read_idx(id)
            h, img = io.unpack_img(s)
            if id not in smidx:
                o = os.path.join(db_path, str(id) + '.jpg')
                cv2.imwrite(o, img)
            else:
                o = os.path.join(sc_path, str(id) + '.jpg')
                cv2.imwrite(o, img)
Пример #12
0
    def __init__(self, root, flag=1, transform=None):
        filename = os.path.join(root, 'train.rec')
        self.filename = filename
        self.idx_file = os.path.splitext(filename)[0] + '.idx'
        self._record = recordio.MXIndexedRecordIO(self.idx_file, self.filename,
                                                  'r')
        prop = open(os.path.join(root, "property"),
                    "r").read().strip().split(',')
        self._flag = flag
        self._transform = transform

        assert len(prop) == 3
        self.num_classes = int(prop[0])
        self.image_size = [int(prop[1]), int(prop[2])]
def load_train_data(data_dir):

    path_imgrec = os.path.join(data_dir, "train.rec")
    path_imgidx = path_imgrec[0:-4] + ".idx"

    print(
        "Loading recordio {}\n\
  Corresponding record idx is {}".format(
            path_imgrec, path_imgidx
        )
    )

    imgrec = recordio.MXIndexedRecordIO(
        path_imgidx, path_imgrec, "r", key_type=int
    )
    # TODO: key_type ??

    # Read header0 to get some info.
    identity_key_start = 0
    identity_key_end = 0
    imgidx_list = []
    id2range = {}

    rec0 = imgrec.read_idx(0)
    header0, img_str = recordio.unpack(rec0)
    if header0.flag > 0:
        identity_key_start = int(header0.label[0])
        identity_key_end = int(header0.label[1])
        imgidx_list = range(1, identity_key_start)

        # Read identity id range
        for identity in range(identity_key_start, identity_key_end):
            rec = imgrec.read_idx(identity)
            header, s = recordio.unpack(rec)
            a, b = int(header.label[0]), int(header.label[1])
            id2range[identity] = (a, b)

    else:
        imgidx_list = imgrec.keys

    # print id2range to txt file
    # with open('id2range.txt', 'w') as f:
    #   for identity in range(identity_key_start, identity_key_end):
    #     l = str(identity) \
    #         + ' ' \
    #         + str(id2range[identity][0]) \
    #         + ' ' + str(id2range[identity][1]) + '\n'
    #     f.write(l)
    return imgrec, imgidx_list
Пример #14
0
    def __init__(
        self,
        path_ms1m=lz.share_path2 + 'faces_ms1m_112x112/',
    ):
        self.path_ms1m = path_ms1m
        self.root_path = Path(path_ms1m)
        path_imgrec = path_ms1m + '/train.rec'
        path_imgidx = path_imgrec[0:-4] + ".idx"
        assert os.path.exists(path_imgidx)
        self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
        self.imgidx, self.ids, self.id2range = lz.msgpack_load(path_ms1m +
                                                               '/info.pk')

        self.num_classes = len(self.ids)
        self.cur = 0
        self.lock = mp.Lock()
Пример #15
0
    def _fork(self):
        self.records = [recordio.MXIndexedRecordIO(os.path.splitext(fname)[0] + '.idx', fname, 'r')
                        for fname in self.filenames]
        self.orig_idx = {}
        idx = 0
        for i, r in enumerate(self.records):
            for j in r.keys:
                self.orig_idx[idx] = (i, j)
                idx += 1

        self.landmark_dicts = {}
        if self.with_landmark:
            for i, fname in enumerate(self.filenames):
                landmark_file = os.path.splitext(fname)[0] + '.landmark'
                with open(landmark_file, 'rb') as f:
                    self.landmark_dicts[i] = pickle.load(f)
Пример #16
0
def extract_asian_celeb_images(args):
    imgrec = recordio.MXIndexedRecordIO(args.idx_path, args.rec_path, 'r')

    last = 0
    cnt = 0
    for i in range(2830146):
        header, s = recordio.unpack(imgrec.read_idx(i + 1))
        img = mx.image.imdecode(s).asnumpy()

        dst = os.path.join(args.write_path, str(int(header.label[0])))
        if not os.path.exists(dst):
            os.makedirs(dst)
            last = int(header.label[0])
            cnt = 0

        cv2.imwrite(os.path.join(dst, f'{cnt}.jpg'),
                    cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        cnt += 1
Пример #17
0
def _main(args):
    output_dir = args.output_dir

    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    path_imgidx = args.path_imgidx
    path_imgrec = args.path_imgrec
    count = args.image_count

    imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')

    # 创建线程池
    # executor = ThreadPoolExecutor(max_workers=args.max_workers)

    for i in tqdm(range(count)):
        # executor.submit(extrat_image(recordio, imgrec, output_dir, i))
        extrat_image(recordio, imgrec, output_dir, i)
Пример #18
0
def dump_mxrec(data_splits):
    for dsp in data_splits:
        num_sample = 0
        source_path = source_root / dsp
        output_meta = output_root / f'{dsp}.meta'
        write_record = mxrec.MXIndexedRecordIO(str(output_root / f'{dsp}.idx'),
                                               str(output_root / f'{dsp}.rec'),
                                               'w')
        for pkl_path in tqdm(source_path.glob("*.pkl")):
            with pkl_path.open('rb') as pf:
                data = pickle.load(pf)
                data = pickle.dumps(data)
                write_record.write_idx(num_sample, data)
            num_sample += 1
            # if num_sample > max_sample:
            #     break
        with output_meta.open('w') as f:
            json.dump({'num_sample': num_sample}, f)
        write_record.close()
Пример #19
0
    def __init__(self,
                 batch_size,
                 data_shape,
                 path_imgrec=None,
                 shuffle=False,
                 aug_list=None,
                 mean=None,
                 rand_mirror=False,
                 cutoff=0,
                 color_jittering=0,
                 data_name='data',
                 label_name='softmax_label',
                 **kwargs):
        super(FaceImageIter, self).__init__()
        assert path_imgrec
        logging.info('loading recordio %s...', path_imgrec)
        path_imgidx = path_imgrec[0:-4] + ".idx"
        self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
        self.seq = list(self.imgrec.keys)
        logging.info("%s 数据大小:%d", path_imgrec, len(self.seq))

        self.mean = mean
        self.nd_mean = None
        if self.mean:
            self.mean = np.array(self.mean, dtype=np.float32).reshape(1, 1, 3)
            self.nd_mean = mx.nd.array(self.mean).reshape((1, 1, 3))

        self.check_data_shape(data_shape)
        self.provide_data = [(data_name, (batch_size, ) + data_shape)]
        self.batch_size = batch_size
        self.data_shape = data_shape
        self.shuffle = shuffle
        self.image_size = '%d,%d' % (data_shape[1], data_shape[2])
        self.rand_mirror = rand_mirror
        logging.info('是否随机翻转图片:%s', rand_mirror)
        self.cutoff = cutoff
        self.color_jittering = color_jittering
        self.CJA = mx.image.ColorJitterAug(0.125, 0.125, 0.125)
        self.provide_label = [(label_name, (batch_size, 101))]
        self.cur = 0
        self.nbatch = 0
        self.is_init = False
Пример #20
0
def load_ms1m_info():
    self = edict()
    path_ms1m = lz.share_path2 + 'faces_ms1m_112x112/'
    path_imgrec = lz.share_path2 + 'faces_ms1m_112x112/train.rec'
    path_imgidx = path_imgrec[0:-4] + ".idx"
    self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')

    imgidx, ids, id2range = lz.msgpack_load(path_ms1m + '/info.pk')
    print(len(imgidx), len(ids), len(id2range))
    # while True:
    #     time.sleep(10)
    # for indt in range(1):
    #     id1 = ids[0]
    #     imgid = id2range[id1][0]
    #     s = self.imgrec.read_idx(imgid)
    #     header, img = recordio.unpack(s)
    #     print(header.label, id1)
    imgidx, ids = np.array(imgidx), np.array(ids)
    print(stat_np(imgidx))
    print(stat_np(ids))
Пример #21
0
def ms1m_gen(batch_size):
    path_idx = "./data/faces_emore/train.idx"
    path_rec = "./data/faces_emore/train.rec"
    imgrec = recordio.MXIndexedRecordIO(path_idx, path_rec, 'r')
    ms1m_list = np.load("ms1m_list.npy", allow_pickle=True).item()
    keys = list(ms1m_list.keys())
    sz = len(ms1m_list)
    pp = np.arange(sz)
    k = np.arange(sz)
    random.seed(a=None)
    anc = []
    pos = []
    neg = []
    ind = 0
    while True:
        t = random.randint(1, sz - 1)
        random.shuffle(k)
        p = (pp + t) % sz
        for h in k:
            i = h
            j = p[h]
            if len(ms1m_list[keys[i]]) > 1:
                i_p = random.sample(ms1m_list[keys[i]], 2)
                i_n = random.sample(ms1m_list[keys[j]], 1)
                header, s = recordio.unpack(imgrec.read_idx(int(i_p[0])))
                img1 = mx.image.imdecode(s).asnumpy() / 255
                header, s = recordio.unpack(imgrec.read_idx(int(i_p[1])))
                img2 = mx.image.imdecode(s).asnumpy() / 255
                header, s = recordio.unpack(imgrec.read_idx(int(i_n[0])))
                img3 = mx.image.imdecode(s).asnumpy() / 255
                anc.append(img1)
                pos.append(img2)
                neg.append(img3)
                ind = (ind + 1) % batch_size
                if ind == 0:
                    x = np.array(anc + pos + neg)
                    y = np.ndarray(shape=(batch_size * 3, 1))
                    yield x, y
                    anc.clear()
                    pos.clear()
                    neg.clear()
Пример #22
0
 def __init__(self):
     from mxnet import recordio
     self.imgrec = recordio.MXIndexedRecordIO(base + "train.idx",
                                              rec_files[0], 'r')
     s = self.imgrec.read_idx(0)
     header, _ = recordio.unpack(s)
     assert header.flag > 0, 'ms1m or glint ...'
     logging.info(f'header0 label {header.label}')
     self.header0 = (int(header.label[0]), int(header.label[1]))
     self.id2range = {}
     self.idx2id = {}
     self.imgidx = []
     self.ids = []
     ids_shif = int(header.label[0])
     for identity in list(range(int(header.label[0]),
                                int(header.label[1]))):
         s = self.imgrec.read_idx(identity)
         header, _ = recordio.unpack(s)
         a, b = int(header.label[0]), int(header.label[1])
         self.id2range[identity] = (a, b)
         self.ids.append(identity)
         self.imgidx += list(range(a, b))
     self.ids = np.asarray(self.ids)
     self.num_classes = len(self.ids)
     self.ids_map = {
         identity - ids_shif: id2
         for identity, id2 in zip(self.ids, range(self.num_classes))
     }  # now cutoff==0, this is identitical
     ids_map_tmp = {
         identity: id2
         for identity, id2 in zip(self.ids, range(self.num_classes))
     }
     self.ids = np.asarray([ids_map_tmp[id_] for id_ in self.ids])
     self.id2range = {
         ids_map_tmp[id_]: range_
         for id_, range_ in self.id2range.items()
     }
     for id_, range_ in self.id2range.items():
         for idx_ in range(range_[0], range_[1]):
             self.idx2id[idx_] = id_
     conf.num_clss = self.num_classes
Пример #23
0
def ms1m_gen_batch(batch_size, sample_size):
    path_idx = "./data/faces_emore/train.idx"
    path_rec = "./data/faces_emore/train.rec"
    imgrec = recordio.MXIndexedRecordIO(path_idx, path_rec, 'r')
    ms1m_list = np.load("ms1m_list.npy", allow_pickle=True).item()
    keys = list(ms1m_list.keys())
    random.seed(a=None)
    while True:
        x = []
        y = []
        people = random.sample(keys, batch_size)
        for person in people:
            imgs = random.sample(ms1m_list[person],
                                 np.min([sample_size,
                                         len(ms1m_list[person])]))
            for src in imgs:
                header, s = recordio.unpack(imgrec.read_idx(int(src)))
                img = mx.image.imdecode(s).asnumpy() / 255
                x.append(img)
                y.append(person)
        yield np.array(x), np.array(y)
Пример #24
0
	def push_record(self, recname):
		print('Pushing file: %s ...'%recname)
		imgrec = recordio.MXIndexedRecordIO(recname+'.idx', recname+'.rec', 'r')
		s = imgrec.read_idx(0)
		header,_ = recordio.unpack(s)
		header0 = (int(header.label[0]), int(header.label[1]))
		for idd in tqdm(range(header0[0], header0[1])):
			idxbuff = [self.idx]
			s = imgrec.read_idx(idd)
			header, _ = recordio.unpack(s)
			imgrange = range(int(header.label[0]), int(header.label[1]))
			for imgidx in imgrange:
				s = imgrec.read_idx(imgidx)
				hdd, img = recordio.unpack(s)
				hdd = mx.recordio.IRHeader(0, float(self.idnum), 0, 0)
				s = recordio.pack(hdd, img)
				self.recout.write_idx( self.idx, s)
				self.idx += 1
			idxbuff.append(self.idx)
			self.ID_idx.append(idxbuff)
			self.idnum += 1 
Пример #25
0
def get_msceleb_images(records_dir):
    imgidx_path = os.path.join(records_dir, "train.idx")
    imgrec_path = os.path.join(records_dir, "train.rec")
    images_dir = os.path.join(records_dir, "images")

    imgrec = recordio.MXIndexedRecordIO(imgidx_path, imgrec_path, 'r')
    s = imgrec.read_idx(0)
    header, _ = recordio.unpack(s)
    tot_images = int(header.label[0]) - 1
    print("Total images", tot_images)
    for i in range(tot_images):
        print("Reading ", i)
        s = imgrec.read()
        header, img = recordio.unpack(s)
        img = mx.image.imdecode(img).asnumpy()
        label = int(header.label)
        img = Image.fromarray(np.uint8(img), "RGB")
        images_subdir = os.path.join(images_dir, "identity_%d" % label)
        if not os.path.exists(images_subdir):
            os.makedirs(images_subdir)
        image_path = os.path.join(images_subdir, "image_%d.jpg" % i)
        img.save(image_path)
Пример #26
0
    def push_mxrecord(self, recname):
        from mxnet import recordio
        print('Pushing mxrec:', recname)
        imgrec = recordio.MXIndexedRecordIO(recname + '.idx', recname + '.rec',
                                            'r')
        header, _ = recordio.unpack(imgrec.read_idx(0))
        header0 = (int(header.label[0]), int(header.label[1]))
        print('datalen', header0[1] - header0[0])
        bar = tqdm(range(header0[0], header0[1]))
        for idd in bar:
            s = imgrec.read_idx(idd)
            header, _ = recordio.unpack(s)
            imgrange = (int(header.label[0]), int(header.label[1]))
            meta = [self.pos]

            for idx in range(imgrange[0], imgrange[1]):
                s = imgrec.read_idx(idx)
                hdd, img = recordio.unpack(s)
                self.ioout.write_idx(self.pos, img)
                self.pos += 1
            meta.append(self.pos)
            self.class_metas.append(meta)
Пример #27
0
    def __init__(self, data_shape, path_imgrec, transform=None):
        super(FaceDataset, self).__init__()
        logging.info('loading recordio %s...',
                     path_imgrec)
        path_imgidx = path_imgrec[0:-4]+".idx"
        self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')  # pylint: disable=redefined-variable-type
        s = self.imgrec.read_idx(0)
        header, _ = recordio.unpack(s)
        if header.flag>0:
          print('header0 label', header.label)
          self.header0 = (int(header.label[0]), int(header.label[1]))
          #assert(header.flag==1)
          self.imgidx = list(range(1, int(header.label[0])))
          #self.imgidx = []
          #self.id2range = {}
          #self.seq_identity = range(int(header.label[0]), int(header.label[1]))
          #for identity in self.seq_identity:
          #  s = self.imgrec.read_idx(identity)
          #  header, _ = recordio.unpack(s)
          #  a,b = int(header.label[0]), int(header.label[1])
          #  count = b-a
          #  if count<images_filter:
          #    continue
          #  self.id2range[identity] = (a,b)
          #  self.imgidx += range(a, b)
          #print('id2range', len(self.id2range))
        else:
          self.imgidx = list(self.imgrec.keys)
        self.seq = self.imgidx

        self.data_shape = data_shape
        self.transform = transforms.Compose([
          #transforms.RandomBrightness(0.3),
          #transforms.RandomContrast(0.3),
          #transforms.RandomSaturation(0.3),
          transforms.RandomFlipLeftRight(),
          transforms.ToTensor()
          ])
Пример #28
0
    def __init__(self, batch_size, data_shape,
                 path_imgrec=None, label_width=None,
                 shuffle=False, mean=None,
                 rand_mirror=False, cutoff=0, color_jittering=0,
                 data_name='data', label_name='softmax_label', **kwargs):
        super(FaceImageIter, self).__init__()
        assert path_imgrec
        assert label_width
        logging.info('loading recordio %s...', path_imgrec)
        path_imgidx = path_imgrec[0:-4]+".idx"
        self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')  # pylint: disable=redefined-variable-type
        s = self.imgrec.read_idx(0)
        header, _ = recordio.unpack(s)
        self.imgidx = list(self.imgrec.keys)
        self.seq = self.imgidx
        self.labelWidth = label_width

        self.mean = mean
        self.nd_mean = None
        if self.mean:
            self.mean = np.array(self.mean, dtype=np.float32).reshape(1,1,3)
            self.nd_mean = mx.nd.array(self.mean).reshape((1,1,3))

        self.check_data_shape(data_shape)
        self.provide_data = [(data_name, (batch_size,) + data_shape)]
        self.batch_size = batch_size
        self.data_shape = data_shape
        self.shuffle = shuffle
        self.image_size = '%d,%d'%(data_shape[1],data_shape[2])
        self.rand_mirror = rand_mirror
        logging.info('rand_mirror: %d', rand_mirror)
        self.cutoff = cutoff
        self.color_jittering = color_jittering
        self.CJA = mx.image.ColorJitterAug(0.125, 0.125, 0.125)
        self.provide_label = [(label_name, (batch_size, self.labelWidth))]
        self.cur = 0
        self.nbatch = 0
        self.is_init = False
Пример #29
0
def extract_ms1m_info():
    self = edict()
    path_ms1m = lz.share_path2 + 'faces_ms1m_112x112/'
    path_imgrec = lz.share_path2 + 'faces_ms1m_112x112/train.rec'
    path_imgidx = path_imgrec[0:-4] + ".idx"
    self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
    s = self.imgrec.read_idx(0)
    header, _ = recordio.unpack(s)
    self.header0 = (int(header.label[0]), int(header.label[1]))
    # assert(header.flag==1)
    self.imgidx = list(range(1, int(header.label[0])))
    id2range = dict()
    self.seq_identity = list(range(int(header.label[0]), int(header.label[1])))
    for identity in self.seq_identity:
        s = self.imgrec.read_idx(identity)
        header, _ = recordio.unpack(s)
        a, b = int(header.label[0]), int(header.label[1])
        id2range[(identity - 3804847)] = (a, b)
        count = b - a
    self.seq = self.imgidx
    self.seq_identity = [int(t) - 3804847 for t in self.seq_identity]
    lz.msgpack_dump([self.imgidx, self.seq_identity, id2range],
                    path_ms1m + '/info.pk')
Пример #30
0
#pre_process.py
import os
import pickle

import cv2 as cv
import mxnet as mx
from mxnet import recordio
from tqdm import tqdm

from config import path_imgidx, path_imgrec, IMG_DIR, pickle_file
from utils import ensure_folder

if __name__ == "__main__":
    ensure_folder(IMG_DIR)
    imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
    print(len(imgrec))

    samples = []
    class_ids = set()

    # %% 1 ~ 5179510
    for i in tqdm(range(5179510)):
        print(i)
        try:
            header, s = recordio.unpack(imgrec.read_idx(i + 1))
            img = mx.image.imdecode(s).asnumpy()
            print(img.shape)
            img = cv.cvtColor(img, cv.COLOR_RGB2BGR)
            print(header.label)
            print(type(header.label))