def run(self): load_dir = self.image_dir train_tar = os.path.join(load_dir, 'ILSVRC2012_img_train.tar') validation_tar = os.path.join(load_dir, 'ILSVRC2012_img_val.tar') for infile in (train_tar, validation_tar): if not os.path.exists(infile): raise IOError( infile + " not found. Please ensure you have ImageNet downloaded." "More info here: http://www.image-net.org/download-imageurls" ) # download our version of the metadata meta_dir = load_i1kmeta(self.out_dir) meta_file = os.path.join(meta_dir, 'neon_ILSVRC2012_devmeta.pkl') self.meta = load_obj(meta_file) self.__dict__.update( self.meta) # get label_dict, label_names, global_mean from meta self.global_mean = np.mean(self.global_mean.reshape(3, -1), axis=1).reshape(3, 1)[::-1] np.random.seed(0) with tarfile.open(train_tar) as tf: s_sets = tf.getmembers() s_tars = [tarfile.open(fileobj=tf.extractfile(s)) for s in s_sets] print('Building trainset list from synset tars.') t_jpegfiles = [] totalsz = len(s_tars) for i, st in enumerate(s_tars): if i % 100 == 0: print("%d%% ..." % (int(round((100.0 * i) / totalsz)))) t_jpegfiles += [st.extractfile(m) for m in st.getmembers()] st.close() print("Done loading") np.random.shuffle(t_jpegfiles) train_labels = [[self.label_dict[j.name[:9]]] for j in t_jpegfiles] self.train_nrec = len(t_jpegfiles) self.ntrain = -(-self.train_nrec // self.macro_size) self.nclass = {'l_id': 1000} self.train_start = 0 train_labels = {'l_id': np.array(train_labels, dtype=np.int32)} self.write_batches('train', self.train_start, train_labels, t_jpegfiles) with tarfile.open(validation_tar) as tf: jpegfiles = sorted([tf.extractfile(m) for m in tf.getmembers()], key=lambda x: x.name) self.val_nrec = len(jpegfiles) self.nval = -(-self.val_nrec // self.macro_size) self.val_start = 10**int(np.log10(self.ntrain) + 1) val_labels = { 'l_id': np.array(self.val_ground_truth, dtype=np.int32) } self.write_batches('val', self.val_start, val_labels, jpegfiles) self.save_meta()
def run(self): load_dir = self.image_dir train_tar = os.path.join(load_dir, "ILSVRC2012_img_train.tar") validation_tar = os.path.join(load_dir, "ILSVRC2012_img_val.tar") for infile in (train_tar, validation_tar): if not os.path.exists(infile): raise IOError( infile + " not found. Please ensure you have ImageNet downloaded." "More info here: http://www.image-net.org/download-imageurls" ) # download our version of the metadata meta_dir = load_i1kmeta(self.out_dir) meta_file = os.path.join(meta_dir, "neon_ILSVRC2012_devmeta.pkl") self.meta = load_obj(meta_file) self.__dict__.update(self.meta) # get label_dict, label_names, global_mean from meta self.global_mean = np.mean(self.global_mean.reshape(3, -1), axis=1).reshape(3, 1)[::-1] np.random.seed(0) with tarfile.open(train_tar) as tf: s_sets = tf.getmembers() s_tars = [tarfile.open(fileobj=tf.extractfile(s)) for s in s_sets] print("Building trainset list from synset tars.") t_jpegfiles = [] totalsz = len(s_tars) for i, st in enumerate(s_tars): if i % 100 == 0: print("%d%% ..." % (int(round((100.0 * i) / totalsz)))) t_jpegfiles += [st.extractfile(m) for m in st.getmembers()] st.close() print("Done loading") np.random.shuffle(t_jpegfiles) train_labels = [[self.label_dict[j.name[:9]]] for j in t_jpegfiles] self.train_nrec = len(t_jpegfiles) self.ntrain = -(-self.train_nrec // self.macro_size) self.nclass = {"l_id": 1000} self.train_start = 0 train_labels = {"l_id": np.array(train_labels, dtype=np.int32)} self.write_batches("train", self.train_start, train_labels, t_jpegfiles) with tarfile.open(validation_tar) as tf: jpegfiles = sorted([tf.extractfile(m) for m in tf.getmembers()], key=lambda x: x.name) self.val_nrec = len(jpegfiles) self.nval = -(-self.val_nrec // self.macro_size) self.val_start = 10 ** int(np.log10(self.ntrain) + 1) val_labels = {"l_id": np.array(self.val_ground_truth, dtype=np.int32)} self.write_batches("val", self.val_start, val_labels, jpegfiles) self.save_meta()