示例#1
0
    def run(self):
        load_dir = self.image_dir
        train_tar = os.path.join(load_dir, 'ILSVRC2012_img_train.tar')
        validation_tar = os.path.join(load_dir, 'ILSVRC2012_img_val.tar')

        for infile in (train_tar, validation_tar):
            if not os.path.exists(infile):
                raise IOError(
                    infile +
                    " not found. Please ensure you have ImageNet downloaded."
                    "More info here: http://www.image-net.org/download-imageurls"
                )
        # download our version of the metadata
        meta_dir = load_i1kmeta(self.out_dir)
        meta_file = os.path.join(meta_dir, 'neon_ILSVRC2012_devmeta.pkl')
        self.meta = load_obj(meta_file)
        self.__dict__.update(
            self.meta)  # get label_dict, label_names, global_mean from meta
        self.global_mean = np.mean(self.global_mean.reshape(3, -1),
                                   axis=1).reshape(3, 1)[::-1]

        np.random.seed(0)
        with tarfile.open(train_tar) as tf:
            s_sets = tf.getmembers()
            s_tars = [tarfile.open(fileobj=tf.extractfile(s)) for s in s_sets]
            print('Building trainset list from synset tars.')
            t_jpegfiles = []
            totalsz = len(s_tars)
            for i, st in enumerate(s_tars):
                if i % 100 == 0:
                    print("%d%% ..." % (int(round((100.0 * i) / totalsz))))
                t_jpegfiles += [st.extractfile(m) for m in st.getmembers()]
                st.close()
            print("Done loading")
            np.random.shuffle(t_jpegfiles)
            train_labels = [[self.label_dict[j.name[:9]]] for j in t_jpegfiles]
            self.train_nrec = len(t_jpegfiles)
            self.ntrain = -(-self.train_nrec // self.macro_size)
            self.nclass = {'l_id': 1000}
            self.train_start = 0
            train_labels = {'l_id': np.array(train_labels, dtype=np.int32)}
            self.write_batches('train', self.train_start, train_labels,
                               t_jpegfiles)

        with tarfile.open(validation_tar) as tf:
            jpegfiles = sorted([tf.extractfile(m) for m in tf.getmembers()],
                               key=lambda x: x.name)
            self.val_nrec = len(jpegfiles)
            self.nval = -(-self.val_nrec // self.macro_size)
            self.val_start = 10**int(np.log10(self.ntrain) + 1)
            val_labels = {
                'l_id': np.array(self.val_ground_truth, dtype=np.int32)
            }
            self.write_batches('val', self.val_start, val_labels, jpegfiles)
        self.save_meta()
示例#2
0
    def run(self):
        load_dir = self.image_dir
        train_tar = os.path.join(load_dir, "ILSVRC2012_img_train.tar")
        validation_tar = os.path.join(load_dir, "ILSVRC2012_img_val.tar")

        for infile in (train_tar, validation_tar):
            if not os.path.exists(infile):
                raise IOError(
                    infile + " not found. Please ensure you have ImageNet downloaded."
                    "More info here: http://www.image-net.org/download-imageurls"
                )
        # download our version of the metadata
        meta_dir = load_i1kmeta(self.out_dir)
        meta_file = os.path.join(meta_dir, "neon_ILSVRC2012_devmeta.pkl")
        self.meta = load_obj(meta_file)
        self.__dict__.update(self.meta)  # get label_dict, label_names, global_mean from meta
        self.global_mean = np.mean(self.global_mean.reshape(3, -1), axis=1).reshape(3, 1)[::-1]

        np.random.seed(0)
        with tarfile.open(train_tar) as tf:
            s_sets = tf.getmembers()
            s_tars = [tarfile.open(fileobj=tf.extractfile(s)) for s in s_sets]
            print("Building trainset list from synset tars.")
            t_jpegfiles = []
            totalsz = len(s_tars)
            for i, st in enumerate(s_tars):
                if i % 100 == 0:
                    print("%d%% ..." % (int(round((100.0 * i) / totalsz))))
                t_jpegfiles += [st.extractfile(m) for m in st.getmembers()]
                st.close()
            print("Done loading")
            np.random.shuffle(t_jpegfiles)
            train_labels = [[self.label_dict[j.name[:9]]] for j in t_jpegfiles]
            self.train_nrec = len(t_jpegfiles)
            self.ntrain = -(-self.train_nrec // self.macro_size)
            self.nclass = {"l_id": 1000}
            self.train_start = 0
            train_labels = {"l_id": np.array(train_labels, dtype=np.int32)}
            self.write_batches("train", self.train_start, train_labels, t_jpegfiles)

        with tarfile.open(validation_tar) as tf:
            jpegfiles = sorted([tf.extractfile(m) for m in tf.getmembers()], key=lambda x: x.name)
            self.val_nrec = len(jpegfiles)
            self.nval = -(-self.val_nrec // self.macro_size)
            self.val_start = 10 ** int(np.log10(self.ntrain) + 1)
            val_labels = {"l_id": np.array(self.val_ground_truth, dtype=np.int32)}
            self.write_batches("val", self.val_start, val_labels, jpegfiles)
        self.save_meta()