Exemplo n.º 1
0
 def _prepare_synset_to_human(self):
     SIZE = 2655750
     URL = "https://heibox.uni-heidelberg.de/f/9f28e956cd304264bb82/?dl=1"
     self.human_dict = os.path.join(self.root, "synset_human.txt")
     if (not os.path.exists(self.human_dict) or
             not os.path.getsize(self.human_dict)==SIZE):
         download(URL, self.human_dict)
Exemplo n.º 2
0
    def _prepare(self):
        cachedir = os.environ.get("XDG_CACHE_HOME",
                                  os.path.expanduser("~/.cache"))
        self.root = os.path.join(cachedir, "autoencoders/data", self.NAME)
        SRC = {
            "AwA2-data.zip": "http://cvml.ist.ac.at/AwA2/AwA2-data.zip",
        }
        if not edu.is_prepared(self.root):
            # prep
            self.logger.info("Preparing dataset {} in {}".format(
                self.NAME, self.root))
            os.makedirs(self.root, exist_ok=True)

            datadir = os.path.join(self.root, "Animals_with_Attributes2")
            if not os.path.exists(datadir):
                datapath = os.path.join(self.root, "AwA2-data.zip")
                if not os.path.exists(datapath):
                    download(SRC["AwA2-data.zip"], datapath)
                edu.unpack(datapath)

            # make filelist
            images = list()
            for path, subdirs, files in os.walk(
                    os.path.join(datadir, "JPEGImages")):
                for name in files:
                    searchname = name.lower()
                    if (searchname.rfind('jpg') != -1
                            or searchname.rfind('png') != -1
                            or searchname.rfind('jpeg') != -1):
                        filename = os.path.relpath(os.path.join(path, name),
                                                   start=self.root)
                        images.append(filename)

            prng = np.random.RandomState(1)
            test = set(prng.choice(len(images), 5000, replace=False))
            train_images = [
                images[i] for i in range(len(images)) if not i in test
            ]
            test_images = [images[i] for i in range(len(images)) if i in test]

            with open(os.path.join(self.root, "train.txt"), "w") as f:
                f.write("\n".join(train_images) + "\n")

            with open(os.path.join(self.root, "test.txt"), "w") as f:
                f.write("\n".join(test_images) + "\n")

            with open(
                    os.path.join(self.root,
                                 "Animals_with_Attributes2/classes.txt"),
                    "r") as f:
                classes = f.read().splitlines()
                classes = [cls.split()[-1] for cls in classes]
                classes = sorted(classes)

            with open(os.path.join(self.root, "classes.txt"), "w") as f:
                f.write("\n".join(classes) + "\n")

            edu.mark_prepared(self.root)
Exemplo n.º 3
0
 def _prepare_animal_synsets(self):
     SIZE = 1490
     URL = "https://heibox.uni-heidelberg.de/f/c18cdf02ea0b4e758729/?dl=1"
     cachedir = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))
     self.root = os.path.join(cachedir, "autoencoders/data/ImageNetAnimals")
     self.animal_synsets = os.path.join(self.root, "animal_synsets.txt")
     if (not os.path.exists(self.animal_synsets) or
             not os.path.getsize(self.animal_synsets)==SIZE):
         download(URL, self.animal_synsets)
Exemplo n.º 4
0
    def _prepare(self):
        self.random_crop = retrieve(self.config,
                                    "ImageNetValidation/random_crop",
                                    default=False)
        cachedir = os.environ.get("XDG_CACHE_HOME",
                                  os.path.expanduser("~/.cache"))
        self.root = os.path.join(cachedir, "autoencoders/data", self.NAME)
        self.datadir = os.path.join(self.root, "data")
        self.txt_filelist = os.path.join(self.root, "filelist.txt")
        self.expected_length = 50000
        if not edu.is_prepared(self.root):
            # prep
            self.logger.info("Preparing dataset {} in {}".format(
                self.NAME, self.root))

            datadir = self.datadir
            if not os.path.exists(datadir):
                path = os.path.join(self.root, self.FILES[0])
                if not os.path.exists(path) or not os.path.getsize(
                        path) == self.SIZES[0]:
                    import academictorrents as at
                    atpath = at.get(self.AT_HASH, datastore=self.root)
                    assert atpath == path

                self.logger.info("Extracting {} to {}".format(path, datadir))
                os.makedirs(datadir, exist_ok=True)
                with tarfile.open(path, "r:") as tar:
                    tar.extractall(path=datadir)

                vspath = os.path.join(self.root, self.FILES[1])
                if not os.path.exists(vspath) or not os.path.getsize(
                        vspath) == self.SIZES[1]:
                    download(self.VS_URL, vspath)

                with open(vspath, "r") as f:
                    synset_dict = f.read().splitlines()
                    synset_dict = dict(line.split() for line in synset_dict)

                self.logger.info("Reorganizing into synset folders")
                synsets = np.unique(list(synset_dict.values()))
                for s in synsets:
                    os.makedirs(os.path.join(datadir, s), exist_ok=True)
                for k, v in synset_dict.items():
                    src = os.path.join(datadir, k)
                    dst = os.path.join(datadir, v)
                    shutil.move(src, dst)

            filelist = glob.glob(os.path.join(datadir, "**", "*.JPEG"))
            filelist = [os.path.relpath(p, start=datadir) for p in filelist]
            filelist = sorted(filelist)
            filelist = "\n".join(filelist) + "\n"
            with open(self.txt_filelist, "w") as f:
                f.write(filelist)

            edu.mark_prepared(self.root)
Exemplo n.º 5
0
    def _prepare(self):
        cachedir = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))
        self.root = os.path.join(cachedir, "autoencoders/data/AnimalFaces")
        self.logger.info("Using data located at {}".format(self.root))

        os.makedirs(self.root, exist_ok=True)
        self.datadir = os.path.join(self.root, "data")

        if not edu.is_prepared(self.root):
            self.logger.info("Preparing dataset {} in {}".format(self.NAME, self.root))

            if not os.path.exists(self.datadir):
                os.makedirs(self.datadir, exist_ok=True)
                imagenet = ImageNetTrain()

                coor_path = os.path.join(self.root, "animalface_coordinates.txt")
                if not os.path.exists(coor_path):
                    download(self.COOR_URL, coor_path)

                with open(coor_path, "r") as f:
                    animalface_coordinates = f.readlines()

                for line in tqdm(animalface_coordinates):
                    ls = line.strip().split(' ')
                    img_name = os.path.join(imagenet.datadir, ls[0])
                    img = Image.open(img_name)
                    img = img.convert('RGB')
                    x = int(ls[1])
                    y = int(ls[2])
                    w = int(ls[3])
                    h = int(ls[4])
                    crop = img.crop((x, y, w, h))

                    out_name = os.path.join(self.datadir,
                                            '%s_%d_%d_%d_%d.jpg' % (ls[0], x, y, w, h))
                    os.makedirs(os.path.dirname(out_name), exist_ok=True)
                    crop.save(out_name)

            train_path = os.path.join(self.root, "animals_list_train.txt")
            if not os.path.exists(train_path):
                download(self.TRAIN_URL, train_path)

            test_path = os.path.join(self.root, "animals_list_test.txt")
            if not os.path.exists(test_path):
                download(self.TEST_URL, test_path)

            shared_train_path = os.path.join(self.root, "shared_animalfaces_train.txt")
            if not os.path.exists(shared_train_path):
                download(self.SHARED_TRAIN_URL, shared_train_path)

            shared_test_path = os.path.join(self.root, "shared_animalfaces_test.txt")
            if not os.path.exists(shared_test_path):
                download(self.SHARED_TEST_URL, shared_test_path)

            restricted_train_path = os.path.join(self.root, "restricted_animalfaces_train.txt")
            if not os.path.exists(restricted_train_path):
                download(self.RESTRICTED_TRAIN_URL, restricted_train_path)

            restricted_test_path = os.path.join(self.root, "restricted_animalfaces_test.txt")
            if not os.path.exists(restricted_test_path):
                download(self.RESTRICTED_TEST_URL, restricted_test_path)

            edu.mark_prepared(self.root)
Exemplo n.º 6
0
 def _prepare_idx_to_synset(self):
     URL = "https://heibox.uni-heidelberg.de/f/d835d5b6ceda4d3aa910/?dl=1"
     self.idx2syn = os.path.join(self.root, "index_synset.yaml")
     if (not os.path.exists(self.idx2syn)):
         download(URL, self.idx2syn)