Пример #1
0
    def load_images(self, img_size):
        if os.path.exists(self.data_path + f"/image_data_{img_size}.plk"):
            with open(self.data_path + f"/image_data_{img_size}.plk",
                      "rb") as fp:
                data = pickle.load(fp)
        else:
            file_name = "CUB_200_2011.tgz"
            url = "http://www.vision.caltech.edu.s3-us-west-2.amazonaws.com/visipedia-data/CUB-200-2011/CUB_200_2011.tgz"
            if not os.path.exists(self.data_path + f"/{file_name}"):
                downloder(url, self.data_path + f"/{file_name}")

            if len(glob.glob(self.data_path + "/images/*.jpg")) != IMAGE_SIZE:
                print("Info:Extracting image data from tar file")
                import tarfile, shutil
                with tarfile.open(self.data_path + f"/{file_name}",
                                  'r') as tar_fp:
                    tar_fp.extractall(self.data_path)
                    shutil.move(self.data_path + "/CUB_200_2011/images",
                                self.data_path)

            data = {}
            files = glob.glob(self.data_path + "/images/*/*.jpg")
            print(f"Info:load {img_size}x{img_size} image data")
            for i, _path in enumerate(files):
                arr_id = int(_path.split("_")[-1].split(".")[0])
                arr = self.path2array(_path, img_size)
                data[arr_id] = arr
                progress(i + 1, IMAGE_SIZE)
            print("")

            with open(self.data_path + f"/image_data_{img_size}.plk",
                      "wb") as fp:
                pickle.dump(data, fp)

        return data
Пример #2
0
    def load_english_captions(self, sent_len):
        if os.path.exists(self.data_path + "/english_captions.pkl"):
            with open(self.data_path + "/english_captions.pkl", "rb") as fp:
                data = pickle.load(fp)
                return data["train_data"], data["val_data"], data["index2tok"]
        else:
            url = "http://images.cocodataset.org/annotations/annotations_trainval2014.zip"
            file_name = "annotations_trainval2014.zip"
            if not os.path.exists(self.data_path + f"/{file_name}"):
                downloder(url, self.data_path + f"/{file_name}")

            annotations_dir = self.data_path + "/annotations"
            if not os.path.exists(annotations_dir): os.mkdir(annotations_dir)
            if len(os.listdir(annotations_dir)) != 6:
                print("Info:extracting zip file")
                import zipfile
                with zipfile.ZipFile(self.data_path + f"/{file_name}",
                                     'r') as zip_fp:
                    zip_fp.extractall(self.data_path)

            print("Info:loading english captin data")

            def load_caption(data_path):
                def preprocess_caption(line):
                    line = line.replace("\ufffd\ufffd", " ")
                    tokenizer = RegexpTokenizer(r"\w+")
                    tokens = tokenizer.tokenize(line.lower())
                    return tokens

                with open(data_path, "r", encoding="utf_8") as fp:
                    data = json.load(fp)
                    anns = data["annotations"]
                    data_dict = defaultdict(list)
                    for ann in anns:
                        data_dict[ann["image_id"]].append(
                            preprocess_caption(ann["caption"]))
                    return data_dict

            files = glob.glob(annotations_dir + "/captions_*2014.json")
            train = None
            val = None
            for data_path in files:
                captions = load_caption(data_path)
                if "train" in data_path: train = captions
                elif "val" in data_path: val = captions

            train_data, val_data, index2tok, tok2index = self.make_vocaburaly(
                train, val, sent_len)

            with open(self.data_path + "/english_captions.pkl", "wb") as fp:
                data = {
                    "train_data": train_data,
                    "val_data": val_data,
                    "index2tok": index2tok,
                    "tok2index": tok2index
                }
                pickle.dump(data, fp)

            return train_data, val_data, index2tok
Пример #3
0
def load_japanese_captions(sent_len, data_path, threshpld):
    if os.path.exists(data_path + "/japanese_caption_data.pkl"):
        with open(data_path + "/japanese_caption_data.pkl", "rb") as fp:
            data = pickle.load(fp)
            return data["train_data"], data["val_data"], data["index2tok"]
    else:
        url = "https://github.com/STAIR-Lab-CIT/STAIR-captions/raw/master/stair_captions_v1.2.tar.gz"
        file_name = "stair_captions_v1.2.tar.gz"
        if not os.path.exists(data_path + f"/{file_name}"):
            downloder(url, data_path + f"/{file_name}")

        path = data_path + "/stair_captions_v1.2"
        if not os.path.exists(path):
            os.mkdir(path)
            print("Info:extracting caption data")
            import tarfile
            with tarfile.open(data_path + f"/{file_name}", "r") as tar_fp:
                tar_fp.extractall(path)
        files = glob.glob(path + "/*_tokenized.json")

        print(f"Info:loading japanese caption data")

        def load_caption(data_path):
            def preprocess_caption(line):
                prep_line = re.sub("[%s]" % re.escape(string.punctuation), " ",
                                   line.rstrip())
                prep_line = prep_line.replace("-", " ").replace("\n", "")
                return prep_line.lower().split(" ")

            with open(data_path, "r", encoding="utf_8") as fp:
                data = json.load(fp)
                anns = data["annotations"]
                data_dict = defaultdict(list)
                for ann in anns:
                    data_dict[ann["image_id"]].append(
                        preprocess_caption(ann["tokenized_caption"]))
                return data_dict

        train = None
        val = None
        for _path in files:
            captions = load_caption(_path)
            if "train" in _path: train = captions
            elif "val" in _path: val = captions

        train_data, val_data, index2tok, tok2index = make_vocaburaly(
            train, val, sent_len, threshpld)

        with open(data_path + "/japanese_caption_data.pkl", "wb") as fp:
            data = {
                "train_data": train_data,
                "val_data": val_data,
                "index2tok": index2tok,
                "tok2index": tok2index
            }
            pickle.dump(data, fp)

        return train_data, val_data, index2tok
Пример #4
0
    def load_images(self, is_train=True):
        if is_train:
            data_type = "train"
            data_len = train_image_size
        else:
            data_type = "val"
            data_len = val_image_size

        path = self.data_path + "/image"
        if not os.path.exists(path): os.mkdir(path)

        if os.path.exists(path +
                          f"/{data_type}2014_array/{self.image_size[0]}"):
            files = glob.glob(
                path + f"/{data_type}2014_array/{self.image_size[0]}/*.npy")
            id_list = [
                int(path.split("_")[-1].split(".")[0]) for path in files
            ]
            data = {id: path for id, path in zip(id_list, files)}

            return data
        else:
            url = f"http://images.cocodataset.org/zips/{data_type}2014.zip"
            file_name = url.split("/")[-1]
            if not os.path.exists(path + f"/{file_name}"):
                downloder(url, path + f"/{file_name}")

            if len(glob.glob(path + f"/{data_type}2014/*.jpg")) != data_len:
                print(f"Info:Extracting {data_type} image data from zip file")
                import zipfile
                with zipfile.ZipFile(path + f"/{file_name}") as zip_fp:
                    zip_fp.extractall(path)
            files = glob.glob(path + f"/{data_type}2014/*.jpg")

            path = path + f"/{data_type}2014_array"
            if not os.path.exists(path): os.mkdir(path)
            path = path + f"/{self.image_size[0]}"
            if not os.path.exists(path):
                os.mkdir(path)
                print(f"Info:Conveting {data_type} image data path to ndarray")
                for i, _path in enumerate(files):
                    file_name = _path.split("/")[-1].split(".")[0]
                    arr = self.path2array(_path)
                    np.save(path + f"/{file_name}.npy", arr)
                    progress(i + 1, data_len)
                print("")

            files = glob.glob(path + "/*.npy")
            id_list = [
                int(path.split("_")[-1].split(".")[0]) for path in files
            ]
            data = {id: path for id, path in zip(id_list, files)}

            return data
Пример #5
0
    def load_images(self, is_train=True):
        if is_train:
            data_type = "train"
            data_len = train_image_size
        else:
            data_type = "val"
            data_len = val_image_size

        if len(glob.glob(self.data_path +
                         f"/{data_type}2014_array/*.npy")) == data_len:
            files = glob.glob(self.data_path + f"/{data_type}2014_array/*.npy")
            id_list = [
                int(path.split("_")[-1].split(".")[0]) for path in files
            ]
            data = {id: path for id, path in zip(id_list, files)}

            return data
        else:
            url = f"http://images.cocodataset.org/zips/{data_type}2014.zip"
            file_name = url.split("/")[-1]
            if not os.path.exists(self.data_path + f"/{file_name}"):
                downloder(url, self.data_path + f"/{file_name}")

            path = self.data_path + f"/{data_type}2014_array"
            if len(glob.glob(self.data_path +
                             f"/{data_type}2014/*.jpg")) != data_len:
                print(
                    f"Info:Extract {data_type} images from zip file and convert images to ndarray"
                )
                if not os.path.exists(path): os.mkdir(path)
                import zipfile
                with zipfile.ZipFile(self.data_path +
                                     f"/{file_name}") as zip_fp:
                    file_count = len(zip_fp.filelist)
                    for i, item in enumerate(zip_fp.filelist):
                        file_name = item.filename.split("/")[-1].split(".")[0]
                        zip_fp.extract(item, self.data_path)
                        if file_name != "":
                            arr = self.path2array(
                                self.data_path +
                                f"/{data_type}2014/{file_name}.jpg")
                            np.save(path + f"/{file_name}", arr)
                        progress(i + 1, file_count)
                    print("")

            files = glob.glob(path + "/*.npy")
            id_list = [
                int(path.split("_")[-1].split(".")[0]) for path in files
            ]
            data = {id: path for id, path in zip(id_list, files)}

            return data
Пример #6
0
    def load_captions(self):
        path = self.data_path + "/annotations"
        if not os.path.exists(path): os.mkdir(path)
        if os.path.exists(path + "/caption_data.pkl"):
            with open(path + "/caption_data.pkl", "rb") as fp:
                data = pickle.load(fp)
                return data["train_data"], data["val_data"], data[
                    "embed_mat"], data["index2tok"], data["tok2index"]
        else:
            url = "http://images.cocodataset.org/annotations/annotations_trainval2014.zip"
            file_name = url.split("/")[-1]
            if not os.path.exists(path + f"/{file_name}"):
                downloder(url, path + f"/{file_name}")

            import zipfile
            with zipfile.ZipFile(path + f"/{file_name}") as zip_fp:
                zip_fp.extractall(os.path.abspath(path + "/.."))

            print("Info:loading caption data")
            files = glob.glob(path + "/captions*.json")

            def load_caption(data_path):
                with open(data_path, "r", encoding="utf_8") as fp:
                    data = json.load(fp)
                    anns = data["annotations"]
                    data_dict = defaultdict(list)
                    for ann in anns:
                        data_dict[ann["image_id"]].append(ann["caption"])
                    return data_dict

            train = None
            val = None
            for data_path in files:
                captions = load_caption(data_path)
                if "train" in data_path: train = captions
                elif "val" in data_path: val = captions

            train_data, val_data, embed_mat, index2tok, tok2index = self.make_vocaburaly(
                train, val, path)

            return train_data, val_data, embed_mat, index2tok, tok2index
Пример #7
0
    def image2depth(self, img):
        try:
            sess = getattr(self, "sess")
            depth_net = getattr(self, "depth_net")
            input_node = getattr(self, "input_node")
        except:
            import sys
            import tensorflow as tf
            os.environ[ 'TF_CPP_MIN_LOG_LEVEL'] = '0'
            sys.path.append(os.path.abspath(__file__ + "/../models"))
            from models.fcrn import ResNet50UpProj
            model_params_path = os.path.abspath(__file__ + "/../data/NYU_ResNet-UpProj.npy")

            if not os.path.exists(model_params_path):
                url = "http://campar.in.tum.de/files/rupprecht/depthpred/NYU_ResNet-UpProj.npy"
                downloder(url, model_params_path)

            self.input_node = tf.placeholder(tf.float32, shape=(None, 256, 256, 3))
            self.depth_net = ResNet50UpProj({'data': self.input_node}, 1, 1, False)

            self.sess = tf.Session()
            print('Loading the model')
            self.depth_net.load(model_params_path, self.sess)

            sess = getattr(self, "sess")
            depth_net = getattr(self, "depth_net")
            input_node = getattr(self, "input_node")
                
        def normalization(arr):
            _min, _max = np.min(arr), np.max(arr)
            arr = (arr - _min) / (_max - _min)
            return arr

        img = img.transpose((1,2,0))
        img = np.expand_dims(np.asarray(img), axis = 0)
        pred = np.asarray(sess.run(depth_net.get_output(), feed_dict={input_node: img}))[0,:,:,0]

        return normalization(pred)
Пример #8
0
    def load_images(self):
        files_cont = len(glob.glob(self.data_path + f"/array/{self.image_size[0]}/*.npy"))
        if files_cont == image_size:
            files = glob.glob(self.data_path + f"/array/{self.image_size[0]}/*.npy")
            id_list = [int(path.split("_")[-1].split(".")[0]) for path in files]
            data = {id:path for id, path in zip(id_list,files)}
        else:
            url = "http://www.vision.caltech.edu.s3-us-west-2.amazonaws.com/visipedia-data/CUB-200-2011/CUB_200_2011.tgz"
            title = "CUB_200_2011.tgz"
            if not os.path.exists(self.data_path + f"/{title}"):downloder(url, self.data_path + f"/{title}")
            
            if len(glob.glob(self.data_path + "/image/*/*.jpg")) != image_size:
                print("Info:Extracting image data from tar file")
                import tarfile, shutil
                with tarfile.open(self.data_path + f"/{title}", 'r') as tar_fp:
                    tar_fp.extractall(self.data_path)
                shutil.move(self.data_path + "/CUB_200_2011/images", self.data_path)
                os.rename(self.data_path + "/images", self.data_path + "/image")

            files = glob.glob(self.data_path + "/image/*/*.jpg")
            path = self.data_path + "/array"
            if not os.path.exists(path):os.mkdir(path)
            path = path + f"/{self.image_size[0]}"
            if not os.path.exists(path):
                os.mkdir(path)
                print("Info:Conveting image data path to ndarray")
                for i, _path in enumerate(files):
                    file_name = _path.split("/")[-1].split(".")[0]
                    arr = self.path2array(_path)
                    np.save(path + f"/{file_name}.npy", arr)
                    progress(i+1, image_size)
                print("")

            files = glob.glob(path + "/*.npy")
            id_list = [int(path.split("_")[-1].split(".")[0]) for path in files]
            data = {id:path for id, path in zip(id_list,files)}

        return data
Пример #9
0
    def load_images(self):
        if os.path.exists(self.data_path + "/image_data.plk"):
            with open(self.data_path + "/image_data.plk", "rb") as fp:
                data = pickle.load(fp)
        else:
            file_name = "102flowers.tgz"
            url = "http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz"
            if not os.path.exists(self.data_path + f"/{file_name}"):
                downloder(url, self.data_path + f"/{file_name}")

            if len(glob.glob(self.data_path + "/images/*.jpg")) != IMAGE_SIZE:
                print("Info:Extracting image data from tar file")
                import tarfile
                with tarfile.open(self.data_path + f"/{file_name}",
                                  'r') as tar_fp:
                    tar_fp.extractall(self.data_path)
                os.rename(self.data_path + "/jpg", self.data_path + "/images")

            data = {}
            files = glob.glob(self.data_path + "/images/*.jpg")
            print("Info:load image data")
            for i, _path in enumerate(files):
                arr_id = int(_path.split("_")[-1].split(".")[0])
                arr_256 = self.path2array(_path, 256)
                arr_128 = self.path2array(_path, 128)
                arr_64 = self.path2array(_path, 64)
                data[arr_id] = {
                    "x_256": arr_256,
                    "x_128": arr_128,
                    "x_64": arr_64
                }
                progress(i + 1, IMAGE_SIZE)
            print("")

            with open(self.data_path + "/image_data.plk", "wb") as fp:
                pickle.dump(data, fp)

        return data
Пример #10
0
    def load_images(self):
        files_cont = len(glob.glob(self.data_path + f"/array/{self.image_size[0]}/*.npy"))
        if files_cont == image_size:
            files = glob.glob(self.data_path + f"/array/{self.image_size[0]}/*.npy")
            id_list = [int(path.split("_")[-1].split(".")[0]) for path in files]
            data = {id:path for id, path in zip(id_list,files)}
        else:
            url = "http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz"
            title = "102flowers.tgz"
            if not os.path.exists(self.data_path + f"/{title}"):downloder(url, self.data_path + f"/{title}")
            
            if len(glob.glob(self.data_path + "/image/*.jpg")) != image_size:
                print("Info:Extracting image data from tar file")
                import tarfile
                with tarfile.open(self.data_path + f"/{title}", 'r') as tar_fp:
                    tar_fp.extractall(self.data_path)
                os.rename(self.data_path + "/jpg", self.data_path + "/image")

            files = glob.glob(self.data_path + "/image/*.jpg")
            path = self.data_path + "/array"
            if not os.path.exists(path):os.mkdir(path)
            path = path + f"/{self.image_size[0]}"
            if not os.path.exists(path):
                os.mkdir(path)
                print("Info:Conveting image data path to ndarray")
                for i, _path in enumerate(files):
                    file_name = _path.split("/")[-1].split(".")[0]
                    arr = self.path2array(_path)
                    np.save(path + f"/{file_name}.npy", arr)
                    progress(i+1, image_size)
                print("")

            files = glob.glob(path + "/*.npy")
            id_list = [int(path.split("_")[-1].split(".")[0]) for path in files]
            data = {id:path for id, path in zip(id_list,files)}

        return data
Пример #11
0
    def make_embed_mat(self, vocaburaly):
        embed_mat_path = os.path.abspath(self.data_path +
                                         "/../crawl-300d-2M.vec")
        if not os.path.exists(embed_mat_path):
            url = "https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip"
            if not os.path.exists(embed_mat_path + ".zip"):
                downloder(url, embed_mat_path + ".zip")
            import zipfile
            with zipfile.ZipFile(embed_mat_path + ".zip") as zip_fp:
                zip_fp.extractall(os.path.abspath(self.data_path + "/../"))

        with open(embed_mat_path, "r", encoding="utf_8") as fp:
            n, d = map(int, fp.readline().split())
            embed_data = {}
            for line in fp:
                tokens = line.rstrip().split(' ')
                if tokens[0] in vocaburaly:
                    embed_data[tokens[0]] = [float(v) for v in tokens[1:]]
                    del vocaburaly[vocaburaly.index(tokens[0])]
            index2tok = {i + 3: key for i, key in enumerate(embed_data.keys())}
            tok2index = {key: i + 3 for i, key in enumerate(embed_data.keys())}
            embed_mat = np.array([
                np.random.normal(size=d),
                np.random.normal(size=d),
                np.random.normal(size=d)
            ] + [
                embed_data[index2tok[i + 3]]
                for i in range(len(index2tok.keys()))
            ])
            index2tok[0] = "<S>"
            index2tok[1] = "</S>"
            index2tok[2] = "<UNK>"
            tok2index["<S>"] = 0
            tok2index["</S>"] = 1
            tok2index["<UNK>"] = 2

        return embed_mat, index2tok, tok2index