예제 #1
0
    def pre_process(self):
        save_data = temp_dir()

        train_src, train_tgt = self.train_data
        dev_src, dev_tgt = self.dev_data

        if self.features:
            train_src = list(map(add_features, train_src))
            dev_src = list(map(add_features, dev_src))

        run_param('preprocess.py', {
            "train_src": save_temp(train_src),
            "train_tgt": save_temp(train_tgt),
            "valid_src": save_temp(dev_src),
            "valid_tgt": save_temp(dev_tgt),
            "save_data": save_data + "data",
            "dynamic_dict": None  # This will add a dynamic-dict parameter
        })

        data_zip = shutil.make_archive(base_name=temp_name(), format="gztar", root_dir=save_data)

        f = open(data_zip, "rb")
        bin_data = f.read()
        f.close()
        return bin_data
예제 #2
0
def download_synthetic(version, images_dir):
    temp = temp_dir()
    file_handle, _ = urlretrieve(version["url"])
    with zipfile.ZipFile(file_handle, 'r') as zipObj:
        zipObj.extractall(temp)

    labels_dir = path.join(temp, "hand_labels_synth")

    for split in ['synth1', 'synth2', 'synth3', 'synth4']:
        original_split_dir = path.join(labels_dir, split)

        split_dir = path.join(images_dir, split)
        makedir(split_dir)

        files = sorted(
            [f for f in os.listdir(original_split_dir) if f.endswith('.json')])
        for file in files:
            content = json.load(open(path.join(original_split_dir, file), "r"))
            fname = file.replace(".json", ".jpg")

            copyfile(path.join(original_split_dir, fname),
                     path.join(split_dir, fname))

            yield split, {
                "image": "/".join(["images", split, fname]),
                "hand": "left" if content["is_left"] else "right",
                "pose": content["hand_pts"]
            }
예제 #3
0
    def pre_process(self):
        save_data = temp_dir()

        train_src = save_temp(
            [add_features(d.plan) for d in self.train_reader.data])
        train_tgt = save_temp([d.delex for d in self.train_reader.data])
        valid_src = save_temp(
            [add_features(d.plan) for d in self.dev_reader.data])
        valid_tgt = save_temp([d.delex for d in self.dev_reader.data])

        run_param(
            'preprocess.py',
            {
                "train_src": train_src,
                "train_tgt": train_tgt,
                "valid_src": valid_src,
                "valid_tgt": valid_tgt,
                "save_data": save_data + "data",
                "dynamic_dict": None  # This will add a dynamic-dict parameter
            })

        data_zip = shutil.make_archive(base_name=temp_name(),
                                       format="gztar",
                                       root_dir=save_data)

        f = open(data_zip, "rb")
        bin_data = f.read()
        f.close()
        return bin_data
예제 #4
0
    def train(self, save_data, opt):
        save_data_archive = save_temp_bin(save_data)

        save_data_dir = temp_dir()
        shutil.unpack_archive(filename=save_data_archive, extract_dir=save_data_dir, format="gztar")

        save_model = temp_dir()

        opt["data"] = save_data_dir + "data"
        opt["save_model"] = save_model
        if is_cuda:
            opt["world_size"] = 1
            opt["gpu_ranks"] = 0

        run_param('train.py', opt)

        return save_model
예제 #5
0
def download_manual(version, images_dir):
    temp = temp_dir()

    file_handle, _ = urlretrieve(version["url"])
    with zipfile.ZipFile(file_handle, 'r') as zipObj:
        zipObj.extractall(temp)

    labels_dir = path.join(temp, "hand_labels")

    for split in ["train", "test"]:
        original_split_dir = path.join(labels_dir, "manual_" + split)

        split_dir = path.join(images_dir, split)
        makedir(split_dir)

        files = sorted(
            [f for f in os.listdir(original_split_dir) if f.endswith('.json')])
        for file in files:
            content = json.load(open(path.join(original_split_dir, file), "r"))

            fname = file.replace(".json", ".jpg")

            # Crop image
            all_x, all_y, _ = zip(*content["hand_pts"])
            size = round(
                max(max(all_x) - min(all_x),
                    max(all_y) - min(all_y)) / 2)

            x = min(all_x) - size
            y = min(all_y) - size

            im = Image.open(path.join(original_split_dir, fname))
            crop = im.crop((x, y, x + 4 * size, y + 4 * size))
            crop.save(path.join(split_dir, fname))

            yield split, {
                "image": "/".join(["images", split, fname]),
                "hand": "left" if content["is_left"] else "right",
                "pose":
                [(x1 - x, y1 - y, z) for x1, y1, z in content["hand_pts"]]
            }
예제 #6
0
def download_sign_language(version, images_dir):
    labels = string.ascii_lowercase

    temp = temp_dir()

    file_handle, _ = urlretrieve(version["url"])
    with zipfile.ZipFile(file_handle, 'r') as zipObj:
        zipObj.extractall(temp)

    for split in ["train", "test"]:
        split_dir = path.join(images_dir, split)
        makedir(split_dir)

        csv = [[int(r) for r in row.split(",")]
               for row in open(path.join(temp, "sign_mnist_" + split +
                                         ".csv")).readlines()[1:]]
        for i, row in enumerate(csv):
            label = labels[row.pop(0)]
            image = np.array(row, dtype=np.uint8).reshape((28, 28))

            f_name = label + "_" + str(i) + ".png"
            Image.fromarray(image).save(path.join(split_dir, f_name))

            yield split, {"label": label, "image": "/".join([split, f_name])}
예제 #7
0
def download_FingerSpell(version, directory):
    FFmpeg.check_installed()

    letters = ['rest'] + list(string.ascii_lowercase)

    animated = {"j": temp_name(".jpg"), "z": temp_name(".jpg")}

    for l, f in list(animated.items()):
        urlretrieve(version["url"] + l + "-begin_" + l + "-end.jpg", f)
        animated[l] = cv2.imread(f)

    videos_path = path.join(directory, "videos")
    makedir(videos_path)

    for l1 in tqdm(letters):
        for l2 in tqdm(letters):
            is_l2_animated = l2 in animated
            is_l1_animated = l1 in animated

            text = (l1 + l2).replace("rest", "")
            gloss = "" if l1 == l2 == "rest" else l2 + "#" if l1 == "rest" else "#" + l1 if l2 == "rest" else "#" + l1 + "# #" + l2 + "#"

            download_l1 = l1
            download_l2 = l2
            if is_l2_animated:
                download_l2 = download_l2 + "-begin"
            if is_l1_animated:
                download_l1 = download_l1 + "-end"

            full_url = version["url"] + download_l1 + "_" + download_l2 + ".jpg"

            video_path = path.join(videos_path, text + ".mp4")
            if not path.exists(video_path):
                temp = temp_name(".jpg")
                urlretrieve(full_url, temp)
                img = cv2.imread(temp)

                if is_l2_animated and not is_l1_animated:
                    img = np.concatenate((img, animated[l2]))
                if is_l1_animated and not is_l2_animated:
                    img = np.concatenate((animated[l1], img))

                imgs = img.reshape((int(img.shape[0] / 256), 256, 256, 3))

                temp_dir_name = temp_dir()
                for i, im in enumerate(imgs):
                    cv2.imwrite(temp_dir_name + str(i).zfill(2) + ".jpg", im)

                FFmpeg.video_from_frames(temp_dir_name, 2, video_path)

            yield {
                "id": text if text != "" else "rest",
                "texts": [{
                    "text": text
                }],
                "gloss": gloss,
                "video": video_path,
                "video_url": full_url,
                "sign_language": "en.us",
                "text_language": "English",
                "metadata": {
                    "width": 256,
                    "height": 256
                }
            }