예제 #1
0
    def pre_process(self):
        save_data = temp_dir()

        train_src, train_tgt = self.train_data
        dev_src, dev_tgt = self.dev_data

        if self.features:
            train_src = list(map(add_features, train_src))
            dev_src = list(map(add_features, dev_src))

        run_param('preprocess.py', {
            "train_src": save_temp(train_src),
            "train_tgt": save_temp(train_tgt),
            "valid_src": save_temp(dev_src),
            "valid_tgt": save_temp(dev_tgt),
            "save_data": save_data + "data",
            "dynamic_dict": None  # This will add a dynamic-dict parameter
        })

        data_zip = shutil.make_archive(base_name=temp_name(), format="gztar", root_dir=save_data)

        f = open(data_zip, "rb")
        bin_data = f.read()
        f.close()
        return bin_data
예제 #2
0
    def pre_process(self):
        save_data = temp_dir()

        train_src = save_temp(
            [add_features(d.plan) for d in self.train_reader.data])
        train_tgt = save_temp([d.delex for d in self.train_reader.data])
        valid_src = save_temp(
            [add_features(d.plan) for d in self.dev_reader.data])
        valid_tgt = save_temp([d.delex for d in self.dev_reader.data])

        run_param(
            'preprocess.py',
            {
                "train_src": train_src,
                "train_tgt": train_tgt,
                "valid_src": valid_src,
                "valid_tgt": valid_tgt,
                "save_data": save_data + "data",
                "dynamic_dict": None  # This will add a dynamic-dict parameter
            })

        data_zip = shutil.make_archive(base_name=temp_name(),
                                       format="gztar",
                                       root_dir=save_data)

        f = open(data_zip, "rb")
        bin_data = f.read()
        f.close()
        return bin_data
예제 #3
0
    def __getstate__(self):
        d = {**self.__dict__}
        if "pc" in d:
            temp = temp_name()
            self.pc.save(temp)
            d["recovery"] = open(temp, "rb").read()

        for k, v in self.__dict__.items():
            if "_dynet." in str(type(v)):
                del d[k]

        return d
예제 #4
0
def create_video(shape, frames: List[List], fname=None, fps=25):
    blank_image = np.zeros(shape, np.uint8)

    if fname is None:
        fname = temp_name(".mp4")
    writer = imageio.get_writer(fname, fps=fps)
    for frame in frames:
        im = blank_image
        for person in frame:
            im = draw_person(im, person)
        writer.append_data(im)
    writer.close()

    return fname
예제 #5
0
def error_bar(
    d: Dict[int, List[int]],
    y_label: str,
    x_label: str,
):
    x = sorted(d.keys())
    y = list(map(lambda i: np.mean(d[i]), x))
    e = list(map(lambda i: np.std(d[i]), x))

    plt.errorbar(x, y, e)
    plt.ylabel(y_label, fontsize=18)
    plt.xlabel(x_label, fontsize=18)
    plt.gcf().subplots_adjust(left=0.15)

    tmp = temp_name(suffix=".pdf")
    plt.savefig(tmp)
    plt.close()
    return tmp
예제 #6
0
    def translate(self, plans: List[str], opts=None):  # Translate entire reader file using a model
        if not hasattr(self, "features"):  # TODO remove after EMNLP
            self.features = True
        if not hasattr(self, "sentences_cache"):  # TODO remove after EMNLP
            self.sentences_cache = {}

        if not opts:
            opts = {
                "beam_size": BEAM_SIZE,
                "find_best": True
            }


        featureize = lambda p: add_features(p) if self.features else p

        o_lines = [[featureize(s.strip()) for i, s in enumerate(s.split("."))] if s != "" else [] for s in plans]
        n_lines = [l for l in list(set(chain.from_iterable(o_lines))) if l not in self.sentences_cache]

        if len(n_lines) == 0:
            return []

        print("Translating", len(n_lines), "sentences")

        source_path = save_temp(n_lines)
        target_path = temp_name()

        n_best = opts["beam_size"] if opts["find_best"] else 1

        self.run_traslate(source_path, target_path, {
            "replace_unk": None,
            "beam_size": opts["beam_size"],
            "n_best": n_best,
            "batch_size": 64
        })

        out_lines_f = open(target_path, "r", encoding="utf-8")
        out_lines = chunks(out_lines_f.read().splitlines(), n_best)
        out_lines_f.close()

        for n, out in zip(n_lines, out_lines):
            self.sentences_cache[n] = find_best_out(n, out)

        return [" ".join([self.sentences_cache[s] for s in lines]) for lines in o_lines]
예제 #7
0
    def translate(self,
                  plans: List[str],
                  opts=None):  # Translate entire reader file using a model
        if not opts:
            opts = {"beam_size": BEAM_SIZE, "find_best": True}

        model_path = save_temp_bin(self.model_bin)

        o_lines = [
            [add_features(s.strip())
             for i, s in enumerate(s.split("."))] if s != "" else []
            for s in plans
        ]
        n_lines = list(set(chain.from_iterable(o_lines)))

        if len(n_lines) == 0:
            return []

        source_path = save_temp(n_lines)
        target_path = temp_name()

        n_best = opts["beam_size"] if opts["find_best"] else 1

        self.run_traslate(
            model_path, source_path, target_path, {
                "replace_unk": None,
                "beam_size": opts["beam_size"],
                "n_best": n_best,
                "batch_size": 64
            })

        out_lines_f = open(target_path, "r", encoding="utf-8")
        out_lines = chunks(out_lines_f.read().splitlines(), n_best)
        out_lines_f.close()

        map_lines = {
            n: find_best_out(n, out)
            for n, out in zip(n_lines, out_lines)
        }

        return [" ".join([map_lines[s] for s in lines]) for lines in o_lines]
예제 #8
0
def download_FingerSpell(version, directory):
    FFmpeg.check_installed()

    letters = ['rest'] + list(string.ascii_lowercase)

    animated = {"j": temp_name(".jpg"), "z": temp_name(".jpg")}

    for l, f in list(animated.items()):
        urlretrieve(version["url"] + l + "-begin_" + l + "-end.jpg", f)
        animated[l] = cv2.imread(f)

    videos_path = path.join(directory, "videos")
    makedir(videos_path)

    for l1 in tqdm(letters):
        for l2 in tqdm(letters):
            is_l2_animated = l2 in animated
            is_l1_animated = l1 in animated

            text = (l1 + l2).replace("rest", "")
            gloss = "" if l1 == l2 == "rest" else l2 + "#" if l1 == "rest" else "#" + l1 if l2 == "rest" else "#" + l1 + "# #" + l2 + "#"

            download_l1 = l1
            download_l2 = l2
            if is_l2_animated:
                download_l2 = download_l2 + "-begin"
            if is_l1_animated:
                download_l1 = download_l1 + "-end"

            full_url = version["url"] + download_l1 + "_" + download_l2 + ".jpg"

            video_path = path.join(videos_path, text + ".mp4")
            if not path.exists(video_path):
                temp = temp_name(".jpg")
                urlretrieve(full_url, temp)
                img = cv2.imread(temp)

                if is_l2_animated and not is_l1_animated:
                    img = np.concatenate((img, animated[l2]))
                if is_l1_animated and not is_l2_animated:
                    img = np.concatenate((animated[l1], img))

                imgs = img.reshape((int(img.shape[0] / 256), 256, 256, 3))

                temp_dir_name = temp_dir()
                for i, im in enumerate(imgs):
                    cv2.imwrite(temp_dir_name + str(i).zfill(2) + ".jpg", im)

                FFmpeg.video_from_frames(temp_dir_name, 2, video_path)

            yield {
                "id": text if text != "" else "rest",
                "texts": [{
                    "text": text
                }],
                "gloss": gloss,
                "video": video_path,
                "video_url": full_url,
                "sign_language": "en.us",
                "text_language": "English",
                "metadata": {
                    "width": 256,
                    "height": 256
                }
            }