示例#1
0
def reformat_KRSL():
    np.random.seed(0)
    krsl_video_dir = os.path.join(KRSL_DIR, "videos")

    videos = list(glob.glob(os.sep.join([krsl_video_dir, "**", "*.mp4"])))

    fps_out = 25
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')

    np.random.shuffle(videos)
    pp = ProgressPrinter(len(videos), 15)

    print("Reformatting KRSL")

    not_images = 0
    for idx, video_path in enumerate(videos):
        out_video_path = os.sep.join([VIDEOS_DIR] +
                                     video_path.split(os.sep)[-2:])
        video_dir = os.path.split(out_video_path)[0]
        if (os.path.exists(out_video_path)):
            pp.omit()
            continue

        images, fps = get_images(video_path)

        if not images:
            not_images += 1
            pp.omit()
            continue

        images = resize_images(images)

        L = len(images)

        L_out = round(L * fps_out / fps)

        images = np.array(images)

        idxs = np.linspace(0, L, L_out, endpoint=False)

        hw = images.shape[1:3]
        assert (hw == (200, 360) or hw == (200, 200) or hw == (360, 200))
        images = [images[round(i)] for i in idxs]

        if not os.path.exists(video_dir):
            os.makedirs(video_dir)

        out = cv2.VideoWriter(out_video_path, fourcc, 25.0, hw[::-1])
        for frame in images:
            out.write(frame)

        out.release()
        pp.show(idx)
    pp.end()
    clean_anno_KRSL("train", save=True)
    clean_anno_KRSL("test", save=True)
    clean_anno_KRSL("dev", save=True)
示例#2
0
def generate_openpose_features_split(pose_estimator, split):
    with torch.no_grad():
        df = get_split_df(split)
        print(SOURCE, "Feature extraction:", STF_MODEL, split, "split")
        L = df.shape[0]

        pp = ProgressPrinter(L, 1)
        for idx in range(L):
            row = df.iloc[idx]
            video_dir, feat_path = get_video_path(row, split, feat_ext=".npy")

            if os.path.exists(feat_path):
                pp.omit()
                continue

            feat_dir = os.path.split(feat_path)[0]

            feats = pose_estimator.estimate_video_pose(video_dir)

            if not os.path.exists(feat_dir):
                os.makedirs(feat_dir)
            np.save(feat_path, feats)

            if SHOW_PROGRESS:
                pp.show(idx)

        if SHOW_PROGRESS:
            pp.end()

        print()
示例#3
0
def convert_phoenix_to_videos():
    ph_images_dir = os.sep.join([PH_DIR, "features", "fullFrame-210x260px"])

    video_dirs = list(glob.glob(os.sep.join([ph_images_dir, '*', '*', '1'])))

    pp = ProgressPrinter(len(video_dirs), 5)
    print("Converting Images into Videos")
    for idx, video_dir in enumerate(video_dirs):
        image_paths = sorted(list(glob.glob(os.path.join(video_dir, "*.png"))))
        video_path = os.path.split(video_dir)[0] + ".mp4"
        video_path = os.sep.join([VIDEOS_DIR] + video_path.split(os.sep)[-2:])
        if os.path.exists(video_path):
            pp.omit()
            continue
        video_dir = os.path.split(video_path)[0]
        if not os.path.exists(video_dir):
            os.makedirs(video_dir)

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')

        shape = (210, 260)

        out = cv2.VideoWriter(video_path, fourcc, 25.0, shape)
        for im in image_paths:
            frame = cv2.imread(im)
            out.write(frame)

        out.release()
        pp.show(idx)

    pp.end()

    print()
示例#4
0
def eval_split_by_lev(model, vocab, split):
    df = get_split_df(split)
    pp = ProgressPrinter(df.shape[0], 5)
    hypes = []
    gts = []
    with torch.no_grad():
        for idx in range(df.shape[0]):
            row = df.iloc[idx]
            gt = vocab.encode(row.annotation)
            video_path, feat_path = get_video_path(row, split)
            tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE)
            pred = model(tensor_video).squeeze(1).log_softmax(dim=1).argmax(
                dim=1).cpu().numpy()

            hypo = []
            for i in range(len(pred)):
                if pred[i] == 0 or (i > 0 and pred[i] == pred[i - 1]):
                    continue
                hypo.append(pred[i])

            gts += gt
            hypes += hypo
            pp.show(idx)

        pp.end()

        hypes = "".join([chr(x) for x in hypes])
        gts = "".join([chr(x) for x in gts])
        wer = Lev.distance(hypes, gts) / len(gts) * 100

        print(wer)
示例#5
0
    def _build_dataset(self):

        dataset_dir = os.sep.join([END2END_DATASETS_DIR, self._get_ffm()])

        X_path = os.sep.join([dataset_dir, "X_" + self.split + ".pkl"])
        Y_path = os.sep.join([dataset_dir, "Y_" + self.split + ".pkl"])
        X_lens_path = os.sep.join(
            [dataset_dir, "X_lens_" + self.split + ".pkl"])

        if os.path.exists(X_path) and os.path.exists(
                Y_path) and os.path.exists(X_lens_path) and self.load:
            with open(X_path, 'rb') as f:
                self.X = pickle.load(f)

            with open(Y_path, 'rb') as f:
                self.Y = pickle.load(f)

            with open(X_lens_path, 'rb') as f:
                self.X_lens = pickle.load(f)

            print(self.split[0].upper() + self.split[1:], "dataset loaded")
        else:
            print("Building", self.split, "dataset")
            df = get_split_df(self.split)
            self.X = []
            self.Y = []
            self.X_lens = []

            pp = ProgressPrinter(df.shape[0], 5)
            for idx in range(df.shape[0]):
                row = df.iloc[idx]
                glosses = self.vocab.encode(row.annotation)
                feat_path, feat, feat_len = self._get_feat(row, glosses)
                if feat is None:
                    continue

                self.X.append(feat_path)
                self.Y.append(glosses)
                self.X_lens.append(feat_len)

                if self._show_progress():
                    pp.show(idx)

            if self._show_progress():
                pp.end()

            if not os.path.exists(dataset_dir):
                os.makedirs(dataset_dir)

            with open(X_path, 'wb') as f:
                pickle.dump(self.X, f)

            with open(Y_path, 'wb') as f:
                pickle.dump(self.Y, f)

            with open(X_lens_path, 'wb') as f:
                pickle.dump(self.X_lens, f)

        self.length = len(self.X)
示例#6
0
def gen_img_feat_split(model, preprocess, split):
    if SOURCE == "KRSL" and split == "dev":
        split = "val"

    df = get_split_df(split)

    print(SOURCE, STF_MODEL, "feature extraction:", split, "split")
    L = df.shape[0]

    pp = ProgressPrinter(L, 10)
    for idx in range(L):
        row = df.iloc[idx]
        video_path, feat_path = get_video_path(row, split, stf_feat=False)
        if os.path.exists(feat_path) and not FEAT_OVERRIDE:
            pp.omit()
            continue

        feat_dir = os.path.split(feat_path)[0]

        images = get_images(video_path)
        if len(images) < 4:
            continue

        tensor_video = get_tensor_video(images, preprocess, "2D")
        inp = tensor_video.to(DEVICE)
        feat = model(inp).cpu()

        if not os.path.exists(feat_dir):
            os.makedirs(feat_dir)

        torch.save(feat, feat_path)

        if SHOW_PROGRESS:
            pp.show(idx)

    if SHOW_PROGRESS:
        pp.end()
示例#7
0
def generate_gloss_dataset(vocab, stf_type=STF_TYPE, use_feat=USE_ST_FEAT):
    print("Generation of the Gloss-Recognition Dataset")
    model, loaded = get_end2end_model(vocab, True, stf_type, use_feat)

    mode = "3D" if stf_type else "2D"

    if not loaded:
        print("STF or SEQ2SEQ model doesn't exist")
        exit(0)

    model.eval()

    temp_stride = 4

    rerun_out_dir = os.path.join(GR_DATASET_DIR, "STF_RERUN")
    rerun_out_path = os.path.join(rerun_out_dir, STF_MODEL + ".bin")

    stf_rerun = use_feat and os.path.exists(rerun_out_path)

    if stf_rerun:
        with open(rerun_out_path, 'rb') as f:
            feats_rerun_data = pickle.load(f)
    else:
        feats_rerun_data = {"frame_n": [], "gloss_paths": [], "gloss_lens": []}

    df = get_split_df("train")
    Y = []
    X = []
    X_lens = []

    pp = ProgressPrinter(df.shape[0], 5)
    cur_n_gloss = 0
    for idx in range(df.shape[0]):
        row = df.iloc[idx]
        video_path, feat_path = get_video_path(row, "train")

        if stf_rerun:
            frame_n = feats_rerun_data["frame_n"][idx]

            if frame_n < temp_stride:
                pp.omit()
                continue

            gloss_paths = feats_rerun_data["gloss_paths"][idx]
            gloss_lens = feats_rerun_data["gloss_lens"][idx]

            with torch.no_grad():
                tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE)

        else:
            images = get_images(video_path)
            frame_n = len(images)
            feats_rerun_data["frame_n"].append(frame_n)

            if frame_n < temp_stride:
                pp.omit()
                feats_rerun_data["gloss_paths"].append("")
                feats_rerun_data["gloss_lens"].append(0)
                continue

            gloss_paths, gloss_lens = get_gloss_paths(images, cur_n_gloss, temp_stride, mode)
            feats_rerun_data["gloss_paths"].append(gloss_paths)
            feats_rerun_data["gloss_lens"].append(gloss_lens)

            with torch.no_grad():
                if use_feat:
                    tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE)
                else:
                    tensor_video = get_tensor_video(images, preprocess_3d, mode).unsqueeze(0).to(DEVICE)

        X += gloss_paths
        X_lens += gloss_lens
        Y += get_decoded_prediction(model, tensor_video, vocab.encode(row.annotation))

        assert (len(Y) == len(X) == len(X_lens))

        cur_n_gloss = len(X)
        if SHOW_PROGRESS:
            pp.show(idx)

    shuffle_and_save_dataset(X, X_lens, Y)
    if use_feat and not stf_rerun:
        if not os.path.exists(rerun_out_dir): os.makedirs(rerun_out_dir)
        with(open(rerun_out_path, 'wb')) as f:
            pickle.dump(feats_rerun_data, f)

    if SHOW_PROGRESS:
        pp.end()
示例#8
0
def train_end2end(model, vocab, datasets, use_feat):
    print("END2END model training...")
    print("Features:", STF_MODEL)
    print("Save Model path:", STF_MODEL_PATH)
    print("WER path:", END2END_WER_PATH)

    optimizer = Adam(model.parameters(), lr=END2END_LR)
    loss_fn = nn.CTCLoss(zero_infinity=True)

    lr_scheduler = ReduceLROnPlateau(optimizer, factor=0.2, patience=4)

    best_wer = get_best_wer()
    curve = {"train": [], "val": []}

    current_best_wer = float("inf")
    trained = False
    # n_epochs since wer was updated
    since_wer_update = 0
    try:
        for epoch in range(1, END2END_N_EPOCHS + 1):
            print("Epoch", epoch)
            for phase in ["train", "val"]:
                if phase == "train":
                    model.train()  # Set model to training mode
                else:
                    model.eval()

                dataset = datasets[phase]
                n_batches = dataset.start_epoch()
                losses = []
                hypes = []
                gts = []

                with torch.set_grad_enabled(phase == "train"):
                    pp = ProgressPrinter(n_batches, 25 if USE_ST_FEAT else 1)
                    for i in range(n_batches):
                        optimizer.zero_grad()
                        X_batch, Y_batch, Y_lens = dataset.get_batch(i)
                        X_batch = X_batch.to(DEVICE)
                        Y_batch = Y_batch.to(DEVICE)

                        preds = model(X_batch).log_softmax(dim=2)
                        T, N, V = preds.shape
                        X_lens = torch.full(size=(N,), fill_value=T, dtype=torch.int32)
                        loss = loss_fn(preds, Y_batch, X_lens, Y_lens)
                        losses.append(loss.item())

                        if phase == "train":
                            loss.backward()
                            optimizer.step()

                        out_sentences = predict_glosses(preds, decoder=None)
                        gts += [y for y in Y_batch.view(-1).tolist() if y != 0]

                        for sentence in out_sentences:
                            hypes += sentence

                        if i == 0 and SHOW_EXAMPLE:
                            pred = " ".join(vocab.decode(out_sentences[0]))
                            gt = Y_batch[0][:Y_lens[0]].tolist()
                            gt = " ".join(vocab.decode(gt))
                            print("   ", phase, 'Ex. [' + pred + ']', '[' + gt + ']')

                        if SHOW_PROGRESS:
                            pp.show(i, "    ")

                    if SHOW_PROGRESS:
                        pp.end("    ")

                hypes = "".join([chr(x) for x in hypes])
                gts = "".join([chr(x) for x in gts])
                phase_wer = Lev.distance(hypes, gts) / len(gts) * 100

                if phase == "train":
                    lr_scheduler.step(phase_wer)

                curve[phase].append(phase_wer)
                phase_loss = np.mean(losses)
                print("   ", phase.upper(), "WER:", phase_wer, "Loss:", phase_loss)

                if phase_wer < best_wer[phase]:
                    best_wer[phase] = phase_wer
                    save_end2end_model(model, phase, best_wer[phase])

                if phase == "val":
                    if phase_wer < current_best_wer:
                        current_best_wer = phase_wer
                        since_wer_update = 0
                    else:
                        since_wer_update += 1

                    if since_wer_update >= END2END_STOP_LIMIT and not use_feat:
                        trained = True
                        raise KeyboardInterrupt

    except KeyboardInterrupt:
        pass

    if epoch >= END2END_N_EPOCHS:
        trained = True

    with open(os.path.join(VARS_DIR, "curve.pkl"), 'wb') as f:
        pickle.dump(curve, f)

    return best_wer, trained
示例#9
0
def train_gloss_recog(model, datasets):
    print("GR model training...")
    print("Features:", STF_MODEL)
    best_loss = float("inf")
    optimizer = Adam(model.parameters(), lr=GR_LR)

    loss_fn = nn.CrossEntropyLoss()

    best_acc = 0
    trained = False

    # n_epochs since wer was updated
    for epoch in range(1, GR_N_EPOCHS + 1):
        print("Epoch", epoch)
        for phase in ['Train', 'Val']:
            if phase == 'Train':
                model.train()
            else:
                model.eval()

            dataset = datasets[phase]
            n_batches = dataset.start_epoch()
            losses = []

            correct = []

            with torch.set_grad_enabled(phase == "Train"):
                pp = ProgressPrinter(n_batches, 25)
                for i in range(n_batches):
                    if phase == "Train":
                        optimizer.zero_grad()

                    X_batch, Y_batch = dataset.get_batch(i)
                    if X_batch.size(1) != 8 and STF_TYPE == 0:
                        continue

                    X_batch = X_batch.to(DEVICE)
                    Y_batch = Y_batch.to(DEVICE)

                    preds = model(X_batch)
                    loss = loss_fn(preds, Y_batch)

                    correct.append(torch.sum(preds.argmax(dim=1) == Y_batch).item())

                    losses.append(loss.item())

                    if phase == "Train":
                        loss.backward()
                        optimizer.step()

                    if SHOW_PROGRESS:
                        pp.show(i, "    Loss: %.3f" % np.mean(losses))

                if SHOW_PROGRESS:
                    pp.end("    ")

            phase_loss = np.mean(losses)
            phase_acc = sum(correct) / len(correct * GR_BATCH_SIZE) * 100

            print("    ", phase, "loss:", phase_loss, "phase ACC:", phase_acc)

            if phase == "Val" and phase_loss < best_loss:
                best_loss = phase_loss
                save_model(model, best_loss)

            if phase == "Val":
                best_acc = max(best_acc, phase_acc)

        if epoch >= 5:
            trained = True

    return best_acc, trained