def load_head_pose():
    labels = read_txt_lines(args.label_path)
    head_pose_dict = {}
    for idx_label in range(16):
        label = labels[idx_label]
        filepath = 'preprocessing/head_pose/{}.txt'.format(label)
        with open(filepath) as myfile:
            head_poses = myfile.read().splitlines()
        head_pose_dict[idx_label] = head_poses
    return head_pose_dict
    def _apply_variable_length_aug(self, filename, raw_data):
        # read info txt file (to see duration of word, to be used to do temporal cropping)
        info_txt = os.path.join(self._annonation_direc, *filename.split('/')[self.label_idx:] )  # swap base folder
        info_txt = os.path.splitext( info_txt )[0] + '.txt'   # swap extension
        info = read_txt_lines(info_txt)  

        utterance_duration = float( info[4].split(' ')[1] )
        half_interval = int( utterance_duration/2.0 * self.fps)  # num frames of utterance / 2
                
        n_frames = raw_data.shape[0]
        mid_idx = ( n_frames -1 ) // 2  # video has n frames, mid point is (n-1)//2 as count starts with 0
        left_idx = random.randint(0, max(0,mid_idx-half_interval-1)  )   # random.randint(a,b) chooses in [a,b]
        right_idx = random.randint( min( mid_idx+half_interval+1,n_frames ), n_frames  )

        return raw_data[left_idx:right_idx]
    def load_dataset(self):

        # -- read the labels file
        self._labels = read_txt_lines(self._label_fp)

        # -- add examples to self._data_files
        self._get_files_for_partition()

        # -- from self._data_files to self.list
        self.list = dict()
        self.instance_ids = dict()
        for i, x in enumerate(self._data_files):
            label = self._get_label_from_path(x)
            self.list[i] = [x, self._labels.index(label)]
            self.instance_ids[i] = self._get_instance_id_from_path(x)

        print('Partition {} loaded'.format(self._data_partition))
Exemplo n.º 4
0
def main():

    parser = argparse.ArgumentParser()

    ## Essential parameters

    parser.add_argument("--video_folder",
                        default="flowers",
                        type=str,
                        help="Folder with flower videos")
    parser.add_argument("--video_name",
                        default="4",
                        type=str,
                        help="Which video")

    # -- dataset config
    parser.add_argument('--dataset', default='lrw', help='dataset selection')
    parser.add_argument('--num-classes',
                        type=int,
                        default=500,
                        help='Number of classes')
    # -- directory
    parser.add_argument('--data-dir', default='temp')
    parser.add_argument('--label-path',
                        type=str,
                        default='./labels/500WordsSortedList.txt',
                        help='Path to txt file with labels')
    parser.add_argument('--annonation-direc',
                        default=None,
                        help='Loaded data directory')
    # -- model config
    parser.add_argument('--backbone-type',
                        type=str,
                        default='resnet',
                        choices=['resnet', 'shufflenet'],
                        help='Architecture used for backbone')
    parser.add_argument('--relu-type',
                        type=str,
                        default='relu',
                        choices=['relu', 'prelu'],
                        help='what relu to use')
    parser.add_argument('--width-mult',
                        type=float,
                        default=1.0,
                        help='Width multiplier for mobilenets and shufflenets')
    # -- TCN config
    parser.add_argument('--tcn-kernel-size',
                        type=int,
                        nargs="+",
                        help='Kernel to be used for the TCN module')
    parser.add_argument('--tcn-num-layers',
                        type=int,
                        default=4,
                        help='Number of layers on the TCN module')
    parser.add_argument('--tcn-dropout',
                        type=float,
                        default=0.2,
                        help='Dropout value for the TCN module')
    parser.add_argument(
        '--tcn-dwpw',
        default=False,
        action='store_true',
        help=
        'If True, use the depthwise seperable convolution in TCN architecture')
    parser.add_argument('--tcn-width-mult',
                        type=int,
                        default=1,
                        help='TCN width multiplier')
    # -- train
    parser.add_argument('--batch-size',
                        type=int,
                        default=32,
                        help='Mini-batch size')
    # -- test
    parser.add_argument('--model-path',
                        type=str,
                        default='models/model.pth.tar',
                        help='Pretrained model pathname')
    # -- feature extractor
    parser.add_argument('--extract-feats',
                        default=False,
                        action='store_true',
                        help='Feature extractor')

    parser.add_argument('--config-path',
                        type=str,
                        default='configs/lrw_snv1x_tcn2x.json',
                        help='Model configiguration with json format')

    args = parser.parse_args()

    if not os.path.exists("temp"):
        os.mkdir("temp")
        os.mkdir("temp/ABOUT")
        os.mkdir("temp/ABOUT/train")
        os.mkdir("temp/ABOUT/test")
        os.mkdir("temp/ABOUT/eval")

    if not os.path.exists(args.video_folder + '/' + args.video_name + '.mp4'):
        print("File does not exist!")
        return 0

    print("croping frames...")
    frames = crop_patch(args.video_folder + '/' + args.video_name + '.mp4')
    print("saving extraction...")
    format_and_save(frames)

    print("loading model...")
    model = get_model(args)

    assert os.path.isfile(
        args.model_path), "File path does not exist. Path input: {}".format(
            args.model_path)
    model.load_state_dict(torch.load(args.model_path)["model_state_dict"],
                          strict=True)

    # -- get dataset iterators
    dset_loaders = get_data_loaders(args)
    print("predicting...")
    predictions = evaluate(model, dset_loaders['test'])
    labels = read_txt_lines("labels/500WordsSortedList.txt")
    sentence = []
    for x in predictions:
        sentence.append(labels[x])
    sentence = ' '.join(sentence)

    # Language in which you want to convert
    language = 'de'

    # Passing the text and language to the engine,
    # here we have marked slow=False. Which tells
    # the module that the converted audio should
    # have a high speed
    myobj = gTTS(text=sentence, lang=language, slow=True)

    # Saving the converted audio in a mp3 file named
    # welcome
    myobj.save("temp/speech.mp3")

    #combine with original

    my_clip = mpe.VideoFileClip(args.video_folder + '/' + args.video_name +
                                '.mp4')
    audio_background = mpe.AudioFileClip('temp/speech.mp3')

    final_clip = my_clip.set_audio(audio_background)
    final_clip.write_videofile("final.avi", fps=my_clip.fps, codec='mpeg4')

    shutil.rmtree("temp")
    print("Finnished successfully")