Exemplo n.º 1
0
def align(text_path, audio_path, align_out_path, word_align=True):
    # create Task object
    config_string = u"task_language=hi"
    config_string += "|os_task_file_format=json"
    rconf = None
    if word_align:
        config_string += "|os_task_file_levels=3"
        config_string += "|is_text_type=mplain"
        rconf = RuntimeConfiguration()
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH_L3] = True
    else:
        config_string += "|is_text_type=plain"

    task = Task(config_string=config_string)
    task.text_file_path_absolute = text_path
    task.audio_file_path_absolute = audio_path
    task.sync_map_file_path_absolute = align_out_path

    # process Task
    ExecuteTask(task, rconf=rconf).execute()

    # output sync map to file
    task.output_sync_map_file()

    # Remove annoying unicode characters
    with open(align_out_path, 'r', encoding='utf8') as f:
        alignment = json.load(f)
    with open(align_out_path, 'w', encoding='utf8') as f:
        json.dump(alignment, f, ensure_ascii=False, indent=2)
Exemplo n.º 2
0
def align_files_in_place(data: InputDataFiles):
    try:
        # prepare config
        aeneas_config = TaskConfiguration()
        aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = data.text_file_format
        aeneas_config[gc.PPN_TASK_LANGUAGE] = data.language

        # create task
        task = Task()
        task.configuration = aeneas_config
        task.audio_file_path_absolute = Path(data.audio_filename)
        task.text_file_path_absolute = Path(data.transcript_filename)

        # process Task
        ExecuteTask(task).execute()

        with open(data.alignment_filename, "w") as f:
            f.write(
                orjson.dumps([(str(fragment.begin), str(fragment.end),
                               fragment.text)
                              for fragment in task.sync_map_leaves()
                              if fragment.is_regular]).decode())
    except Exception as e:
        raise HTTPException(status_code=500,
                            detail="Error during processing: " + str(e)) from e
Exemplo n.º 3
0
def align_audio(
        language: LanguageEnum = Form(...),
        text_file_format: TextFileFormatEnum = Form(...),
        transcript: UploadFile = File(...),
        audio: UploadFile = File(...),
):
    try:
        # prepare config
        aeneas_config = TaskConfiguration()
        aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = text_file_format
        aeneas_config[gc.PPN_TASK_LANGUAGE] = language

        # get named temporary files
        tmp_audio = convert_to_tempfile(audio)
        tmp_transcript = convert_to_tempfile(transcript)

        # create task
        task = Task()
        task.configuration = aeneas_config
        task.audio_file_path_absolute = Path(tmp_audio.name)
        task.text_file_path_absolute = Path(tmp_transcript.name)

        # process Task
        ExecuteTask(task).execute()

        tmp_audio.close()
        tmp_transcript.close()

        return [(str(fragment.begin), str(fragment.end), fragment.text)
                for fragment in task.sync_map_leaves() if fragment.is_regular]
    except Exception as e:
        raise HTTPException(status_code=500,
                            detail="Error during processing: " + str(e)) from e
Exemplo n.º 4
0
def align_aeneas():

    for PID in config.PIDs:

        audio_wc = os.path.join(PID, config.narrations_subfolder, "*.m4a")
        audio_file_list = glob(audio_wc)

        if len(audio_file_list) == 0:
            audio_wc = os.path.join(PID, config.narrations_subfolder, "*.mp3")
            audio_file_list = glob(audio_wc)

        audio_file_list.sort()

        for a in audio_file_list:
            file_id = os.path.split(a)[1][0:6]
            t = os.path.join(config.aeneas_transcript_dir, file_id + ".txt")
            out_file = os.path.join(config.aeneas_output_dir, file_id + ".sbv")
            print "aligning " + a, t

            if not os.path.isfile(t):
                print t + " not available so will not be processed.  Was it a missed ground truth file?"
                continue

            # create Task object
            config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=sbv"
            task = Task(config_string=config_string)
            task.audio_file_path_absolute = a
            task.text_file_path_absolute = t
            task.sync_map_file_path_absolute = out_file

            # process Task
            ExecuteTask(task).execute()

            # output sync map to file
            task.output_sync_map_file()
Exemplo n.º 5
0
def createSyncedLyricsFile(lyrics, file):
    global lyricsSynced, errors
    f = open("tempSync.txt", "w+")
    f.write(lyrics)
    f.close()
    config = TaskConfiguration()
    config[gc.PPN_TASK_LANGUAGE] = Language.FRA
    config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN
    config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.AUDH
    task = Task()
    task.configuration = config
    try:
        task.audio_file_path_absolute = file
        task.text_file_path_absolute = "tempSync.txt"
        ExecuteTask(task).execute()
        syncedLyricsFile = open(file[:-4] + ".lrc", "w+")
        for fragment in task.sync_map_leaves():
            syncedLyricsFile.write(
                str('[' + gf.time_to_hhmmssmmm(fragment.interval.begin, '.')[3:-1] + ']' + fragment.text + '\n'))
        syncedLyricsFile.close()
        print("   Sync Added", sep=' ', end='', flush=True)
        lyricsSynced += 1
    except Exception as e :
        errors += 1
        print("   Sync error", sep=' ', end='',flush=True)
Exemplo n.º 6
0
def main():
    if len(sys.argv) < 5:
        usage()
        return

    audio_file_path = sys.argv[1]
    text_file_path = sys.argv[2]
    config_string = sys.argv[3]
    sync_map_file_path = sys.argv[4]

    print "[INFO] Creating task..."
    task = Task(config_string)
    task.audio_file_path_absolute = audio_file_path
    task.text_file_path_absolute = text_file_path
    task.sync_map_file_path_absolute = sync_map_file_path
    print "[INFO] Creating task... done"

    print "[INFO] Executing task..."
    logger = Logger(tee=False)
    executor = ExecuteTask(task=task, logger=logger)
    result = executor.execute()
    print "[INFO] Executing task... done"

    if not result:
        print "[ERRO] An error occurred while executing the task"
        return

    print "[INFO] Creating output container..."
    path = task.output_sync_map_file()
    print "[INFO] Creating output container... done"

    if path != None:
        print "[INFO] Created %s" % path
    else:
        print "[ERRO] An error occurred while writing the output sync map file"
Exemplo n.º 7
0
def force_align(audio_path,
                text_path,
                output_path,
                min_length=1.0,
                max_length=10.0,
                logging=logging):
    sentences = []
    task = Task(
        config_string=
        u"task_language=eng|is_text_type=plain|os_task_file_format=json")
    task.audio_file_path_absolute = audio_path
    task.text_file_path_absolute = text_path
    task.sync_map_file_path_absolute = output_path
    logging.info("Aligning audio and text...")
    ExecuteTask(task).execute()
    logging.info("Aligned audio and text")

    for fragment in task.sync_map_leaves():
        if fragment.length > min_length and fragment.length < max_length and fragment.text:
            sentences.append({
                "start": float(fragment.begin),
                "end": float(fragment.end),
                "length": float(fragment.length),
                "text": fragment.text,
            })

    with open(output_path, "w") as f:
        json.dump(sentences, f, indent=4)
Exemplo n.º 8
0
 def test_set_text_file_path_absolute_05(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.is_text_file_format = TextFileFormat.PARSED
     task.text_file_path_absolute = get_abs_path("res/inputtext/sonnet_parsed.txt")
     self.assertNotEqual(task.text_file, None)
     self.assertEqual(len(task.text_file), 15)
Exemplo n.º 9
0
 def test_set_text_file_path_absolute_03(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.is_text_file_format = TextFileFormat.UNPARSED
     task.configuration.is_text_unparsed_class_regex = "ra"
     task.configuration.is_text_unparsed_id_sort = IDSortingAlgorithm.NUMERIC
     task.text_file_path_absolute = get_abs_path("res/inputtext/sonnet_unparsed_class_id.xhtml")
     self.assertNotEqual(task.text_file, None)
     self.assertEqual(len(task.text_file), 15)
Exemplo n.º 10
0
def force_align():
    WORK_DIR = os.path.abspath("data")
    conf = "task_language=rus|is_text_type=plain|os_task_file_format=json"

    task = Task(config_string=conf)
    task.audio_file_path_absolute = f"{WORK_DIR}/audio.wav"
    task.text_file_path_absolute = f"{WORK_DIR}/transcript.txt"

    ExecuteTask(task).execute()
    return json.loads(task.sync_map.json_string)["fragments"]
Exemplo n.º 11
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=txt|os_task_file_name=output_head.txt|is_text_type=plain|is_audio_file_head_length=11.960|is_audio_file_process_length=31.640"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/inputtext/sonnet_plain_head_length.txt"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/output_head_length.txt"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
Exemplo n.º 12
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/p001.smil"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
Exemplo n.º 13
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/p001.smil"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
 def transcribe(self, audio, transcript, output):
     config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
     task = Task(config_string=config_string)
     task.audio_file_path_absolute = audio  #get_path(self.audio_folder, self.audio_prefix, filename, "mp3")
     task.text_file_path_absolute = transcript  #get_path(self.transcript_processed_folder, self.transcript_processed_prefix, filename, "txt")
     task.sync_map_file_path_absolute = output  #get_path(self.output_folder, self.output_prefix, filename, "json")
     print("Processing task...\n")
     # process Task
     ExecuteTask(task).execute()
     print(f"Taks processed. Writing output to {output}")
     # output sync map to file
     task.output_sync_map_file()
    def align_audio_and_text(self, file_path):
        config = TaskConfiguration()
        config[gc.PPN_TASK_LANGUAGE] = Language.PAN
        config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN
        config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.JSON
        task = Task()
        task.configuration = config

        task.audio_file_path_absolute = self.audio_file_path
        task.text_file_path_absolute = self.txt_file_path
        task.sync_map_file_path_absolute = file_path
        ExecuteTask(task).execute()
        task.output_sync_map_file()
Exemplo n.º 16
0
def process_aeneas(txt_filename, wav_filename, csv_filename):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=csv"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = wav_filename
    task.text_file_path_absolute = txt_filename
    task.sync_map_file_path_absolute = csv_filename

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Exemplo n.º 17
0
def process_aeneas_map(filepath_, format_):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = filepath_ + '.' + format_
    task.text_file_path_absolute = filepath_ + ".txt"
    task.sync_map_file_path_absolute = filepath_ + ".json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Exemplo n.º 18
0
 def execute(self, config_string, audio_path, text_path):
     handler, tmp_path = gf.tmp_file()
     task = Task(config_string)
     task.audio_file_path_absolute = gf.absolute_path(audio_path, __file__)
     task.text_file_path_absolute = gf.absolute_path(text_path, __file__)
     executor = ExecuteTask(task)
     executor.execute()
     task.sync_map_file_path_absolute = tmp_path
     result_path = task.output_sync_map_file()
     self.assertIsNotNone(result_path)
     self.assertEqual(result_path, tmp_path)
     self.assertGreater(len(gf.read_file_bytes(result_path)), 0)
     gf.delete_file(handler, tmp_path)
Exemplo n.º 19
0
 def set_text_file(self, path, fmt, expected, id_regex=None, class_regex=None, id_sort=None):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.is_text_file_format = fmt
     if id_regex is not None:
         task.configuration.is_text_unparsed_id_regex = id_regex
     if class_regex is not None:
         task.configuration.is_text_unparsed_class_regex = class_regex
     if id_sort is not None:
         task.configuration.is_text_unparsed_id_sort = id_sort
     task.text_file_path_absolute = get_abs_path(path)
     self.assertNotEqual(task.text_file, None)
     self.assertEqual(len(task.text_file), expected)
Exemplo n.º 20
0
 def set_text_file(self, path, fmt, expected, id_regex=None, class_regex=None, id_sort=None):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["i_t_format"] = fmt
     if class_regex is not None:
         task.configuration["i_t_unparsed_class_regex"] = class_regex
     if id_regex is not None:
         task.configuration["i_t_unparsed_id_regex"] = id_regex
     if id_sort is not None:
         task.configuration["i_t_unparsed_id_sort"] = id_sort
     task.text_file_path_absolute = gf.absolute_path(path, __file__)
     self.assertIsNotNone(task.text_file)
     self.assertEqual(len(task.text_file), expected)
Exemplo n.º 21
0
def run_aeneas_task(audio_filepath, lyrics_filepath, output_filepath):
    config_string = "task_language=eng|os_task_file_format=json|is_text_type=plain"

    task = Task(config_string=config_string)

    task.audio_file_path_absolute = audio_filepath
    task.text_file_path_absolute = lyrics_filepath
    task.sync_map_file_path_absolute = output_filepath

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Exemplo n.º 22
0
def align(audio_file_path, text_file_path, syncmap_file_path):
    # create Task object
    config_string = "task_language=zh|is_text_type=plain|os_task_file_format=txt"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = audio_file_path
    task.text_file_path_absolute = text_file_path
    task.sync_map_file_path_absolute = syncmap_file_path

    # process Task
    ExecuteTask(task).execute()
    # output sync map to file
    task.output_sync_map_file()

    return True
def sync_map_generator(song_name):
# create Task object
    print('Creating sync map file...')
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Songs/" + song_name +".wav"
    task.text_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Lyrics/" + song_name +".txt"
    task.sync_map_file_path_absolute = u"output/"+ song_name + ".json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
    print('Created sync map file')
Exemplo n.º 24
0
def executeAeneas(text_path, audio_path):
    audio_name, _ = os.path.splitext(audio_path)

    # create Task object
    config_string = u'task_language=tur|is_text_type=plain|os_task_file_format=json'
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = audio_path
    task.text_file_path_absolute = text_path
    task.sync_map_file_path_absolute = audio_name + "_syncmap.json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Exemplo n.º 25
0
def generate_epub(pub_id):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = u"./test/audio.mp3"
    task.text_file_path_absolute = u"./test/sonnet_plain.txt"
    task.sync_map_file_path_absolute = u"./test/output.json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
    with open(task.sync_map_file_path_absolute, 'r') as f:
        read_data = f.read()

    return read_data  #task.sync_map_file_path_absolute
Exemplo n.º 26
0
def force_align(book_name, chapter_index):
    print("Aligning chapter {:d}".format(chapter_index))
    # create Task objects
    task = Task(config_string=properties.aeneas_configuration_string)

    task.audio_file_path_absolute = fu.build_audio_path(
        book_name, chapter_index)
    task.text_file_path_absolute = fu.build_valid_text_path(
        book_name, chapter_index)
    task.sync_map_file_path_absolute = fu.build_syncmap_path(
        book_name, chapter_index)

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Exemplo n.º 27
0
def make_subs(wav_path, txt_path, srt_path, start):
    """Gets the subtitles with the correct timing based on the wav file"""

    config_string = "task_language=eng|is_text_type=plain|os_task_file_format=srt"

    task = Task(config_string=config_string)
    task.audio_file_path_absolute = wav_path
    task.text_file_path_absolute = txt_path
    task.sync_map_file_path_absolute = srt_path

    ExecuteTask(task).execute()
    task.output_sync_map_file()

    subs = pysrt.open(srt_path)

    subs.shift(seconds=start)

    return subs
Exemplo n.º 28
0
def chopsounds():
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = inputsound
    task.text_file_path_absolute = inputtext

    # process Task
    ExecuteTask(task).execute()

    # Carve wav file into fragments
    sound = AudioSegment.from_wav(inputsound)
    for fragment in task.sync_map_leaves():
        if fragment.length > 0.0:
            fsound = sound[float(fragment.begin) * 1000:float(fragment.end) *
                           1000]
            fsound.export(outputdir + "/" + fragment.identifier + ".wav",
                          format="wav")
Exemplo n.º 29
0
 def set_text_file(self,
                   path,
                   fmt,
                   expected,
                   id_regex=None,
                   class_regex=None,
                   id_sort=None):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.is_text_file_format = fmt
     if id_regex is not None:
         task.configuration.is_text_unparsed_id_regex = id_regex
     if class_regex is not None:
         task.configuration.is_text_unparsed_class_regex = class_regex
     if id_sort is not None:
         task.configuration.is_text_unparsed_id_sort = id_sort
     task.text_file_path_absolute = get_abs_path(path)
     self.assertNotEqual(task.text_file, None)
     self.assertEqual(len(task.text_file), expected)
Exemplo n.º 30
0
 def set_text_file(self,
                   path,
                   fmt,
                   expected,
                   id_regex=None,
                   class_regex=None,
                   id_sort=None):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["i_t_format"] = fmt
     if class_regex is not None:
         task.configuration["i_t_unparsed_class_regex"] = class_regex
     if id_regex is not None:
         task.configuration["i_t_unparsed_id_regex"] = id_regex
     if id_sort is not None:
         task.configuration["i_t_unparsed_id_sort"] = id_sort
     task.text_file_path_absolute = gf.absolute_path(path, __file__)
     self.assertIsNotNone(task.text_file)
     self.assertEqual(len(task.text_file), expected)
Exemplo n.º 31
0
def create_aeneas_json_file(audio_path, text_path, output_path):
    """
    Use the api aeneas to synchronize audio and text.

        Parameters:
        audio_path (str): audio filepath.
        text_path (str): text filepath.
        output_path (str): output json filepath.

        Returns:
        Boolean: True or False.
    """
    try:
        # create Task object
        config_string = u"task_language=por|is_text_type=plain|os_task_file_format=json|task_adjust_boundary_percent_value=50|mfcc_mask_nonspeech_l2=True"
        task = Task(config_string=config_string)
        task.audio_file_path_absolute = u"{}".format(audio_path)
        task.text_file_path_absolute = u"{}".format(text_path)
        task.sync_map_file_path_absolute = u"{}".format(output_path)

        # process Task
        ExecuteTask(task).execute()

        # output sync map to file
        task.output_sync_map_file()

    except KeyboardInterrupt:
        print("KeyboardInterrupt Detected!")
        exit()

    except:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        exc_file = split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, exc_file, exc_tb.tb_lineno)
        return False

    return True
Exemplo n.º 32
0
def get_alignments():
    config = u'task_language=eng|is_text_type=plain|os_task_file_format=json'

    for address, youtube_id in text_audio_pairs.iteritems():

        # download youtube video
        print D.audio_from_youtube(
            'https://youtu.be/' + youtube_id,
            output_file_path=u'alignment/audio/' + address + '.webm',
            preferred_format=u'webm'
            )

        # designate text, audio, and syncmap files
        text = os.path.abspath(u'alignment/text/' + address + '.txt')
        audio = os.path.abspath(u'alignment/audio/' + address + '.webm')
        syncmap = os.path.abspath(u'alignment/syncmaps/' + address + '.json')

        # align text to audio
        task = Task(config_string=config)
        task.text_file_path_absolute = text
        task.audio_file_path_absolute = audio
        task.sync_map_file_path_absolute = syncmap
        ExecuteTask(task).execute()
        task.output_sync_map_file()
Exemplo n.º 33
0
def get_alignment(path_to_audio_file: str,
                  transcript: List[str],
                  force=False,
                  language='fr_FR') -> List[dict]:
    # see https://github.com/readbeyond/aeneas/blob/9d95535ad63eef4a98530cfdff033b8c35315ee1/aeneas/ttswrappers/espeakngttswrapper.py#L45  # noqa
    language = {
        'fr_FR': 'fra',
        'en_US': 'eng',
    }[language]
    full_transcript = '\t'.join(transcript)
    full_transcript_hash = sha1(full_transcript.encode()).hexdigest()
    path_to_transcript = os.path.join(CACHE_DIR, f'{full_transcript_hash}.txt')

    with open(path_to_audio_file, 'rb') as f:
        audio_file_hash = hash_file(f)

    with open(path_to_transcript, 'w') as f:
        f.writelines('\n'.join(transcript))

    path_to_alignment_tmp = os.path.join(
        CACHE_DIR, f'{full_transcript_hash}_{audio_file_hash}.json')

    if force or not os.path.isfile(path_to_alignment_tmp):
        # build alignment
        task = Task(
            f'task_language={language}|os_task_file_format=json|is_text_type=plain'
        )
        task.audio_file_path_absolute = os.path.abspath(path_to_audio_file)
        task.text_file_path_absolute = path_to_transcript
        task.sync_map_file_path_absolute = path_to_alignment_tmp
        executor = ExecuteTask(task=task)
        executor.execute()
        task.output_sync_map_file()

    with open(path_to_alignment_tmp) as source:
        return [cleanup_fragment(f) for f in json.load(source)['fragments']]
Exemplo n.º 34
0
def main():
    """Main entry point"""
    parser = argparse.ArgumentParser(prog="librivox_align.py")
    parser.add_argument("book_yml", help="YAML file with book details")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG)
    _LOGGER.debug(args)

    args.book_yml = Path(args.book_yml)
    input_dir = args.book_yml.parent

    with open(args.book_yml, "r") as book_file:
        book = yaml.safe_load(book_file)

    # Load gruut language
    gruut_lang = gruut.Language.load(book["gruut"]["language"])
    assert gruut_lang, "Unsupported language"

    language = book["aeneas"]["language"]

    # Load book text
    text_path = Path(input_dir / book["text"]["file"])
    _LOGGER.debug("Loading book text from %s", text_path)
    with open(text_path, "r") as text_file:
        text = text_file.readlines()

    # Process MP3 files
    for mp3_name, mp3_info in book["audio"].items():
        mp3_path = input_dir / mp3_name
        sync_path = mp3_path.with_suffix(".json")

        config_string = f"task_language={language}|is_text_type=plain|os_task_file_format=json|task_adjust_boundary_no_zero=True"

        start_time = float(mp3_info.get("start_time", 0))
        if start_time > 0:
            # Skip seconds at the beginning
            config_string += f"|is_audio_file_head_length={start_time}"

        end_time = float(mp3_info.get("end_time", 0))
        if end_time < 0:
            # Skip seconds at the end
            end_time = abs(end_time)
            config_string += f"|is_audio_file_tail_length={end_time}"
        elif end_time > 0:
            # Set length of audio
            config_string += f"|is_audio_file_process_length={end_time}"

        task = Task(config_string=config_string)
        task.audio_file_path_absolute = mp3_path.absolute()
        task.sync_map_file_path_absolute = sync_path.absolute()

        mp3_text_path = mp3_path.with_suffix(".txt")
        with open(mp3_text_path, mode="w+") as mp3_text_file:
            start_line, end_line = mp3_info["start_line"], mp3_info["end_line"]
            # Clean up newlines in text
            mp3_text = ""
            for line_index in range(start_line - 1, end_line):
                mp3_text += text[line_index].strip() + "\n"

            # Run through gruut tokenizer to expand abbreviations, numbers, etc.
            raw_text_path = mp3_path.with_suffix(".raw.txt")
            with open(raw_text_path, "w") as raw_text_file:
                for sentence in gruut_lang.tokenizer.tokenize(mp3_text):
                    clean_text = " ".join(sentence.clean_words)

                    # Each sentence in on a line now
                    print(clean_text, file=mp3_text_file)
                    print(sentence.raw_text, file=raw_text_file)

            mp3_text_file.seek(0)
            task.text_file_path_absolute = mp3_text_file.name

            # Generate sync map JSON file
            _LOGGER.debug("Generating %s (%s)", sync_path, mp3_path)
            ExecuteTask(task).execute()
            task.output_sync_map_file()
Exemplo n.º 35
0
def retrieve_keyword_audio(vid, keyword):
    audio_index = 0
    v_url = URL_TEMPLATE.format(vid)
    youtube = YouTube(v_url)
    y_len = youtube.player_config_args['player_response']['videoDetails']['lengthSeconds'] 
    print("Length :",y_len)
    print("Views  :",youtube.views) 

    if int(y_len) > 2700:
        # only consider video < 45 mins
        return audio_index
    print("="*40)
    caption = youtube.captions.get_by_language_code('ko')
    
    if caption:
        print("caption==ko")
        # retrieve audio from video
        youtube.streams.first().download(output_path=TEMP_DIR, filename=vid)

        temp_file_name = TEMP_DIR+vid
        if not os.path.isfile(temp_file_name + ".mp4"):
            return audio_index

        time.sleep(1) # need to wait before ffmpeg takes in as input file
        cmd = FFMPEG_TEMPLATE.format(temp_file_name).split()
        subprocess.check_output(cmd)

        audio = librosa.core.load(temp_file_name+".wav", 16000)[0]

        os.remove(temp_file_name + ".mp4")
        os.remove(temp_file_name + ".wav")

        formatted_vid = vid.replace('_', '-')

        cc_arr = caption.generate_srt_captions().split('\n\n')
        for captions in cc_arr:
            cc_split = captions.split('\n')
            if len(cc_split) == 4 and cc_split[0] == '':
                cc_split = (cc_split[1], cc_split[2], cc_split[3])
            elif len(cc_split) != 3:
                continue

            _, cc_time, cc_text = cc_split
            cc_text = TAG_CLEANER.sub('', cc_text)

            # clean up punctuation
            cc_text = cc_text.translate(TRANSPLATOR)

            cc_text = cc_text.lower()
            words = cc_text.strip().split()

            # steming words
            if keyword not in words and keyword + "s" not in words and keyword + "es" not in words:
                continue

            aligner_task = Task(config_string=ALIGNER_CONFIG_STRING)

            # prepare label file for forced aligner

            label_file = temp_file_name + "_" + keyword + ".txt"
            with open(label_file, "w+") as file:
                for word in words:
                    file.write(word+"\n")

            # prepare audio file for forced aligner

            match_result = SRT_TIME_PARSER.match(cc_time)
            if match_result:
                start_time_ms = srt_time_to_ms(
                    match_result.group(1),
                    match_result.group(2),
                    match_result.group(3),
                    match_result.group(4))
                stop_time_ms = srt_time_to_ms(
                    match_result.group(5),
                    match_result.group(6),
                    match_result.group(7),
                    match_result.group(8))

                start_pos = start_time_ms * 16
                stop_pos = stop_time_ms * 16

                block = audio[start_pos:stop_pos] # *16 since 16 samples are captured per each ms

                # temporary audio file for forced aligner
                audio_file = temp_file_name + "_" + keyword + ".wav"
                librosa.output.write_wav(audio_file, block, 16000)
                time.sleep(1) # buffer for writing wav file

            else:
                print(TEXT_COLOUR['FAIL'] + "failed pasing srt time : "
                      + cc_time + TEXT_COLOUR['ENDC'])
                raise Exception('srt time fail error')

            aligner_task.text_file_path_absolute = label_file
            aligner_task.audio_file_path_absolute = audio_file

            # process aligning task
            ExecuteTask(aligner_task).execute()

            for fragment in aligner_task.sync_map_leaves():
                if fragment.is_regular and keyword in fragment.text and fragment.length < 0.9:
                    begin = int(fragment.begin * 16000)
                    end = int(fragment.end * 16000)
                    keyword_audio = pad_and_center_align(block[begin:end], 16000)

                    file_name = keyword+"_"+str(audio_index)+".wav"
                    librosa.output.write_wav(
                        DATA_DIR + "/" + keyword + "/" + file_name, keyword_audio, 16000)
                    audio_index += 1
    return audio_index
Exemplo n.º 36
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 1:
            return self.print_help()

        if self.has_option([u"-e", u"--examples"]):
            return self.print_examples(False)

        if self.has_option(u"--examples-all"):
            return self.print_examples(True)

        if self.has_option([u"--list-parameters"]):
            return self.print_parameters()

        parameter = self.has_option_with_value(u"--list-values")
        if parameter is not None:
            return self.print_values(parameter)
        elif self.has_option(u"--list-values"):
            return self.print_values(u"?")

        # NOTE list() is needed for Python3, where keys() is not a list!
        demo = self.has_option(list(self.DEMOS.keys()))
        demo_parameters = u""
        download_from_youtube = self.has_option([u"-y", u"--youtube"])
        largest_audio = self.has_option(u"--largest-audio")
        keep_audio = self.has_option(u"--keep-audio")
        output_html = self.has_option(u"--output-html")
        validate = not self.has_option(u"--skip-validator")
        print_faster_rate = self.has_option(u"--faster-rate")
        print_rates = self.has_option(u"--rates")
        print_zero = self.has_option(u"--zero")

        if demo:
            validate = False
            for key in self.DEMOS:
                if self.has_option(key):
                    demo_parameters = self.DEMOS[key]
                    audio_file_path = demo_parameters[u"audio"]
                    text_file_path = demo_parameters[u"text"]
                    config_string = demo_parameters[u"config"]
                    sync_map_file_path = demo_parameters[u"syncmap"]
                    # TODO allow injecting rconf options directly from DEMOS options field
                    if key == u"--example-cewsubprocess":
                        self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True
                    elif key == u"--example-ctw-espeak":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK
                    elif key == u"--example-ctw-speect":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT
                    elif key == u"--example-festival":
                        self.rconf[RuntimeConfiguration.TTS] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_PATH] = "text2wave"
                    elif key == u"--example-mws":
                        self.rconf[RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500"
                        self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500"
                    elif key == u"--example-faster-rate":
                        print_faster_rate = True
                    elif key == u"--example-no-zero":
                        print_zero = True
                    elif key == u"--example-py":
                        self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False
                    elif key == u"--example-rates":
                        print_rates = True
                    elif key == u"--example-youtube":
                        download_from_youtube = True
                    break
        else:
            if len(self.actual_arguments) < 4:
                return self.print_help()
            audio_file_path = self.actual_arguments[0]
            text_file_path = self.actual_arguments[1]
            config_string = self.actual_arguments[2]
            sync_map_file_path = self.actual_arguments[3]

        html_file_path = None
        if output_html:
            keep_audio = True
            html_file_path = sync_map_file_path + u".html"

        if download_from_youtube:
            youtube_url = audio_file_path

        if (not download_from_youtube) and (not self.check_input_file(audio_file_path)):
            return self.ERROR_EXIT_CODE
        if not self.check_input_file(text_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(sync_map_file_path):
            return self.ERROR_EXIT_CODE
        if (html_file_path is not None) and (not self.check_output_file(html_file_path)):
            return self.ERROR_EXIT_CODE

        self.check_c_extensions()

        if demo:
            msg = []
            msg.append(u"Running example task with arguments:")
            if download_from_youtube:
                msg.append(u"  YouTube URL:   %s" % youtube_url)
            else:
                msg.append(u"  Audio file:    %s" % audio_file_path)
            msg.append(u"  Text file:     %s" % text_file_path)
            msg.append(u"  Config string: %s" % config_string)
            msg.append(u"  Sync map file: %s" % sync_map_file_path)
            if len(demo_parameters[u"options"]) > 0:
                msg.append(u"  Options:       %s" % demo_parameters[u"options"])
            self.print_info(u"\n".join(msg))

        if validate:
            self.print_info(u"Validating config string (specify --skip-validator to bypass)...")
            validator = Validator(logger=self.logger)
            result = validator.check_configuration_string(config_string, is_job=False, external_name=True)
            if not result.passed:
                self.print_error(u"The given config string is not valid:")
                self.print_generic(result.pretty_print())
                return self.ERROR_EXIT_CODE
            self.print_info(u"Validating config string... done")

        if download_from_youtube:
            try:
                self.print_info(u"Downloading audio from '%s' ..." % youtube_url)
                downloader = Downloader(logger=self.logger)
                audio_file_path = downloader.audio_from_youtube(
                    youtube_url,
                    download=True,
                    output_file_path=None,
                    largest_audio=largest_audio
                )
                self.print_info(u"Downloading audio from '%s' ... done" % youtube_url)
            except ImportError:
                self.print_no_pafy_error()
                return self.ERROR_EXIT_CODE
            except Exception as exc:
                self.print_error(u"An unexpected error occurred while downloading audio from YouTube:")
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating task...")
            task = Task(config_string, logger=self.logger)
            task.audio_file_path_absolute = audio_file_path
            task.text_file_path_absolute = text_file_path
            task.sync_map_file_path_absolute = sync_map_file_path
            self.print_info(u"Creating task... done")
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while creating the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Executing task...")
            executor = ExecuteTask(task=task, rconf=self.rconf, logger=self.logger)
            executor.execute()
            self.print_info(u"Executing task... done")
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while executing the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating output sync map file...")
            path = task.output_sync_map_file()
            self.print_info(u"Creating output sync map file... done")
            self.print_success(u"Created file '%s'" % path)
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while writing the sync map file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        if output_html:
            try:
                parameters = {}
                parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration["o_format"]
                parameters[gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration["o_smil_audio_ref"]
                parameters[gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration["o_smil_page_ref"]
                self.print_info(u"Creating output HTML file...")
                task.sync_map.output_html_for_tuning(audio_file_path, html_file_path, parameters)
                self.print_info(u"Creating output HTML file... done")
                self.print_success(u"Created file '%s'" % html_file_path)
            except Exception as exc:
                self.print_error(u"An unexpected error occurred while writing the HTML file:")
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        if download_from_youtube:
            if keep_audio:
                self.print_info(u"Option --keep-audio set: keeping downloaded file '%s'" % audio_file_path)
            else:
                gf.delete_file(None, audio_file_path)

        if print_zero:
            zero_duration = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.begin == l.end]
            if len(zero_duration) > 0:
                self.print_warning(u"Fragments with zero duration:")
                for fragment in zero_duration:
                    self.print_generic(u"  %s" % fragment)

        if print_rates:
            self.print_info(u"Fragments with rates:")
            for fragment in task.sync_map.fragments_tree.vleaves_not_empty:
                self.print_generic(u"  %s (rate: %.3f chars/s)" % (fragment, fragment.rate))

        if print_faster_rate:
            max_rate = task.configuration["aba_rate_value"]
            if max_rate is not None:
                faster = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.rate >= max_rate + Decimal("0.001")]
                if len(faster) > 0:
                    self.print_warning(u"Fragments with rate greater than %.3f:" % max_rate)
                    for fragment in faster:
                        self.print_generic(u"  %s (rate: %.3f chars/s)" % (fragment, fragment.rate))

        return self.NO_ERROR_EXIT_CODE
Exemplo n.º 37
0
    aligner.py --audio-file=<a> --text-file=<t> --syncmap-file=<s>

Options:
    --audio-file=<a>        Audio file path
    --text-file=<t>         Text file path
    --syncmap-file=<s>      Syncmap file path
"""
from docopt import docopt
from aeneas.executetask import ExecuteTask
from aeneas.task import Task

if __name__ == "__main__":
    args = docopt(__doc__)

    audio_file = args["--audio-file"]

    text_file = args["--text-file"]

    syncmap_file = args["--syncmap-file"]

    config_string = u"task_language=kan|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)

    task.audio_file_path_absolute = audio_file
    task.text_file_path_absolute = text_file
    task.sync_map_file_path_absolute = syncmap_file

    # process Task
    ExecuteTask(task).execute()
    task.output_sync_map_file()
Exemplo n.º 38
0
    lang = args.lang

if lang not in ["eng", "hi", "hin"]:
    print("only hi and eng allowed for language")
    exit(1)

from aeneas.executetask import ExecuteTask
from aeneas.task import Task
from aeneas.runtimeconfiguration import RuntimeConfiguration

config_string = u"task_language=" + lang + u"|is_text_type=subtitles|os_task_file_format=srt"

tempout, tempfilename = tempfile.mkstemp()
task = Task(config_string=config_string)
task.audio_file_path_absolute = args.audio
task.text_file_path_absolute = args.txt
task.sync_map_file_path_absolute = tempfilename
rconf = RuntimeConfiguration()
# This option ignores the non-word sounds in the audio
rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True
rconf[RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD] = 2.5

# To use a different Text-to-Speech engine
#rconf[RuntimeConfiguration.TTS] = "festival"

# process Task
ExecuteTask(task, rconf=rconf).execute()

# output sync map to file
task.output_sync_map_file()