Пример #1
0
def parallel_calc_features(wav_files_16bits: list,
                           mfcc_dim: int,
                           target_sample_rate: int,
                           output_folder: str,
                           process_num: int,
                           queue_capacity: int = 1024):
    def run_process(process_id: int, audio_file_pipe: mp.Queue):
        count = 0
        with open(f"{output_folder}/part.{process_id}.feat", "w") as fou:
            while True:
                audio_file = audio_file_pipe.get()
                if audio_file is None:
                    print(f"run_process[{process_id}] exits!")
                    break

                feature = calc_mfcc_delta(audio_file, mfcc_dim,
                                          target_sample_rate)
                print([audio_file, feature], file=fou)

                count += 1
                if count % 100 == 0:
                    nlp.print_flush(
                        f"So far, process[{process_id}] have processed "
                        f"{count} audio files.")

    assert 1 <= process_num
    nlp.execute_cmd(f"rm -r {output_folder}")
    nlp.ensure_folder_exists(output_folder)

    start_time = time.time()
    file_pipe = mp.Queue(queue_capacity)
    process_runners = [
        mp.Process(target=run_process, args=(idx, file_pipe))
        for idx in range(process_num)
    ]

    for p in process_runners:
        p.start()

    for audio_file in wav_files_16bits:
        file_pipe.put(audio_file)

    for _ in process_runners:
        file_pipe.put(None)

    for p in process_runners:
        p.join()

    duration = nlp.to_readable_time(time.time() - start_time)
    print(
        f"It takes {duration} to process {len(wav_files_16bits)} audio files")
Пример #2
0
    def _convert_sph_to_wav(sph_file: str) -> typing.Union[str, None]:
        assert sph_file.endswith(".sph")
        out_file = nlp.replace_file_name(sph_file, ".sph", ".wav")
        if os.path.exists(out_file):
            return out_file

        cmd = f"sox {sph_file} {out_file}"
        if execute_cmd(cmd) == 0:
            return out_file

        cmd = f"sph2pipe -f rif {sph_file} {out_file}"
        if execute_cmd(cmd) == 0:
            return out_file

        return None
Пример #3
0
    def _convert_flac_to_wav(flac_file: str) -> typing.Union[str, None]:
        assert flac_file.endswith(".flac")
        out_file = nlp.replace_file_name(flac_file, ".flac", ".wav")
        if os.path.exists(out_file):
            return out_file

        cmd = f"sox {flac_file} {out_file}"
        if nlp.execute_cmd(cmd) == 0:
            return out_file

        return None
Пример #4
0
    def _convert_mp3_to_wav(map3_file: str) -> typing.Union[str, None]:
        assert map3_file.endswith(".mp3")
        out_file = nlp.replace_file_name(map3_file, ".mp3", ".wav")
        if os.path.exists(out_file):
            return out_file

        cmd = f"ffmpeg -i {map3_file} {out_file}"
        if execute_cmd(cmd) == 0:
            return out_file

        return None
Пример #5
0
    def convert_to_16bits(wav_or_flac_file: str) -> typing.Union[str, None]:
        file_ext = nlp.get_file_extension(wav_or_flac_file)
        new_file = nlp.replace_file_name(wav_or_flac_file, f".{file_ext}",
                                         ".16bits.wav")
        if os.path.exists(new_file):
            return new_file

        if nlp.execute_cmd(f"sox {wav_or_flac_file} -b 16 {new_file}") == 0:
            return new_file

        return None
Пример #6
0
def compute(input_file):
    nlp.execute_cmd(
        f"./test_bleu/newextract.py -s test_bleu/sgm/05/nist_c2e_05_src "
        f"-i {input_file} -o {input_file}.sgm")
    nlp.execute_cmd(
        f"./test_bleu/post_process -o {input_file}.sgm -n {input_file}.sgm.post -l"
    )
    nlp.execute_cmd(
        f"./test_bleu/mteval-v11b.pl -c -r test_bleu/sgm/05/nist_c2e_05_ref -s test_bleu/sgm/05/nist_c2e_05_src "
        f"-t {input_file}.sgm.post  > {input_file}.sgm.post.bleu.case")
    nlp.execute_cmd(
        f"./test_bleu/mteval-v11b.pl -r test_bleu/sgm/05/nist_c2e_05_ref -s test_bleu/sgm/05/nist_c2e_05_src "
        f"-t {input_file}.sgm.post  > {input_file}.sgm.post.bleu.uncase")
    f = open(f"{input_file}.sgm.post.bleu.uncase", 'r').readlines()
    out = 0
    for line in f:
        if 'BLEU score =' in line:
            out = line.split(' ')[-4]
            break
    nlp.execute_cmd(f"rm {input_file}.sgm*")
    return out
Пример #7
0
    def convert_to_standard_wav(
            wav_or_flac_file: str) -> typing.Union[str, None]:
        '''
    :return: sample-width=2 Bytes, sample rating=16K.
    '''
        if wav_or_flac_file.endswith(".norm.wav"):
            return wav_or_flac_file

        file_ext = nlp.get_file_extension(wav_or_flac_file)
        new_file = nlp.replace_file_name(wav_or_flac_file, f".{file_ext}",
                                         ".norm.wav")
        if os.path.exists(new_file):
            return new_file

        if nlp.execute_cmd(f"sox {wav_or_flac_file} "
                           f"-b 16 -r 16000 {new_file}") == 0:
            return new_file

        return None