Exemplo n.º 1
0
def degrade_audio(source_audio_file, target_audio_file=None):
    """
    Degrades audio to typical G711 level.
    Useful if models need to target this audio quality.
    """

    valid_input_file(source_audio_file, ["mp3", "sph", "wav", "au", "raw"])

    target_audio_file = (source_audio_file
                         if target_audio_file is None else target_audio_file)

    # degrade to 8k
    tmp1 = ".".join(source_audio_file.split(".")[:-1]) + "_tmp1.wav"
    subprocess.call(
        "sox -V1 {} -r 8000 -e a-law {}".format(source_audio_file, tmp1),
        shell=True,
    )

    # convert to u-law
    tmp2 = ".".join(source_audio_file.split(".")[:-1]) + "_tmp2.wav"
    subprocess.call(
        "sox -V1 {} --rate 8000 -e u-law {}".format(tmp1, tmp2),
        shell=True,
    )

    # upgrade to 16k a-law signed
    subprocess.call(
        "sox -V1 {} --rate 16000 -e signed  -b 16 --channel 1 {}".format(
            tmp2, target_audio_file),
        shell=True,
    )
    os.remove(tmp1)
    os.remove(tmp2)
Exemplo n.º 2
0
def validate_transcript(transcript):
    """
    Exit if invalid transcript
    """
    if not valid_input_file(transcript):
        LOGGER.error("Invalid transcript file {}".format(transcript))
        sys.exit(1)
Exemplo n.º 3
0
def main():
    """
    Degrade all audio files given as arguments (in place by default)
    """
    for file_name in sys.argv[1:]:
        if valid_input_file(file_name, ["mp3", "sph", "wav", "au", "raw"]):
            degrade_audio(file_name)
        else:
            LOGGER.error("Invalid input file %s", file_name)
Exemplo n.º 4
0
def clean_text_file(*input_text_files):
    """
    Cleans input *.txt files and outputs *_cleaned.txt
    """
    for input_text_file in input_text_files:
        if not valid_input_file(input_text_file, valid_extensions=["txt"]):
            LOGGER.error(
                "File %s does not end in .txt - please only use this for cleaning txt files",
                input_text_file,
            )
            continue
        clean_one_file(input_text_file)

        LOGGER.info("File output: %s", input_text_file.replace(".txt", "_cleaned.txt"))
def main():
    """
    Either run tests or clean formatting for files, depending on # of arguments
    """

    parser = argparse.ArgumentParser(
        description='cleans input *.txt files and outputs *_cleaned.txt')
    parser.add_argument('files',
                        type=str,
                        nargs='+',
                        help='list of input files')

    args = parser.parse_args()
    for file_name in args.files:
        if not valid_input_file(file_name, valid_extensions=['txt']):
            LOGGER.error(
                "File %s does not end in .txt - please only use this for cleaning txt files",
                file_name)
            continue
        clean_text_file(file_name)
Exemplo n.º 6
0
def validate_audio_file(source_audio_file):
    if not valid_input_file(source_audio_file,
                            ["mp3", "sph", "wav", "au", "raw"]):
        LOGGER.error("Invalid audio file {}".format(source_audio_file))
        sys.exit(1)
Exemplo n.º 7
0
def check_audio_file(audio_file_name):
    if valid_input_file(audio_file_name, ["mp3", "sph", "wav", "au", "raw"]):
        return audio_file(audio_file_name)
    else:
        LOGGER.error("Invalid audio file {}".format(audio_file_name))
        sys.exit(1)
Exemplo n.º 8
0
def check_transcript(transcript):
    if valid_input_file(transcript):
        return time_aligned_text(input_data=transcript)
    else:
        LOGGER.error("Invalid transcript file {}".format(transcript))
        sys.exit(1)