示例#1
0
def convert(input_file, output_file):
    """
    Convert between text file formats (supported formats are stm, json, srt, vtt, txt, and html)
    """
    check_input_file_validity(input_file)
    input_file = assign_if_valid(input_file)
    input_file.write(output_file)
示例#2
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Compares a reference and transcript file and calculates word error rate (WER) between these two files"
    )
    parser.add_argument(
        "reference_file",
        metavar="reference_file",
        type=str,
        help='reference "truth" file',
    )
    parser.add_argument(
        "transcript_file",
        metavar="transcript_file",
        type=str,
        help="transcript possibly containing errors",
    )
    parser.add_argument(
        "--char-level",
        help="calculate character error rate instead of word error rate",
        action="store_true",
    )
    parser.add_argument(
        "--ignore-nsns",
        help="ignore non silence noises like um, uh, etc.",
        action="store_true",
    )

    # parse arguments
    args = parser.parse_args()

    # read files from arguments
    ref = assign_if_valid(args.reference_file)
    hyp = assign_if_valid(args.transcript_file)

    if ref is None or hyp is None:
        print(
            "Error with an input file. Please check all files exist and are accepted by ASRToolkit"
        )
    elif args.char_level:
        print("CER: {:5.3f}%".format(cer(ref, hyp, args.ignore_nsns)))
    else:
        print("WER: {:5.3f}%".format(wer(ref, hyp, args.ignore_nsns)))
示例#3
0
def convert(input_file, output_file):
    """
    Convert between text file formats (supported formats are stm, json, srt, vtt, txt, and html)

    Validates lines of transcript before writing new file.
    STM files are unformatted (eg 10 -> ten)
    """
    check_input_file_validity(input_file)
    input_file = assign_if_valid(input_file)
    input_file.write(output_file)
示例#4
0
def compute_wer(reference_file,
                transcript_file,
                char_level=False,
                ignore_nsns=False):
    """
    Compares a reference and transcript file and calculates word error rate (WER) between these two files
    If --char-level is given, compute CER instead
    If --ignore-nsns is given, ignore non silence noises
    """

    # read files from arguments
    ref = assign_if_valid(reference_file)
    hyp = assign_if_valid(transcript_file)

    if ref is None or hyp is None:
        print(
            "Error with an input file. Please check all files exist and are accepted by ASRToolkit"
        )
    elif char_level:
        print("CER: {:5.3f}%".format(cer(ref, hyp, ignore_nsns)))
    else:
        print("WER: {:5.3f}%".format(wer(ref, hyp, ignore_nsns)))
def convert(input_file, output_file):
    input_file = assign_if_valid(input_file)
    input_file.write(output_file)