def convert(input_file, output_file): """ Convert between text file formats (supported formats are stm, json, srt, vtt, txt, and html) """ check_input_file_validity(input_file) input_file = assign_if_valid(input_file) input_file.write(output_file)
def main(): parser = argparse.ArgumentParser( description= "Compares a reference and transcript file and calculates word error rate (WER) between these two files" ) parser.add_argument( "reference_file", metavar="reference_file", type=str, help='reference "truth" file', ) parser.add_argument( "transcript_file", metavar="transcript_file", type=str, help="transcript possibly containing errors", ) parser.add_argument( "--char-level", help="calculate character error rate instead of word error rate", action="store_true", ) parser.add_argument( "--ignore-nsns", help="ignore non silence noises like um, uh, etc.", action="store_true", ) # parse arguments args = parser.parse_args() # read files from arguments ref = assign_if_valid(args.reference_file) hyp = assign_if_valid(args.transcript_file) if ref is None or hyp is None: print( "Error with an input file. Please check all files exist and are accepted by ASRToolkit" ) elif args.char_level: print("CER: {:5.3f}%".format(cer(ref, hyp, args.ignore_nsns))) else: print("WER: {:5.3f}%".format(wer(ref, hyp, args.ignore_nsns)))
def convert(input_file, output_file): """ Convert between text file formats (supported formats are stm, json, srt, vtt, txt, and html) Validates lines of transcript before writing new file. STM files are unformatted (eg 10 -> ten) """ check_input_file_validity(input_file) input_file = assign_if_valid(input_file) input_file.write(output_file)
def compute_wer(reference_file, transcript_file, char_level=False, ignore_nsns=False): """ Compares a reference and transcript file and calculates word error rate (WER) between these two files If --char-level is given, compute CER instead If --ignore-nsns is given, ignore non silence noises """ # read files from arguments ref = assign_if_valid(reference_file) hyp = assign_if_valid(transcript_file) if ref is None or hyp is None: print( "Error with an input file. Please check all files exist and are accepted by ASRToolkit" ) elif char_level: print("CER: {:5.3f}%".format(cer(ref, hyp, ignore_nsns))) else: print("WER: {:5.3f}%".format(wer(ref, hyp, ignore_nsns)))
def convert(input_file, output_file): input_file = assign_if_valid(input_file) input_file.write(output_file)