def score_folder_SD(fileList, fileDict, diarcollar, write_msg, out_path): sctk_path = util.get_fs02sctk_path() py_val_path = sctk_path + 'scutils/dscore/validate_rttm.py' py_score_path = sctk_path + 'scutils/dscore/score.py' log_write_path = util.get_logs_path() + util.get_bname(out_path) + '.log' derDict = {} log_list = [] unsuc_files = [] for fn in fileList: ref_rttm = fileDict['ref'][fn] hyp_rttm = fileDict['hyp'][fn] ref_uem = ref_rttm.replace('/RTTM/', '/UEM/').replace('.rttm', '.uem') der, curr_log = score_file_SAD(py_val_path, py_score_path, fn, ref_rttm, hyp_rttm, ref_uem, diarcollar) if der != 'NaN': derDict[fn] = der else: unsuc_files.append(fn) log_list += curr_log with open(log_write_path, 'w') as file: file.write('\n'.join(log_list)) print('\n\nLog File for SD Task - DER evaluation', 'written to path:\n\t', log_write_path, '\n\n') write_msg.append('\n\n') write_msg.append('Number of Files to be Evaluated:' + str(len(fileList)) + '\n\n') write_msg.append('Number of Files Successfully Evaluated:' + str(len(derDict)) + '\n\n') if len(unsuc_files) > 0: write_msg.append('Files that could not be evaluated:\n\t' + ' '.join(unsuc_files)) return derDict, write_msg
def score_file_SAD(gt_fp, hyp_fp, sadcollar): temp_out_fp = util.get_temp_path() + util.getfName(gt_fp) + '.out' util.remove_file(temp_out_fp) scprl_fath = util.get_fs02sctk_path() + 'scutils/scoreFile_SAD.pl' term_cmd = [ 'perl', scprl_fath, '-r', gt_fp, '-h', hyp_fp, '-s', '2', '-e', '3', '-g', '4', '-t', '5', '-f', '6', '-u', '7', '-o', temp_out_fp ] termOut = util.get_term_output(term_cmd) termOut = termOut.strip().replace('\n\n', '\n').split('\n') termOut = [x.strip() for x in termOut][2:] util.remove_file(temp_out_fp) collarDict = {'0.0': 3, '0.25': 7, '0.5': 11, '1.0': 15, '2.0': 19} dcf_desired = termOut[int(collarDict[sadcollar])].split()[2].strip() collarDict_fp = {'0.0': 2, '0.25': 6, '0.5': 10, '1.0': 14, '2.0': 18} pfp_desired = termOut[int(collarDict_fp[sadcollar])].split()[2].strip() collarDict_fn = {'0.0': 1, '0.25': 5, '0.5': 9, '1.0': 13, '2.0': 17} pfn_desire = termOut[int(collarDict_fn[sadcollar])].split()[2].strip() if not util.is_number(dcf_desired): pfp_desired = 'NaN' if not util.is_number(pfp_desired): pfn_desire = 'NaN' if not util.is_number(pfn_desire): pfn_desire = 'NaN' return dcf_desired, pfp_desired, pfn_desire
def parse_arguments(): sctk_path = util.get_fs02sctk_path() def_out_path = util.get_results_path( ) + 'SID_TopN_Result_' + util.getDateTimeStrStamp() + '.txt' desc='Wrapper File to generate Top-N Accuracy Scores for FS02 Challenge SID Task.' +\ 'For more information regarding scoring input and hypothesis files, '+\ 'refer below arguments description.' ref_mp = 'egs/ref_gt/SID/FS01_SID_uttID2spkID_Dev.txt' hyp_mp = 'egs/sys_results/SID/FS01_SID_uttID2spkID_Dev.txt' ref_def = sctk_path + ref_mp hyp_def = sctk_path + hyp_mp ref_str = 'Reference (ground truth) File Path. '+\ 'This file must be the SID ground truth file. '+\ 'Please refer ./'+ref_mp+' file for example.' hyp_str = 'Hypothesis (system output) File Path. '+\ 'This directory must include only SAD system output files. '+\ 'Please refer ./'+hyp_mp+' file for example and file format.' out_str = 'Output (overall system score) File Path. '+\ 'Default: Result file will stored in '+util.get_results_path()+' directory. '+\ 'Additional log files will be stored in '+util.get_logs_path() clr_str = 'Desired Top-N Accuracy for SID evaluation. '+\ 'Default: Top-5 Accuracy.' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-ref', '--ref', type=str, default=ref_def, help=ref_str) parser.add_argument('-hyp', '--hyp', type=str, default=hyp_def, help=hyp_str) parser.add_argument('-out', '--out', type=str, default=def_out_path, help=out_str) parser.add_argument('-topN', '--topN', type=int, default=5, help=clr_str) args = parser.parse_args() ref_path = util.processInpPath(args.ref, inpType='file', checkExists=True) hyp_path = util.processInpPath(args.hyp, inpType='file', checkExists=True) out_path = util.processInpPath(args.out, inpType='file') max_TopN = validate_hyp_file(hyp_path) topN_num = proc_topN_inp(args.topN, max_TopN) return ref_path, hyp_path, out_path, topN_num
def parse_arguments(): sctk_path = util.get_fs02sctk_path() def_out_path = util.get_results_path( ) + 'SD_DER_Result_' + util.getDateTimeStrStamp() + '.txt' coll_inps_str = 'Allowed Inputs: 0, 0.25, 0.5, 1, 2' desc='Wrapper File to generate DER Scores for FS02 Challenge SD (track1 and track2) Tasks.' +\ 'Scoring mechanism for both tracks will be the same. For more '+\ 'information regarding scoring input and hypothesis files, refer '+\ 'below arguments description. Open-Source Software Credits: '+\ 'This script uses dscore toolkit developed by Neville Ryant for '+\ 'generating DER scores. for more info, refer: (https://github.com/nryant/dscore)' ref_mp = 'egs/ref_gt/SD/' hyp_mp = 'egs/sys_results/SD/' ref_def = sctk_path + ref_mp hyp_def = sctk_path + hyp_mp ref_str = 'Reference (ground truth) Directory Path. '+\ 'This directory must include only SD ground truth RTTM '+\ 'and UEM folders. Please refer ./'+ref_mp+' directory for examples.' hyp_str = 'Hypothesis (system output) Directory Path. '+\ 'This directory must include only diarization system output RTTM files. '+\ 'Please refer ./'+hyp_mp+' directory for examples and file format.' out_str = 'Output (per file and overall system score) File Path. '+\ 'Default: Result file will stored in '+util.get_results_path()+' directory. '+\ 'Additional log files if generated will be stored in '+util.get_logs_path() clr_str = 'Desired forgiveness Collar for SD evaluation. '+coll_inps_str+\ ' Default collar length: 0.25 secs.' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-ref', '--ref', type=str, default=ref_def, help=ref_str) parser.add_argument('-hyp', '--hyp', type=str, default=hyp_def, help=hyp_str) parser.add_argument('-out', '--out', type=str, default=def_out_path, help=out_str) parser.add_argument('-diarcollar', '--diarcollar', type=float, default=0.25, help=clr_str) args = parser.parse_args() ref_path = proc_sd_ref_files(util.processInpPath(args.ref)) hyp_path = util.processInpPath(args.hyp) out_path = util.processInpPath(args.out, inpType='file') diarcollar = proc_sd_collar(args.diarcollar) return ref_path, hyp_path, out_path, diarcollar
def parse_arguments(): sctk_path = util.get_fs02sctk_path() def_out_path = util.get_results_path()+'ASR_WER_Result_'+util.getDateTimeStrStamp()+'.txt' desc='Wrapper File to generate WER Scores for FS02 Challenge ASR (track1 and track2) Task.' +\ 'Scoring mechanism for both tracks is the same, but the system output '+\ 'hypothesis file/folder expected from the user will be different. '+\ '(folder with json files for track-1, and a plain text file for track-2)'+\ 'For more information regarding scoring input and hypothesis files, '+\ 'refer below arguments description. Open-Source Software Credits: '+\ 'This script uses compute-wer tool from the Kaldi Speech Recognition '+\ 'Toolkit. for more info, refer: (http://kaldi-asr.org/doc/tools.html)' ref_mp = 'egs/ref_gt/ASR/ASR_track' hyp_mp = 'egs/sys_results/ASR/ASR_track' ref_def = sctk_path+ref_mp hyp_def = sctk_path+hyp_mp ref_str = 'Reference (ground truth) Path. '+\ '(Directory Path for Track-1, and File Path for Track-2) '+\ 'Directory Path for Track-1 must include only ASR ground truth files. '+\ 'For ASR_track1: directory containing json format ground truth files required. '+\ 'Please refer ./'+ref_mp+'1/ directory for examples. '+\ 'For ASR_track2: kaldi "text" file. Refer: { https://kaldi-asr.org/doc/data_prep.html#data_prep_data }. '+\ ' file contents of File Path for Track-2 must include only FS02_ASR_track2 '+\ 'file-names followed by associated transcripts (like in Kaldi "text" format)'+\ 'Please refer ./'+ref_mp+'2/ directory for examples.' hyp_str = 'Hypothesis (system output) Directory/File Path. '+\ '(Directory Path for Track-1, and File Path for Track-2) '+\ 'Directory Path for Track-1 must include only FS02-ASR system output files. '+\ 'For ASR_track1: directory containing json format system output files required. '+\ 'Please refer ./'+ref_mp+'1/ directory for examples and file format. '+\ 'For ASR_track2: kaldi "text" file. Refer: { https://kaldi-asr.org/doc/data_prep.html#data_prep_data }. '+\ 'file contents of File Path for Track-2 must include only FS02_ASR_track2 '+\ 'file-names followed by associated transcripts (like in Kaldi "text" format)'+\ 'Please refer ./'+ref_mp+'2/ directory for examples and file format.' out_str = 'Output (overall system score) File Path. '+\ 'Default: Result file will stored in '+util.get_results_path()+' directory. '+\ 'Additional log files if generated will be stored in '+util.get_logs_path() trk_str = 'Track number of the ASR Task to be evaluated. '+\ 'Input Options: (as string) "1" or "2"' kld_str = 'base path to the locally installed kaldi directory. '+\ 'e.g. /home/crss/kaldi. This argument is required for' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-track', '--track', type=str, default='1', help=trk_str) parser.add_argument('-kaldi','--kaldi', type=str, required=True, help=kld_str) parser.add_argument('-ref', '--ref', type=str, default=ref_def, help=ref_str) parser.add_argument('-hyp', '--hyp', type=str, default=hyp_def, help=hyp_str) parser.add_argument('-out', '--out', type=str, default=def_out_path, help=out_str) args = parser.parse_args() track_num = proc_track_num(args.track) kaldi_path = util.processInpPath(args.kaldi) if args.ref == ref_def or args.hyp == hyp_def: ad = str(track_num)+'/' t2_name = 'FS01_ASR_track2_transcriptions_Dev' print('ref or hyp paths are either not provided, or match default paths.') print('Running Script on default example for Track-',str(track_num)) if track_num == 2: args.ref = ref_def+ad+t2_name args.hyp = hyp_def+ad+t2_name else: args.ref = ref_def+ad args.hyp = hyp_def+ad if track_num == 2: ref_path = util.processInpPath(args.ref, inpType='file', checkExists=True) hyp_path = util.processInpPath(args.hyp, inpType='file', checkExists=True) else: ref_path = util.processInpPath(args.ref) hyp_path = util.processInpPath(args.hyp) out_path = util.processInpPath(args.out, inpType='file') return ref_path, hyp_path, out_path, track_num, kaldi_path
def parse_arguments(): sctk_path = util.get_fs02sctk_path() def_out_path = util.get_results_path( ) + 'SAD_DCF_Result_' + util.getDateTimeStrStamp() + '.txt' coll_inps_str = 'Allowed Inputs: 0, 0.25, 0.5, 1, 2' desc='Wrapper File to generate DCF Scores for FS02 Challenge SAD Task.' +\ 'For more information regarding scoring input and hypothesis files, '+\ 'refer below arguments description. '+\ 'Open-Source Software Credits: This script uses scoreFile_SAD.pl '+\ 'developed by NIST. for more info, refer: (https://www.nist.gov/'+\ 'itl/iad/mig/nist-open-speech-activity-detection-evaluation)' ref_mp = 'egs/ref_gt/SAD/' hyp_mp = 'egs/sys_results/SAD/' ref_def = sctk_path + ref_mp hyp_def = sctk_path + hyp_mp ref_str = 'Reference (ground truth) Directory Path. '+\ 'This directory must include only SAD ground truth files. '+\ 'Please refer ./'+ref_mp+' directory for examples.' hyp_str = 'Hypothesis (system output) Directory Path. '+\ 'This directory must include only SAD system output files. '+\ 'Please refer ./'+hyp_mp+' directory for examples and file format.' out_str = 'Output (per file and overall system score) File Path. '+\ 'Default: Result file will stored in '+util.get_results_path()+' directory. '+\ 'Additional log files if generated will be stored in '+util.get_logs_path() clr_str = 'Desired forgiveness Collar for SAD evaluation. '+coll_inps_str+\ ' Default collar length: 0.5 secs.' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-ref', '--ref', type=str, default=ref_def, help=ref_str) parser.add_argument('-hyp', '--hyp', type=str, default=hyp_def, help=hyp_str) parser.add_argument('-out', '--out', type=str, default=def_out_path, help=out_str) parser.add_argument('-sadcollar', '--sadcollar', type=float, default=0.5, help=clr_str) args = parser.parse_args() ref_path = util.processInpPath(args.ref) hyp_path = util.processInpPath(args.hyp) out_path = util.processInpPath(args.out, inpType='file') sadcollar = proc_sad_collar(args.sadcollar) return ref_path, hyp_path, out_path, sadcollar