def doit(algorithm_name, dataset_name): ''' read all results on a submission/algorithm on one dataset Change the algorithm name and path to annotations ''' # results_dataset_file = algorithm_name + '/results.csv' annotation_URI = os.path.join(URI + dataset_name, algorithm_name, 'results.csv') print(annotation_URI) WER = [] ins = [] dels = [] subs = [] words = [] with open(annotation_URI, 'r') as csvfile: score_ = csv.reader(csvfile, delimiter=',') for i,row in enumerate(score_): # each row is a song if i == 0: continue if algorithm_name == 'DDA2' or algorithm_name == 'DDA3': WER.append( 100* (float(row[1])) ) else: WER.append( (float(row[1])) ) ins.append( (float(row[2])) ) dels.append( (float(row[3])) ) subs.append( (float(row[4])) ) words.append( (float(row[5])) ) mean_WER, stdevE, medianE = getMeanAndStDevError(WER) mean_ins, stdevE, medianE = getMeanAndStDevError(ins) mean_dels, stdevP, medianP = getMeanAndStDevError(dels) mean_subs, stdevP, medianP = getMeanAndStDevError(subs) mean_words, stdevP, medianP = getMeanAndStDevError(words) ############ Append to summary for a dataset ######################### result_summary_file = 'summary_' + dataset_name + '.csv' output_URI = os.path.join(URI + dataset_name, result_summary_file) results = [[algorithm_name,'{:.2f}'.format(mean_WER), '{:.2f}'.format(mean_ins), '{:.2f}'.format(mean_dels) , '{:.2f}'.format(mean_subs), '{:.2f}'.format(mean_words)] ] if not os.path.exists(output_URI): results_prefix = [['Submission', 'WER','Insertions','Deletions','Substitutions','TotalWords']] writeCsv_(output_URI, results_prefix, append=0) writeCsv_(output_URI, results, append=1) print ( 'file {} written'.format(output_URI)) else: print ( 'Appended to file {}'.format(output_URI)) writeCsv_(output_URI, results, append=1)
def doit(algorithm_name, dataset_name): ''' read all results on a submission/algorithm on one dataset Change the algorithm name and path to annotations ''' # results_dataset_file = algorithm_name + '/results.csv' annotation_URI = os.path.join(URI + dataset_name, algorithm_name, 'metrics', 'results.csv') print(annotation_URI) errors = [] median_errors = [] percentages = [] percentages_e = [] with open(annotation_URI, 'r') as csvfile: score_ = csv.reader(csvfile, delimiter=',') for i,row in enumerate(score_): # each row is a song if i == 0: continue errors.append( (float(row[1])) ) median_errors.append( (float(row[2])) ) if dataset_name == 'HansensDataset' or dataset_name =='HansensDataset_acappella': percentages.append( (float(row[3])) ) # TODO: check if NaN percentages_e.append( (float(row[4])) ) meanE, stdevE, medianE = getMeanAndStDevError(errors) meanMedE, stdevE, medianE = getMeanAndStDevError(median_errors) if dataset_name == 'HansensDataset' or dataset_name =='HansensDataset_acappella': meanP, stdevP, medianP = getMeanAndStDevError(percentages) meanPE, stdevP, medianP = getMeanAndStDevError(percentages_e) ############ Append to summary for a dataset ######################### result_summary_file = 'summary_' + dataset_name + '.csv' output_URI = os.path.join(URI + dataset_name, result_summary_file) if dataset_name == 'HansensDataset' or dataset_name =='HansensDataset_acappella': results = [[algorithm_name,'{:.2f}'.format(meanE), '{:.2f}'.format(meanMedE), '{:.2f}'.format(meanP) , '{:.2f}'.format(meanPE)] ] else: results = [[algorithm_name,'{:.2f}'.format(meanE), '{:.2f}'.format(meanMedE), 'NA' , '{:.2f}'.format(meanPE)] ] if not os.path.exists(output_URI): print ( 'file {} written'.format(output_URI)) writeCsv_(output_URI, results, append=0) else: print ( 'Appended to file {}'.format(output_URI)) writeCsv_(output_URI, results, append=1)
def eval_error_textgrid_test(): audio_name = '05_Semahat_Ozdenses_-_Bir_Ihtimal_Daha_Var_0_zemin_from_69_5205_to_84_2' annotation_url = os.path.join(PATH_TEST_DATASET, audio_name + ANNOTATION_EXT) start_index = 0 end_index = -1 detected_token_list = [[0.61, 0.94, u'Bir'], [1.02, 3.41, u'ihtimal'], [3.42, 4.11, u'daha'], [4.12, 5.4, u'var'], [8.03, 8.42, u'o'], [8.46, 8.83, u'da'], [8.86, 10.65, u'\xf6lmek'], [10.66, 11.04, u'mi'], [11.05, 14.39, u'dersin']] annotation_token_list, detected_token_list, dummy, dummy = \ strip_non_lyrics_tokens(annotation_url, detected_token_list, tierAliases.phrases, start_index, end_index) alignment_errors = _eval_alignment_error(annotation_token_list, detected_token_list, tierAliases.phrases) mean, std_dev, median = getMeanAndStDevError(alignment_errors) print("mean : ", mean, "st dev: ", std_dev)
def eval_all_metrics_lab(refs_URI, detected_URI): """ run all eval metrics on one file """ ref_intervals, ref_labels = load_labeled_intervals(refs_URI) detected_intervals, detected_labels = load_labeled_intervals(detected_URI) # metric 1: alignment error alignmentErrors = _eval_alignment_error(ref_intervals, detected_intervals, tierAliases.phrases, ref_labels) mean, stDev, median = getMeanAndStDevError(alignmentErrors) # metric 2: percentage correct initialTimeOffset_refs = ref_intervals[0][0] finalts_refs = ref_intervals[-1][1] durationCorrect, totalLength = _eval_percentage_correct( reference_token_list=ref_intervals, detected_token_List=detected_intervals, final_ts_anno=finalts_refs, initial_time_offset_refs=initialTimeOffset_refs, reference_labels=ref_labels) percentage_correct = durationCorrect / totalLength # metric 3: percentage tolerance percentage_tolerance = _eval_percentage_tolerance( ref_intervals=ref_intervals, detected_intervals=detected_intervals, reference_labels=ref_labels, tolerance=0.3) return mean, percentage_correct, percentage_tolerance
def eval_all_metrics_lab(refs_URI, detected_URI, tolerance=0.3): """ run all eval metrics on one file """ ref_intervals, ref_labels = load_labeled_intervals(refs_URI) detected_intervals, use_end_ts = load_detected_intervals(detected_URI) # metric 1: alignment error alignmentErrors = _eval_alignment_error(ref_intervals, detected_intervals, ref_labels, use_end_ts) mean, stDev, median = getMeanAndStDevError(alignmentErrors) # metric 2: percentage correct initialTimeOffset_refs = ref_intervals[0][0] finalts_refs = ref_intervals[-1][1] durationCorrect, totalLength = _eval_percentage_correct(reference_token_list=ref_intervals, detected_token_List=detected_intervals, final_ts_anno=finalts_refs, initial_time_offset_refs=initialTimeOffset_refs, reference_labels=ref_labels) percentage_correct = durationCorrect / totalLength # metric 3: percentage tolerance percentage_tolerance = _eval_percentage_tolerance(ref_intervals=ref_intervals, detected_intervals=detected_intervals, reference_labels=ref_labels, tolerance=tolerance) return mean, percentage_correct, percentage_tolerance
def test_eval_error_lab_mauch(): """ test mean average error/displacement (in seconds) of alignment with loading the .lab files """ ref_intervals, detected_intervals, ref_labels = load_ref_and_detections(dataset='mauch') alignment_errors = _eval_alignment_error(ref_intervals, detected_intervals, ref_labels) mean_mauch, std_dev_mauch, median_mauch = getMeanAndStDevError(alignment_errors) assert mean_mauch == 0.0 and std_dev_mauch == 0.0
def test_eval_error_lab_mauch(): """ test mean average error/displacement (in seconds) of alignment with loading the .lab files """ ref_intervals, detected_intervals, ref_labels = load_ref_and_detections( dataset='mauch') alignment_errors = _eval_alignment_error(ref_intervals, detected_intervals, ref_labels) mean_mauch, std_dev_mauch, median_mauch = getMeanAndStDevError( alignment_errors) assert mean_mauch == 0.0 and std_dev_mauch == 0.0
def test_eval_error_lab_hansen(): """ test mean average error/displacement (in seconds) of alignment with loading the .lab files """ ref_intervals, detected_intervals, ref_labels = load_ref_and_detections( dataset='hansen') alignment_errors = _eval_alignment_error(ref_intervals, detected_intervals, tierAliases.phrases, ref_labels) mean_hansen, std_dev_hansen, median_hansen = getMeanAndStDevError( alignment_errors) assert mean_hansen == 0.0 and std_dev_hansen == 0.0
def doit(algorithm_name, dataset_name, dataset_path): ''' read all results ona submission/algorithm on one dataset Change the algorithm name and path to annotations ''' results_dataset_file = algorithm_name + '_' + dataset_name +'.csv' annotation_URI = os.path.join(URI + 'MIREX_2018_ala' + dataset_path, results_dataset_file) print(annotation_URI) errors = [] median_errors = [] percentages = [] percentages_e = [] with open(annotation_URI, 'r') as csvfile: score_ = csv.reader(csvfile, delimiter=',') for i,row in enumerate(score_): # each row is a song if i == 0: continue errors.append( (float(row[1])) ) median_errors.append( (float(row[2])) ) percentages.append( (float(row[3])) ) # TODO: check if NaN percentages_e.append( (float(row[4])) ) meanE, stdevE, medianE = getMeanAndStDevError(errors) meanMedE, stdevE, medianE = getMeanAndStDevError(median_errors) meanP, stdevP, medianP = getMeanAndStDevError(percentages) meanPE, stdevP, medianP = getMeanAndStDevError(percentages_e) ############ Write summary for a dataset ######################### result_summary_file = 'summary_' + dataset_name + '.csv' output_URI = os.path.join(URI + 'MIREX_2018_ala' + dataset_path, result_summary_file) if not os.path.exists(output_URI): print ( '{output_URI} does not exist') else: results = [[algorithm_name,'{:.2f}'.format(meanE), '{:.2f}'.format(meanMedE), '{:.2f}'.format(meanP) , '{:.2f}'.format(meanPE)] ] writeCsv_(output_URI, results, append=1)
def evalOneFile(argv): ''' Main utility function ''' if len(argv) != 5: print ("usage: {} <URI_annotation> <URI_detected> <evalLevel> <URI_audio>".format(argv[0])) sys.exit() annoURI = argv[1] detectedURI = argv[2] evalLevel = int(argv[3]) audio_URI = argv[4] alignmentErrors = evalAlignmentError(annoURI , detectedURI , evalLevel) mean, stDev, median = getMeanAndStDevError(alignmentErrors) print("mean : ", mean, "st dev: " , stDev) return mean, stDev, median, alignmentErrors