def calculate_report(results_tuple): r''' This routine will calculate a WER report. It'll compute the `mean` WER and create ``Sample`` objects of the ``report_count`` top lowest loss items from the provided WER results tuple (only items with WER!=0 and ordered by their WER). ''' samples = [] items = list(zip(*results_tuple)) total_levenshtein = 0.0 total_label_length = 0.0 for label, decoding, distance, loss in items: sample_wer = wer(label, decoding) sample = Sample(label, decoding, loss, distance, sample_wer) samples.append(sample) total_levenshtein += levenshtein(label.split(), decoding.split()) total_label_length += float(len(label.split())) # Getting the WER from the accumulated levenshteins and lengths samples_wer = total_levenshtein / total_label_length # Filter out all items with WER=0 samples = [s for s in samples if s.wer > 0] # Order the remaining items by their loss (lowest loss on top) samples.sort(key=lambda s: s.loss) # Take only the first report_count items samples = samples[:FLAGS.report_count] # Order this top FLAGS.report_count items by their WER (lowest WER on top) samples.sort(key=lambda s: s.wer) return samples_wer, samples
def process_decode_result(item): label, decoding, distance, loss = item sample_wer = wer(label, decoding) return AttrDict({ 'src': label, 'res': decoding, 'loss': loss, 'distance': distance, 'wer': sample_wer, 'levenshtein': levenshtein(label.split(), decoding.split()), 'label_length': float(len(label.split())), })
def get_bestmatch_keywords_using_wer(str): global allwords if str in allwords: return str r = [] str1 = ' '.join(list(str)) for o in allwords: o1 = ' '.join(list(o)) # print (type(o), type(str), o1, str1) r.append(wer(o1, str1)) idx = np.argmin(np.array(r), axis=0) # print (idx) # print(str, allwords[idx]) return allwords[idx]