def test_sequence_matcher2(self): """Test the sequence matcher.""" a = ['a', 'b'] b = ['a', 'b', 'd', 'c'] sm = SequenceMatcher() sm.set_seq1(a) sm.set_seq2(b) self.assertTrue(sm.distance() == 2) sm.set_seqs(b, a) self.assertTrue(sm.distance() == 2)
def test_sequence_matcher(self): """Test the sequence matcher.""" a = ['a', 'b'] b = ['a', 'b', 'd', 'c'] sm = SequenceMatcher(a=a, b=b) opcodes = [['equal', 0, 1, 0, 1], ['equal', 1, 2, 1, 2], ['insert', 1, 1, 2, 3], ['insert', 1, 1, 3, 4]] self.assertTrue(sm.distance() == 2) self.assertTrue(sm.ratio() == 2 / 3) self.assertTrue(sm.quick_ratio() == 2 / 3) self.assertTrue(sm.real_quick_ratio() == 2 / 3) self.assertTrue(sm.distance() == 2) # This doesn't return anything, saves the value in the sm cache. self.assertTrue(not sm._compute_distance_fast()) self.assertTrue(sm.get_opcodes() == opcodes) self.assertTrue(list(sm.get_matching_blocks()) == [[0, 0, 1], [1, 1, 1]])
def test_issue4(self): """ Test for error reported here: https://github.com/belambert/edit-distance/issues/4 """ a = ['that', 'continuous', 'sanction', ':=', '(', 'flee', 'U', 'complain', ')', 'E', 'attendance', 'eye', '^', 'flowery', 'revelation', '^', 'ridiculous', 'destination', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>'] b = ['continuous', ':=', '(', 'sanction', '^', 'flee', '^', 'attendance', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>'] target_opcodes = [['delete', 0, 1, 0, 0], ['equal', 1, 2, 0, 1], ['delete', 2, 3, 0, 0], ['equal', 3, 4, 1, 2], ['equal', 4, 5, 2, 3], ['insert', 4, 4, 3, 4], ['insert', 4, 4, 4, 5], ['equal', 5, 6, 5, 6], ['replace', 6, 7, 6, 7], ['replace', 7, 8, 7, 8], ['replace', 8, 9, 8, 9], ['replace', 9, 10, 9, 10], ['replace', 10, 11, 10, 11], ['replace', 11, 12, 11, 12], ['replace', 12, 13, 12, 13], ['replace', 13, 14, 13, 14], ['replace', 14, 15, 14, 15], ['replace', 15, 16, 15, 16], ['replace', 16, 17, 16, 17], ['replace', 17, 18, 17, 18], ['equal', 18, 19, 18, 19], ['equal', 19, 20, 19, 20], ['equal', 20, 21, 20, 21], ['equal', 21, 22, 21, 22], ['equal', 22, 23, 22, 23], ['equal', 23, 24, 23, 24], ['equal', 24, 25, 24, 25], ['equal', 25, 26, 25, 26], ['equal', 26, 27, 26, 27], ['equal', 27, 28, 27, 28], ['equal', 28, 29, 28, 29]] sm = SequenceMatcher(a=a, b=b) self.assertEqual(sm.distance(), 16) self.assertEqual(sm.get_opcodes(), target_opcodes)
def test_issue4_simpler(self): """ Test for error reported here: https://github.com/belambert/edit-distance/issues/4 """ a = ['that', 'continuous', 'sanction', ':=', '('] b = ['continuous', ':=', '(', 'sanction', '^'] sm = SequenceMatcher(a=a, b=b) self.assertEqual(sm.distance(), 4) target_opcodes = [['delete', 0, 1, 0, 0], ['equal', 1, 2, 0, 1], ['delete', 2, 3, 0, 0], ['equal', 3, 4, 1, 2], ['equal', 4, 5, 2, 3], ['insert', 4, 4, 3, 4], ['insert', 4, 4, 4, 5]] self.assertEqual(sm.get_opcodes(), target_opcodes)
def test_issue4_simpler(self): """Test for error reported here: https://github.com/belambert/edit-distance/issues/4""" a = ["that", "continuous", "sanction", ":=", "("] b = ["continuous", ":=", "(", "sanction", "^"] sm = SequenceMatcher(a=a, b=b) self.assertEqual(sm.distance(), 4) target_opcodes = [ ["delete", 0, 1, 0, 0], ["equal", 1, 2, 0, 1], ["delete", 2, 3, 0, 0], ["equal", 3, 4, 1, 2], ["equal", 4, 5, 2, 3], ["insert", 5, 5, 3, 4], ["insert", 5, 5, 4, 5], ] self.assertEqual(sm.get_opcodes(), target_opcodes)
def test_issue4(self): """ Test for error reported here: https://github.com/belambert/edit-distance/issues/4 """ a = [ 'that', 'continuous', 'sanction', ':=', '(', 'flee', 'U', 'complain', ')', 'E', 'attendance', 'eye', '^', 'flowery', 'revelation', '^', 'ridiculous', 'destination', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>' ] b = [ 'continuous', ':=', '(', 'sanction', '^', 'flee', '^', 'attendance', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>' ] target_opcodes = [['delete', 0, 1, 0, 0], ['equal', 1, 2, 0, 1], ['delete', 2, 3, 0, 0], ['equal', 3, 4, 1, 2], ['equal', 4, 5, 2, 3], ['insert', 4, 4, 3, 4], ['insert', 4, 4, 4, 5], ['equal', 5, 6, 5, 6], ['replace', 6, 7, 6, 7], ['replace', 7, 8, 7, 8], ['replace', 8, 9, 8, 9], ['replace', 9, 10, 9, 10], ['replace', 10, 11, 10, 11], ['replace', 11, 12, 11, 12], ['replace', 12, 13, 12, 13], ['replace', 13, 14, 13, 14], ['replace', 14, 15, 14, 15], ['replace', 15, 16, 15, 16], ['replace', 16, 17, 16, 17], ['replace', 17, 18, 17, 18], ['equal', 18, 19, 18, 19], ['equal', 19, 20, 19, 20], ['equal', 20, 21, 20, 21], ['equal', 21, 22, 21, 22], ['equal', 22, 23, 22, 23], ['equal', 23, 24, 23, 24], ['equal', 24, 25, 24, 25], ['equal', 25, 26, 25, 26], ['equal', 26, 27, 26, 27], ['equal', 27, 28, 27, 28], ['equal', 28, 29, 28, 29]] sm = SequenceMatcher(a=a, b=b) self.assertEqual(sm.distance(), 16) self.assertEqual(sm.get_opcodes(), target_opcodes)
def _update_state(self, y_true, y_pred, mask=None): for i in range(len(y_true)): assert len(y_true[i]) == len(y_pred[i]) # select utterance y_true_ = y_true[i] y_pred_ = y_pred[i] # remove padding y_true_ = y_true_[mask[i]] y_pred_ = y_pred_[mask[i]] # merge consequence states y_true_ = _merge_consequent_states(y_true_) y_pred_ = _merge_consequent_states(y_pred_) # compute edit distance sm = SequenceMatcher(a=y_true_, b=y_pred_) edit_distance = sm.distance() # update state self.edit_distance.assign_add(edit_distance) self.length.assign_add(len(y_true_))
def test_issue4(self): """Test for error reported here: https://github.com/belambert/edit-distance/issues/4""" a = [ "that", "continuous", "sanction", ":=", "(", "flee", "U", "complain", ")", "E", "attendance", "eye", "^", "flowery", "revelation", "^", "ridiculous", "destination", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", ] # noqa b = [ "continuous", ":=", "(", "sanction", "^", "flee", "^", "attendance", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", "<EOS>", ] # noqa target_opcodes = [ ["delete", 0, 1, 0, 0], ["equal", 1, 2, 0, 1], ["delete", 2, 3, 0, 0], ["equal", 3, 4, 1, 2], ["equal", 4, 5, 2, 3], ["insert", 5, 5, 3, 4], ["insert", 5, 5, 4, 5], ["equal", 5, 6, 5, 6], ["replace", 6, 7, 6, 7], ["replace", 7, 8, 7, 8], ["replace", 8, 9, 8, 9], ["replace", 9, 10, 9, 10], ["replace", 10, 11, 10, 11], ["replace", 11, 12, 11, 12], ["replace", 12, 13, 12, 13], ["replace", 13, 14, 13, 14], ["replace", 14, 15, 14, 15], ["replace", 15, 16, 15, 16], ["replace", 16, 17, 16, 17], ["replace", 17, 18, 17, 18], ["equal", 18, 19, 18, 19], ["equal", 19, 20, 19, 20], ["equal", 20, 21, 20, 21], ["equal", 21, 22, 21, 22], ["equal", 22, 23, 22, 23], ["equal", 23, 24, 23, 24], ["equal", 24, 25, 24, 25], ["equal", 25, 26, 25, 26], ["equal", 26, 27, 26, 27], ["equal", 27, 28, 27, 28], ["equal", 28, 29, 28, 29], ] # noqa sm = SequenceMatcher(a=a, b=b) self.assertEqual(sm.distance(), 16) self.assertEqual(sm.get_opcodes(), target_opcodes)
y_pred_phones = states2phones(y_pred, phones, stateList) y_true_phones = states2phones(y_true, phones, stateList) accuracy.reset_states() accuracy.update_state(y_true_phones, y_pred_phones) print('Frame-by-frame accuracy at the phoneme level: {:.2f}%'.format( accuracy.result().numpy() * 100)) plt.figure() plot_confusion_matrix(y_true_phones, y_pred_phones) plt.title('Frame-by-frame confusion matrix at the phoneme level') # PER at the state level N = 10000 # number of frames to consider (distance computation is expensive) y_pred_merged = merge_consequent_states(y_pred[:N]) y_true_merged = merge_consequent_states(y_true[:N]) sm = SequenceMatcher(a=y_true_merged, b=y_pred_merged) edit_distance = sm.distance() print('PER at the state level: {:.2f}%'.format(edit_distance / N * 100)) # PER at the phoneme level y_pred_merged = merge_consequent_states(y_pred_phones[:N]) y_true_merged = merge_consequent_states(y_true_phones[:N]) sm = SequenceMatcher(a=y_true_merged, b=y_pred_merged) edit_distance = sm.distance() print('PER at the phoneme level: {:.2f}%'.format(edit_distance / N * 100)) # posteriors for first utterance utterance = testdata[0] x, y = prepare_matrices([utterance], K, feature_type, dynamic_features=dynamic_features,
# pred_four_layer_trans = transcribe(y_sample, prediction_four_layer) y_transcribed = transcribe(y_sample) pred_one_layer_trans = transcribe(prediction_one_layer) pred_four_layer_trans = transcribe(prediction_four_layer) fig, axs = plt.subplots(3) axs[0].set_title("Correct output, state level merged") axs[0].pcolormesh(y_transcribed.T) axs[1].set_title(name + " 1 layer") axs[1].pcolormesh(pred_one_layer_trans.T) axs[2].set_title(name + " 4 layers") axs[2].pcolormesh(pred_four_layer_trans.T) plt.show() seq1 = SequenceMatcher(y_transcribed, prediction_four_layer_merged) distance = seq1.distance() / 324 * 100 # TODO: Then measure the Phone Error Rate (PER), # that is the length normalised edit distance between the sequence # of states from the DNN and the correct transcription # TODO: Use SequenceMatcher from edit distance to quickly calculate PER # Part 4 Phenome Level edit dist # y_transcribed_merged = transcribe(y_sample_merged, y_sample_merged) # pred_one_layer_trans_merged = transcribe(y_sample_merged, prediction_one_layer_merged) # pred_four_layer_trans_merged = transcribe(y_sample_merged, prediction_four_layer_merged) y_transcribed_merged = transcribe(y_sample_merged) pred_one_layer_trans_merged = transcribe(prediction_one_layer_merged) pred_four_layer_trans_merged = transcribe(prediction_four_layer_merged)