Exemplo n.º 1
0
def test_e2e():
    with open("tests/data/lucier_golden.json") as f:
        golden = json.load(f)
    with open("tests/data/lucier.txt") as f:
        transcript = f.read()
    ret = lm_transcribe("tests/data/lucier.mp3", transcript, "PROTO_LANGDIR",
                        "data/nnet_a_gpu_online")

    score = alignment_score(golden['words'], ret['words'])
    assert_greater(score['correct'], 0.85)
    assert_less(score['error'], 0.35)
Exemplo n.º 2
0
def test_golden_master_identity():
	with open("tests/data/lucier_golden.json") as f:
		golden = json.load(f)['words']
	got = alignment_score(golden, golden)
	want = {
		'inserted': 0.0,
		'correct': 1.0,
		'deleted': 0.0,
		'substituted': 0.0,
		'error': 0.0,
	}
	assert_equals(got, want)
Exemplo n.º 3
0
def test_e2e():
	with open("tests/data/lucier_golden.json") as f:
		golden = json.load(f)
	with open("tests/data/lucier.txt") as f:
		transcript = f.read()
	ret = lm_transcribe(
		"tests/data/lucier.mp3",
		transcript,
		"PROTO_LANGDIR",
		"data/nnet_a_gpu_online")
	
	score = alignment_score(golden['words'], ret['words'])
	assert_greater(score['correct'], 0.85)
	assert_less(score['error'], 0.35)
Exemplo n.º 4
0
def test_no_ref_text():
	alignment_score([], [])
Exemplo n.º 5
0
def test_alignment_score():
	tests = [
		[
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}],
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}],
			{
				'inserted': 0.0,
				'correct': 1.0,
				'deleted': 0.0,
				'substituted': 0.0,
				'error': 0.0,
			},
			'one correct',
		],
		[
			[{'case': 'success', 'alignedWord': 'one', 'start': 0, 'end': 1}, {'case': 'success', 'alignedWord': 'two', 'start': 2, 'end': 3}],
			[{'case': 'success', 'alignedWord': 'one', 'start': 0, 'end': 1}, {'case': 'success', 'alignedWord': 'two', 'start': 2, 'end': 3}],
			{
				'inserted': 0.0,
				'correct': 1.0,
				'deleted': 0.0,
				'substituted': 0.0,
				'error': 0.0,
			},
			'two correct',
		],
		[
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}],
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}, {'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}],
			{
				'inserted': 0.0,
				'correct': 0.5,
				'deleted': 0.5,
				'substituted': 0.0,
				'error': 0.5,
			},
			'one deleted, one correct',
		],
		[
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}, {'case': 'not-found-in-transcript', 'alignedWord': 'two', 'start': 2, 'end': 3}],
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}],
			{
				'inserted': 1.0,
				'correct': 1.0,
				'deleted': 0.0,
				'substituted': 0.0,
				'error': 1.0,
			},
			'one inserted',
		],
		[
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}, {'case': 'not-found-in-transcript', 'alignedWord': 'two', 'start': 2, 'end': 3}, {'case': 'not-found-in-transcript', 'alignedWord': 'three', 'start': 4, 'end': 5}],
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}],
			{
				'inserted': 2.0,
				'correct': 1.0,
				'deleted': 0.0,
				'substituted': 0.0,
				'error': 2.0,
			},
			'two inserted',
		],
		[
			[],
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}],
			{
				'inserted': 0.0,
				'correct': 0.0,
				'deleted': 1.0,
				'substituted': 0.0,
				'error': 1.0,
			},
			'one deleted',
		],
		[
			[],
			[{'case': 'success', 'alignedWord': 'one', 'start': 0, 'end': 1}, {'case': 'success', 'alignedWord': 'two', 'start': 0, 'end': 1}],
			{
				'inserted': 0.0,
				'correct': 0.0,
				'deleted': 1.0,
				'substituted': 0.0,
				'error': 1.0,
			},
			'two deleted',
		],
		[
			[{'case': 'not-found-in-transcript', 'alignedWord': 'yes', 'start': 0, 'end': 1}],
			[{'case': 'success', 'alignedWord': 'no', 'start': 0, 'end': 1}],
			{
				'inserted': 0.0,
				'correct': 0.0,
				'deleted': 0.0,
				'substituted': 1.0,
				'error': 1.0,
			},
			'one substituted',
		],
		[
			[{'case': 'not-found-in-transcript', 'alignedWord': 'yes', 'start': 0, 'end': 1}, {'case': 'not-found-in-transcript', 'alignedWord': 'yes2', 'start': 2, 'end': 3}],
			[{'case': 'success', 'alignedWord': 'no', 'start': 0, 'end': 1}, {'case': 'success', 'alignedWord': 'no2', 'start': 2, 'end': 3}],
			{
				'inserted': 0.0,
				'correct': 0.0,
				'deleted': 0.0,
				'substituted': 1.0,
				'error': 1.0,
			},
			'two substituted',
		],
		[
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}, {'case': 'not-found-in-audio', 'alignedWord': 'hello'}],
			[{'case': 'success', 'alignedWord': 'hello', 'start': 0, 'end': 1}],
			{
				'inserted': 0.0,
				'correct': 1.0,
				'deleted': 0.0,
				'substituted': 0.0,
				'error': 0.0,
			},
			'not found in audio',
		],
	]
	for hyp, ref, want, name in tests:
		got = alignment_score(hyp, ref)
		msg = "%s: wanted %r, got %r" % (name, want, got)
		assert_equals(got, want, msg)