Пример #1
0
def test_new_transcription_using_use_existing(tmpdir):
    """
    Using the use_existing when an existing transcription does not exist is okay.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    kaldi.new_transcription('transcription_w', use_existing=True)
    return
Пример #2
0
def test_override_and_use_existing(tmpdir):
    """
    Cannot have both the override and use_existing parameters set to True.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    with pytest.raises(ValueError):
        kaldi.new_transcription('transcription_w',
                                override=True,
                                use_existing=True)
    return
Пример #3
0
def test_two_new_transcription_with_same_name(tmpdir):
    """
    Trying to create two transcriptions with the same name without override or
    use_existing set to True will produce a ValueError.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    kaldi.new_transcription('transcription_w')
    with pytest.raises(ValueError):
        kaldi.new_transcription('transcription_w')
    return
Пример #4
0
def test_existing_transcription_using_override(tmpdir):
    """
    Use override to delete a transcription with the same name and create a totally
    new transcription with the same name.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    t1 = kaldi.new_transcription('transcription_w')
    t1_hash = t1.hash
    t2 = kaldi.new_transcription('transcription_w', override=True)
    # note t1 can no longer be used
    assert len(kaldi.list_transcriptions()) == 1
    assert t1_hash != t2.hash
    return
Пример #5
0
def test_error_when_writing_to_protected_property(tmpdir):
    """
    An error is raised when there is an attempt to write to a protected
    property.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    t = kaldi.new_transcription('transcription_w')

    with pytest.raises(AttributeError):
        t.has_been_transcribed = True
    with pytest.raises(AttributeError):
        t.exporter = "some obj"
    with pytest.raises(AttributeError):
        t.state = "Not a valid"
Пример #6
0
def test_new_transcription(tmpdir):
    """
    Check the state of a new transcription.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    t = kaldi.new_transcription('transcription_w')
    assert t.has_been_transcribed == False
    assert t.exporter == None
    assert t.state == json.loads(f"""
    {{
        "name": "transcription_w",
        "hash": "{t.hash}",
        "date": "{t.date}",
        "model": null,
        "has_been_transcribed": false,
        "exporter": null
    }}
    """)
    return
Пример #7
0
def test_new_transcription_using_override(tmpdir):
    """
    Using override has no effect when the pron dict with the same name does not
    exist.
    """
    kaldi = KaldiInterface(f'{tmpdir}/state')
    t = kaldi.new_transcription('transcription_w', override=True)
    assert t.has_been_transcribed == False
    assert t.exporter == None
    assert t.state == json.loads(f"""
    {{
        "name": "transcription_w",
        "hash": "{t.hash}",
        "date": "{t.date}",
        "model": null,
        "has_been_transcribed": false,
        "exporter": null
    }}
    """)
    return
Пример #8
0
ds.add_directory('/recordings/transcribed', filter=['eaf', 'wav'])
ds.process()

# Step 2
# ======
# Build pronunciation dictionary
pd = kaldi.new_pron_dict('pd')
pd.link(ds)
pd.set_l2s_path('/recordings/letter_to_sound.txt')
pd.generate_lexicon()

# Step 3
# ======
# Link dataset and pd to a new model, then train the model.
m = kaldi.new_model('mx')
m.link(ds, pd)
m.build_kaldi_structure()
m.train() # may take a while

# Step 4
# ======
# Make a transcription interface and transcribe unseen audio to elan.
t = kaldi.new_transcription('tx')
t.link(m)
with open('/recordings/untranscribed/audio.wav', 'rb') as faudio:
    t.prepare_audio(faudio)
# t.transcribe_align()
t.transcribe()
# print(t.elan().decode('utf-8'))
print(t.text().decode('utf-8'))