Пример #1
0
def test_normalize_text(inp):
    normalized = normalize_text(inp)
    if hasattr(normalized, "isascii"):
        # Only exists on python 3.7+
        assert normalized.isascii()
    # this will raise an exception if the text is not normalized
    normalized.encode("ascii")
Пример #2
0
def sample_manifest(sample_data):
    audio_files = get_files(sample_data / "LapsBM-F004", ".wav")

    manifest = sample_data / "test_example_manifest.json"
    with open(manifest, "w", encoding="utf8") as f:
        for fil in audio_files:
            data = {
                "audio_filepath":
                str(fil.resolve()),
                "duration":
                audio_len(fil),
                "text":
                normalize_text(fil.with_suffix(".txt").read_text().strip()),
            }
            json.dump(data, f)
            f.write("\n")
    return manifest
Пример #3
0
def test_normalize_text_specific_inputs():
    assert normalize_text("áàâã") == "aaaa"
    assert normalize_text("ç") == "c"
Пример #4
0
 def preprocess_text(self, text: str) -> str:
     normalized = normalize_text(text)
     lower = lower_text(normalized)
     return lower