Пример #1
0
    def test_groups_entries_with_the_same_word(self):
        sut = isle_io.readIsleDict(os.path.join(dataRoot, "isle_sample.txt"))
        entries = lazyLoadValue("with", sut)

        self.assertEqual(2, len(entries))

        expectedPosList = ["in", "nnp", "rp"]
        firstExpectedSyllabification = [["w", "ɪ", "ð"]]
        firstExpectedEntry = phonetics.Entry("with",
                                             [firstExpectedSyllabification],
                                             expectedPosList)
        self.assertEqual(firstExpectedEntry, entries[0])

        secondExpectedSyllabification = [["w", "ɪ", "ɵ"]]
        secondExpectedEntry = phonetics.Entry("with",
                                              [secondExpectedSyllabification],
                                              expectedPosList)
        self.assertEqual(secondExpectedEntry, entries[1])
Пример #2
0
    def test_opening_isle_files(self):
        sut = isle_io.readIsleDict(os.path.join(dataRoot, "isle_sample.txt"))

        self.assertEqual(23, len(sut.keys()))
        entry = lazyLoadValue("another", sut)[0]

        expectedSyllabification = [["ə"], ["n", "ˈʌ"], ["ð", "ɚ"]]
        expectedEntry = phonetics.Entry("another", [expectedSyllabification],
                                        ["dt", "nn", "prp"])

        self.assertEqual(expectedEntry, entry)
Пример #3
0
    def test_lookup_can_return_multiple_results(self):
        sut = self.isle.lookup("another")
        self.assertEqual(2, len(sut))

        self.assertEqual(
            phonetics.Entry(
                "another",
                [[["ə"], ["n", "ˈʌ"], ["ð", "ɚ"]]],
                ["dt", "nn", "prp"],
            ),
            sut[0],
        )

        self.assertEqual(
            phonetics.Entry(
                "another",
                [[["ə"], ["n", "ˈʌ", "ð"], ["ə", "ɹ"]]],
                ["dt", "nn", "prp"],
            ),
            sut[1],
        )
Пример #4
0
    def test_lookup(self):
        sut = self.isle.lookup("cat")
        self.assertEqual(1, len(sut))

        self.assertEqual(
            phonetics.Entry(
                "cat",
                [[["k", "ˌæ", "t˺"]]],
                ["dt", "nn", "prp"],
            ),
            sut[0],
        )
Пример #5
0
def lazyLoadValue(word: str, linesByWord: Dict[str,
                                               str]) -> List[phonetics.Entry]:
    entryList = [
        isle_io.parseIslePronunciation(word, line)
        for line in linesByWord[word]
    ]

    return [
        phonetics.Entry(
            entry["word"],
            entry["syllabificationList"],
            entry["posList"],
        ) for entry in entryList
    ]
Пример #6
0
    def test_can_read_multiword_entries(self):
        sut = isle_io.readIsleDict(os.path.join(dataRoot, "isle_sample.txt"))

        entries = lazyLoadValue("pumpkins_parley", sut)
        self.assertEqual(1, len(entries))

        entry = entries[0]
        self.assertEqual(2, len(entry.syllabificationList))

        expectedSyllabificationList = [
            [["p", "ˈʌ", "m"], ["k", "n̩", "z"]],
            [["p", "ˈɑ", "ɹ"], ["l", "i"]],
        ]
        expectedPosList = ["nns"]
        expectedEntry = phonetics.Entry("pumpkins_parley",
                                        expectedSyllabificationList,
                                        expectedPosList)

        self.assertEqual(expectedEntry, entry)
Пример #7
0
    def test_loading_full_built_in_isle_dictionary(self):
        # This takes time (~0.5s) so we'll only do it for this one test
        # In other tests for Isle, we'll open the much smaller VirtualIsle,
        # which should be almost instant
        sut = isletool.Isle()

        # This test requires knowledge of the built in dictionary.
        # Please reference that file (pysle/data/ISLEdict.txt)
        # I don't like it, but I don't know of another way
        # Notice that the entry for cat is slightly different
        # than with the one defined in the VirtualIsle.
        cat = sut.lookup("cat")
        self.assertEqual(1, len(cat))
        self.assertEqual(
            phonetics.Entry("cat", [[["k", "ˈæ", "t"]]], ["nn", "nnp"]), cat[0]
        )

        # The number of unique words in the built-in dictionary
        self.assertEqual(254_430, len(sut.rawData.keys()))
Пример #8
0
def findClosestEntryForSyllabification(
    isle: isletool.Isle,
    word: str,
    syllabification: Union[List[List[str]], phonetics.Syllabification],
) -> Tuple[phonetics.Entry, phonetics.Entry]:
    """Find the closest entry for a syllabified list of phonemes

    Args:
        isle: an instance of Isle
        word: the word to lookup
        syllabification: the syllabification for the word

    Returns:
        the Isle entry with a syllabification that is most similar
        to the input one, among the entries for this word
    """
    entries = isle.lookup(word)

    _syllabification = phonetics._toSyllabification(syllabification)
    entry = phonetics.Entry(word, [_syllabification], [])

    return entry.findClosestPronunciation(entries)
Пример #9
0
    def _lazyLoad(self, word: str) -> List[phonetics.Entry]:
        """Fetches entries for a word; if not parsed yet, parses the original text"""

        entries = self.data.get(word)
        if not entries:
            lazyLoadedEntries: List[phonetics.Entry] = []

            lines = self.rawData.get(word)
            if lines is None:
                raise errors.WordNotInIsleError(word)

            for rawIsleLine in lines:
                entryAsHash = isle_io.parseIslePronunciation(word, rawIsleLine)
                entry = phonetics.Entry(
                    entryAsHash["word"],
                    entryAsHash["syllabificationList"],
                    entryAsHash["posList"],
                )
                lazyLoadedEntries.append(entry)

            self.data[word] = lazyLoadedEntries
            return lazyLoadedEntries
        else:
            return entries
Пример #10
0
def entry(phoneList: List[List[List[str]]]):
    return phonetics.Entry("foo", phoneList, ["n"])