def test_groups_entries_with_the_same_word(self): sut = isle_io.readIsleDict(os.path.join(dataRoot, "isle_sample.txt")) entries = lazyLoadValue("with", sut) self.assertEqual(2, len(entries)) expectedPosList = ["in", "nnp", "rp"] firstExpectedSyllabification = [["w", "ɪ", "ð"]] firstExpectedEntry = phonetics.Entry("with", [firstExpectedSyllabification], expectedPosList) self.assertEqual(firstExpectedEntry, entries[0]) secondExpectedSyllabification = [["w", "ɪ", "ɵ"]] secondExpectedEntry = phonetics.Entry("with", [secondExpectedSyllabification], expectedPosList) self.assertEqual(secondExpectedEntry, entries[1])
def test_opening_isle_files(self): sut = isle_io.readIsleDict(os.path.join(dataRoot, "isle_sample.txt")) self.assertEqual(23, len(sut.keys())) entry = lazyLoadValue("another", sut)[0] expectedSyllabification = [["ə"], ["n", "ˈʌ"], ["ð", "ɚ"]] expectedEntry = phonetics.Entry("another", [expectedSyllabification], ["dt", "nn", "prp"]) self.assertEqual(expectedEntry, entry)
def test_lookup_can_return_multiple_results(self): sut = self.isle.lookup("another") self.assertEqual(2, len(sut)) self.assertEqual( phonetics.Entry( "another", [[["ə"], ["n", "ˈʌ"], ["ð", "ɚ"]]], ["dt", "nn", "prp"], ), sut[0], ) self.assertEqual( phonetics.Entry( "another", [[["ə"], ["n", "ˈʌ", "ð"], ["ə", "ɹ"]]], ["dt", "nn", "prp"], ), sut[1], )
def test_lookup(self): sut = self.isle.lookup("cat") self.assertEqual(1, len(sut)) self.assertEqual( phonetics.Entry( "cat", [[["k", "ˌæ", "t˺"]]], ["dt", "nn", "prp"], ), sut[0], )
def lazyLoadValue(word: str, linesByWord: Dict[str, str]) -> List[phonetics.Entry]: entryList = [ isle_io.parseIslePronunciation(word, line) for line in linesByWord[word] ] return [ phonetics.Entry( entry["word"], entry["syllabificationList"], entry["posList"], ) for entry in entryList ]
def test_can_read_multiword_entries(self): sut = isle_io.readIsleDict(os.path.join(dataRoot, "isle_sample.txt")) entries = lazyLoadValue("pumpkins_parley", sut) self.assertEqual(1, len(entries)) entry = entries[0] self.assertEqual(2, len(entry.syllabificationList)) expectedSyllabificationList = [ [["p", "ˈʌ", "m"], ["k", "n̩", "z"]], [["p", "ˈɑ", "ɹ"], ["l", "i"]], ] expectedPosList = ["nns"] expectedEntry = phonetics.Entry("pumpkins_parley", expectedSyllabificationList, expectedPosList) self.assertEqual(expectedEntry, entry)
def test_loading_full_built_in_isle_dictionary(self): # This takes time (~0.5s) so we'll only do it for this one test # In other tests for Isle, we'll open the much smaller VirtualIsle, # which should be almost instant sut = isletool.Isle() # This test requires knowledge of the built in dictionary. # Please reference that file (pysle/data/ISLEdict.txt) # I don't like it, but I don't know of another way # Notice that the entry for cat is slightly different # than with the one defined in the VirtualIsle. cat = sut.lookup("cat") self.assertEqual(1, len(cat)) self.assertEqual( phonetics.Entry("cat", [[["k", "ˈæ", "t"]]], ["nn", "nnp"]), cat[0] ) # The number of unique words in the built-in dictionary self.assertEqual(254_430, len(sut.rawData.keys()))
def findClosestEntryForSyllabification( isle: isletool.Isle, word: str, syllabification: Union[List[List[str]], phonetics.Syllabification], ) -> Tuple[phonetics.Entry, phonetics.Entry]: """Find the closest entry for a syllabified list of phonemes Args: isle: an instance of Isle word: the word to lookup syllabification: the syllabification for the word Returns: the Isle entry with a syllabification that is most similar to the input one, among the entries for this word """ entries = isle.lookup(word) _syllabification = phonetics._toSyllabification(syllabification) entry = phonetics.Entry(word, [_syllabification], []) return entry.findClosestPronunciation(entries)
def _lazyLoad(self, word: str) -> List[phonetics.Entry]: """Fetches entries for a word; if not parsed yet, parses the original text""" entries = self.data.get(word) if not entries: lazyLoadedEntries: List[phonetics.Entry] = [] lines = self.rawData.get(word) if lines is None: raise errors.WordNotInIsleError(word) for rawIsleLine in lines: entryAsHash = isle_io.parseIslePronunciation(word, rawIsleLine) entry = phonetics.Entry( entryAsHash["word"], entryAsHash["syllabificationList"], entryAsHash["posList"], ) lazyLoadedEntries.append(entry) self.data[word] = lazyLoadedEntries return lazyLoadedEntries else: return entries
def entry(phoneList: List[List[List[str]]]): return phonetics.Entry("foo", phoneList, ["n"])