Exemplo n.º 1
0
    def test_combining_in_others(self):
        # Setswana's 'bó̝kɔ̝̀ːnì' was being parsed as:
        #
        # 8.    b                                      consonant
        # 9.    o̝(o̝, ò̝, ó̝, ô̝, ǒ̝)                 vowel
        # 10.    k                                      consonant
        # 11.    ɔ̝̀ː                                   missing       *
        # 12.    n                                      consonant
        # 13.    i(i, ì, í, î, ǐ, ì̞, í̞)               vowel
        #
        # i.e. in token 11 the combining character of double triangle "ː" is
        # merged to the character 'ɔ̝̀'. 'ɔ̝̀' is IN the inventory, but 'ɔ̝̀ː' is NOT
        # so this gets flagged as an error. "ː" is in other symbols and is
        # currently not being recognized as such

        f = FileReader()
        f.data['consonants'] = f.parse_inventory("b, k, n", 'consonant')
        f.data['vowels'] = f.parse_inventory(
            "o̝(o̝, ò̝, ó̝, ô̝, ǒ̝), i(i, ì, í, î, ǐ, ì̞, í̞)", 'vowel')
        f.known_missings.update(f.parse_list(['/ɔ̝̀/']))
        f.other_symbols.update(f.parse_inventory('ː', 'other'))
        # Other: ː
        transcript = 'bó̝kɔ̝̀ːnì'
        transcript = f.standardise(transcript)
        parsed = f.parse_transcript(transcript)
        assert parsed[0] == Token("b"), parsed
        assert parsed[1] == Token("o̝(o̝, ò̝, ó̝, ô̝, ǒ̝)"), parsed
        assert parsed[2] == Token("k"), parsed
        assert parsed[3] == MissingToken("ɔ̝̀"), parsed
        assert parsed[4] == Token("ː"), parsed
        assert parsed[5] == Token("n"), parsed
        assert parsed[6] == Token("i(i, ì, í, î, ǐ, ì̞, í̞)"), parsed
Exemplo n.º 2
0
 def test_ellipsis(self):
     # an error with ellipsis. [...]
     f = FileReader()
     f.data['consonants'] = f.parse_inventory("l n", 'consonant')
     f.data['vowels'] = f.parse_inventory("", 'vowels')
     f.known_missings.update(f.parse_list(["/[...]/"]))
     transcript = f.standardise("l [...] n")
     parsed = f.parse_transcript(transcript)
     assert parsed[0] == Token("l")
     assert parsed[1] == Token(" ")
     assert parsed[2] == Token("[...]")
     assert parsed[3] == Token(" ")
     assert parsed[4] == Token("n")
Exemplo n.º 3
0
 def test_upper_xumi(self):
     # an error with large other symbols being identified as single ones.
     # e.g. here "||" is being identified as two "|" i.e. "|", "|"
     f = FileReader()
     f.data['consonants'] = f.parse_inventory("l H", 'consonant')
     f.data['vowels'] = f.parse_inventory("i", 'vowels')
     f.known_missings.update(f.parse_list(["/|/", "/||/"]))
     transcript = f.standardise("li || H")
     parsed = f.parse_transcript(transcript)
     assert parsed[0] == Token("l")
     assert parsed[1] == Token("i")
     assert parsed[2] == Token(" ")
     assert parsed[3] == Token("||")
     assert parsed[4] == Token(" ")
     assert parsed[5] == Token("H")
Exemplo n.º 4
0
class Test_Mambai(unittest.TestCase):
    def setUp(self):
        self.f = FileReader()
        self.f.data['consonants'] = self.f.parse_inventory(
            """
        p, b, t, d, k, g(g, k̚, q̚, ɣ, ʁ), kp(kp, kpŋm), gb, ɓ(ɓ, ʔm̰, ʔɓ, ʔp),
        ɗ(ɗ, ʔn̰, ʔɗ, ʔl̰), m, n, ŋ, ⱱ̟, ɽ(ɽ, ɳ̆, r), f, v, s, z, h, j(j, ɲ), 
        ʔj̰(ʔj̰, ʔɲ̰), w(w, ŋʷ), ʔw̰(ʔw̰, ʔŋ̰ʷ, ʔẁ̰), l(l, n), ʔ
        """, "consonant")
        self.f.data['vowels'] = self.f.parse_inventory(
            """
        i(i, í, ì, î, ĭ̀, ĭ́, íʔḭ̆́),
        ĩ(ĩ, ĩ́, ĩ̀, ĩ̂),
        ḭ̃(ḭ̃, ḭ̃́, ḭ̃̀, ḭ̃̂),
        ḭ(ḭ, ḭ́, ḭ̀, ḭ̂, iʔḭ),
        iː(iː, íː, ìː, îː),
        ĩː(ĩː, ĩ́ː, ĩ̀ː, ĩ̂ː),
        iˤ(iˤ, íˤ, ìˤ, îˤ, eˤ, éˤ, èˤ, êˤ),
        ĩˤ(ĩˤ, ĩ́ˤ, ĩ̀ˤ, ĩ̂ˤ), ẽˤ(ẽˤ, ẽ́ˤ, ẽ̀ˤ, ẽ̂ˤ),
        
        e(e, é, è, ê),
        ḛ(ḛ, ḛ́, ḛ̀, ḛ̂, eʔḛ, èʔḛ̆),
        eː(e:, éː, èː, êː),
        ḛ̃(ḛ̃, ḛ̃́, ḛ̃̀, ḛ̃̂),

        a(a, á, à, â),
        ã(ã, ã́, ã̀, ã̂),
        a̰(a̰, á̰, ắ̰, à̰, â̰, aʔa̰, áʔằ̰, áʔắ̰),
        aː(aː, áː, àː, âː), 
        ãː(ãː, ã́ː, ã̀ː, ã̂ː),
        aˤ(aˤ, áˤ, àˤ, âˤ),
        ãˤ(ãˤ, ã́ˤ, ã̀ˤ, ã̂ˤ), õˤ(õˤ, ṍˤ, õ̀ˤ, õ̂ˤ),
        ã̰(ã̰, ã̰́, ã̰̀, ã̰̂),

        o(o, ó, ò, ô, ŏ̀),
        o̰(o̰, ó̰, ò̰, ô̰, oʔo̰, óʔŏ̰́),
        oː(oː, óː, òː, ôː),
        õ̰(õ̰, ṍ̰, õ̰̀, õ̰̂),

        u(u, ú, ù, û),
        ũ(ũ, ṹ, ũ̀, ũ̂), 
        ṵ(ṵ, ṵ́, ṵ̀, ṵ̂, uʔṵ, úʔṵ̆́, úʔṵ̆̀, ùʔṵ̆̀),
        uː(uː, úː, ùː, ûː),
        ũː(ũː, ṹː, ũ̀ː, ũ̂ː), 
        uˤ(uˤ, úˤ, ùˤ, ûˤ, oˤ, óˤ, òˤ, ôˤ), 
        ũˤ(ũˤ, ṹˤ, ũ̀ˤ, ũ̂ˤ), 
        ṵ̃(ṵ̃, ṵ̃́, ṵ̃̀, ṵ̃̂)
        """, "vowel")
        self.f.known_missings.update(self.f.parse_list([
            "/↗/",
        ]))

    def test_get_maximal(self):
        max_, store = self.f.get_maximal(['ó', 'ʔ', 'ẁ̰'])
        assert max_ == ['ó']
        assert store == ['ʔ', 'ẁ̰']

    def test_one(self):
        # parsed as
        #130.	j(j, ɲ)                            	consonant
        #131.	o(o, ó, ò, ô, ŏ̀)                  	vowel
        #132.	↗                                  	other
        #133.	óʔẁ̰                               	missing   	*
        #134.	                                   	punctuation
        transcript = self.f.standardise("jó↗óʔẁ̰")
        parsed = self.f.parse_transcript(transcript)
        assert parsed[0] == Token("j(j, ɲ)")
        assert parsed[1] == Token("o(o, ó, ò, ô, ŏ̀)")
        assert parsed[2] == MissingToken("↗")
        assert parsed[3] == Token("o(o, ó, ò, ô, ŏ̀)")
        assert parsed[4] == Token("ʔw̰(ʔw̰, ʔŋ̰ʷ, ʔẁ̰)")