Пример #1
0
def test_find_verbs_in_hkcancor():
    all_verbs = HKCANCOR.search(pos='^V')
    assert len(all_verbs) == 29012
    assert all_verbs[:10] == [('去', 'V', 'heoi3', ''), ('去', 'V', 'heoi3', ''),
                              ('旅行', 'VN', 'leoi5hang4', ''),
                              ('有冇', 'V1', 'jau5mou5', ''),
                              ('要', 'VU', 'jiu3', ''),
                              ('有得', 'VU', 'jau5dak1', ''),
                              ('冇得', 'VU', 'mou5dak1', ''),
                              ('去', 'V', 'heoi3', ''), ('係', 'V', 'hai6', ''),
                              ('係', 'V', 'hai6', '')]
Пример #2
0
def test_find_verbs_in_hkcancor():
    all_verbs = HKCANCOR.search(pos="^V")
    assert len(all_verbs) == 29726
    assert all_verbs[:10] == [
        Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
        Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
        Token(word="旅行", pos="VN", jyutping="leoi5hang4", mor=None, gra=None),
        Token(word="有冇", pos="V1", jyutping="jau5mou5", mor=None, gra=None),
        Token(word="要", pos="VU", jyutping="jiu3", mor=None, gra=None),
        Token(word="有得", pos="VU", jyutping="jau5dak1", mor=None, gra=None),
        Token(word="冇得", pos="VU", jyutping="mou5dak1", mor=None, gra=None),
        Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
        Token(word="係", pos="V", jyutping="hai6", mor=None, gra=None),
        Token(word="係", pos="V", jyutping="hai6", mor=None, gra=None),
    ]
Пример #3
0
def test_find_verbs_in_hkcancor():
    all_verbs = HKCANCOR.search(pos='^V')
    assert len(all_verbs) == 29012
    assert all_verbs[:10] == [
        ('去', 'V', 'heoi3', ''),
        ('去', 'V', 'heoi3', ''),
        ('旅行', 'VN', 'leoi5hang4', ''),
        ('有冇', 'V1', 'jau5mou5', ''),
        ('要', 'VU', 'jiu3', ''),
        ('有得', 'VU', 'jau5dak1', ''),
        ('冇得', 'VU', 'mou5dak1', ''),
        ('去', 'V', 'heoi3', ''),
        ('係', 'V', 'hai6', ''),
        ('係', 'V', 'hai6', '')
    ]
Пример #4
0
def test_find_verbs_in_hkcancor():
    all_verbs = HKCANCOR.search(pos="^V")
    assert len(all_verbs) == 29012
    assert all_verbs[:10] == [
        ("去", "V", "heoi3", ""),
        ("去", "V", "heoi3", ""),
        ("旅行", "VN", "leoi5hang4", ""),
        ("有冇", "V1", "jau5mou5", ""),
        ("要", "VU", "jiu3", ""),
        ("有得", "VU", "jau5dak1", ""),
        ("冇得", "VU", "mou5dak1", ""),
        ("去", "V", "heoi3", ""),
        ("係", "V", "hai6", ""),
        ("係", "V", "hai6", ""),
    ]
Пример #5
0
def test_word_range():
    all_verbs = HKCANCOR.search(pos="^V", word_range=(1, 2))
    assert len(all_verbs) == 29726
    assert all_verbs[:2] == [
        [
            Token(word="啲", pos="U", jyutping="di1", mor=None, gra=None),
            Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
            Token(word="唔", pos="D", jyutping="m4", mor=None, gra=None),
            Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
        ],
        [
            Token(word="唔", pos="D", jyutping="m4", mor=None, gra=None),
            Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
            Token(word="旅行",
                  pos="VN",
                  jyutping="leoi5hang4",
                  mor=None,
                  gra=None),
            Token(word="啊", pos="Y", jyutping="aa3", mor=None, gra=None),
        ],
    ]
Пример #6
0
def test_by_utterances_true():
    all_verbs = HKCANCOR.search(pos="^V", by_utterances=True)
    assert len(all_verbs) == 29726
    print(all_verbs[:2])
    assert all_verbs[:2] == [
        [
            Token(word="喂", pos="E", jyutping="wai3", mor=None, gra=None),
            Token(word="遲", pos="A", jyutping="ci4", mor=None, gra=None),
            Token(word="啲", pos="U", jyutping="di1", mor=None, gra=None),
            Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
            Token(word="唔", pos="D", jyutping="m4", mor=None, gra=None),
            Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
            Token(word="旅行",
                  pos="VN",
                  jyutping="leoi5hang4",
                  mor=None,
                  gra=None),
            Token(word="啊", pos="Y", jyutping="aa3", mor=None, gra=None),
            Token(word="?", pos="?", jyutping=None, mor=None, gra=None),
        ],
        [
            Token(word="喂", pos="E", jyutping="wai3", mor=None, gra=None),
            Token(word="遲", pos="A", jyutping="ci4", mor=None, gra=None),
            Token(word="啲", pos="U", jyutping="di1", mor=None, gra=None),
            Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
            Token(word="唔", pos="D", jyutping="m4", mor=None, gra=None),
            Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
            Token(word="旅行",
                  pos="VN",
                  jyutping="leoi5hang4",
                  mor=None,
                  gra=None),
            Token(word="啊", pos="Y", jyutping="aa3", mor=None, gra=None),
            Token(word="?", pos="?", jyutping=None, mor=None, gra=None),
        ],
    ]
Пример #7
0
def test_utterance_range():
    all_verbs = HKCANCOR.search(pos="^V", utterance_range=(0, 1))
    assert len(all_verbs) == 29726
    assert all_verbs[:2] == [
        [
            [
                Token(word="喂", pos="E", jyutping="wai3", mor=None, gra=None),
                Token(word="遲", pos="A", jyutping="ci4", mor=None, gra=None),
                Token(word="啲", pos="U", jyutping="di1", mor=None, gra=None),
                Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
                Token(word="唔", pos="D", jyutping="m4", mor=None, gra=None),
                Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
                Token(word="旅行",
                      pos="VN",
                      jyutping="leoi5hang4",
                      mor=None,
                      gra=None),
                Token(word="啊", pos="Y", jyutping="aa3", mor=None, gra=None),
                Token(word="?", pos="?", jyutping=None, mor=None, gra=None),
            ],
            [
                Token(word="你", pos="R", jyutping="nei5", mor=None, gra=None),
                Token(word="老公",
                      pos="N",
                      jyutping="lou5gung1",
                      mor=None,
                      gra=None),
                Token(word="有冇",
                      pos="V1",
                      jyutping="jau5mou5",
                      mor=None,
                      gra=None),
                Token(word="平", pos="A", jyutping="peng4", mor=None, gra=None),
                Token(word="機票",
                      pos="N",
                      jyutping="gei1piu3",
                      mor=None,
                      gra=None),
                Token(word="啊", pos="Y", jyutping="aa3", mor=None, gra=None),
                Token(word="?", pos="?", jyutping=None, mor=None, gra=None),
            ],
        ],
        [
            [
                Token(word="喂", pos="E", jyutping="wai3", mor=None, gra=None),
                Token(word="遲", pos="A", jyutping="ci4", mor=None, gra=None),
                Token(word="啲", pos="U", jyutping="di1", mor=None, gra=None),
                Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
                Token(word="唔", pos="D", jyutping="m4", mor=None, gra=None),
                Token(word="去", pos="V", jyutping="heoi3", mor=None, gra=None),
                Token(word="旅行",
                      pos="VN",
                      jyutping="leoi5hang4",
                      mor=None,
                      gra=None),
                Token(word="啊", pos="Y", jyutping="aa3", mor=None, gra=None),
                Token(word="?", pos="?", jyutping=None, mor=None, gra=None),
            ],
            [
                Token(word="你", pos="R", jyutping="nei5", mor=None, gra=None),
                Token(word="老公",
                      pos="N",
                      jyutping="lou5gung1",
                      mor=None,
                      gra=None),
                Token(word="有冇",
                      pos="V1",
                      jyutping="jau5mou5",
                      mor=None,
                      gra=None),
                Token(word="平", pos="A", jyutping="peng4", mor=None, gra=None),
                Token(word="機票",
                      pos="N",
                      jyutping="gei1piu3",
                      mor=None,
                      gra=None),
                Token(word="啊", pos="Y", jyutping="aa3", mor=None, gra=None),
                Token(word="?", pos="?", jyutping=None, mor=None, gra=None),
            ],
        ],
    ]
Пример #8
0
def test_by_tokens_false():
    all_verbs = HKCANCOR.search(pos="^V", by_tokens=False)
    assert len(all_verbs) == 29726
    expected = ["去", "去", "旅行", "有冇", "要", "有得", "冇得", "去", "係", "係"]
    assert all_verbs[:10] == expected