示例#1
0
def test_affixed_entries():
    bt = PyBoTrie(BoSyl(), 'empty', config=Config("pybo.yaml"))
    bt.inflect_n_add('གྲུབ་མཐའ་', 'NOUN',
                     ins='data')  # adds all inflected forms
    bt.inflect_n_add('བཀྲ་ཤིས་', 'NOUN',
                     ins='data')  # only adds one entry in the trie
    assert bt.has_word('བཀྲ་ཤིས་') == {'data': 'NOUNᛃᛃᛃ', 'exists': True}
    assert bt.has_word('བཀྲ་ཤིསའི་') == {'exists': False}
    assert bt.has_word('གྲུབ་མཐའ་') == {'data': 'NOUNᛃᛃᛃ', 'exists': True}
    assert bt.has_word('གྲུབ་མཐར་') == {'data': 'NOUNᛃlaᛃ1ᛃaa', 'exists': True}
    assert bt.has_word('གྲུབ་མཐས་') == {
        'data': 'NOUNᛃgisᛃ1ᛃaa',
        'exists': True
    }
    assert bt.has_word('གྲུབ་མཐའི་') == {
        'data': 'NOUNᛃgiᛃ2ᛃaa',
        'exists': True
    }
    assert bt.has_word('གྲུབ་མཐའམ་') == {
        'data': 'NOUNᛃamᛃ2ᛃaa',
        'exists': True
    }
    assert bt.has_word('གྲུབ་མཐའང་') == {
        'data': 'NOUNᛃangᛃ2ᛃaa',
        'exists': True
    }
    assert bt.has_word('གྲུབ་མཐའོ་') == {'data': 'NOUNᛃoᛃ2ᛃaa', 'exists': True}
    assert bt.has_word('གྲུབ་མཐའིའོ་') == {
        'data': 'NOUNᛃgi+oᛃ4ᛃaa',
        'exists': True
    }
示例#2
0
def test_config():
    config = Config()

    # default config filename
    assert config.filename.name == 'pybo.yaml'  # config.filename is a Path object

    # paths for trie content
    main, custom = config.get_tok_data_paths('POS')
    # each profile contains one or more sections
    assert [m for m in main] == ['lexica_bo', 'pos']
    # each element in a Path object leading to a resource file
    assert isinstance(main['pos'][0], Path)

    # custom files to overwrite the existing trie can be added as follows
    assert len(custom) == 0
    main, custom = config.get_tok_data_paths('POS', modifs='trie_data/')
    assert [c for c in custom] == ['lexica_bo', 'lemmas'] == [t.parts[-1] for t in Path('trie_data/').glob('*')]

    # overwriting the main profile
    main, custom = config.get_tok_data_paths('trie_data/', mode='custom')
    assert [m for m in main] == ['lexica_bo', 'lemmas']
示例#3
0
def test_deactivate_trie_entries():
    trie = PyBoTrie(BoSyl(), 'empty', config=Config("pybo.yaml"))
    trie.rebuild_trie()
    word = 'བཀྲ་ཤིས་'
    trie.add(word)
    assert trie.has_word(word)
    trie.deactivate_word(word)
    assert trie.has_word(word) == {'exists': False}

    trie.rebuild_trie()
    words = ['བཀྲ་ཤིས་', 'བདེ་ལེགས་']
    trie.add(words[0])
    trie.add(words[1])
    trie.deactivate_wordlist('resources/remove_vocabs/test.txt')
    assert trie.has_word(words[0]) == {'exists': False}
    assert trie.has_word(words[1]) == {'exists': False}
示例#4
0
def test_pybotrie():
    bt = PyBoTrie(BoSyl(), 'empty', config=Config("pybo.yaml"))
    bt.rebuild_trie()
    bt.add('གྲུབ་མཐའ་', 'pos-tag')
    assert bt.has_word('གྲུབ་མཐའི་') == {'exists': False}
    assert bt.has_word('གྲུབ་མཐའ་') == {'exists': True, 'data': 'pos-tag'}
示例#5
0
def test_building_trie():
    bt = PyBoTrie(BoSyl(), 'pytib', config=Config("pybo.yaml"))
    bt.rebuild_trie()
    assert bt.has_word('བཀྲ་ཤིས་') == {'data': 'NOUNᛃᛃᛃ', 'exists': True}
    assert bt.has_word('ཤིས་') == {'data': 'VERBᛃᛃᛃ', 'exists': True}