예제 #1
0
def test_mini_1_1():
    assert len(wn.lexicons()) == 4
    assert len(wn.lexicons(lang='en')) == 2
    assert len(wn.lexicons(lang='ja')) == 1
    assert wn.lexicons(lang='ja')[0].logo == 'logo.svg'

    w = wn.Wordnet(lang='en')
    assert len(w.lexicons()) == 2
    assert len(w.expanded_lexicons()) == 0
    assert len(w.word('test-en-exemplify-v').lemma().tags()) == 1

    w = wn.Wordnet(lang='ja')
    assert len(w.lexicons()) == 1
    assert len(w.expanded_lexicons()) == 1
    assert len(w.synsets('例え')[0].hypernyms()) == 1
    assert w.synsets('例え')[0].lexfile() == 'noun.cognition'
    assert len(w.word('test-ja-例え-n').lemma().pronunciations()) == 1
    assert w.word('test-ja-例え-n').forms()[1].id == 'test-ja-例え-n-たとえ'
    p = w.word('test-ja-例え-n').lemma().pronunciations()[0]
    assert p.value == 'tatoe'
    assert p.variety == 'standard'
    assert p.notation == 'ipa'
    assert p.phonemic
    assert p.audio == 'tatoe.wav'

    w = wn.Wordnet(lang='ja', expand='')
    assert len(w.lexicons()) == 1
    assert len(w.expanded_lexicons()) == 0
    assert len(w.synsets('例え')[0].hypernyms()) == 0

    w = wn.Wordnet(lexicon='test-en test-en-ext')
    assert len(w.lexicons()) == 2
    assert len(w.expanded_lexicons()) == 0
    assert len(w.synsets('fire')[0].hyponyms()) == 1
예제 #2
0
파일: db_test.py 프로젝트: dpalmasan/wn
def test_remove_extension(mini_lmf_1_0, mini_lmf_1_1):
    with tempfile.TemporaryDirectory('wn_data_1_1_trigger') as dir:
        old_data_dir = wn.config.data_directory
        wn.config.data_directory = dir
        wn.add(mini_lmf_1_0)
        wn.add(mini_lmf_1_1)
        assert len(wn.lexicons()) == 4
        wn.remove('test-en-ext')
        assert len(wn.lexicons()) == 3
        wn.remove('test-ja')
        assert len(wn.lexicons()) == 2
        wn.add(mini_lmf_1_1)
        assert len(wn.lexicons()) == 4
        wn.remove('test-en')
        assert {lex.id for lex in wn.lexicons()} == {'test-es', 'test-ja'}
        wn.config.data_directory = old_data_dir
        # close any open DB connections before teardown
        for conn in wn._db.pool.values():
            conn.close()
예제 #3
0
def test_export(mini_lmf_1_0, tmp_path):
    tmpdir = tmp_path / 'test_export'
    tmpdir.mkdir()
    tmppath = tmpdir / 'mini_lmf_export.xml'
    lexicons = wn.lexicons(lexicon='test-en test-es')
    wn.export(lexicons, tmppath)

    if hasattr(ET, 'canonicalize'):  # available from Python 3.8
        # remove comments, indentation, etc.
        orig = ET.canonicalize(from_file=mini_lmf_1_0, strip_text=True)
        temp = ET.canonicalize(from_file=tmppath, strip_text=True)
        # additional transformation to help with debugging
        orig = orig.replace('<', '\n<')
        temp = temp.replace('<', '\n<')
        assert orig == temp
예제 #4
0
def test_lexicons_empty():
    assert len(wn.lexicons()) == 0
예제 #5
0
def test_lexicons_unknown():
    results = wn.lexicons(lang='unk')
    assert len(results) == 0
    results = wn.lexicons(lexicon='test-unk')
    assert len(results) == 0
예제 #6
0
def test_lexicons_mini():
    assert len(wn.lexicons()) == 2
    assert all(isinstance(lex, wn.Lexicon) for lex in wn.lexicons())

    results = wn.lexicons(lang='en')
    assert len(results) == 1 and results[0].language == 'en'
    results = wn.lexicons(lang='es')
    assert len(results) == 1 and results[0].language == 'es'

    results = wn.lexicons(lexicon='*')
    assert len(results) == 2
    results = wn.lexicons(lexicon='*:1')
    assert len(results) == 2
    results = wn.lexicons(lexicon='test-en')
    assert len(results) == 1 and results[0].language == 'en'
    results = wn.lexicons(lexicon='test-en:1')
    assert len(results) == 1 and results[0].language == 'en'
    results = wn.lexicons(lexicon='test-en:*')
    assert len(results) == 1 and results[0].language == 'en'

    assert wn.lexicons(lexicon='test-en')[0].specifier() == 'test-en:1'
    assert wn.lexicons(lexicon='test-es')[0].specifier() == 'test-es:1'

    assert wn.lexicons(lexicon='test-en')[0].requires() == {}
    assert wn.lexicons(lexicon='test-es')[0].requires() == {}
예제 #7
0
    'https://opensource.org/licenses/Apache-2.0': 'Apache-2.0',
    'https://www.unicode.org/license.html': 'unicode'
}

core = []
for l in open('wn-core-ili.tab'):
    core.append(l.strip())
#print(core)


def link(text, url):
    return (f"<a href='{url}'>{text}</a>")


stats = list()
for l in wn.lexicons():
    ### Fixme  link for wordnet license
    incore = len(
        [s for s in wn.synsets(lexicon=l.id) if s.ili and (s.ili.id in core)])
    synsets = len(wn.synsets(lexicon=l.id))
    data = f"""  <tr>
    <th>{l.specifier()}</th>
    <td>{l.language}</td>
    <td>{link(l.label, l.url)}</td>
    <td align='right'>{synsets:,d}</td>
    <td align='right'>{len(wn.senses(lexicon=l.id)):,d}</td>
    <td align='right'>{len(wn.words(lexicon=l.id)):,d}</td>
    <td align='right'>{incore/len(core):.1%}</td>
    <td>{link(licenses[l.license], l.license)}</td> 
    </tr>"""
    stats.append(data)
예제 #8
0
파일: __main__.py 프로젝트: dpalmasan/wn
def _lexicons(args):
    for lex in wn.lexicons(lang=args.lang, lexicon=args.lexicon):
        print('\t'.join((lex.id, lex.version, f'[{lex.language}]', lex.label)))