Exemplo n.º 1
0
def get_project(project_name):
    class Object(object):
        pass

    project = Object()
    project.id = project_name
    project.language = 'fr'
    project.analyzer = annif.analyzer.get_analyzer('snowball(french)')
    if "dewey" in project_name:
        project.vocab = 'dewey'
        project.subjects = SubjectIndex.load('.data\vocabs\dewey\subjects')
    else:
        project.vocab = 'rameau'
        project.subjects = SubjectIndex.load('.data\vocabs\rameau\subjects')
    return project
Exemplo n.º 2
0
def test_analysishits_vector_notfound(document_corpus):
    subjects = SubjectIndex(document_corpus)
    hits = ListAnalysisResult([
        AnalysisHit(
            uri='http://example.com/notfound', label='not found', score=1.0)
    ], subjects)
    assert hits.vector.sum() == 0
Exemplo n.º 3
0
def test_list_suggestions_vector_notfound(document_corpus):
    subjects = SubjectIndex(document_corpus)
    suggestions = ListSuggestionResult(
        [
            SubjectSuggestion(
                uri='http://example.com/notfound',
                label='not found',
                score=1.0)],
        subjects)
    assert suggestions.vector.sum() == 0
Exemplo n.º 4
0
def test_load_tsv_uri_brackets(tmpdir):
    tmpfile = tmpdir.join('subjects.tsv')
    tmpfile.write("<http://www.yso.fi/onto/yso/p8993>\thylyt\n" +
                  "<http://www.yso.fi/onto/yso/p9285>\tneoliittinen kausi")

    index = SubjectIndex.load(str(tmpfile))
    assert len(index) == 2
    assert index[0] == ('http://www.yso.fi/onto/yso/p8993', 'hylyt', None)
    assert index[1] == ('http://www.yso.fi/onto/yso/p9285',
                        'neoliittinen kausi', None)
Exemplo n.º 5
0
def test_load_tsv_with_notations(tmpdir):
    tmpfile = tmpdir.join('subjects-with-notations.tsv')
    tmpfile.write("http://www.yso.fi/onto/yso/p8993\thylyt\t42.42\n" +
                  "http://www.yso.fi/onto/yso/p9285\tneoliittinen kausi\t42.0")

    index = SubjectIndex.load(str(tmpfile))
    assert len(index) == 2
    assert index[0] == ('http://www.yso.fi/onto/yso/p8993', 'hylyt', '42.42')
    assert index[1] == ('http://www.yso.fi/onto/yso/p9285',
                        'neoliittinen kausi', '42.0')
Exemplo n.º 6
0
def test_load_tsv_with_deprecated(tmpdir):
    tmpfile = tmpdir.join('subjects.tsv')
    tmpfile.write("<http://www.yso.fi/onto/yso/p8993>\thylyt\n" +
                  "<http://example.org/deprecated>\t\n" +
                  "<http://www.yso.fi/onto/yso/p9285>\tneoliittinen kausi")

    index = SubjectIndex.load(str(tmpfile))
    assert len(index) == 3
    assert len(index.active) == 2
    active = list(index.active)
    assert active[0] == (0, 'http://www.yso.fi/onto/yso/p8993', 'hylyt', None)
    assert active[1] == \
        (2, 'http://www.yso.fi/onto/yso/p9285', 'neoliittinen kausi', None)
Exemplo n.º 7
0
def test_analysishits_vector(document_corpus):
    subjects = SubjectIndex(document_corpus)
    hits = ListAnalysisResult([
        AnalysisHit(uri='http://www.yso.fi/onto/yso/p7141',
                    label='sinetit',
                    score=1.0),
        AnalysisHit(uri='http://www.yso.fi/onto/yso/p6479',
                    label='viikingit',
                    score=0.5)
    ], subjects)
    assert isinstance(hits.vector, np.ndarray)
    assert len(hits.vector) == len(subjects)
    assert hits.vector.sum() == 1.5
    for subject_id, score in enumerate(hits.vector):
        if subjects[subject_id][1] == 'sinetit':
            assert score == 1.0
        elif subjects[subject_id][1] == 'viikingit':
            assert score == 0.5
        else:
            assert score == 0.0
Exemplo n.º 8
0
def test_list_suggestions_vector(document_corpus):
    subjects = SubjectIndex(document_corpus)
    suggestions = ListSuggestionResult(
        [
            SubjectSuggestion(
                uri='http://www.yso.fi/onto/yso/p7141',
                label='sinetit',
                score=1.0),
            SubjectSuggestion(
                uri='http://www.yso.fi/onto/yso/p6479',
                label='viikingit',
                score=0.5)],
        subjects)
    assert isinstance(suggestions.vector, np.ndarray)
    assert len(suggestions.vector) == len(subjects)
    assert suggestions.vector.sum() == 1.5
    for subject_id, score in enumerate(suggestions.vector):
        if subjects[subject_id][1] == 'sinetit':
            assert score == 1.0
        elif subjects[subject_id][1] == 'viikingit':
            assert score == 0.5
        else:
            assert score == 0.0
Exemplo n.º 9
0
    LazySuggestionResult, ListSuggestionResult, SuggestionFilter
from annif.corpus import SubjectIndex


class DotDict(dict):
    pass


project = DotDict()
project.name = 'Letters Omikuji Parabel English'
project.language = 'en'
project.backend = 'letters-omikuji-bonsai-en'
project.analyzer = annif.analyzer.get_analyzer('snowball(english)')
project.limit = 10
project.vocab = 'letters-unesco'
project.subjects = SubjectIndex.load('./data/vocabs/letters-unesco/subjects')
project.datadir = str('./data/projects/letters-omikuji-bonsai-en')

backend_type = annif.backend.get_backend("omikuji")
backend = backend_type(backend_id='letters-omikuji-bonsai-en',
                       config_params={'limit': 5},
                       project=project)


def append_suggestions_node(results, txt_xml_path):
    # https://stackoverflow.com/questions/28782864/modify-xml-file-using-elementtree
    tree = ET.ElementTree(file=txt_xml_path)
    root = tree.getroot()

    #subjects = ET.SubElement(root, "subjects")
    subjects = ET.Element('subjects')