def get_project(project_name): class Object(object): pass project = Object() project.id = project_name project.language = 'fr' project.analyzer = annif.analyzer.get_analyzer('snowball(french)') if "dewey" in project_name: project.vocab = 'dewey' project.subjects = SubjectIndex.load('.data\vocabs\dewey\subjects') else: project.vocab = 'rameau' project.subjects = SubjectIndex.load('.data\vocabs\rameau\subjects') return project
def test_analysishits_vector_notfound(document_corpus): subjects = SubjectIndex(document_corpus) hits = ListAnalysisResult([ AnalysisHit( uri='http://example.com/notfound', label='not found', score=1.0) ], subjects) assert hits.vector.sum() == 0
def test_list_suggestions_vector_notfound(document_corpus): subjects = SubjectIndex(document_corpus) suggestions = ListSuggestionResult( [ SubjectSuggestion( uri='http://example.com/notfound', label='not found', score=1.0)], subjects) assert suggestions.vector.sum() == 0
def test_load_tsv_uri_brackets(tmpdir): tmpfile = tmpdir.join('subjects.tsv') tmpfile.write("<http://www.yso.fi/onto/yso/p8993>\thylyt\n" + "<http://www.yso.fi/onto/yso/p9285>\tneoliittinen kausi") index = SubjectIndex.load(str(tmpfile)) assert len(index) == 2 assert index[0] == ('http://www.yso.fi/onto/yso/p8993', 'hylyt', None) assert index[1] == ('http://www.yso.fi/onto/yso/p9285', 'neoliittinen kausi', None)
def test_load_tsv_with_notations(tmpdir): tmpfile = tmpdir.join('subjects-with-notations.tsv') tmpfile.write("http://www.yso.fi/onto/yso/p8993\thylyt\t42.42\n" + "http://www.yso.fi/onto/yso/p9285\tneoliittinen kausi\t42.0") index = SubjectIndex.load(str(tmpfile)) assert len(index) == 2 assert index[0] == ('http://www.yso.fi/onto/yso/p8993', 'hylyt', '42.42') assert index[1] == ('http://www.yso.fi/onto/yso/p9285', 'neoliittinen kausi', '42.0')
def test_load_tsv_with_deprecated(tmpdir): tmpfile = tmpdir.join('subjects.tsv') tmpfile.write("<http://www.yso.fi/onto/yso/p8993>\thylyt\n" + "<http://example.org/deprecated>\t\n" + "<http://www.yso.fi/onto/yso/p9285>\tneoliittinen kausi") index = SubjectIndex.load(str(tmpfile)) assert len(index) == 3 assert len(index.active) == 2 active = list(index.active) assert active[0] == (0, 'http://www.yso.fi/onto/yso/p8993', 'hylyt', None) assert active[1] == \ (2, 'http://www.yso.fi/onto/yso/p9285', 'neoliittinen kausi', None)
def test_analysishits_vector(document_corpus): subjects = SubjectIndex(document_corpus) hits = ListAnalysisResult([ AnalysisHit(uri='http://www.yso.fi/onto/yso/p7141', label='sinetit', score=1.0), AnalysisHit(uri='http://www.yso.fi/onto/yso/p6479', label='viikingit', score=0.5) ], subjects) assert isinstance(hits.vector, np.ndarray) assert len(hits.vector) == len(subjects) assert hits.vector.sum() == 1.5 for subject_id, score in enumerate(hits.vector): if subjects[subject_id][1] == 'sinetit': assert score == 1.0 elif subjects[subject_id][1] == 'viikingit': assert score == 0.5 else: assert score == 0.0
def test_list_suggestions_vector(document_corpus): subjects = SubjectIndex(document_corpus) suggestions = ListSuggestionResult( [ SubjectSuggestion( uri='http://www.yso.fi/onto/yso/p7141', label='sinetit', score=1.0), SubjectSuggestion( uri='http://www.yso.fi/onto/yso/p6479', label='viikingit', score=0.5)], subjects) assert isinstance(suggestions.vector, np.ndarray) assert len(suggestions.vector) == len(subjects) assert suggestions.vector.sum() == 1.5 for subject_id, score in enumerate(suggestions.vector): if subjects[subject_id][1] == 'sinetit': assert score == 1.0 elif subjects[subject_id][1] == 'viikingit': assert score == 0.5 else: assert score == 0.0
LazySuggestionResult, ListSuggestionResult, SuggestionFilter from annif.corpus import SubjectIndex class DotDict(dict): pass project = DotDict() project.name = 'Letters Omikuji Parabel English' project.language = 'en' project.backend = 'letters-omikuji-bonsai-en' project.analyzer = annif.analyzer.get_analyzer('snowball(english)') project.limit = 10 project.vocab = 'letters-unesco' project.subjects = SubjectIndex.load('./data/vocabs/letters-unesco/subjects') project.datadir = str('./data/projects/letters-omikuji-bonsai-en') backend_type = annif.backend.get_backend("omikuji") backend = backend_type(backend_id='letters-omikuji-bonsai-en', config_params={'limit': 5}, project=project) def append_suggestions_node(results, txt_xml_path): # https://stackoverflow.com/questions/28782864/modify-xml-file-using-elementtree tree = ET.ElementTree(file=txt_xml_path) root = tree.getroot() #subjects = ET.SubElement(root, "subjects") subjects = ET.Element('subjects')