def test_create_words(self): from amcat.models.token import TokenValues lang = amcattest.get_test_language() tokens = [] l1 = Lemma.objects.create(lemma="a", pos="b") w1 = Word.objects.create(lemma=l1, word="b") for lemma in "ab": for word in "bbcc": tokens.append( TokenValues(None, None, word=word, lemma=lemma, pos="b", major=None, minor=None, namedentity=None)) with self.checkMaxQueries( 8 ): # 2 to cache lemmata+words, 1 to create lemmata, 5 to create words words = create_words(tokens) self.assertEqual( set(words.keys()), set([("a", "b", "b"), ("a", "b", "c"), ("b", "b", "b"), ("b", "b", "c")])) for (lemmastr, pos, wordstr), word in words.items(): self.assertEqual(word.word, wordstr) self.assertEqual(word.lemma.lemma, lemmastr) self.assertEqual(words["a", "b", "b"].id, w1.id) self.assertEqual(words["a", "b", "c"].lemma_id, l1.id)
def test_create_lemmata(self): from amcat.models.token import TokenValues lang = amcattest.get_test_language() l1 = Lemma.objects.create(lemma="a", pos="b") tokens = [ TokenValues(None, None, None, lemma=l, pos="b", major=None, minor=None, namedentity=None) for l in "a" * 10 ] tokens += [ TokenValues(None, None, None, lemma=l, pos="c", major=None, minor=None, namedentity=None) for l in "ab" * 5 ] with self.checkMaxQueries( 3): # 1 to cache, 2 to create with different poss lemmata = create_lemmata(tokens) # are existing lemmata 'recycled'? self.assertEqual(lemmata["a", "b"].id, l1.id) # did we get the correct lemmata? self.assertEqual(set(lemmata.keys()), set([("a", "b"), ("a", "c"), ("b", "c")])) for (lemmastr, pos), lemma in lemmata.items(): self.assertEqual(lemma.lemma, lemmastr)
def test_get_analysis(self): from amcat.nlp.frog import Frog from amcat.models import Analysis, Plugin p = Plugin.objects.create(label='test', module='amcat.nlp.frog', class_name='Frog') a = Analysis.objects.create(language=amcattest.get_test_language(), plugin=p) self.assertEqual(a.plugin.get_class(), Frog) f = a.get_script() self.assertEqual(type(f), Frog) self.assertFalse(f.triples)
def test_create_lemmata(self): from amcat.models.token import TokenValues lang = amcattest.get_test_language() l1 = Lemma.objects.create(lemma="a", pos="b") tokens = [TokenValues(None, None, None, lemma=l, pos="b", major=None, minor=None, namedentity=None) for l in "a"*10] tokens += [TokenValues(None, None, None, lemma=l, pos="c", major=None, minor=None, namedentity=None) for l in "ab"*5] with self.checkMaxQueries(3): # 1 to cache, 2 to create with different poss lemmata = create_lemmata(tokens) # are existing lemmata 'recycled'? self.assertEqual(lemmata["a","b"].id, l1.id) # did we get the correct lemmata? self.assertEqual(set(lemmata.keys()), set([("a","b"), ("a","c"), ("b","c")])) for (lemmastr, pos), lemma in lemmata.items(): self.assertEqual(lemma.lemma, lemmastr)
def test_create_words(self): from amcat.models.token import TokenValues lang = amcattest.get_test_language() tokens = [] l1 = Lemma.objects.create(lemma="a", pos="b") w1 = Word.objects.create(lemma=l1, word="b") for lemma in "ab": for word in "bbcc": tokens.append(TokenValues(None, None, word=word, lemma=lemma, pos="b", major=None, minor=None, namedentity=None)) with self.checkMaxQueries(8): # 2 to cache lemmata+words, 1 to create lemmata, 5 to create words words = create_words(tokens) self.assertEqual(set(words.keys()), set([("a","b", "b"), ("a","b","c"), ("b","b", "b"), ("b","b","c")])) for (lemmastr, pos, wordstr), word in words.items(): self.assertEqual(word.word, wordstr) self.assertEqual(word.lemma.lemma, lemmastr) self.assertEqual(words["a", "b", "b"].id, w1.id) self.assertEqual(words["a", "b", "c"].lemma_id, l1.id)
def test_rules(self): cb = amcattest.create_test_codebook() lang = amcattest.get_test_language() r = RuleSet.objects.create(label="test", lexicon_codebook=cb, lexicon_language=lang) condition = "?x :rel_nsubj ?y" insert = "?x :boe ?y" Rule.objects.create(ruleset=r, label="x", order=2, where=condition, insert=insert) getrules = lambda r : [{k:v for k,v in rule.iteritems() if k in ["condition", "insert"]} for rule in r.get_rules()] self.assertEqual(getrules(r), [{"condition": condition, "insert": insert}]) Rule.objects.create(ruleset=r, label="y", order=1, where="w", insert="i") self.assertEqual(getrules(r), [{"condition": "w", "insert": "i"}, {"condition": condition, "insert": insert}])