示例#1
0
    def test_create_words(self):
        from amcat.models.token import TokenValues
        lang = amcattest.get_test_language()
        tokens = []
        l1 = Lemma.objects.create(lemma="a", pos="b")
        w1 = Word.objects.create(lemma=l1, word="b")
        for lemma in "ab":
            for word in "bbcc":
                tokens.append(
                    TokenValues(None,
                                None,
                                word=word,
                                lemma=lemma,
                                pos="b",
                                major=None,
                                minor=None,
                                namedentity=None))
        with self.checkMaxQueries(
                8
        ):  # 2 to cache lemmata+words, 1 to create lemmata, 5 to create words
            words = create_words(tokens)

        self.assertEqual(
            set(words.keys()),
            set([("a", "b", "b"), ("a", "b", "c"), ("b", "b", "b"),
                 ("b", "b", "c")]))
        for (lemmastr, pos, wordstr), word in words.items():
            self.assertEqual(word.word, wordstr)
            self.assertEqual(word.lemma.lemma, lemmastr)

        self.assertEqual(words["a", "b", "b"].id, w1.id)
        self.assertEqual(words["a", "b", "c"].lemma_id, l1.id)
示例#2
0
 def test_create_lemmata(self):
     from amcat.models.token import TokenValues
     lang = amcattest.get_test_language()
     l1 = Lemma.objects.create(lemma="a", pos="b")
     tokens = [
         TokenValues(None,
                     None,
                     None,
                     lemma=l,
                     pos="b",
                     major=None,
                     minor=None,
                     namedentity=None) for l in "a" * 10
     ]
     tokens += [
         TokenValues(None,
                     None,
                     None,
                     lemma=l,
                     pos="c",
                     major=None,
                     minor=None,
                     namedentity=None) for l in "ab" * 5
     ]
     with self.checkMaxQueries(
             3):  # 1 to cache, 2 to create with different poss
         lemmata = create_lemmata(tokens)
     # are existing lemmata 'recycled'?
     self.assertEqual(lemmata["a", "b"].id, l1.id)
     # did we get the correct lemmata?
     self.assertEqual(set(lemmata.keys()),
                      set([("a", "b"), ("a", "c"), ("b", "c")]))
     for (lemmastr, pos), lemma in lemmata.items():
         self.assertEqual(lemma.lemma, lemmastr)
示例#3
0
文件: token.py 项目: edisona/amcat
 def test_get_analysis(self):
     from amcat.nlp.frog import Frog
     from amcat.models import Analysis, Plugin
     p = Plugin.objects.create(label='test',
                               module='amcat.nlp.frog',
                               class_name='Frog')
     a = Analysis.objects.create(language=amcattest.get_test_language(),
                                 plugin=p)
     self.assertEqual(a.plugin.get_class(), Frog)
     f = a.get_script()
     self.assertEqual(type(f), Frog)
     self.assertFalse(f.triples)
示例#4
0
 def test_create_lemmata(self):
     from amcat.models.token import TokenValues
     lang = amcattest.get_test_language()
     l1 = Lemma.objects.create(lemma="a", pos="b")
     tokens = [TokenValues(None, None, None, lemma=l, pos="b", major=None, minor=None, namedentity=None)
               for l in "a"*10]
     tokens += [TokenValues(None, None, None, lemma=l, pos="c", major=None, minor=None, namedentity=None)
               for l in "ab"*5]
     with self.checkMaxQueries(3): # 1 to cache, 2 to create with different poss
         lemmata = create_lemmata(tokens)
     # are existing lemmata 'recycled'?
     self.assertEqual(lemmata["a","b"].id, l1.id)
     # did we get the correct lemmata?
     self.assertEqual(set(lemmata.keys()), set([("a","b"), ("a","c"), ("b","c")]))
     for (lemmastr, pos), lemma in lemmata.items():
         self.assertEqual(lemma.lemma, lemmastr)
示例#5
0
    def test_create_words(self):
        from amcat.models.token import TokenValues
        lang = amcattest.get_test_language()
        tokens = []
        l1 = Lemma.objects.create(lemma="a", pos="b")
        w1 = Word.objects.create(lemma=l1, word="b")
        for lemma in "ab":
            for word in "bbcc":
                tokens.append(TokenValues(None, None, word=word, lemma=lemma, pos="b", major=None, minor=None, namedentity=None))
        with self.checkMaxQueries(8): # 2 to cache lemmata+words, 1 to create lemmata, 5 to create words
            words = create_words(tokens)

        self.assertEqual(set(words.keys()), set([("a","b", "b"), ("a","b","c"), ("b","b", "b"), ("b","b","c")]))
        for (lemmastr, pos, wordstr), word in words.items():
            self.assertEqual(word.word, wordstr)
            self.assertEqual(word.lemma.lemma, lemmastr)

        self.assertEqual(words["a", "b", "b"].id, w1.id)
        self.assertEqual(words["a", "b", "c"].lemma_id, l1.id)
示例#6
0
    def test_rules(self):
        cb = amcattest.create_test_codebook()
        lang = amcattest.get_test_language()
        r = RuleSet.objects.create(label="test", lexicon_codebook=cb,
                                   lexicon_language=lang)
        condition = "?x :rel_nsubj ?y"
        insert = "?x :boe ?y"
        Rule.objects.create(ruleset=r, label="x", order=2,
                            where=condition, insert=insert)

        getrules = lambda r : [{k:v for k,v in rule.iteritems()
                                if k in ["condition", "insert"]}
                               for rule in r.get_rules()]

        self.assertEqual(getrules(r),
                         [{"condition": condition, "insert": insert}])

        Rule.objects.create(ruleset=r, label="y", order=1,
                            where="w", insert="i")
        self.assertEqual(getrules(r),
                         [{"condition": "w", "insert": "i"},
                          {"condition": condition, "insert": insert}])
示例#7
0
    def test_rules(self):
        cb = amcattest.create_test_codebook()
        lang = amcattest.get_test_language()
        r = RuleSet.objects.create(label="test", lexicon_codebook=cb,
                                   lexicon_language=lang)
        condition = "?x :rel_nsubj ?y"
        insert = "?x :boe ?y"
        Rule.objects.create(ruleset=r, label="x", order=2,
                            where=condition, insert=insert)

        getrules = lambda r : [{k:v for k,v in rule.iteritems()
                                if k in ["condition", "insert"]}
                               for rule in r.get_rules()]

        self.assertEqual(getrules(r),
                         [{"condition": condition, "insert": insert}])

        Rule.objects.create(ruleset=r, label="y", order=1,
                            where="w", insert="i")
        self.assertEqual(getrules(r),
                         [{"condition": "w", "insert": "i"},
                          {"condition": condition, "insert": insert}])