Exemplo n.º 1
0
 def eval_ner(self, name, corpus, baseline, lang = "nl", model="conll2002"):
     t = NER.ner(name, lang, model)(corpus) if name != 'tnt' else  NER.ner(name, lang)(corpus)
     self.validate_corpus_tokens(corpus, t)
     r = corpus.conlleval(t)
     self.assertEqual(baseline['FB1'] < r['FB1'], True)
     self.assertEqual(baseline['recall'] < r['recall'], True)
     self.assertEqual(baseline['accuracy'] < r['accuracy'], True)
Exemplo n.º 2
0
 def eval_ner(self, name, corpus, baseline, lang="nl", model="conll2002"):
     t = NER.ner(name, lang, model)(corpus) if name != 'tnt' else NER.ner(
         name, lang)(corpus)
     self.validate_corpus_tokens(corpus, t)
     r = corpus.conlleval(t)
     self.assertEqual(baseline['FB1'] < r['FB1'], True)
     self.assertEqual(baseline['recall'] < r['recall'], True)
     self.assertEqual(baseline['accuracy'] < r['accuracy'], True)
Exemplo n.º 3
0
    def test_ner(self):
        glue = NER.NamedEntityGlue()
        tokens = [("Den", 0, 3, Token.NE_LOC), ("Haag", 4, 4, Token.NE_LOC),
                  ("mmm", 9, 3, 0), (u"Amsterdam", 14, 9, Token.NE_LOC),
                  ("mmm", 24, 3, 0), ("van", 28, 3, Token.NE_PER),
                  ("der", 32, 3, Token.NE_PER), ("Saar", 36, 4, Token.NE_PER)]

        res = []
        for t in tokens:
            r = glue(t[0], t[1], t[3])
            if r: res.append(r)

        self.assertEqual(len(res), 4)
        self.assertEqual(res[0][0], "Den Haag")
        self.assertEqual((res[0][3] & Token.NE_LOC) > 0, True)
        self.assertEqual(tuple(res[1]), tokens[2])
        self.assertEqual(tuple(res[2]), tokens[3])

        r = glue.flush()
        if r: res.append(r)

        self.assertEqual(len(res), len(tokens) - 3)
        self.assertEqual(res[0][0], "Den Haag")
        self.assertEqual((res[0][3] & Token.NE_LOC) > 0, True)
        self.assertEqual(tuple(res[1]), tokens[2])
        self.assertEqual(tuple(res[2]), tokens[3])
        self.assertEqual(tuple(res[3]), tokens[4])
        self.assertEqual(res[4], ["van der Saar", 28, 12, Token.NE_PER])

        self.assertEqual(JavaBasedNER().home(),
                         os.path.join(GRAVITY_LIB_HOME, "ner"))

        self.assertEqual(len(NER.list()), 5)
        for n in ('lingpipe', 'stanford', 'lbj', 'tnt'):
            self.assertEqual(isinstance(NER.ner(n), NER), True)

        txt = "Amsterdam is amsterdam"
        n = NER.ner("lingpipe", 'nl', 'conll2002')(txt)
        self.assertEqual(len(n), 2)
        self.validate_tokens(txt, n)

        txt = "Amsterdam is capital of The Netherlands."
        n = NER.ner("stanford", 'nl', 'conll2002')(txt)
        self.assertEqual(len(n), 2)
        self.validate_tokens(txt, n)

        txt = "Amsterdam is capital of The Netherlands."
        n = NER.ner("lbj", 'en')(txt)
        self.assertEqual(len(n), 2)
        self.validate_tokens(txt, n)

        import platform
        if platform.platform().lower().find("linux") >= 0:
            txt = "Amsterdam is capital of The Netherlands."

            def f():
                NER.ner("tnt", 'en')

            self.assertRaises(BaseException, f)

            n = NER.ner("tnt", 'nl')(txt)
            self.assertEqual(len(n), 2)
            self.validate_tokens(txt, n)
        else:
            print "Skip testing TNT tagger. It is possible on Linux platform only."
Exemplo n.º 4
0
 def f():
     NER.ner("tnt", 'en')
Exemplo n.º 5
0
 def f(): NER.ner("tnt", 'en')
 self.assertRaises(BaseException, f)
Exemplo n.º 6
0
    def test_ner(self):
        glue = NER.NamedEntityGlue()
        tokens = [ ("Den", 0, 3, Token.NE_LOC), ("Haag", 4, 4, Token.NE_LOC), ("mmm", 9, 3, 0), (u"Amsterdam", 14, 9, Token.NE_LOC), ("mmm", 24, 3, 0), 
                   ("van", 28, 3, Token.NE_PER) , ("der", 32, 3, Token.NE_PER), ("Saar", 36, 4, Token.NE_PER)
                 ]
        
        res = []
        for t in tokens:
            r = glue(t[0], t[1], t[3])
            if r: res.append(r)
        
        self.assertEqual(len(res), 4)
        self.assertEqual(res[0][0], "Den Haag")
        self.assertEqual((res[0][3] & Token.NE_LOC) > 0, True)
        self.assertEqual(tuple(res[1]), tokens[2])
        self.assertEqual(tuple(res[2]), tokens[3])
        
        r = glue.flush()
        if r: res.append(r)
        
        self.assertEqual(len(res), len(tokens) - 3)
        self.assertEqual(res[0][0], "Den Haag")
        self.assertEqual((res[0][3] & Token.NE_LOC) > 0, True)
        self.assertEqual(tuple(res[1]), tokens[2])
        self.assertEqual(tuple(res[2]), tokens[3])
        self.assertEqual(tuple(res[3]), tokens[4])
        self.assertEqual(res[4], ["van der Saar", 28, 12, Token.NE_PER])

        self.assertEqual(JavaBasedNER().home(), os.path.join(GRAVITY_LIB_HOME, "ner"))
        
        self.assertEqual(len(NER.list()), 5)
        for n in ('lingpipe', 'stanford', 'lbj', 'tnt'):
            self.assertEqual(isinstance(NER.ner(n), NER), True)

        txt = "Amsterdam is amsterdam"
        n = NER.ner("lingpipe", 'nl', 'conll2002')(txt)
        self.assertEqual(len(n), 2)
        self.validate_tokens(txt, n)
        
        txt = "Amsterdam is capital of The Netherlands."
        n = NER.ner("stanford", 'nl', 'conll2002')(txt)
        self.assertEqual(len(n), 2)
        self.validate_tokens(txt, n)

        txt = "Amsterdam is capital of The Netherlands."
        n = NER.ner("lbj", 'en')(txt)
        self.assertEqual(len(n), 2)
        self.validate_tokens(txt, n)
        
        import platform
        if platform.platform().lower().find("linux") >= 0:
            txt = "Amsterdam is capital of The Netherlands."
            
            def f(): NER.ner("tnt", 'en')
            self.assertRaises(BaseException, f)
            
            n = NER.ner("tnt", 'nl')(txt)
            self.assertEqual(len(n), 2)
            self.validate_tokens(txt, n)
        else:
            print "Skip testing TNT tagger. It is possible on Linux platform only."