示例#1
0
 def testGetPrepositionedNounPhrase(self):
     tokens = list(TEST_TOKENS)
     tokens[3] = tokens[3].replace(chunk="B-PP")
     tokens[4] = tokens[4].replace(chunk="I-PP")
     s = Sentence(tokens)
     s.addAnnotation('sentinel', 0)
     a = s.addAnnotation('type', 6)
     self.assertListEqual(list(a.getPrepositionedNounPhrase_()),
                          ['sentinel', 'stem1'])
示例#2
0
    def testTokenDistanceIfOverlapping(self):
        s = Sentence(TEST_TOKENS)
        a = s.addAnnotation('A', 2, 4)
        b = s.addAnnotation('B', 2, 3)
        c = s.addAnnotation('C', 1, 3)
        d = s.addAnnotation('D', 0, 2)
        e = s.addAnnotation('E', 1, 2)

        for other in [a, b, d, e]:
            self.assertEqual(c.tokenDistanceTo(other), -1)
示例#3
0
    def testPhraseDistance(self):
        s = Sentence(TEST_TOKENS)
        a = s.addAnnotation('A', 0, 2)
        b = s.addAnnotation('B', 3)
        c = s.addAnnotation('C', 6, 8)
        d = s.addAnnotation('D', 9)
        e = s.addAnnotation('E', 3, 5)

        for other, dist in [(a, 1), (b, 1), (c, -1), (d, 0), (e, 0)]:
            self.assertEqual(c.phraseDistanceTo(other), dist, msg=repr(other))
示例#4
0
    def testTokenDistance(self):
        s = Sentence(TEST_TOKENS)
        a = s.addAnnotation('A', 0, 2)
        b = s.addAnnotation('B', 3)
        c = s.addAnnotation('C', 6, 8)
        d = s.addAnnotation('D', 8)
        e = s.addAnnotation('E', 4, 6)

        for other, dist in [(a, 4), (b, 2), (d, 0), (e, 0)]:
            self.assertEqual(c.tokenDistanceTo(other), dist)
示例#5
0
 def testPhraseTagsTo(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('true', 5)
     a2 = s.addAnnotation('true', 0, 2)
     a3 = s.addAnnotation('true', 3, 5)
     a0 = s.addAnnotation('true', 0)
     a9 = s.addAnnotation('true', 9)
     self.assertEqual(list(a1.phraseTagsBetween(a2)), ['NP'])
     self.assertEqual(list(a1.phraseTagsBetween(a1)), [])
     self.assertEqual(list(a2.phraseTagsBetween(a3)), [])
     self.assertEqual(list(a0.phraseTagsBetween(a9)), ['NP', 'NP', 'NP'])
示例#6
0
 def testPosTagsTo(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('true', 5)
     a2 = s.addAnnotation('true', 0, 2)
     a3 = s.addAnnotation('true', 3, 5)
     a0 = s.addAnnotation('true', 0)
     a9 = s.addAnnotation('true', 9)
     self.assertEqual(list(a1.posTagsBetween(a2)), ['pos2', 'pos3', 'pos4'])
     self.assertEqual(list(a1.posTagsBetween(a1)), [])
     self.assertEqual(list(a2.posTagsBetween(a3)), ['pos2'])
     self.assertEqual(list(a0.posTagsBetween(a9)),
                      ['pos%d' % i for i in range(1, 9)])
示例#7
0
    def testComparator(self):
        s = Sentence(TEST_TOKENS)
        n = Annotation(s, 2, 5)

        for i in range(1, 7):
            self.assertTrue(n > Annotation(s, 0, i), i)

        for i in range(3, 7):
            self.assertTrue(n < Annotation(s, i, 7), i)

        self.assertTrue(n > Annotation(s, 1, 6))
        self.assertTrue(n < Annotation(s, 3, 4))
        self.assertTrue(n == Annotation(s, 2, 5))
示例#8
0
 def testGetMaskedWords(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2,
                     4)  # should only fetch one masked token ("type1")
     s.addAnnotation('type2', 6)
     s.addAnnotation('type2', 8, 9)
     self.maxDiff = None
     self.assertListEqual(list(s.maskedWords()), [
         'word0', 'word1', 'type1', 'word4', 'word5', 'type2', 'word7',
         'type2', 'word9'
     ])
     self.assertListEqual(list(s.maskedWords(7)),
                          ['word7', 'type2', 'word9'])
示例#9
0
 def testAddAnnotation(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2, 4)
     s.addAnnotation('type2', 6)
     s.addAnnotation('type2', 8, 9)
     self.assertEqual(len(s.annotations), 2)
     self.assertEqual(set(s.annotations.keys()), {'type1', 'type2'})
     self.assertTrue(
         all(
             isinstance(annotations, set)
             for annotations in s.annotations.values()))
     self.assertEqual(
         sum(len(annotations) for annotations in s.annotations.values()), 3)
示例#10
0
 def testGetPhraseNumbers(self):
     s = Sentence(TEST_TOKENS)
     self.assertListEqual(list(s.phraseNumbers()), [1, 2, 3, 4])
     self.assertListEqual(list(s.phraseNumbers(1, 6)), [1, 2])
示例#11
0
 def testTokenDistanceOnDifferentSentences(self):
     s1 = Sentence(TEST_TOKENS)
     s2 = Sentence(TEST_TOKENS)
     a1 = s1.addAnnotation('type', 0, 2)
     a2 = s2.addAnnotation('type', 6, 8)
     self.assertRaises(ValueError, a1.tokenDistanceTo, a2)
示例#12
0
 def testComparatorTypeError(self):
     s = Sentence(TEST_TOKENS)
     n = Annotation(s, 2, 5)
     self.assertRaises(TypeError, lambda: n < s)
示例#13
0
 def testComparatorValueError(self):
     s1 = Sentence(TEST_TOKENS)
     n1 = Annotation(s1, 2, 5)
     s2 = Sentence(TEST_TOKENS)
     n2 = Annotation(s2, 2, 5)
     self.assertRaises(ValueError, lambda: n1 < n2)
示例#14
0
 def testPhraseDistanceIfBothInOverlappingPhrase(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 0, 5)
     b = s.addAnnotation('B', 6, 10)
     self.assertEqual(a.phraseDistanceTo(b), 0)
示例#15
0
 def testPhraseDistanceIfBothNotInPhrase(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 2)
     b = s.addAnnotation('B', 5)
     self.assertEqual(a.phraseDistanceTo(b), 1)
示例#16
0
 def testGetStems(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('mask', 2, 4)
     s.addAnnotation('mask', 6)
     self.assertListEqual(list(s.stems(3, 7)),
                          ['stem3', 'stem4', 'stem5', 'stem6'])
示例#17
0
 def testCopyConstructor(self):
     s1 = Sentence(TEST_TOKENS)
     s1.addAnnotation("ann", 1)
     s2 = Sentence(s1)
     self.assertEqual(s2.getAnnotations("ann"), s1.getAnnotations("ann"))
示例#18
0
 def testTokenDistanceIfEqual(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 0, 2)
     b = s.addAnnotation('B', 0, 2)
     self.assertEqual(a.tokenDistanceTo(b), -2)
示例#19
0
 def testEquals(self):
     s = Sentence(TEST_TOKENS)
     a1 = Annotation(s, 1, 2)
     a2 = Annotation(s, 1, 2)
     self.assertEqual(a1, a2)
示例#20
0
 def testGetPhraseWordsOutside(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('mask', 2)
     self.assertListEqual(list(a.getPhraseWords()), ['mask'])
示例#21
0
 def testGetPhraseTagFailure(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('true', 0, 5)
     a2 = s.addAnnotation('true', 2, 9)
     self.assertRaises(ValueError, a1.getPhraseTag_)
     self.assertRaises(ValueError, a2.getPhraseTag_)
示例#22
0
 def testGetPhraseStems(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('mask', 3)
     self.assertListEqual(list(a.getPhraseStems()), ['mask', 'stem4'])
示例#23
0
 def testGetPhraseOffsetOutside(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('mask', 2)
     self.assertEqual(a.getPhraseOffset(), (2, 3))
示例#24
0
 def testGetPhraseOffsetMultiPhrase(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('mask', 4, 7)
     self.assertEqual(a.getPhraseOffset(), (4, 7))
示例#25
0
 def testEquals(self):
     s1 = Sentence(TEST_TOKENS)
     s2 = Sentence(s1)
     self.assertEqual(s1, s1)
     self.assertNotEqual(s1, s2)
示例#26
0
 def testInit(self):
     s = Sentence(TEST_TOKENS)
     self.assertEqual(len(s), len(TEST_TOKENS))
     self.assertEqual(list(s), TEST_TOKENS)
示例#27
0
    def testGetPhraseNumber(self):
        s = Sentence(TEST_TOKENS)
        tests = [1, 1, 0, 2, 2, 0, 3, 3, 0, 4]

        for i, n in enumerate(tests):
            self.assertEqual(s.phraseNumber(i), n)
示例#28
0
 def testIsNotInsidePhrase(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('false', 1, 3)
     self.assertFalse(a1.isInsidePhrase())
示例#29
0
    def testGetPhraseOffset(self):
        s = Sentence(TEST_TOKENS)
        tests = [(1, (0, 2)), (2, (3, 5)), (3, (6, 8)), (4, (9, 10))]

        for number, offset in tests:
            self.assertEqual(s.phraseOffsetFor(number), offset)
示例#30
0
 def testGetPoSTags(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('mask', 2, 4)
     s.addAnnotation('mask', 6)
     self.assertListEqual(list(s.posTags(3, 7)),
                          ['pos3', 'pos4', 'pos5', 'pos6'])