def testGetPrepositionedNounPhrase(self): tokens = list(TEST_TOKENS) tokens[3] = tokens[3].replace(chunk="B-PP") tokens[4] = tokens[4].replace(chunk="I-PP") s = Sentence(tokens) s.addAnnotation('sentinel', 0) a = s.addAnnotation('type', 6) self.assertListEqual(list(a.getPrepositionedNounPhrase_()), ['sentinel', 'stem1'])
def testTokenDistanceIfOverlapping(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('A', 2, 4) b = s.addAnnotation('B', 2, 3) c = s.addAnnotation('C', 1, 3) d = s.addAnnotation('D', 0, 2) e = s.addAnnotation('E', 1, 2) for other in [a, b, d, e]: self.assertEqual(c.tokenDistanceTo(other), -1)
def testPhraseDistance(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('A', 0, 2) b = s.addAnnotation('B', 3) c = s.addAnnotation('C', 6, 8) d = s.addAnnotation('D', 9) e = s.addAnnotation('E', 3, 5) for other, dist in [(a, 1), (b, 1), (c, -1), (d, 0), (e, 0)]: self.assertEqual(c.phraseDistanceTo(other), dist, msg=repr(other))
def testTokenDistance(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('A', 0, 2) b = s.addAnnotation('B', 3) c = s.addAnnotation('C', 6, 8) d = s.addAnnotation('D', 8) e = s.addAnnotation('E', 4, 6) for other, dist in [(a, 4), (b, 2), (d, 0), (e, 0)]: self.assertEqual(c.tokenDistanceTo(other), dist)
def testPhraseTagsTo(self): s = Sentence(TEST_TOKENS) a1 = s.addAnnotation('true', 5) a2 = s.addAnnotation('true', 0, 2) a3 = s.addAnnotation('true', 3, 5) a0 = s.addAnnotation('true', 0) a9 = s.addAnnotation('true', 9) self.assertEqual(list(a1.phraseTagsBetween(a2)), ['NP']) self.assertEqual(list(a1.phraseTagsBetween(a1)), []) self.assertEqual(list(a2.phraseTagsBetween(a3)), []) self.assertEqual(list(a0.phraseTagsBetween(a9)), ['NP', 'NP', 'NP'])
def testPosTagsTo(self): s = Sentence(TEST_TOKENS) a1 = s.addAnnotation('true', 5) a2 = s.addAnnotation('true', 0, 2) a3 = s.addAnnotation('true', 3, 5) a0 = s.addAnnotation('true', 0) a9 = s.addAnnotation('true', 9) self.assertEqual(list(a1.posTagsBetween(a2)), ['pos2', 'pos3', 'pos4']) self.assertEqual(list(a1.posTagsBetween(a1)), []) self.assertEqual(list(a2.posTagsBetween(a3)), ['pos2']) self.assertEqual(list(a0.posTagsBetween(a9)), ['pos%d' % i for i in range(1, 9)])
def testComparator(self): s = Sentence(TEST_TOKENS) n = Annotation(s, 2, 5) for i in range(1, 7): self.assertTrue(n > Annotation(s, 0, i), i) for i in range(3, 7): self.assertTrue(n < Annotation(s, i, 7), i) self.assertTrue(n > Annotation(s, 1, 6)) self.assertTrue(n < Annotation(s, 3, 4)) self.assertTrue(n == Annotation(s, 2, 5))
def testGetMaskedWords(self): s = Sentence(TEST_TOKENS) s.addAnnotation('type1', 2, 4) # should only fetch one masked token ("type1") s.addAnnotation('type2', 6) s.addAnnotation('type2', 8, 9) self.maxDiff = None self.assertListEqual(list(s.maskedWords()), [ 'word0', 'word1', 'type1', 'word4', 'word5', 'type2', 'word7', 'type2', 'word9' ]) self.assertListEqual(list(s.maskedWords(7)), ['word7', 'type2', 'word9'])
def testAddAnnotation(self): s = Sentence(TEST_TOKENS) s.addAnnotation('type1', 2, 4) s.addAnnotation('type2', 6) s.addAnnotation('type2', 8, 9) self.assertEqual(len(s.annotations), 2) self.assertEqual(set(s.annotations.keys()), {'type1', 'type2'}) self.assertTrue( all( isinstance(annotations, set) for annotations in s.annotations.values())) self.assertEqual( sum(len(annotations) for annotations in s.annotations.values()), 3)
def testGetPhraseNumbers(self): s = Sentence(TEST_TOKENS) self.assertListEqual(list(s.phraseNumbers()), [1, 2, 3, 4]) self.assertListEqual(list(s.phraseNumbers(1, 6)), [1, 2])
def testTokenDistanceOnDifferentSentences(self): s1 = Sentence(TEST_TOKENS) s2 = Sentence(TEST_TOKENS) a1 = s1.addAnnotation('type', 0, 2) a2 = s2.addAnnotation('type', 6, 8) self.assertRaises(ValueError, a1.tokenDistanceTo, a2)
def testComparatorTypeError(self): s = Sentence(TEST_TOKENS) n = Annotation(s, 2, 5) self.assertRaises(TypeError, lambda: n < s)
def testComparatorValueError(self): s1 = Sentence(TEST_TOKENS) n1 = Annotation(s1, 2, 5) s2 = Sentence(TEST_TOKENS) n2 = Annotation(s2, 2, 5) self.assertRaises(ValueError, lambda: n1 < n2)
def testPhraseDistanceIfBothInOverlappingPhrase(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('A', 0, 5) b = s.addAnnotation('B', 6, 10) self.assertEqual(a.phraseDistanceTo(b), 0)
def testPhraseDistanceIfBothNotInPhrase(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('A', 2) b = s.addAnnotation('B', 5) self.assertEqual(a.phraseDistanceTo(b), 1)
def testGetStems(self): s = Sentence(TEST_TOKENS) s.addAnnotation('mask', 2, 4) s.addAnnotation('mask', 6) self.assertListEqual(list(s.stems(3, 7)), ['stem3', 'stem4', 'stem5', 'stem6'])
def testCopyConstructor(self): s1 = Sentence(TEST_TOKENS) s1.addAnnotation("ann", 1) s2 = Sentence(s1) self.assertEqual(s2.getAnnotations("ann"), s1.getAnnotations("ann"))
def testTokenDistanceIfEqual(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('A', 0, 2) b = s.addAnnotation('B', 0, 2) self.assertEqual(a.tokenDistanceTo(b), -2)
def testEquals(self): s = Sentence(TEST_TOKENS) a1 = Annotation(s, 1, 2) a2 = Annotation(s, 1, 2) self.assertEqual(a1, a2)
def testGetPhraseWordsOutside(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('mask', 2) self.assertListEqual(list(a.getPhraseWords()), ['mask'])
def testGetPhraseTagFailure(self): s = Sentence(TEST_TOKENS) a1 = s.addAnnotation('true', 0, 5) a2 = s.addAnnotation('true', 2, 9) self.assertRaises(ValueError, a1.getPhraseTag_) self.assertRaises(ValueError, a2.getPhraseTag_)
def testGetPhraseStems(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('mask', 3) self.assertListEqual(list(a.getPhraseStems()), ['mask', 'stem4'])
def testGetPhraseOffsetOutside(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('mask', 2) self.assertEqual(a.getPhraseOffset(), (2, 3))
def testGetPhraseOffsetMultiPhrase(self): s = Sentence(TEST_TOKENS) a = s.addAnnotation('mask', 4, 7) self.assertEqual(a.getPhraseOffset(), (4, 7))
def testEquals(self): s1 = Sentence(TEST_TOKENS) s2 = Sentence(s1) self.assertEqual(s1, s1) self.assertNotEqual(s1, s2)
def testInit(self): s = Sentence(TEST_TOKENS) self.assertEqual(len(s), len(TEST_TOKENS)) self.assertEqual(list(s), TEST_TOKENS)
def testGetPhraseNumber(self): s = Sentence(TEST_TOKENS) tests = [1, 1, 0, 2, 2, 0, 3, 3, 0, 4] for i, n in enumerate(tests): self.assertEqual(s.phraseNumber(i), n)
def testIsNotInsidePhrase(self): s = Sentence(TEST_TOKENS) a1 = s.addAnnotation('false', 1, 3) self.assertFalse(a1.isInsidePhrase())
def testGetPhraseOffset(self): s = Sentence(TEST_TOKENS) tests = [(1, (0, 2)), (2, (3, 5)), (3, (6, 8)), (4, (9, 10))] for number, offset in tests: self.assertEqual(s.phraseOffsetFor(number), offset)
def testGetPoSTags(self): s = Sentence(TEST_TOKENS) s.addAnnotation('mask', 2, 4) s.addAnnotation('mask', 6) self.assertListEqual(list(s.posTags(3, 7)), ['pos3', 'pos4', 'pos5', 'pos6'])