示例#1
0
    def test_leaves_unmatched_punctuation_alone(self):
        f = normalizers.StripMatchedPunctuationNormalizer()

        # quotes
        #   " character
        self.assertEqual(f('"foo'), '"foo')
        self.assertEqual(f('foo"'), 'foo"')
        self.assertEqual(f('"foo bar'), '"foo bar')
        self.assertEqual(f('foo bar"'), 'foo bar"')
        #   ' character
        self.assertEqual(f("'foo"), "'foo")
        self.assertEqual(f("foo'"), "foo'")
        self.assertEqual(f("'foo bar"), "'foo bar")
        self.assertEqual(f("foo bar'"), "foo bar'")

        # brackets
        #   () characters
        self.assertEqual(f('(foo'), '(foo')
        self.assertEqual(f('foo)'), 'foo)')
        self.assertEqual(f('(foo bar'), '(foo bar')
        self.assertEqual(f('foo bar)'), 'foo bar)')
        #   [] characters
        self.assertEqual(f('[foo'), '[foo')
        self.assertEqual(f('foo]'), 'foo]')
        self.assertEqual(f('[foo bar'), '[foo bar')
        self.assertEqual(f('foo bar]'), 'foo bar]')
        #   {} characters
        self.assertEqual(f('{foo'), '{foo')
        self.assertEqual(f('foo}'), 'foo}')
        self.assertEqual(f('{foo bar'), '{foo bar')
        self.assertEqual(f('foo bar}'), 'foo bar}')
示例#2
0
    def test_leaves_leading_and_trailing_punctuation_alone(self):
        f = normalizers.StripMatchedPunctuationNormalizer()

        # leading
        self.assertEqual(f(':foo'), ':foo')
        self.assertEqual(f(';foo bar'), ';foo bar')

        # trailing
        self.assertEqual(f('foo.'), 'foo.')
        self.assertEqual(f('foo bar!'), 'foo bar!')

        # leading and trailing
        self.assertEqual(f('.foo,'), '.foo,')
        self.assertEqual(f('- foo bar!'), '- foo bar!')
示例#3
0
    def test_leaves_matched_punctuation_not_wrapping_text_alone(self):
        f = normalizers.StripMatchedPunctuationNormalizer()

        # quotes
        #   " character
        self.assertEqual(f('"foo" bar'), '"foo" bar')
        self.assertEqual(f('foo "bar"'), 'foo "bar"')
        #   ' character
        self.assertEqual(f("'foo' bar"), "'foo' bar")
        self.assertEqual(f("foo 'bar'"), "foo 'bar'")

        # brackets
        #   () characters
        self.assertEqual(f('(foo) bar'), '(foo) bar')
        self.assertEqual(f('foo (bar)'), 'foo (bar)')
        #   [] characters
        self.assertEqual(f('[foo] bar'), '[foo] bar')
        self.assertEqual(f('foo [bar]'), 'foo [bar]')
        #   {} characters
        self.assertEqual(f('{foo} bar'), '{foo} bar')
        self.assertEqual(f('foo {bar}'), 'foo {bar}')
示例#4
0
    def test_leaves_normal_text_alone(self):
        f = normalizers.StripMatchedPunctuationNormalizer()

        self.assertEqual(f('foo'), 'foo')
        self.assertEqual(f('foo bar'), 'foo bar')