def test_strips_whitespace_and_punctuation(self): f = normalizers.StripWhitespaceAndPunctuationNormalizer() # leading whitespace and punctuation self.assertEqual(f('- foo'), 'foo') self.assertEqual(f(': foo bar'), 'foo bar') self.assertEqual(f(' _\tfoo'), 'foo') self.assertEqual(f(' . foo bar'), 'foo bar') # trailing punctuation self.assertEqual(f('foo. '), 'foo') self.assertEqual(f('foo bar !'), 'foo bar') self.assertEqual(f('foo ? '), 'foo') self.assertEqual(f('foo bar\t@ '), 'foo bar') # leading and trailing punctuation self.assertEqual(f('- foo.'), 'foo') self.assertEqual(f(' :foo bar !\t\t'), 'foo bar') self.assertEqual(f('#foo\t&'), 'foo') self.assertEqual(f(' ^ foo bar $'), 'foo bar')
def test_strips_punctuation(self): f = normalizers.StripWhitespaceAndPunctuationNormalizer() # leading punctuation self.assertEqual(f('-foo'), 'foo') self.assertEqual(f(':foo bar'), 'foo bar') self.assertEqual(f('_foo'), 'foo') self.assertEqual(f('.foo bar'), 'foo bar') # trailing punctuation self.assertEqual(f('foo.'), 'foo') self.assertEqual(f('foo bar!'), 'foo bar') self.assertEqual(f('foo?'), 'foo') self.assertEqual(f('foo bar@'), 'foo bar') # leading and trailing punctuation self.assertEqual(f('-foo.'), 'foo') self.assertEqual(f(':foo bar!'), 'foo bar') self.assertEqual(f('#foo&'), 'foo') self.assertEqual(f('^foo bar$'), 'foo bar')
def test_strips_whitespace(self): f = normalizers.StripWhitespaceAndPunctuationNormalizer() # leading whitespace self.assertEqual(f(' foo'), 'foo') self.assertEqual(f(' foo bar'), 'foo bar') self.assertEqual(f('\tfoo'), 'foo') self.assertEqual(f('\tfoo bar'), 'foo bar') # trailing whitespace self.assertEqual(f('foo '), 'foo') self.assertEqual(f('foo bar '), 'foo bar') self.assertEqual(f('foo\t'), 'foo') self.assertEqual(f('foo bar\t'), 'foo bar') # leading and trailing whitespace self.assertEqual(f(' foo '), 'foo') self.assertEqual(f(' foo bar '), 'foo bar') self.assertEqual(f(' foo\t'), 'foo') self.assertEqual(f(' foo bar\t'), 'foo bar')
def test_does_not_strip_matched_punctuation(self): f = normalizers.StripWhitespaceAndPunctuationNormalizer() # quotes # leading and trailing # " character self.assertEqual(f('"foo"'), '"foo"') self.assertEqual(f('"foo bar"'), '"foo bar"') # ' character self.assertEqual(f("'foo'"), "'foo'") self.assertEqual(f("'foo bar'"), "'foo bar'") # leading only # " character self.assertEqual(f('"foo" bar'), '"foo" bar') # ' character self.assertEqual(f("'foo' bar"), "'foo' bar") # trailing only # " character self.assertEqual(f('foo "bar"'), 'foo "bar"') # ' character self.assertEqual(f("foo 'bar'"), "foo 'bar'") # parentheses # leading and trailing # () characters self.assertEqual(f('(foo)'), '(foo)') self.assertEqual(f('(foo bar)'), '(foo bar)') # [] characters self.assertEqual(f("[foo]"), "[foo]") self.assertEqual(f("[foo bar]"), "[foo bar]") # leading only # () characters self.assertEqual(f('(foo) bar'), '(foo) bar') # [] characters self.assertEqual(f("[foo] bar"), "[foo] bar") # trailing only # () characters self.assertEqual(f('foo (bar)'), 'foo (bar)') # [] characters self.assertEqual(f("foo [bar]"), "foo [bar]")
def test_leaves_internal_whitespace_alone(self): f = normalizers.StripWhitespaceAndPunctuationNormalizer() self.assertEqual(f('Some text with odd\n internal whitespace'), 'Some text with odd\n internal whitespace')
def test_leaves_normal_text_alone(self): f = normalizers.StripWhitespaceAndPunctuationNormalizer() self.assertEqual(f('foo'), 'foo') self.assertEqual(f('foo bar'), 'foo bar')