Пример #1
0
 def test_disallowed_replace_with(self):
     """replace_with should fail gracefully"""
     filth = Filth()
     with self.assertRaises(InvalidReplaceWith):
         filth.replace_with('surrogate')
     with self.assertRaises(InvalidReplaceWith):
         filth.replace_with('something_invalid')
Пример #2
0
 def test_nonoverlapping_filth(self):
     """can't merge non-overlapping filth"""
     a_filth = Filth(beg=0, end=3, text="the")
     b_filth = Filth(beg=4, end=7, text="end")
     with self.assertRaises(FilthMergeError):
         a_filth.merge(b_filth)
     with self.assertRaises(FilthMergeError):
         b_filth.merge(a_filth)
Пример #3
0
    def test_invalid_merge_documents(self):
        """Ensure Filth in two different documents cant be merged"""
        filth_a = Filth(0, 2, text='aa', document_name='one')
        filth_b = Filth(1, 2, text='a', document_name='two')

        with self.assertRaises(FilthMergeError):
            filth_a.merge(filth_b)

        with self.assertRaises(FilthMergeError):
            filth_b.merge(filth_a)
Пример #4
0
    def test_merged_to_string(self):
        """Test the MergedFilth to string"""
        class TestFilth(Filth):
            type = 'test_filth'

        merged = MergedFilth(TestFilth(0, 2, 'ab'), Filth(1, 2, 'b'))
        self.assertEqual(
            merged.__repr__(),
            "<MergedFilth filths=[<TestFilth text='ab'>, <Filth text='b'>]>")
Пример #5
0
 def test_disallowed_replace_with(self):
     """replace_with should fail gracefully"""
     filth = Filth()
     with self.assertRaises(InvalidReplaceWith):
         filth.replace_with('surrogate')
     with self.assertRaises(InvalidReplaceWith):
         filth.replace_with('something_invalid')
Пример #6
0
 def test_nonoverlapping_filth(self):
     """can't merge non-overlapping filth"""
     a_filth = Filth(beg=0, end=3, text="the")
     b_filth = Filth(beg=4, end=7, text="end")
     with self.assertRaises(FilthMergeError):
         a_filth.merge(b_filth)
     with self.assertRaises(FilthMergeError):
         b_filth.merge(a_filth)
Пример #7
0
    def test_equality(self):
        """Test the filth equality function"""
        self.assertTrue(
            Filth(beg=0, end=5, text='hello') == Filth(
                beg=0, end=5, text='hello'))
        self.assertTrue(
            Filth(beg=0, end=5, text='hello') == Filth(
                beg=0, end=5, text='hello', match=re.match('123', '1234')))

        self.assertTrue(
            Filth(beg=0, end=5, text='hello') != Filth(
                beg=1, end=5, text='hello'))
        self.assertTrue(
            Filth(beg=0, end=5, text='hello') != Filth(
                beg=0, end=6, text='hello'))
        self.assertTrue(
            Filth(beg=0, end=5, text='hello') != Filth(
                beg=0, end=5, text='hellou'))

        self.assertTrue(
            Filth(beg=0, end=5, text='hello', document_name='test') == Filth(
                beg=0, end=5, text='hello', document_name='test'))
        self.assertTrue(
            Filth(beg=0, end=5, text='hello') != Filth(
                beg=0, end=5, text='hello', document_name='test'))
        self.assertTrue(
            Filth(beg=0, end=5, text='hello', document_name='test') != Filth(
                beg=0, end=5, text='hello'))
        self.assertTrue(
            Filth(beg=0, end=5, text='hello', document_name='test') != Filth(
                beg=0, end=5, text='hello', document_name='another_test'))

        self.assertTrue(
            Filth(beg=0, end=5, text='hello', detector_name='tester') == Filth(
                beg=0, end=5, text='hello', detector_name='tester'))
        self.assertTrue(
            Filth(beg=0, end=5, text='hello', detector_name='tester') != Filth(
                beg=0, end=5, text='hello', detector_name='another_tester'))
        self.assertTrue(
            Filth(beg=0, end=5, text='hello', detector_name='tester') != Filth(
                beg=0, end=5, text='hello'))
        self.assertTrue(
            Filth(beg=0, end=5, text='hello') != Filth(
                beg=0, end=5, text='hello', detector_name='tester'))

        self.assertTrue(
            Filth(beg=0,
                  end=5,
                  text='hello',
                  document_name='test',
                  detector_name='tester') == Filth(beg=0,
                                                   end=5,
                                                   text='hello',
                                                   document_name='test',
                                                   detector_name='tester'))
        self.assertTrue(
            Filth(beg=0,
                  end=5,
                  text='hello',
                  document_name='test',
                  detector_name='tester') != Filth(
                      beg=0,
                      end=5,
                      text='hello',
                      document_name='test',
                      detector_name='another_tester'))
        self.assertTrue(
            Filth(beg=0,
                  end=5,
                  text='hello',
                  document_name='test',
                  detector_name='tester') != Filth(
                      beg=0,
                      end=5,
                      text='hello',
                      document_name='another_test',
                      detector_name='tester'))
Пример #8
0
    def test_filth_string(self):
        """Test the Filth to string function"""

        filth = Filth(beg=0, end=5)
        self.assertEqual(str(filth), "<Filth text=''>")

        filth = Filth(beg=0, end=5)
        self.assertEqual(filth.__repr__(), "<Filth text=''>")

        filth = Filth(beg=0, end=5)
        self.assertEqual(filth._to_string(), "<Filth text=''>")

        filth = Filth(beg=0, end=5, text='hello')
        self.assertEqual(str(filth), "<Filth text='hello'>")

        filth = Filth(beg=0, end=5, text='hello', document_name='hello.txt')
        self.assertEqual(str(filth),
                         "<Filth text='hello' document_name='hello.txt'>")

        filth = Filth(beg=0, end=5, text='hello', document_name='hello.txt')
        self.assertEqual(filth._to_string(attributes=['text']),
                         "<Filth text='hello'>")
        self.assertEqual(filth._to_string(attributes=['beg', 'end', 'text']),
                         "<Filth beg=0 end=5 text='hello'>")
        self.assertEqual(
            filth._to_string(attributes=['text', 'document_name']),
            "<Filth text='hello' document_name='hello.txt'>")
Пример #9
0
    def test_merging(self):
        """Ensure that filths are merged correctly"""
        filths = scrubadub.Scrubber._merge_filths([
            Filth(beg=6, end=7, text='a'),
            Filth(beg=2, end=3, text='a'),
            Filth(beg=0, end=1, text='a'),
            Filth(beg=4, end=5, text='a'),
        ])
        self.assertEqual([(f.beg, f.end) for f in filths], [(0, 1), (2, 3),
                                                            (4, 5), (6, 7)])

        filths = scrubadub.Scrubber._merge_filths([
            Filth(beg=7, end=8, text='a'),
            Filth(beg=0, end=1, text='a'),
            Filth(beg=4, end=5, text='a'),
            Filth(beg=0, end=3, text='aaa'),
            Filth(beg=5, end=8, text='aaa'),
        ])
        self.assertEqual([(f.beg, f.end) for f in filths], [(0, 3), (4, 8)])

        filths = scrubadub.Scrubber._merge_filths([
            Filth(beg=5, end=8, text='aaa', document_name='a'),
            Filth(beg=0, end=3, text='aaa', document_name='b'),
            Filth(beg=4, end=5, text='a', document_name='b'),
            Filth(beg=7, end=8, text='a', document_name='a'),
            Filth(beg=0, end=1, text='a', document_name='a'),
        ])
        self.assertEqual([(f.document_name, f.beg, f.end) for f in filths],
                         [('a', 0, 1), ('a', 5, 8), ('b', 0, 3), ('b', 4, 5)])

        filths = scrubadub.Scrubber._merge_filths([
            Filth(beg=5, end=8, text='aaa', document_name=None),
            Filth(beg=0, end=3, text='aaa', document_name='b'),
            Filth(beg=4, end=5, text='a', document_name='b'),
            Filth(beg=7, end=8, text='a', document_name=None),
            Filth(beg=0, end=1, text='a', document_name='a'),
        ])
        self.assertEqual([(f.document_name, f.beg, f.end) for f in filths],
                         [(None, 5, 8), ('a', 0, 1), ('b', 0, 3), ('b', 4, 5)])
Пример #10
0
    def test_sorting(self):
        """Ensure that filths are sorted correctly"""
        filths = scrubadub.Scrubber._sort_filths([
            Filth(beg=6, end=7),
            Filth(beg=2, end=3),
            Filth(beg=0, end=1),
            Filth(beg=4, end=5),
        ])
        self.assertEqual([(f.beg, f.end) for f in filths], [(0, 1), (2, 3),
                                                            (4, 5), (6, 7)])

        filths = scrubadub.Scrubber._sort_filths([
            Filth(beg=7, end=8),
            Filth(beg=0, end=1),
            Filth(beg=4, end=5),
            Filth(beg=0, end=3),
            Filth(beg=5, end=8),
        ])
        self.assertEqual([(f.beg, f.end) for f in filths], [(0, 3), (0, 1),
                                                            (4, 5), (5, 8),
                                                            (7, 8)])

        filths = scrubadub.Scrubber._sort_filths([
            Filth(beg=5, end=8, document_name='a'),
            Filth(beg=0, end=3, document_name='b'),
            Filth(beg=4, end=5, document_name='b'),
            Filth(beg=7, end=8, document_name='a'),
            Filth(beg=0, end=1, document_name='a'),
        ])
        self.assertEqual([(f.document_name, f.beg, f.end) for f in filths],
                         [('a', 0, 1), ('a', 5, 8), ('a', 7, 8), ('b', 0, 3),
                          ('b', 4, 5)])