Exemplo n.º 1
0
    def test_calculate_links_length_and_clean_chunk(self):
        chunk = parser.Chunk(
            '<p><a href="#">asd<a href="#">asd</a>asd</a>asd</p>')

        chunk._calculate_length_with_tags()
        chunk._calculate_links_length_and_clean_chunk(self.cleaner)

        self.assertEqual(chunk.chunk, 'asd' * 4)
        self.assertEqual(chunk.length_of_links, 9)
Exemplo n.º 2
0
    def test_make_calculations(self):
        chunk = parser.Chunk(self.chunk)
        chunk.make_calculations(self.cleaner, self.punctuation)

        text_density = len(self.useful_text) / len(self.chunk)
        count_of_punctuation_marks = sum(
            (self.chunk.count(mark) for mark in self.punctuation))

        weight = (text_density + count_of_punctuation_marks / 100) + 1
        weight += (1 - count_of_punctuation_marks / len(self.useful_text))

        self.assertEqual(chunk.weight, weight)
Exemplo n.º 3
0
    def test_calculate_count_of_punctuation_marks(self):
        count_of_punctuation_marks = sum(
            (self.chunk.count(mark) for mark in self.punctuation))
        chunk = parser.Chunk(self.chunk)

        self.assertRaises(parser.ChunkProcedureException,
                          chunk._calculate_count_of_punctuation_marks,
                          self.punctuation)

        self.assertRaises(parser.ChunkProcedureException,
                          chunk._calculate_links_length_and_clean_chunk,
                          self.cleaner)

        chunk._calculate_length_with_tags()
        chunk._calculate_links_length_and_clean_chunk(self.cleaner)
        chunk._calculate_count_of_punctuation_marks(self.punctuation)

        self.assertEqual(chunk.count_of_punctuation_marks,
                         count_of_punctuation_marks)
Exemplo n.º 4
0
    def test_calculate_length_without_tags(self):
        chunk = parser.Chunk(self.chunk)

        self.assertRaises(parser.ChunkProcedureException,
                          chunk._calculate_length_without_tags)

        chunk._calculate_length_with_tags()

        self.assertFalse(chunk._cleaned)
        self.assertRaises(parser.ChunkProcedureException,
                          chunk._calculate_length_without_tags)

        chunk._calculate_links_length_and_clean_chunk(self.cleaner)

        self.assertEqual(chunk._links_length, 0)

        chunk._calculate_length_without_tags()

        self.assertEqual(chunk.length_without_tags, len(self.useful_text))
Exemplo n.º 5
0
 def test_calculate_length_with_tags(self):
     chunk = parser.Chunk(self.chunk)
     chunk._calculate_length_with_tags()
     self.assertEqual(chunk.length_with_tags, len(self.chunk))