Exemplo n.º 1
0
    def _classify_nodes(self, node):
        """
        classification result: link, navigation, spam
        """

        is_link_node = self._classifiers["link_node_classifier"].classify(node)
        if is_link_node:
            Utils.add_class(node, 'dlinks')
Exemplo n.º 2
0
 def _mark_link_containers(self, node):
     features = self._all_features[node]
     if features["text_length"] > 0 and float(features['link_length'])/features["text_length"] > self._config["link_threshold"]:
         if float(features['short_link_count']) / features['link_count'] > self._config["short_link_threshold"]:
             self._shrink_nav_node(node)
             if features['short_link_count'] == 1:
                 self._replace_child_class(node, 'dnav')
                 Utils.add_class(node, 'dnav')
             elif features['short_link_count'] > 3:
                 self._replace_child_class(node, 'dnavb', new_class_name='dnavg')
                 Utils.add_class(node, 'dnavb')
             else:
                 self._replace_child_class(node, 'dnavg')
                 Utils.add_class(node, 'dnavg')
         else:
             Utils.add_class(node, 'dlst')
Exemplo n.º 3
0
 def _mark_link_containers(self, node):
     # TODO understand here, different kind of links marked as different
     # class.
     features = self._all_features[node]
     if features["text_length"] > 0 and float(features['link_length'])/features["text_length"] > self._config["link_threshold"]:
         if float(features['short_link_count']) / features['link_count'] > self._config["short_link_threshold"]:
             self._shrink_nav_node(node)
             if features['short_link_count'] == 1:
                 # Just one short link
                 # replace 'dnav' to empty string
                 self._replace_child_class(node, 'dnav')
                 Utils.add_class(node, 'dnav')
             elif features['short_link_count'] > 3:
                 # short link greater than 3
                 # replace dnavb to dnavg
                 self._replace_child_class(node, 'dnavb', new_class_name='dnavg')
                 Utils.add_class(node, 'dnavb')
             else:
                 self._replace_child_class(node, 'dnavg')
                 Utils.add_class(node, 'dnavg')
         else:
             Utils.add_class(node, 'dlst')