示例#1
0
 def calculate_best_node(self, doc):
     #print(lxml.html.tostring(doc))  #doc.text_content()
     top_nodes = Parser.css_select(doc,'#sina_keyword_ad_area2')
     if len(top_nodes) < 1:
         top_node = ContentExtractor.calculate_best_node(self,doc)
     else:
         top_node = top_nodes[0]
     #print(Parser.getText(top_node))
     #if top_node is None:
     #    return doc
     return top_node
示例#2
0
 def __init__(self,config):
     ContentExtractor.__init__(self,config)