def add_links(self, links, priority, depth=0, source_url=""): _counter = 0 self.logger.debug('Trying to add %d links' % len(links)) for link in links: _link = URLProcessor.validate(link, source_url) if self._evaluate_link(_link) and not self.link_db.is_in_base(_link): #_depth = SimpleCrawlingDepthPolicy.calculate_depth(link, source_url, depth) #_depth = RealDepthCrawlingDepthPolicy.calculate_depth(link, self.link_db) _depth = IgnoreDepthPolicy.calculate_depth() if _depth <= self.max_url_depth: self.logger.debug("Added:%s with priority %d" % (_link, _depth)) self.link_db.add_link(_link, priority, _depth) _counter += 1 self.logger.debug("Added %d new links into DB." % _counter)
def add_links(self, links, priority, depth=0, source_url=""): _counter = 0 self.logger.debug('Trying to add %d links' % len(links)) for link in links: _link = URLProcessor.validate(link, source_url) if self._evaluate_link(_link) and not self.link_db.is_in_base(_link): #_depth = SimpleCrawlingDepthPolicy.calculate_depth(link, source_url, depth) usage example #_depth = RealDepthCrawlingDepthPolicy.calculate_depth(link, self.link_db) usage example _depth = IgnoreDepthPolicy.calculate_depth() if _depth <= self.max_url_depth: try: self.link_db.add_link(_link, priority, _depth) if source_url: self.link_db.points(source_url, _link) except Exception as e: self.logger.error("Add links error:"+str(_link)+"Message:"+str(e.message)) print "Add links error:" + str(_link) + "Message:" + str(e.message) else: self.logger.debug("Added:%s with priority %s" % (_link, priority)) _counter += 1 self.logger.debug("Added %d new links into DB." % _counter)