class Getter(): def __init__(self): self.Mysql = MysqlClient() self.crawler = Crawler() def is_over_threshold(self): """ Determine whether the agent pool limit has been reached """ if self.Mysql.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run(self): print('Get the execution') if not self.is_over_threshold(): for callback_label in range(self.crawler.__CrawlFuncCount__): callback = self.crawler.__CrawlFunc__[callback_label] # Get an agent proxies = self.crawler.get_proxies(callback) sys.stdout.flush() for proxy in proxies: if (self.Mysql.exists(proxy)): pass else: print(proxy) self.Mysql.add(proxy)
class Getter(): def __init__(self): self.mysql = MysqlClient() self.spider = Spider() def is_over_max(self): if self.mysql.count() >= MAX_POOL_COUNT: return True else: return False def run(self): print('爬虫程序开始执行') if not self.is_over_max(): for callback_lable in range(self.spider.__SpiderFuncCount__): callback = self.spider.__SpiderFunc__[callback_lable] proxies = self.spider.get_proxies(callback) for proxy in proxies: self.mysql.add(proxy) self.mysql.close()
def save_to_mysql(self,query,title,url): m = MysqlClient() m.add(query,title,url)
def save_to_mysql(self, query, title, url): m = MysqlClient() m.add(query, title, url)