'Index <{}>, Word <{}>, Page <{}> Done, sleeping {}s!'. format(next_ind, word, page, wt)) self.driver.implicitly_wait(wt) if is_break: break in_client.close() self.close_browser() def close_browser(self): try: self.driver.close() except (NoSuchWindowException, ): pass if __name__ == '__main__': if sys.argv[1:]: params = [unicode(p, chardet).split('=') for p in sys.argv[1:]] WeixinSelenium().crawl(**dict(params)) while True: time.sleep(45) c_word = storage_word.pop() if storage_word else [None, 0] WeixinSelenium.logger('Break word: <{} {}>'.format(*c_word)) WeixinSelenium().crawl(*c_word) if not storage_word: break
self.driver.implicitly_wait(wt) if is_break: break in_client.close() self.close_browser() def close_browser(self): try: self.driver.close() except (NoSuchWindowException,): pass if __name__ == '__main__': if sys.argv[1:]: params = [unicode(p, chardet).split('=') for p in sys.argv[1:]] WeixinSelenium().crawl(**dict(params)) while True: time.sleep(45) c_word = storage_word.pop() if storage_word else [None, 0] WeixinSelenium.logger('Break word: <{} {}>'.format(*c_word)) WeixinSelenium().crawl(*c_word) if not storage_word: break
for bulk_words in total_words: try: pool.map(lambda w: cls().crawl_single(w), bulk_words) except Exception as e: cls.logger.info('Threads crawl error: type <{}>, msg <{}>'.format(e.__class__, e)) pool.close() pool.join() in_client.close() def close_browser(self): try: self.driver.close() except (NoSuchWindowException,): pass if __name__ == '__main__': WeixinPhantomjs.crawl_with_threads() while storage_word: word_page = storage_word.pop() WeixinPhantomjs.logger.info('Remain word: <{} {}>'.format(*word_page)) WeixinPhantomjs().crawl_single(*word_page) in_client.close() # Phantomjs