Exemplo n.º 1
0
                    'Index <{}>, Word <{}>, Page <{}> Done, sleeping {}s!'.
                    format(next_ind, word, page, wt))
                self.driver.implicitly_wait(wt)

            if is_break:
                break

        in_client.close()
        self.close_browser()

    def close_browser(self):
        try:
            self.driver.close()
        except (NoSuchWindowException, ):
            pass


if __name__ == '__main__':
    if sys.argv[1:]:
        params = [unicode(p, chardet).split('=') for p in sys.argv[1:]]
        WeixinSelenium().crawl(**dict(params))

    while True:
        time.sleep(45)
        c_word = storage_word.pop() if storage_word else [None, 0]
        WeixinSelenium.logger('Break word: <{} {}>'.format(*c_word))
        WeixinSelenium().crawl(*c_word)

        if not storage_word:
            break
Exemplo n.º 2
0
                self.driver.implicitly_wait(wt)

            if is_break:
                break

        in_client.close()
        self.close_browser()

    def close_browser(self):
        try:
            self.driver.close()
        except (NoSuchWindowException,):
            pass


if __name__ == '__main__':
    if sys.argv[1:]:
        params = [unicode(p, chardet).split('=') for p in sys.argv[1:]]
        WeixinSelenium().crawl(**dict(params))

    while True:
        time.sleep(45)
        c_word = storage_word.pop() if storage_word else [None, 0]
        WeixinSelenium.logger('Break word: <{} {}>'.format(*c_word))
        WeixinSelenium().crawl(*c_word)

        if not storage_word:
            break


Exemplo n.º 3
0
        for bulk_words in total_words:
            try:
                pool.map(lambda w: cls().crawl_single(w), bulk_words)
            except Exception as e:
                cls.logger.info('Threads crawl error: type <{}>, msg <{}>'.format(e.__class__, e))

        pool.close()
        pool.join()
        in_client.close()

    def close_browser(self):
        try:
            self.driver.close()
        except (NoSuchWindowException,):
            pass


if __name__ == '__main__':
    WeixinPhantomjs.crawl_with_threads()

    while storage_word:
        word_page = storage_word.pop()
        WeixinPhantomjs.logger.info('Remain word: <{} {}>'.format(*word_page))
        WeixinPhantomjs().crawl_single(*word_page)

        in_client.close()

    # Phantomjs