Пример #1
0
 def wait_until_title_contains_keyword(self):
     try:
         WebDriverWait(self.webdriver,
                       5).until(EC.title_contains(self.query))
     except TimeoutException:
         logger.debug(
             SeleniumSearchError(
                 '{}: Keyword "{}" not found in title: {}'.format(
                     self.name, self.query, self.webdriver.title)))
Пример #2
0
    def wait_until_serp_loaded(self):
        """
        This method tries to wait until the page requested is loaded.

        We know that the correct page is loaded when self.page_number appears
        in the navigation of the page.
        """

        if self.search_type == 'normal':

            if self.search_engine_name == 'google':
                selector = '#navcnt td.cur'
            elif self.search_engine_name == 'yandex':
                selector = '.pager__item_current_yes font font'
            elif self.search_engine_name == 'bing':
                selector = 'nav li a.sb_pagS'
            elif self.search_engine_name == 'yahoo':
                selector = '.compPagination strong'
            elif self.search_engine_name == 'baidu':
                selector = '#page .fk_cur + .pc'
            elif self.search_engine_name == 'duckduckgo':
                # no pagination in duckduckgo
                pass
            elif self.search_engine_name == 'ask':
                selector = '#paging .pgcsel .pg'

            # content = None
            try:
                time.sleep(1)
                WebDriverWait(self.webdriver, 5).until(
                    EC.text_to_be_present_in_element(
                        (By.CSS_SELECTOR, selector),
                        str(self.page_number)
                    )
                )
            except TimeoutException:
                self._save_debug_screenshot()
                try:
                    self.webdriver.find_element_by_css_selector(selector).text
                except NoSuchElementException:
                    logger.error('Skip it, no such element - SeleniumSearchError')
                    self.quit()
                    raise SeleniumSearchError('Stop Scraping, seems we are blocked')
            except Exception:
                logger.error('Scrape Exception pass. Selector: ' + str(selector))
                self._save_debug_screenshot()
                pass

        elif self.search_type == 'image':
            self.wait_until_title_contains_keyword()

        else:
            self.wait_until_title_contains_keyword()
Пример #3
0
    def wait_until_serp_loaded(self):
        """
        This method tries to wait until the page requested is loaded.

        We know that the correct page is loaded when self.page_number appears
        in the navigation of the page.
        """

        if self.search_type == 'normal':

            if self.search_engine_name == 'google':
                selector = '#resultStats'
            elif self.search_engine_name == 'yandex':
                selector = '.pager__item_current_yes font font'
            elif self.search_engine_name == 'bing':
                selector = 'nav li a.sb_pagS'
            elif self.search_engine_name == 'yahoo':
                selector = '.compPagination strong'
            elif self.search_engine_name == 'baidu':
                selector = '#page .fk_cur + .pc'
            elif self.search_engine_name == 'duckduckgo':
                # no pagination in duckduckgo
                pass
            elif self.search_engine_name == 'ask':
                selector = '#paging .pgcsel .pg'

            try:
                WebDriverWait(self.webdriver, 5).until(
                    EC.visibility_of_element_located(
                        (By.CSS_SELECTOR, selector)))
            except NoSuchElementException:
                logger.error(
                    'No such element. Seeing if title matches before raising SeleniumSearchError'
                )
                self._save_debug_screenshot()
                try:
                    self.wait_until_title_contains_keyword()
                except TimeoutException:
                    self.quit()
                    raise SeleniumSearchError(
                        'Stop Scraping, seems we are blocked')
            except Exception as e:
                logger.error('Scrape Exception pass. Selector: ' +
                             str(selector))
                logger.error('Error: ' + str(e))
                self._save_debug_screenshot()
                pass

        else:
            self.wait_until_title_contains_keyword()