Python SearchEngineScrape示例

编程语言: Python

命名空间/包名称: GoogleScraper.scraping

hotexamples.com的示例: 4

Python SearchEngineScrape - 已找到4个示例。这些是从开源项目中提取的最受好评的GoogleScraper.scraping.SearchEngineScrape现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

__init__(2)

blocking_search(1)

示例#1

显示文件

文件： selenium_mode.py 项目： shankrsingh/GoogleScraper

    def __init__(self, *args, captcha_lock=None, browser_num=1, **kwargs):
        """Create a new SelScraper thread Instance.

        Args:
            captcha_lock: To sync captcha solving (stdin)
            proxy: Optional, if set, use the proxy to route all scrapign through it.
            browser_num: A unique, semantic number for each thread.
        """
        self.search_input = None

        threading.Thread.__init__(self)
        SearchEngineScrape.__init__(self, *args, **kwargs)

        self.browser_type = Config['SELENIUM'].get('sel_browser', 'chrome').lower()
        self.browser_num = browser_num
        self.captcha_lock = captcha_lock
        self.scrape_method = 'selenium'

        self.xvfb_display = Config['SELENIUM'].get('xvfb_display', None)

        self.search_param_values = self._get_search_param_values()

        # get the base search url based on the search engine.
        self.base_search_url = get_base_search_url_by_search_engine(self.search_engine_name, self.scrape_method)
        super().instance_creation_info(self.__class__.__name__)

示例#2

显示文件

文件： http_mode.py 项目： DMoneyLin/GoogleScraper

    def __init__(self, config, *args, time_offset=0.0, **kwargs):
        """Initialize an HttScrape object to scrape over blocking http.

        HttpScrape inherits from SearchEngineScrape
        and from threading.Timer.
        """
        threading.Timer.__init__(self, time_offset, self.search)
        SearchEngineScrape.__init__(self, config, *args, **kwargs)

        # Bind the requests module to this instance such that each
        # instance may have an own proxy
        self.requests = __import__('requests')

        # initialize the GET parameters for the search request
        self.search_params = {}

        # initialize the HTTP headers of the search request
        # to some base values that mozilla uses with requests.
        # the Host and User-Agent field need to be set additionally.
        self.headers = headers

        # the mode
        self.scrape_method = 'http'

        # get the base search url based on the search engine.
        self.base_search_url = get_base_search_url_by_search_engine(self.config, self.search_engine_name, self.scrape_method)

        super().instance_creation_info(self.__class__.__name__)

        if self.search_engine_name == 'blekko':
            logger.critical('blekko does not support http mode.')
            self.startable = False

示例#3

显示文件

文件： http_mode.py 项目： viscat/GoogleScraper

    def __init__(self, config, *args, time_offset=0.0, **kwargs):
        """Initialize an HttScrape object to scrape over blocking http.

        HttpScrape inherits from SearchEngineScrape
        and from threading.Timer.
        """
        threading.Timer.__init__(self, time_offset, self.search)
        SearchEngineScrape.__init__(self, config, *args, **kwargs)

        # Bind the requests module to this instance such that each
        # instance may have an own proxy
        self.requests = __import__('requests')

        # initialize the GET parameters for the search request
        self.search_params = {}

        # initialize the HTTP headers of the search request
        # to some base values that mozilla uses with requests.
        # the Host and User-Agent field need to be set additionally.
        self.headers = headers

        # the mode
        self.scrape_method = 'http'

        # get the base search url based on the search engine.
        self.base_search_url = get_base_search_url_by_search_engine(
            self.config, self.search_engine_name, self.scrape_method)

        super().instance_creation_info(self.__class__.__name__)

        if self.search_engine_name == 'blekko':
            logger.critical('blekko doesnt support http mode.')
            self.startable = False

示例#4

显示文件

文件： http_mode.py 项目： dangrafic-data/SearchEngineScraper

    def run(self):
        super().before_search()

        if self.startable:
            args = []
            kwargs = {}
            kwargs['rand'] = False
            SearchEngineScrape.blocking_search(self, self.search, *args,
                                               **kwargs)