Python CrawlUri.effective_url示例

编程语言: Python

命名空间/包名称: spyder.thrift.gen.ttypes

类/类型: CrawlUri

方法/功能: effective_url

hotexamples.com的示例: 5

Python CrawlUri.effective_url - 已找到5个示例。这些是从开源项目中提取的最受好评的spyder.thrift.gen.ttypes.CrawlUri.effective_url现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

CrawlUri(24)

current_priority(8)

rep_header(8)

optional_vars(6)

effective_url(3)

req_time(3)

content_body(2)

url(2)

req_header(1)

status_code(1)

示例#1

显示文件

文件： test_limiter.py 项目： truemped/Spyder

    def test_do_not_process_robots_works(self):

        curi = CrawlUri()
        curi.effective_url = "http://127.0.0.1/robots.txt"
        curi.optional_vars = dict()

        l = limiter.DefaultLimiter(None)

        for i in range(2):
            l._do_not_process_robots(curi)
            self.assertEqual(CURI_OPTIONAL_TRUE,
                             curi.optional_vars[CURI_EXTRACTION_FINISHED])

示例#2

显示文件

文件： frontier.py 项目： mt3/Spyder

    def _crawluri_from_uri(self, uri):
        """
        Convert an URI tuple to a :class:`CrawlUri`.

        Replace the hostname with the real IP in order to cache DNS queries.
        """
        (url, etag, mod_date, _next_date, prio) = uri

        parsed_url = urlparse(url)

        # dns resolution and caching
        port = parsed_url.port
        if not port:
            port = PROTOCOLS_DEFAULT_PORT[parsed_url.scheme]

        effective_netloc = self._dns_cache["%s:%s" % (parsed_url.hostname,
            port)]

        curi = CrawlUri(url)
        curi.effective_url = url.replace(parsed_url.netloc, "%s:%s" %
                effective_netloc)
        curi.current_priority = prio
        curi.req_header = dict()
        if etag:
            curi.req_header["Etag"] = etag
        if mod_date:
            mod_date_time = datetime.fromtimestamp(mod_date)
            curi.req_header["Last-Modified"] = serialize_date_time(
                    mod_date_time)

        curi.optional_vars = dict()
        if parsed_url.username and parsed_url.password:
            curi.optional_vars[CURI_SITE_USERNAME] = \
                parsed_url.username.encode()
            curi.optional_vars[CURI_SITE_PASSWORD] = \
                parsed_url.password.encode()

        return curi

示例#3

显示文件

文件： frontier.py 项目： truemped/Spyder

    def _crawluri_from_uri(self, uri):
        """
        Convert an URI tuple to a :class:`CrawlUri`.

        Replace the hostname with the real IP in order to cache DNS queries.
        """
        (url, etag, mod_date, _next_date, prio) = uri

        parsed_url = urlparse(url)

        # dns resolution and caching
        port = parsed_url.port
        if not port:
            port = PROTOCOLS_DEFAULT_PORT[parsed_url.scheme]

        effective_netloc = self._dns_cache["%s:%s" %
                                           (parsed_url.hostname, port)]

        curi = CrawlUri(url)
        curi.effective_url = url.replace(parsed_url.netloc,
                                         "%s:%s" % effective_netloc)
        curi.current_priority = prio
        curi.req_header = dict()
        if etag:
            curi.req_header["Etag"] = etag
        if mod_date:
            mod_date_time = datetime.fromtimestamp(mod_date)
            curi.req_header["Last-Modified"] = serialize_date_time(
                mod_date_time)

        curi.optional_vars = dict()
        if parsed_url.username and parsed_url.password:
            curi.optional_vars[CURI_SITE_USERNAME] = \
                parsed_url.username.encode()
            curi.optional_vars[CURI_SITE_PASSWORD] = \
                parsed_url.password.encode()

        return curi

示例#4

显示文件

    def test_that_creating_processing_function_works(self):
        settings = Settings()
        processors = settings.SPYDER_EXTRACTOR_PIPELINE
        processors.extend(settings.SPYDER_SCOPER_PIPELINE)
        processors.append('test_workerprocess')
        self.assertRaises(ValueError, workerprocess.create_processing_function,
                          settings, processors)

        processors.pop()
        processors.append('test_workerprocess_unspec')
        self.assertRaises(ValueError, workerprocess.create_processing_function,
                          settings, processors)

        processors.pop()
        processing = workerprocess.create_processing_function(
            settings, processors)

        curi = CrawlUri(optional_vars=dict())
        curi.effective_url = "http://127.0.0.1/robots.txt"
        curi2 = processing(curi)

        self.assertEqual(CURI_OPTIONAL_TRUE,
                         curi2.optional_vars[CURI_EXTRACTION_FINISHED])

示例#5

显示文件

文件： test_workerprocess_processing.py 项目： Big-Data/Spyder

    def test_that_creating_processing_function_works(self):
        settings = Settings()
        processors = settings.SPYDER_EXTRACTOR_PIPELINE
        processors.extend(settings.SPYDER_SCOPER_PIPELINE)
        processors.append('test_workerprocess')
        self.assertRaises(ValueError, workerprocess.create_processing_function,
                settings, processors)

        processors.pop()
        processors.append('test_workerprocess_unspec')
        self.assertRaises(ValueError, workerprocess.create_processing_function,
                settings, processors)

        processors.pop()
        processing = workerprocess.create_processing_function(settings,
                processors)

        curi = CrawlUri(optional_vars=dict())
        curi.effective_url = "http://127.0.0.1/robots.txt"
        curi2 = processing(curi)

        self.assertEqual(CURI_OPTIONAL_TRUE,
                curi2.optional_vars[CURI_EXTRACTION_FINISHED])