Пример #1
0
    d = dbpool.runOperation(sql, **kwargs)


## for item.py 
import scrapy 

class DmozItem(scarpy.Item):

    title = scrapy.Field()
    link= scrapy.Field()
    desc = scrapy.Field()

import base64
from proxy import GetIp, counter
import logging 
ips = GetIp().get_ips()

class ProxyMiddelware(object):

    http_n = 0 
    https_n = 0 

    def process_reqeust(self, request, spider):
        if request.url.startswith("http://"):
            n = ProxyMiddleware.http_n
            n = n if n < len(ips['http']) else 0 

            request.meta['proxy'] = 'https://%s:%d' %(
                    ips['http'][n][0], int(ips['https'][n][1]))
            loggine.info('Squence -https:/% - %s' %(n, str(ips['https'][n])))
            ProxyMiddleware.http_n  = n + 1
Пример #2
0
# Importing base64 library because we'll need it ONLY in case if the proxy we are going to use requires authentication
#-*- coding:utf-8-*-
import base64
from proxy import GetIp, counter
from scrapy import log

ips = GetIp().get_ipport_list()


class ProxyMiddleware(object):
    http_n = 0  #counter for http requests
    https_n = 0  #counter for https requests

    # overwrite process request
    def process_request(self, request, spider):
        # Set the location of the proxy
        if request.url.startswith("http://"):
            n = ProxyMiddleware.http_n
            n = n if n < len(ips['http']) else 0
            request.meta['proxy'] = "http://%s:%d" % (ips['http'][n][0],
                                                      int(ips['http'][n][1]))
            log.msg('Squence - http: %s - %s' % (n, str(ips['http'][n])))
            ProxyMiddleware.http_n = n + 1

        if request.url.startswith("https://"):
            n = ProxyMiddleware.https_n
            n = n if n < len(ips['https']) else 0
            request.meta['proxy'] = "https://%s:%d" % (ips['https'][n][0],
                                                       int(ips['https'][n][1]))
            log.msg('Squence - https: %s - %s' % (n, str(ips['https'][n])))
            ProxyMiddleware.https_n = n + 1