Exemplo n.º 1
0
 def process_request(self, request, spider):
     _proxy = Proxy()
     proxy = _proxy.get()
     proxy2 = _proxy.get()
     try:
         request.meta["proxy"] = "http://" + proxy2
     except Exception as e:
         print(e)
Exemplo n.º 2
0
    def _retry(self, request, reason, spider):
        retries = request.meta.get('retry_times', 0) + 1
        retry_times = self.max_retry_times

        if 'max_retry_times' in request.meta:
            retry_times = request.meta['max_retry_times']

        stats = spider.crawler.stats
        if retries <= retry_times:
            logger.debug(
                "Retrying %(request)s (failed %(retries)d times): %(reason)s",
                {
                    'request': request,
                    'retries': retries,
                    'reason': reason
                },
                extra={'spider': spider})
            retryreq = request.copy()
            retryreq.meta['retry_times'] = retries
            retryreq.dont_filter = True
            retryreq.priority = request.priority + self.priority_adjust
            _proxy = Proxy()
            proxy = _proxy.get()
            proxy2 = _proxy.get()
            proxy3 = _proxy.get()
            request.meta["proxy"] = "http://" + proxy3

            if isinstance(reason, Exception):
                reason = global_object_name(reason.__class__)

            stats.inc_value('retry/count')
            stats.inc_value('retry/reason_count/%s' % reason)
            return retryreq
        else:
            _proxy = Proxy()
            proxy = _proxy.get()
            proxy2 = _proxy.get()
            proxy3 = _proxy.get()
            proxy4 = _proxy.get()
            request.meta["proxy"] = "http://" + proxy4
            request.dont_filter = True
            request.priority = request.priority + self.priority_adjust
            # return self.process_request(request, spider)
            return request
Exemplo n.º 3
0
from afanti_tiku_lib.dbs import html_archive
from afanti_tiku_lib.dbs.execute import execute

from adsl_server.proxy import Proxy
from user_agent_lib.user_agent import UserAgent

LOGGING_FORMAT = '%(asctime)-15s:%(levelname)s: %(message)s'
logging.basicConfig(format=LOGGING_FORMAT,
                    level=logging.INFO,
                    filename='working/achihuo_mini_vko.log',
                    filemode='a')

mysql = CommonMysql('html_archive2')
mysql_conn = mysql.connection()

_proxy = Proxy()

user_agent = UserAgent()

HEADERS = {
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.87 Safari/537.36',
    'Accept':
    'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
    'Referer': 'http://tiku.vko.cn/',
    'X-Requested-With': 'XMLHttpRequest',
    'Connection': 'keep-alive',
}