def __init__(self):
     self.sleep_time = 0
     self.task_list = [
         ZhiDaoGenerator().crawl_zhidao_words,
         BaiduHotGenerator().crawl_hot_words
     ]
     self.logger = LogHandler('keywords_controller')
 def __init__(self):
     self.p_receiver = ProxiesReceiver()
     self.essence_dict = dict()
     self.activity_dict = dict()
     self.session = requests.Session()
     # 先从话题id中筛选出未获取问题的id放入待获取队列
     QuestionsGenerator.save_new_topic_id()
     self.logger = LogHandler("question_generator")
Exemplo n.º 3
0
 def __init__(self):
     # 代理ip
     self.p_receiver = ProxiesReceiver()
     # 建立会话,设置requests重连次数和重连等待时间
     self.session = requests.Session()
     retry = Retry(connect=3, backoff_factor=0.5)
     adapter = HTTPAdapter(max_retries=retry)
     self.session.mount('https://', adapter)
     self.logger = LogHandler('topics_generator')
     logging.getLogger("urllib3").setLevel(logging.ERROR)
Exemplo n.º 4
0
 def __init__(self):
     self.logger = LogHandler('zhidao_crawl')
     try:
         # 知道请求头,获取Cookie的BAIDUID,否则抓取不到数据
         self.zhidao_headers = {
             'User-Agent':
             'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 16 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
             'Cookie':
             'BAIDUID=%s' % requests.get('https://zhidao.baidu.com/browse/',
                                         timeout=3).cookies['BAIDUID']
         }
     except Exception as e:
         raise e
Exemplo n.º 5
0
 def __init__(self):
     self.db = ProxyDBClient('proxy')
     self.raw_proxy_queue = 'raw_proxy'
     self.log = LogHandler('proxy_manager')
     self.useful_proxy_queue = 'useful_proxy'
Exemplo n.º 6
0
 def __init__(self, queue, item_dict):
     ProxyManager.__init__(self)
     Thread.__init__(self)
     self.log = LogHandler('proxy_check', file=False)  # 多线程同时写一个日志文件会有问题
     self.queue = queue
     self.item_dict = item_dict
Exemplo n.º 7
0
   Description :   used for check getFreeProxy.py
   Author :        JHao
   date:          2018/7/10
-------------------------------------------------
   Change Activity:
                   2018/7/10: CheckProxy
-------------------------------------------------
"""
__author__ = 'JHao'

from proxy.ProxyGetter.getFreeProxy import GetFreeProxy
from proxy.Util.utilFunction import verifyProxyFormat

from util.loghandler import LogHandler

log = LogHandler('check_proxy', file=False)


class CheckProxy(object):
    @staticmethod
    def checkAllGetProxyFunc():
        """
        检查getFreeProxy所有代理获取函数运行情况
        Returns:
            None
        """
        import inspect
        member_list = inspect.getmembers(GetFreeProxy,
                                         predicate=inspect.isfunction)
        proxy_count_dict = dict()
        for func_name, func in member_list:
Exemplo n.º 8
0
 def __init__(self):
     ProxyManager.__init__(self)
     self.log = LogHandler('refresh_schedule')