示例#1
0
def FetchJob():
    # 通过scheduler得到对应的app context对象
    app = schedule.app
    with app.app_context():
        store_type = current_app.config.get('DATA_STORE_TYPE')
    if store_type == 'mysql':
        db = sql
    else:
        db = RedisHelper(jobs.PROXY_RAW_KEY)
    import inspect
    member_list = inspect.getmembers(FetchFreeProxy,
                                     predicate=inspect.isfunction)
    proxy_set = set()
    for func_name, func in member_list:
        log.debug(u"开始获取代理: {}".format(func_name))
        try:
            for proxy in func():
                proxy = proxy.strip()
                if not proxy or not verifyProxyFormat(proxy):
                    log.error('ProxyFetch - {func}: '
                              '{proxy} illegal'.format(func=func_name,
                                                       proxy=proxy.ljust(20)))
                    continue
                elif proxy in proxy_set:
                    log.debug('ProxyFetch - {func}: '
                              '{proxy} exist'.format(func=func_name,
                                                     proxy=proxy.ljust(20)))
                    continue
                else:
                    log.debug('ProxyFetch - {func}: '
                              '{proxy} success'.format(func=func_name,
                                                       proxy=proxy.ljust(20)))
                    # 保存数据
                    last_time = datetime.datetime.now().strftime(
                        "%Y-%m-%d %H:%M:%S")
                    p = Proxy(name=func_name, proxy=proxy, last_time=last_time)
                    # 持久化保存
                    if store_type == 'mysql':
                        record = ProxyRaw(name=func_name,
                                          proxy=proxy,
                                          https=False,
                                          gmt_create=last_time,
                                          gmt_modified=last_time)
                        with app.app_context():
                            db.session.add(record)
                            db.session.commit()
                    else:
                        db.add(p.proxy, p.Json)
                    # 保存到set中检查是否重复
                    proxy_set.add(p)
        except Exception as e:
            log.error(u"代理获取函数 {} 运行出错!".format(func_name))
    # 执行相关统计数据
    log.debug('本次插入代理总数: %s', len(proxy_set))
class CheckProcess(object):

    def __init__(self, queue: Queue):
        self.queue = queue
        app = schedule.app
        with app.app_context():
            self.store_type = current_app.config.get('DATA_STORE_TYPE')
        if self.store_type == 'mysql':
            self.db = sql
        else:
            self.db = RedisHelper(jobs.PROXY_VALID_KEY)

    def run(self):
        if self.store_type != 'mysql':
           self.db.change(jobs.PROXY_VALID_KEY)
        while True:
            if self.queue.empty(): break
            # log.info('正在运行代理检查,检查队列是: {}'.format(self.queue.qsize()))
            proxy_data = self.queue.get()
            if proxy_data is None: break
            if self.store_type != 'mysql':
                proxyObj = Proxy.fromJson(proxy_data)
            else:
                proxyObj = proxy_data
            proxy, status = proxyObj.validateProxy()
            # log.info('执行检查结果: {}, 结果是: {}'.format(str(proxy),status))
            if status or proxy.fail < FAIL_COUNT:
                # 保存到数据库中
                if self.store_type != 'mysql':
                    if self.db.exists(proxy.proxy):
                        log.debug('ValidProxyCheck - {}  : {} validation exists'.format(proxy.name,
                                                                                       proxy.proxy.ljust(20)))
                    self.db.add(proxy.proxy, proxy.Json)
                else:
                    app = schedule.app
                    with app.app_context():
                        proxy_info=self.db.session.query(ProxyValid).filter(ProxyValid.proxy==proxy.proxy).first()
                        proxy_info.success=proxy.success
                        proxy_info.fail=proxy.fail
                        proxy_info.total=proxy.total
                        proxy_info.quality=proxy.quality
                        proxy_info.last_status=proxy.last_status
                        proxy_info.gmt_modified= proxy.last_time
                        self.db.session.commit()
                log.debug('ValidProxyCheck - {}  : {} validation pass'.format(proxy.name, proxy.proxy.ljust(20)))
            else:
                log.debug('ValidProxyCheck - {}  : {} validation fail'.format(proxy.name, proxy.proxy.ljust(20)))
                if self.store_type != 'mysql':
                   self.db.delete(proxy.proxy)
                else:
                    app = schedule.app
                    with app.app_context():
                        self.db.session.query(ProxyValid).filter(ProxyValid.proxy==proxy.proxy).delete()
示例#3
0
class CheckProcess(object):

    def __init__(self, queue: Queue):
        self.queue = queue
        app = schedule.app
        with app.app_context():
            self.store_type = current_app.config.get('DATA_STORE_TYPE')
        if self.store_type == 'mysql':
            self.db = sql
        else:
            self.db = RedisHelper(jobs.PROXY_VALID_KEY)

    def run(self):
        if self.store_type != 'mysql':
            self.db.change(jobs.PROXY_VALID_KEY)
        while True:
            if self.queue.empty(): break
            proxy_data = self.queue.get()
            if proxy_data is None: break
            if self.store_type != 'mysql':
                proxyObj = Proxy.fromJson(proxy_data)
            else:
                proxyObj = proxy_data
            proxy, status = proxyObj.validateProxy()
            if status:
                # 保存到数据库中
                if self.store_type != 'mysql':
                    self.db.add(proxy.proxy, proxy.Json)
                else:
                    proxy_valid = ProxyValid(name=proxy.name, proxy=proxy.proxy, https=proxy.https,
                                             proxy_type=proxy.type, china=proxy.china, location=proxy.location,
                                             success=proxy.success, fail=proxy.fail, total=proxy.total,
                                             quality=proxy.quality,
                                             last_status=proxy.last_status, gmt_modified=proxy.last_time)
                    app = schedule.app
                    with app.app_context():
                        self.db.session.add(proxy_valid)
                        self.db.session.commit()
                log.debug('RawProxyCheck - {}  : {} validation pass'.format(proxy.name, proxy.proxy.ljust(20)))
            else:
                log.error(
                    'RawProxyCheck - {}  : {}, into time: {} validation fail'.format(proxy.name, proxy.proxy.ljust(20),
                                                                                     proxy.last_time))