示例#1
0
def deal_data(url, text, selector):
    '''
    处理响应结果
    :return:
    '''
    if 'ArticleTitle' in text and 't20131029_1012002' not in url:
        redis_db.sadd(content_key, text)
        ctx.log('详情内容获取成功, 已缓存至redis!')
示例#2
0
def deal_data(url, text, selector):
    '''
    处理响应结果
    :return:
    '''
    if 'ArticleTitle' in text:
        redis_db.sadd(content_key, text)
        ctx.log('详情内容获取成功, 已缓存至redis!')
示例#3
0
def cach_task(url, text, selector):
    '''
    缓存任务
    :return:
    '''
    # 分页任务缓存
    page_count = re.findall(r"createPageHTML\((\d+)", text, re.S)
    if page_count and 'index_' not in url:
        url_list = []
        for page in range(1, int(page_count[0])):
            url_list.append(urljoin(url, 'index_{}.shtml'.format(page)))
        redis_db.sadd(catlog_key, url_list)
        ctx.log('缓存 catlog_page_list 成功!')

    # 详情任务缓存
    detail_url_list = selector.css('.lsj-list li a::attr(href)').extract()
    if detail_url_list:
        redis_db.sadd(detail_key, detail_url_list)
        ctx.log('缓存 detail_url_list 成功!')
示例#4
0

def start():
    ip = '127.0.0.1'
    port = 8080
    print("温馨提示:服务IP {} 端口 {} 请确保代理已配置".format(ip, port))

    myaddon = RuiShuCapture()
    opts = options.Options(listen_port=port)
    pconf = proxy.config.ProxyConfig(opts)
    m = DumpMaster(opts)
    m.server = proxy.server.ProxyServer(pconf)
    m.addons.add(myaddon)

    try:
        m.run()
    except KeyboardInterrupt:
        m.shutdown()


if __name__ == '__main__':
    tart_url = [
        'http://fgw.hubei.gov.cn/gzjj/tzgg/tz/',
        'http://fgw.hubei.gov.cn/gzjj/dtyw/fgyw/',
    ]
    # 推送任务入口
    if not redis_db.sget('gov:start_task'):
        redis_db.sadd('gov:start_task', tart_url)
        print('pushed start_tasks !')
    start()