Exemplo n.º 1
0
        type=str,
        default=
        'https://www.qcsanbao.cn/webqcba/DVMProducerServlet?method=getWhereList&p=1',
        help="要爬取的网站")
    args = parser.parse_args()
    url = args.url
    base_url = configs["basic_url"]
    r = get_redis_connect()
    dl = Download()
    par = Parse()

    # 制作列表页的url_list
    make_url_list(
        base_url,
        par.parse_main_page_get_total_pagenum(
            dl.download_first_page(url, logger), configs["test"]))

    threading_list = []

    # 列表页的解析详情页的数据url,存放在redis中,并且下载列表页html
    threading_list.extend([
        Thread(target=download_and_parse_page,
               args=("url_list", r, par.parse_main_page_get_detail_page_url,
                     dl.download_first_page, dl.download_list_page_html, lock,
                     logger)) for _ in range(configs["thread_num"])
    ])

    # 解析详情页的code和name数据url,存放在redis中,并且下载详情页html
    threading_list.extend([
        Thread(target=download_and_parse_page,
               args=("detail_url_list", r, par.parse_detail_page_get_url,