Пример #1
0
def spider_proxyip(num=100):
    for i in range(1, 100):
        try:
            url = 'http://www.xicidaili.com/nt/%s' % str(i)
            # print(url)
            req = requests.get(url, headers=create_headers())
            source_code = req.content
            # print(source_code)
            soup = BeautifulSoup(source_code, 'lxml')
            ips = soup.findAll('tr')

            for x in range(1, len(ips)):
                ip = ips[x]
                tds = ip.findAll("td")
                proxy_host = "{0}://".format(
                    tds[5].contents[0]
                ) + tds[1].contents[0] + ":" + tds[2].contents[0]
                proxy_temp = {tds[5].contents[0]: proxy_host}
                proxys_src.append(proxy_temp)
            # print(len(proxys_src))
        except Exception as e:
            print("spider_proxyip exception:")
            print(e)
            # if x >= num:
    print(len(proxys_src))
    return proxys_src
async def download_images(save_path: str, image_url: str):
    """
    :param save_path: 保存图片的路径
     :param image_url: 图片的下载的url地址
    :return:
    """
    async with aiohttp.ClientSession() as session:
        async with session.get(image_url, headers=create_headers()) as req:
            image = await req.read()
            fp = await aiofiles.open(save_path, 'wb')
            await fp.write(image)