def spider_proxyip(num=100): for i in range(1, 100): try: url = 'http://www.xicidaili.com/nt/%s' % str(i) # print(url) req = requests.get(url, headers=create_headers()) source_code = req.content # print(source_code) soup = BeautifulSoup(source_code, 'lxml') ips = soup.findAll('tr') for x in range(1, len(ips)): ip = ips[x] tds = ip.findAll("td") proxy_host = "{0}://".format( tds[5].contents[0] ) + tds[1].contents[0] + ":" + tds[2].contents[0] proxy_temp = {tds[5].contents[0]: proxy_host} proxys_src.append(proxy_temp) # print(len(proxys_src)) except Exception as e: print("spider_proxyip exception:") print(e) # if x >= num: print(len(proxys_src)) return proxys_src
async def download_images(save_path: str, image_url: str): """ :param save_path: 保存图片的路径 :param image_url: 图片的下载的url地址 :return: """ async with aiohttp.ClientSession() as session: async with session.get(image_url, headers=create_headers()) as req: image = await req.read() fp = await aiofiles.open(save_path, 'wb') await fp.write(image)