def aliveproxy() -> Set[str]: urls = [ "http://aliveproxy.com/fastest-proxies", "http://aliveproxy.com/high-anonymity-proxy-list", "http://aliveproxy.com/anonymous-proxy-list", "http://aliveproxy.com/transparent-proxy-list", "http://aliveproxy.com/us-proxy-list", "http://aliveproxy.com/gb-proxy-list", "http://aliveproxy.com/de-proxy-list", "http://aliveproxy.com/jp-proxy-list", "http://aliveproxy.com/ca-proxy-list", ] proxy_set7 = set() logger.info( f"Parsing proxies from {short_url(urls[0])}..." ) # aliveproxy.com for url in urls: r = requests.get(url, headers=standard_headers) soup = BeautifulSoup(r.content, "lxml") plp_s7 = len(proxy_set7) # previous len proxy_set7 for proxy in soup.find("table", {"class": "cm or"}).find_all("tr")[1:]: proxies = parse_proxies(str(proxy.find("td"))) proxy_set7.update(proxies) link = r.url.split('/')[-2] logger.info( f"From {link} section were parsed {len(proxy_set7) - plp_s7} proxies" ) time.sleep(1.3) # crawling-delay logger.info( f"From {short_url(urls[0])} were parsed {len(proxy_set7)} proxies" ) return proxy_set7
def openproxy() -> Set[str]: date = dt.now().strftime("%d.%m.%Y %H:%M:%S") strp_date = dt.strptime(date, "%d.%m.%Y %H:%M:%S") stamp_date = int(time.mktime(strp_date.timetuple()) * 1000) proxy_set5 = set() links = set() url = f"https://api.openproxy.space/list?skip=0&ts={stamp_date}" r = requests.get(url, headers=standard_headers) data = r.json() for _dict in data: if len(_dict.get("protocols")) == 2: links.add(f"https://openproxy.space/list/{_dict.get('code')}") logger.info(f"Parsing proxies from {short_url(r.url)}...") for link in links: r = requests.get(link, headers=standard_headers) try: soup = BeautifulSoup(r.content, "lxml") proxies = parse_proxies(str(soup.find_all("script")[-6])) proxy_set5.update(proxies) logger.info( f"From {r.url.split('/')[-1]} section were parsed {len(proxies)} proxies" ) except Exception: logger.exception( f"Proxies from {link.split('/')[-1]} were not loaded :(" ) time.sleep(1.3) # crawling-delay logger.info( f"From {short_url(r.url)} were parsed {len(proxy_set5)} proxies" ) return proxy_set5
def awmproxy() -> Set[str]: url = "http://awmproxy.net" r = requests.get(url, headers=standard_headers) proxy_set4 = parse_proxies(r.text) logger.info( f"From {short_url(r.url)} were parsed {len(proxy_set4)} proxies" ) return proxy_set4
def proxy_ip_list() -> Set[str]: url = "http://proxy-ip-list.com/" r = requests.get(url, headers=standard_headers) proxies_set2 = parse_proxies(r.text) logger.info( f"From {short_url(r.url)} were parsed {len(proxies_set2)} proxies" ) return proxies_set2
def proxy50_50() -> Set[str]: url = "https://proxy50-50.blogspot.com/" r = requests.get(url, headers=standard_headers) proxies_set = parse_proxies(r.text) logger.info( f"From {short_url(r.url)} were parsed {len(proxies_set)} proxies" ) return proxies_set
def httptunnel() -> Set[str]: url = "http://www.httptunnel.ge/ProxyListForFree.aspx" r = requests.get(url, headers=standard_headers) proxies_set11 = parse_proxies(r.text) logger.info( f"From {short_url(r.url)} were parsed {len(proxies_set11)} proxies" ) return proxies_set11
def community_aliveproxy() -> Set[str]: url = "http://community.aliveproxy.com/proxy_list_http_fastest" proxy_set8 = set() r = requests.get(url, headers=standard_headers) soup = BeautifulSoup(r.content, "lxml") try: for proxy in soup.find("table").find_all("tr")[1:]: proxies = parse_proxies(proxy.text) proxy_set8.update(proxies) logger.info( f"From {short_url(r.url)} were parsed {len(proxy_set8)} proxies" ) except Exception: logger.exception(f"Proxies from {short_url(r.url)} were not loaded :(") return proxy_set8