def __init__(self, manager_instance, load_from_web=False): self.settings = Settings.getInstance() self.proxyResource = ProxyResource() self.manager = manager_instance self.html_getter = WebPageDownloader(); self.filename = os.path.join(self.settings.resourcedir, "proxies.txt") self.filename_static = os.path.join(self.settings.resourcedir, "proxies_static.txt") self.autosave_interval = (5, 12 * 5) # (seconds * check times) self.autosave_checkcount = 0 # load first if os.path.exists(self.filename) and not load_from_web: self.loadFromFile() if len(self.manager.proxies) < 10: print "Load too less proxies from file." self.loadProxyFromWeb() self.saveToFile()
def __init__(self): self.settings = Settings.getInstance() self.html_getter = WebPageDownloader();
class ProxyResource: def __init__(self): self.settings = Settings.getInstance() self.html_getter = WebPageDownloader(); # self.filename = os.path.join(self.settings.resourcedir, "proxies.txt") def load_proxycn(self): ''' ''' # class resource psource_template = "http://www.proxycn.cn/html_proxy/port%s-%s.html" psource_ports = (8080, 80, 81, 3128, 8000, 1080, 444) p_nextpage = re.compile("<a href=[^>]*>下一页</a>") p_proxy = re.compile("<TR [^>]* onDblClick=\"clip\\('([\\d.]+):(\\d+)'\\);alert\\('已拷贝到剪贴板!'\\)[^\\x00]+?<TD class=\"list\">\\d+</TD><TD class=\"list\">(\\w*)</TD><TD class=\"list\">([^<]*)</TD><TD class=\"list\">([^<]*)</TD>") # another place proxy_urls = [] proxy_urls.append("http://www.cnproxy.com/proxy1.html"); proxy_urls.append("http://www.cnproxy.com/proxy2.html"); proxy_urls.append("http://www.cnproxy.com/proxy3.html"); proxy_urls.append("http://www.cnproxy.com/proxy4.html"); proxy_urls.append("http://www.cnproxy.com/proxy5.html"); proxy_urls.append("http://www.cnproxy.com/proxy6.html"); proxy_urls.append("http://www.cnproxy.com/proxy7.html"); proxy_urls.append("http://www.cnproxy.com/proxy8.html"); proxy_urls.append("http://www.cnproxy.com/proxy9.html"); proxy_urls.append("http://www.cnproxy.com/proxy10.html"); p_proxy2 = re.compile("<tr><td>(\\d+\\.\\d+\\.\\d+\\.\\d+)<SCRIPT type=text/javascript>[^<]+?</SCRIPT></td><td>(.+?)</td><td>") proxies = [] for port in psource_ports: page = 0 hasNext = True while hasNext: page += 1 purl = psource_template % (port, page) hasNext = self.__loadProxyFromURL(proxies, purl, p_proxy) # load from 2 place # for url in proxy_urls: # self.__loadProxyFromURL2(url, p_proxy2) return proxies def load_proxyServer(self): proxies = [] url = "http://www.proxynova.com/get_proxies.php?proxy_type=2&btn_submit=Download+all+Proxies" self.__loadFromProxyServer(proxies, url) return proxies def __loadFromProxyServer(self,proxies,url): print "load proxies from %s " % url source = self.html_getter.getHtmlRetry(url,3) source = unicode(source, "UTF-8").encode("UTF-8") print source results = source.split("\n")[2:-2] count = 0 if results is not None: for x in results: result = x.split(":") print "hi "+x ip = result[0] port = result[1] print "length:%s ip:%s port:%s " % (len(result),ip,port) model = ProxyModel(ip,port,"proxyServer") proxies.append(model) count += 1 print "---proxyLoader---:load proxy from proxyServer get %s " % count def __loadProxyFromURL(self, proxies, url, pattern): '''Put model into ProxyModel, return has_next_page.''' p_nextpage = re.compile("<a href=[^>]*>下一页</a>") # copy print "---proxyloader---:load proxy from %s" % url source = self.html_getter.getHtmlRetry(url, 3) source = unicode(source, "gbk").encode("UTF-8") foundNextPage = p_nextpage.search(source) results = pattern.findall(source) count = 0 if results is not None: for result in results: ip = result[0] port = result[1] model = ProxyModel(ip, port, result[2].strip().lower()) model.location = result[3] model.validate_date = result[4] proxies.append(model) count += 1 print "---proxyloader---:load proxy from %s (get %s)" % (url, count) return foundNextPage def saveToFile(self, file_abspath, proxies): '''Save list of ProxyModel in a file.''' if os.path.exists(file_abspath): os.remove(file_abspath) print "$proxy/> remove %s." % file_abspath # write to file f = file(file_abspath, "w") for proxyModel in proxies: f.write(proxyModel.to_line()) f.write("\n") f.close() print "$proxy/> write proxies to %s." % f.name