def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue image_tree = tree.xpath( '//*[@id="mimvp-body"]/div[2]/div/table[1]/tbody/td/img/@src') proxy_tree = tree.xpath( '//*[@id="mimvp-body"]/div[2]/div/table[1]/tbody/td') image_list = [ "https://proxy.mimvp.com/" + px for px in image_tree[0::2] ] ip_list = [px.xpath('./text()')[0] for px in proxy_tree[1::10]] assert len(image_list) == len(ip_list) cap = Captcha(self.captcha_recognize_url) all_length = len(image_list) for i in range(all_length): try: port = cap.get_image_result(image_list[i]) yield ip_list[i] + ":" + port except Exception as e: yield None
def getter(self): mode = 1 try: cookie = self._prepare() except IndexError: mode = 0 for url in self.url_list: if mode == 1: tree = get_html_tree(url, cookie=cookie) else: tree = get_html_tree(url) if tree is None: continue proxy_list = tree.xpath('//*[@id="freelist"]/table/tbody/tr') for px in proxy_list: yield ':'.join(px.xpath('./td/text()')[0:2])
def getter(self): for url in self.url_list: response = get_html(url) key_pattern = re.compile( '''name="fefefsfesf4tzrhtzuh" value="([^"]+)"''') keysearch = re.findall(key_pattern, response) fefefsfesf4tzrhtzuh = keysearch[0] post_data = { 'filter_port': "", 'filter_http_gateway': "", 'filter_http_anon': "", 'filter_response_time_http': "", 'fefefsfesf4tzrhtzuh': fefefsfesf4tzrhtzuh, 'filter_country': "", 'filter_timeouts1': "", 'liststyle': "info", 'proxies': "200", 'type': "httphttps", 'submit': "Anzeigen" } tree = get_html_tree(url, data=post_data) if tree is None: continue px_segment = tree.xpath( '//table[@class="proxyList center"]/tr')[1:] for px in px_segment: yield ":".join( [px.xpath('./td/a/text()')[0], px.xpath('./td/text()')[0]])
def _gen_url_list(): try: url_list = get_html_tree( "http://www.youdaili.net/Daili/http/").xpath( './/div[@class="chunlist"]/ul/li/p/a/@href')[0:1] return url_list except Exception as e: raise e
def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue proxy_list = tree.xpath('/html/body/div[2]/div[2]/table/tbody/tr') for px in proxy_list: yield ':'.join([px.xpath('./td[1]/a/text()')[0], px.xpath('./td/text()')[0]])
def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue proxy_list = tree.xpath('.//table[@id="ip_list"]//tr') for px in proxy_list: yield ':'.join(px.xpath('./td/text()')[0:2])
def getter(self): for url in self.url_list: tree = get_html_tree(url) if not tree: continue proxy_list = tree.xpath('//*[@id="proxylisttable"]/tbody/tr') for px in proxy_list: yield ':'.join(px.xpath('./td/text()')[0:2])
def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue proxy_list = tree.xpath('/html/body/main/div[2]/div/div/div[1]/div/div/table//tr') for px in proxy_list[1:]: yield ':'.join([px.xpath('./td/text()')[1].strip(), px.xpath('./td/text()')[2].strip()])
def getter(self): for port, url in self.url_list: tree = get_html_tree(url, proxy=self.proxy_) if tree is None: continue proxy_list = tree.xpath('//*[@id="ipc"]/tbody/tr/td[1]/text()')[1:] for ip in proxy_list: yield ip + ":" + port
def getter(self): for url in self.url_list: tree = get_html_tree(url) if not tree: continue px_segment = tree.xpath("/html/body/div[4]/li[2]/ul")[1:] for px in px_segment: yield ":".join(px.xpath("./span/li/text()")[0:2])
def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue px_segment = tree.xpath('/html/body/div/div/div[2]/div/table/tr')[1:] for px in px_segment: yield ":".join(px.xpath( "./td/text()")[0:2])
def getter(self): for url in self.url_list: tree = get_html_tree(url, proxy=self.proxy) if tree is None: continue for item in tree.xpath("//table[@id='proxylisttable']/tbody/tr"): ip = item.xpath("td[1]/text()")[0].strip() port = item.xpath("td[2]/text()")[0].strip() yield ip + ":" + port
def getter(self): cookie = self._prepare() for url in self.url_list: tree = get_html_tree(url, cookie=cookie) if tree is None: continue proxy_list = tree.xpath('//*[@id="freelist"]/table/tbody/tr') for px in proxy_list: yield ':'.join(px.xpath('./td/text()')[0:2])
def getter(self): for url in self.url_list: tree = get_html_tree(url, proxy=self.proxy) if tree is None: continue px_segment = tree.xpath('//*[@id="proxylistt"]/tbody/tr')[:-1] for px in px_segment: ip = px.xpath('./td')[0].xpath('./span')[0].tail port = port_dict[px.xpath('./td')[0].xpath('./span')[1].xpath("@class")[0]] yield ip + port
def getter(self): for url in self.url_list: tree = get_html_tree(url) if not tree: continue px_segment = tree.xpath("//table[@ class='table']/tbody/tr") for px in px_segment: yield "".join( px.xpath( "./td[@class='ip']/span/text() | ./td[@class='ip']/div/text()|./td[@class='ip']/text()" ))
def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue proxy_list = tree.xpath( '//*[@id="site-app"]/div/div/div[1]/div/table/tbody/tr') for px in proxy_list: yield ':'.join([ px.xpath('*/text()')[1].strip(), px.xpath('*/text()')[3].strip() ])
def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue table = tree.xpath('//table/tbody/tr') for tb in table: component = tb.xpath( 'td[@class="ip"]/*[not(@style="display: none;" or @style="display:none;")]/text()' ) component.insert(-1, ':') yield "".join(component)
def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue proxy_list = tree.xpath('/html/body/div[2]/table//tr') for px in proxy_list[1:]: script_string = 'function func() {var proxies=[];' + ( re.sub("document.*?;", "", ''.join( px.xpath('./td[1]/script/text()'))) + "; return proxies}").replace("\n", "") js_string = execjs.compile(script_string) result = js_string.call('func') yield result[0]
def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue px_segment = tree.xpath('//*[@id="main"]/table/tr')[1:] for px in px_segment: if px.xpath("./td"): ip_raw = px.xpath("./td")[0].xpath("./script/text()")[0] ip_find_list = ip_pattern.findall(ip_raw) if ip_find_list: ip_find = ip_find_list[0] port = px.xpath("./td/text()")[0] ip = base64.b64decode( codecs.decode(ip_find.strip(), 'rot-13')).strip().decode('utf-8') yield ":".join([ip, port])
def getter(self): for url in self.url_list: tree = get_html_tree(url) if tree is None: continue port_dict = { key.split("=")[0]: key.split("=")[1] for key in tree.xpath("//head/script/text()")[0].strip().split( ";") if key != '' } px_segment = tree.xpath('//*[@id="proxylist"]/tr') for px in px_segment: ip = px.xpath('./td/text()')[0] port = "".join([ port_dict[key] for key in re.findall(r"\+.*", px.xpath('./td/script/text()')[0]) [0].replace(")", "").split("+") if key != '' ]) yield ip + ":" + port