def parse(resp: SiteResponse): items = resp.xpath('//tr')[1:] for item in items: try: res = SiteResponseData() res.ip = item.xpath('.//td[1]//text()')[0] res.port = item.xpath('.//td[2]//text()')[0] yield res except Exception: continue
def parse(resp: SiteResponse): items = resp.xpath('//tr')[1:] for item in items: try: data = item.xpath('.//td[1]//*[name(.)!="p"]/text()') res = SiteResponseData() res.ip = "".join(data[:-1]) res.port = data[-1] yield res except Exception: continue
def parse(resp: SiteResponse): import re ips = re.findall(r'(?:\d{1,3}\.){3}\d{1,3}:\d+', resp.text) for ip in ips: try: item = ip.split(':') res = SiteResponseData() res.ip = item[0] res.port = item[1] yield res except Exception: continue
def parse(resp: SiteResponse): items = resp.xpath('//tr')[1:] for item in items: try: res = SiteResponseData() res.ip = item.xpath('.//td[1]//text()')[0] res.port = item.xpath('.//td[2]//text()')[0] yield res except Exception: continue try: if resp.site.current_page < resp.site.page_limit: n = resp.xpath( '//ul[@class="pagination"]//li//a[@aria-label="Next"]//@href' )[0] request = SiteRequestData() request.url = resp.site.base_url + n request.use_proxy = True resp.site.current_page += 1 yield request except Exception: pass