class NzbindexSpider(object): def __init__(self, bound_ip): self.bound_ip = bound_ip def find(self, name): parser = HTMLParser.HTMLParser() self.session = ModifiedSession(bound_ip=self.bound_ip) self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False) response = self.session.get("https://nzbindex.com/search/", params={ "q": name, "age": "", "max": "50", "minage": "", "sort": "agedesc", "minsize": "100", "maxsize": "", "dq": "", "poster": "", "nfo": "", "hasnfo": "1", "complete": "1", "hidespam": "1", "more": "1" }, verify=False) search_results = [] results = re.findall("<tr[^>]*>(.*?)<\/tr>", response.text, re.DOTALL) for result in results: if 'class="threat"' in result: # Password protected or otherwise unsuitable for download continue match = re.search("<label[^>]*>(.*?)<\/label>", result, re.DOTALL) if match is None: continue title = parser.unescape(re.sub("<[^>]*>", "", match.group(1))) if name.lower() in title.lower(): match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result) if match is not None: search_results.append(NzbindexResult(title, match.group(0), self)) if len(search_results) == 0: raise NotFoundException("No results were found.") return search_results
class BinsearchSpider(object): def __init__(self, bound_ip): self.bound_ip = bound_ip def find(self, name): parser = HTMLParser.HTMLParser() self.session = ModifiedSession(bound_ip=self.bound_ip) response = self.session.get("https://binsearch.info/index.php", params={ "q": name, "m": "", "adv_age": "600", "max": "100", "adv_g": "", "adv_sort": "date", "minsize": "100", "maxsize": "", "adv_col": "on", "adv_nfo": "on", "font": "", "postdate": "", "server": "" }, verify=False) search_results = [] # Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though. results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL) for result in results: if 'requires password' in result: # Password protected continue match = re.search('<span[^>]*class="s"[^>]*>(.*?)<\/span>', result, re.DOTALL) if match is None: continue title = parser.unescape(re.sub("<[^>]+>", "", match.group(1))) if name.lower() in title.lower(): match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result) if match is not None: search_results.append(BinsearchResult(name, title, match.group(1), self, response.url)) if len(search_results) == 0: raise NotFoundException("No results were found.") return search_results