Exemplo n.º 1
0
class NzbindexSpider(object):
	def __init__(self, bound_ip):
		self.bound_ip = bound_ip
		
	def find(self, name):
		parser = HTMLParser.HTMLParser()
		self.session = ModifiedSession(bound_ip=self.bound_ip)
		self.session.post("https://nzbindex.com/agree/", data={"agree": "I agree"}, verify=False)
		
		response = self.session.get("https://nzbindex.com/search/", params={
			"q": name,
			"age": "",
			"max": "50",
			"minage": "",
			"sort": "agedesc",
			"minsize": "100",
			"maxsize": "",
			"dq": "",
			"poster": "",
			"nfo": "",
			"hasnfo": "1",
			"complete": "1",
			"hidespam": "1",
			"more": "1"
		}, verify=False)
		
		search_results = []
		
		results = re.findall("<tr[^>]*>(.*?)<\/tr>", response.text, re.DOTALL)
		
		for result in results:
			if 'class="threat"' in result:
				# Password protected or otherwise unsuitable for download
				continue
			
			match = re.search("<label[^>]*>(.*?)<\/label>", result, re.DOTALL)
			
			if match is None:
				continue
				
			title = parser.unescape(re.sub("<[^>]*>", "", match.group(1)))
			
			if name.lower() in title.lower():
				match = re.search('https?:\/\/nzbindex\.com\/download\/[^"]+\.nzb', result)
				
				if match is not None:
					search_results.append(NzbindexResult(title, match.group(0), self))
		
		if len(search_results) == 0:
			raise NotFoundException("No results were found.")
				
		return search_results
Exemplo n.º 2
0
class BinsearchSpider(object):
	def __init__(self, bound_ip):
		self.bound_ip = bound_ip
		
	def find(self, name):
		parser = HTMLParser.HTMLParser()
		self.session = ModifiedSession(bound_ip=self.bound_ip)
		
		response = self.session.get("https://binsearch.info/index.php", params={
			"q": name,
			"m": "",
			"adv_age": "600",
			"max": "100",
			"adv_g": "",
			"adv_sort": "date",
			"minsize": "100",
			"maxsize": "",
			"adv_col": "on",
			"adv_nfo": "on",
			"font": "",
			"postdate": "",
			"server": ""
		}, verify=False)
		
		search_results = []
		
		# Nice try, corrupting your HTML to deter scrapers. Not going to stop me, though.
		results = re.findall('<tr[^>]+>(.*?)<a href="browse\.php', response.text, re.DOTALL)
		
		for result in results:
			if 'requires password' in result:
				# Password protected
				continue
			
			match = re.search('<span[^>]*class="s"[^>]*>(.*?)<\/span>', result, re.DOTALL)
			
			if match is None:
				continue
				
			title = parser.unescape(re.sub("<[^>]+>", "", match.group(1)))
			
			if name.lower() in title.lower():
				match = re.search('<input[^>]*type="checkbox"[^>]*name="([0-9]+)"[^>]*>', result)
				
				if match is not None:
					search_results.append(BinsearchResult(name, title, match.group(1), self, response.url))
		
		if len(search_results) == 0:
			raise NotFoundException("No results were found.")
				
		return search_results