def _get_details(self, torrent): url = '{}{}'.format(self.base_url, torrent[0]) try: r = requests.get(url) except requests.exceptions.ConnectionError: # can't connect, go to next url return html = r.content soup = BeautifulSoup(html, 'html.parser') seeds = soup.find('span', class_='stat_red') if seeds: seeds = seeds.get_text(strip=True) seeds = seeds.replace(',', '') else: seeds = 0 # <b>Released:</b> 20th Aug 2016<br/> dt = None p = re.compile('.*<b>Released:</b> (.*?)<.*') m = p.match(str(html)) if m: date_string = m.groups()[0] dt = parse(date_string) t = Torrent() t.title = torrent[1] t.size = self.to_bytes(torrent[4]) t.date = dt t.seeders = int(seeds) t.tracker = self.shortname t.magnet = torrent[3] return t
def _get_details(self, detail): url = '{}{}'.format('http://1337x.to', detail[0]) try: r = requests.get(url) except requests.exceptions.ConnectionError: # can't connect, go to next url return html = r.content soup = BeautifulSoup(html, 'html.parser') section = soup.find('div', class_='category-detail') magnet = section.find_all('a')[1]['href'] date_string = section.find_all('span')[7].get_text(strip=True) # TODO: That's brave. dt = dateparser.parse(date_string) torrent = Torrent() torrent.title = detail[1] torrent.size = self.to_bytes(detail[3]) torrent.date = dt torrent.seeders = int(detail[2]) torrent.magnet = magnet torrent.tracker = self.shortname return torrent
def search(self, search_string, season=False, episode=False): if season and episode: search_string = '%s' % ( self.se_ep( season, episode, search_string)) query = search_string encoded_search = urllib.parse.quote(query) torrents = [] loop_number = 0 for try_url in self.provider_urls: url = '%s/search/all/{}/c/d/1/?fmt=rss' % try_url full_url = url.format(encoded_search) loop_number += 1 self.logger.info('%s[%s]@%s via "%s"' % (self.job_id, self.shortname, loop_number, full_url)) parsed = feedparser.parse(full_url) for show in parsed['entries']: # TODO: Fetch detail pages for age of torrents # Removing attempts at date parsing until bitsnoop get their act together # if show['published_parsed']: # dt = datetime.fromtimestamp(mktime(show['published_parsed'])) # print(type(dt)) # print(dt) # date = dt.strftime('%b %d/%Y') t = Torrent() t.title = show['title'] t.size = int(show['size']) t.date = None t.seeders = int(show['numseeders']) t.tracker = self.shortname t.magnet = show['magneturi'] torrents.append(t) self.logger.info('%s[%s]@%s found %s result(s)' % (self.job_id, self.shortname, loop_number, len(torrents))) if len(torrents) != 0: return torrents # We got this far with no results self.logger.info('%s[%s] exiting without any results' % (self.job_id, self.shortname)) return torrents
def search(self, search_string, season=None, episode=None): if season and episode: search_string = '%s %s' % (search_string, sxxexx(season, episode)) query = urllib.parse.quote(search_string) torrents = [] loop_number = 0 for try_url in self.provider_urls: url = '%s/rss/type/search/x/%s/' % (try_url, query) loop_number += 1 self.logger.info('%s[%s]@%s via "%s"' % (self.job_id, self.shortname, loop_number, url)) parsed = feedparser.parse(url) for show in parsed['entries']: if show: if show['published_parsed']: dt = datetime.fromtimestamp( mktime(show['published_parsed'])) else: dt = None t = Torrent() t.title = show['title'] t.size = self.to_bytes(show['size']) t.date = dt t.seeders = int(show['seeds']) t.tracker = self.shortname t.magnet = hash2magnet(show['hash'], t.title) torrents.append(t) self.logger.info( '%s[%s]@%s found %s result(s)' % (self.job_id, self.shortname, loop_number, len(torrents))) if len(torrents) != 0: return torrents # We got this far with no results self.logger.info('%s[%s] exiting without any results' % (self.job_id, self.shortname)) return torrents
def search(self, search_string, season=None, episode=None): if season and episode: searches = self.se_ep(search_string, season, episode) else: searches = [search_string] # get token for api url = '{}?get_token=get_token&app_id=tvoverlord'.format(self.baseurl) try: r = requests.get(url) except requests.exceptions.ConnectionError: return [] if r.status_code == 403: self.url = url return [] j = r.json() token = j['token'] torrents = [] count = 0 loop_number = 0 for search in searches: # the torrentapi only allows one query every two seconds if count > 0: time.sleep(2) count += 1 search_tpl = '{}?mode=search&search_string={}&token={}&format=json_extended&sort=seeders&limit=100&app_id=tvoverlord' search_string = urllib.parse.quote(search) url = search_tpl.format(self.baseurl, search_string, token) try: loop_number += 1 self.logger.info('%s[%s]@%s via "%s"' % (self.job_id, self.shortname, loop_number, url)) r = requests.get(url) except requests.exceptions.ConnectionError: # can't connect, go to next url continue results = r.json() if 'error_code' in results.keys() and results['error_code'] == 20: continue # no results found try: shows = results['torrent_results'] except KeyError: # no results continue for show in shows: torrent = Torrent() torrent.title = show['title'] torrent.date = parse(show['pubdate'].split(' ')[0]) torrent.size = int(show['size']) torrent.seeders = int(show['seeders']) torrent.magnet = show['download'] torrent.tracker = self.shortname torrents.append(torrent) self.logger.info('%s[%s]@%s found %s result(s)' % (self.job_id, self.shortname, loop_number, len(torrents))) if len(torrents) != 0: return torrents # We got this far with no results self.logger.info('%s[%s] exiting without any results' % (self.job_id, self.shortname)) return torrents
def search(self, search_string, season=None, episode=None): if season and episode: searches = self.se_ep(search_string, season, episode) else: searches = [search_string] torrents = [] loop_number = 0 for try_url in self.provider_urls: # urls = '%s/search/ ' % try_url for search in searches: search_string = urllib.parse.quote(search) url = '%s/search/%s/0/7/0' % (try_url, search_string) # urls += '%s/0/7/0 ' % search_string loop_number += 1 self.logger.info('%s[%s]@%s via "%s"' % (self.job_id, self.shortname, loop_number, url)) r = requests.get(url) html = r.content soup = BeautifulSoup(html, 'html.parser') search_results = soup.find('table', id='searchResult') if search_results: # for each row in search results table, (skipping thead) for tr in search_results.find_all('tr')[1:]: tds = tr.find_all('td')[1:] try: torrent = Torrent() torrent.tracker = self.shortname torrent.title = tds[0].find('a', {'class': 'detLink'}).string.strip() details = tds[0].find('font').contents[0].split(', ') # hackity hack to fix TPB's use of 'Y-day' date_string = details[0].replace('Uploaded ', '') if 'Y-day' in date_string: yesterday = datetime.now() - timedelta(days=1) date_string = date_string.replace('Y-day', yesterday.strftime('%d %B %Y')) if 'Today' in date_string: date_string = date_string.replace('Today', datetime.now().strftime('%d %B %Y')) torrent.date = parse(date_string) torrent.size = self.to_bytes(details[1]) torrent.seeders = int(tds[1].string) torrent.magnet = tds[0].find('a', href=re.compile('magnet:.*')).attrs['href'] torrents.append(torrent) except IndexError: # sometimes some fields are empty, so trying to # access them throws an IndexError. We can safely # skip them. pass self.logger.info('%s[%s]@%s found %s result(s)' % (self.job_id, self.shortname, loop_number, len(torrents))) if len(torrents) != 0: return torrents # We got this far with no results self.logger.info('%s[%s] exiting without any results' % (self.job_id, self.shortname)) return torrents
def search(self, search_string, season=None, episode=None): if season and episode: searches = self.se_ep(search_string, season, episode) else: searches = [search_string] # http://www.torrentdownloads.me/rss.xml?type=search&search=doctor+who+s05e01 base_url = '%s/rss.xml?type=search&search={}' % self.provider_urls[0] torrents = [] loop_number = 0 for search in searches: encoded_search = urllib.parse.quote(search) url = base_url.format(encoded_search) loop_number += 1 self.logger.info('%s[%s]@%s via "%s"' % (self.job_id, self.shortname, loop_number, url)) parsed = feedparser.parse(url) if len(parsed) == 0: continue for show in parsed['entries']: title = show['title'] # torrentdownloads returns results that match any word in the # search, so the results end up with a bunch of stuff we aren't # interested in and we need to filter them out. stop = False for i in search.split(' '): if i.lower() not in title.lower(): stop = True if stop: continue dt = datetime.fromtimestamp(mktime(show['published_parsed'])) magnet_url = 'magnet:?xt=urn:btih:{}&dn={}' magnet_hash = show['info_hash'] torrent = Torrent() torrent.title = show['title'] torrent.seeders = int(show['seeders']) torrent.date = dt torrent.size = int(show['size']) torrent.magnet = magnet_url.format(magnet_hash, urllib.parse.quote(title)) torrent.tracker = self.shortname torrents.append(torrent) self.logger.info( '%s[%s]@%s found %s result(s)' % (self.job_id, self.shortname, loop_number, len(torrents))) if len(torrents) != 0: return torrents # We got this far with no results self.logger.info('%s[%s] exiting without any results' % (self.job_id, self.shortname)) return torrents
def search(self, search_string, season=False, episode=False): if season and episode: search_string = '%s' % ( self.se_ep( season, episode, search_string)) query = search_string encoded_search = urllib.parse.quote(query) torrents = [] loop_number = 0 for try_url in self.provider_urls: # cid=0 everything, cid=8 tv shows: lookfor = 0 if season and episode: lookfor = 8 # tv only url = '{}/rss.xml?type=search&cid={}&search=%s'.format(try_url, lookfor) full_url = url % encoded_search loop_number += 1 self.logger.info('%s[%s]@%s via "%s"' % (self.job_id, self.shortname, loop_number, full_url)) parsed = feedparser.parse(full_url) for show in parsed['entries']: dt = datetime.fromtimestamp(mktime(show['published_parsed'])) title = show['title'] # extratorrent returns results that match any word in the # search, so the results end up with a bunch of stuff we aren't # interested in and we need to filter them out. stop = False for i in search_string.split(' '): if i.lower() not in title.lower(): stop = True if stop: continue # TODO: Evaluate if this is exiting early. Feels messy. # the ExtraTorrent rss feed doesn't supply the magnet link, or any # usable links (They must be downloaded from the site). But the # feed has the URN hash, so we can build a magnet link from that. magnet_url = 'magnet:?xt=urn:btih:{}&dn={}' magnet_hash = show['info_hash'] magnet = magnet_url.format(magnet_hash, urllib.parse.quote(title)) seeds = show['seeders'] if seeds == '---': seeds = '0' t = Torrent() t.title = title t.size = int(show['size']) t.date = dt t.seeders = int(seeds) t.tracker = self.shortname t.magnet = magnet torrents.append(t) self.logger.info('%s[%s]@%s found %s result(s)' % (self.job_id, self.shortname, loop_number, len(torrents))) if len(torrents) != 0: return torrents # We got this far with no results self.logger.info('%s[%s] exiting without any results' % (self.job_id, self.shortname)) return torrents