def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] del data['total_results'] torrent_rows = data['results'] for row in torrent_rows: try: title = row.get('release_name') download_url = row.get('download_url') if not all([title, download_url]): continue seeders = row.get('seeders') leechers = row.get('leechers') # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue freeleech = row.get('freeleech') if self.freeleech and not freeleech: continue torrent_size = '{0} MB'.format(row.get('size', -1)) size = convert_size(torrent_size) or -1 pubdate_raw = row.get('publish_date') pubdate = self.parse_pubdate(pubdate_raw, timezone='Europe/Copenhagen') item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_rows = html('item') for row in torrent_rows: try: if row.category and 'video' not in row.category.get_text(strip=True).lower(): continue title_raw = row.title.text # Add "-" after codec and add missing "." title = re.sub(r'([xh][ .]?264|xvid)( )', r'\1-', title_raw).replace(' ', '.') if title_raw else '' info_hash = row.guid.text.rsplit('/', 1)[-1] download_url = 'magnet:?xt=urn:btih:' + info_hash + '&dn=' + title + self._custom_trackers if not all([title, download_url]): continue torrent_size, seeders, leechers = self._split_description(row.find('description').text) size = convert_size(torrent_size) or -1 pubdate_raw = row.pubdate.get_text() pubdate = self.parse_pubdate(pubdate_raw) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] torrent_rows = data.get('torrent_results', {}) if not torrent_rows: log.debug('Data returned from provider does not contain any torrents') return items for row in torrent_rows: try: title = row.pop('title') download_url = row.pop('download') + self._custom_trackers if not all([title, download_url]): continue seeders = row.pop('seeders', 0) leechers = row.pop('leechers', 0) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = row.pop('size', None) size = convert_size(torrent_size, default=-1) pubdate_raw = row.pop('pubdate', None) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] torrents = data['torrentList'] user_timezone = data.get('userTimeZone', 'UTC') # Continue only if at least one release is found if not torrents: log.debug('Data returned from provider does not contain any torrents') return items for torrent in torrents: try: title = torrent['name'] download_url = self.urls['download'].format(id=torrent['fid'], file=torrent['filename']) seeders = int(torrent['seeders']) leechers = int(torrent['leechers']) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue size = convert_size(torrent['size']) or -1 pubdate_raw = torrent['addedTimestamp'] pubdate = self.parse_pubdate(pubdate_raw, timezone=user_timezone) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] json_data = data.get('data', {}) torrent_rows = json_data.get('torrents', []) for row in torrent_rows: try: title = row.pop('name', '') download_url = '{0}?{1}'.format( self.urls['download'], urlencode({'id': row.pop('id', ''), 'passkey': self.passkey})) if not all([title, download_url]): continue seeders = try_int(row.pop('seeders', 0)) leechers = try_int(row.pop('leechers', 0)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue size = convert_size(row.pop('size', -1), -1) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] torrent_rows = data.pop('torrents', {}) if not torrent_rows: log.debug('Provider has no results for this search') return items for row in torrent_rows: try: title = row.get('name') download_url = row.get('download_link') if not all([title, download_url]): continue seeders = row.get('seeders') leechers = row.get('leechers') # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue size = convert_size(row.get('size'), default=-1) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'hash': '', } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ # Units units = ['B', 'KIB', 'MIB', 'GIB', 'TIB', 'PIB'] items = [] for item in data: try: title = item['title'] download_url = item['link'] if not all([title, download_url]): continue seeders = try_int(item['nyaa_seeders']) leechers = try_int(item['nyaa_leechers']) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue size = convert_size(item['nyaa_size'], default=-1, units=units) pubdate = self.parse_pubdate(item['published']) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] torrent_rows = data.pop('torrents', {}) # Skip column headers for row in torrent_rows: try: title = row.pop('title', '') info_hash = row.pop('infoHash', '') download_url = 'magnet:?xt=urn:btih:' + info_hash if not all([title, download_url, info_hash]): continue swarm = row.pop('swarm', {}) seeders = try_int(swarm.pop('seeders', 0)) leechers = try_int(swarm.pop('leechers', 0)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue size = convert_size(row.pop('size', -1)) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrents = html('tr') if not torrents or len(torrents) < 2: log.debug('Data returned from provider does not contain any torrents') return items # Skip column headers for row in torrents[1:]: # Skip extraneous rows at the end if len(row.contents) < 10: continue try: title = row.find(class_='torrent-filename').get_text(strip=True) download_url = row.find(class_='torrent-download-icon').get('href') seeders = row.contents[13].get_text() leechers = row.contents[15].get_text() size = convert_size(row.contents[11].get_text(strip=True), default=-1) pubdate = self.parse_pubdate(row.contents[7].contents[1].get('title')) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: entries = html('item') for item in entries: try: title = item.title.get_text(strip=True) download_url = item.enclosure.get('url').strip() if not (title and download_url): continue # description = item.find('description') size = convert_size(item.enclosure.get('length'), default=-1) pubdate_raw = item.pubdate.get_text(strip=True) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'pubdate': pubdate, } items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: # Continue only if at least one release is found empty = html.find('h2', text='No .torrents fit this filter criteria') if empty: log.debug('Data returned from provider does not contain any torrents') return items torrent_table = html.find('table', attrs={'style': 'border: none; width: 100%;'}) torrent_rows = torrent_table('tr', class_='browse') if torrent_table else [] for row in torrent_rows: cells = row('td') try: title = cells[1].find('a').get('title') torrent_url = cells[2].find('a').get('href') download_url = urljoin(self.url, torrent_url) if not all([title, torrent_url]): continue seeders = try_int(cells[9].get_text(), 1) leechers = try_int(cells[10].get_text()) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = self._norm_size(cells[7].get_text(strip=True)) size = convert_size(torrent_size) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] for row in data: try: # Check if this is a freeleech torrent and if we've configured to only allow freeleech. if self.freeleech and row.get('download-multiplier') != 0: continue title = re.sub(r'\[.*\=.*\].*\[/.*\]', '', row['name']) if row['name'] else None download_url = urljoin(self.urls['download'], '{0}/{1}.torrent'.format( row['t'], row['name'] )) if row['t'] and row['name'] else None if not all([title, download_url]): continue seeders = int(row['seeders']) leechers = int(row['leechers']) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = row['size'] size = convert_size(torrent_size) or -1 if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) pubdate_raw = row['ctime'] pubdate = self.parse_pubdate(pubdate_raw, fromtimestamp=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ def get_label_title(label): """Get table row header labels.""" if label.get_text(): return label.get_text(strip=True) if label.a and label.a.get_text(strip=True): return label.a.get_text(strip=True) if label.img: return label.img.get('title') items = [] if '<h2>Nothing found!</h2>' in data: log.debug('Data returned from provider does not contain any torrents') return items with BS4Parser(data, 'html.parser') as html: torrent_table = html.find('table', width='100%') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 1: log.debug('Data returned from provider does not contain any torrents') return items # Cat., Active, Name, Download, Added, Size, Uploader, Seeders, Leechers labels = [get_label_title(label) for label in torrent_rows[0]('td')] for row in torrent_rows[1:]: try: cells = row.findChildren('td')[:len(labels)] if len(cells) < len(labels): continue title = cells[labels.index('Name')].a title = title.get_text(strip=True) if title else None link = cells[labels.index('Download')].a link = link.get('href') if link else None download_url = urljoin(self.url, link) if link else None if not all([title, download_url]): continue seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True)) leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size, _, unit = cells[labels.index('Size')].contents size = convert_size('{0} {1}'.format(torrent_size, unit)) or -1 pubdate_raw = cells[labels.index('Added')].get_text() pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as soup: torrent_table = soup.find('table', class_='listing') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items a = 1 if len(torrent_rows[0]('td')) < 2 else 0 # Skip column headers for top, bot in zip(torrent_rows[a::2], torrent_rows[a + 1::2]): try: desc_top = top.find('td', class_='desc-top') title = desc_top.get_text(strip=True) if desc_top else None download_url = desc_top.find('a')['href'] if desc_top else None if not all([title, download_url]): continue stats = bot.find('td', class_='stats').get_text(strip=True) sl = re.match(r'S:(?P<seeders>\d+)L:(?P<leechers>\d+)C:(?:\d+)ID:(?:\d+)', stats.replace(' ', '')) seeders = try_int(sl.group('seeders')) if sl else 0 leechers = try_int(sl.group('leechers')) if sl else 0 # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue desc_bottom = bot.find('td', class_='desc-bot').get_text(strip=True) size = convert_size(desc_bottom.split('|')[1].strip('Size: ')) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html('div', class_='panel-body', limit=2) if mode != 'RSS': torrent_rows = torrent_table[1]('tr') if torrent_table else [] else: torrent_rows = torrent_table[0]('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items # Skip column headers for row in torrent_rows[1:]: cells = row('td') try: title = cells[1].find('a').get_text() magnet = cells[2].find('a', title='Magnet link')['href'] download_url = '{magnet}{trackers}'.format(magnet=magnet, trackers=self._custom_trackers) if not all([title, download_url]): continue seeders = 1 leechers = 0 if len(cells) > 5: peers = cells[5].find('div') if peers and peers.get('title'): peers = peers['title'].replace(',', '').split(' | ', 1) # Removes 'Seeders: ' seeders = try_int(peers[0][9:]) # Removes 'Leechers: ' leechers = try_int(peers[1][10:]) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[3].get_text().replace(',', '') size = convert_size(torrent_size) or -1 pubdate_raw = cells[4].get_text().replace('yesterday', '24 hours') # "long ago" can't be translated to a date if pubdate_raw == 'long ago': pubdate_raw = None pubdate = self.parse_pubdate(pubdate_raw, human_time=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html.parser') as html: torrent_table = html.find('div', class_='browse') torrent_rows = torrent_table( 'div', class_=re.compile('^line')) if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 1: log.debug( 'Data returned from provider does not contain any torrents' ) return items for row in torrent_rows: try: heb_eng_title = row.find('div', class_='bTitle').find( href=re.compile(r'details\.php')).find('b').get_text() if '/' in heb_eng_title: title = heb_eng_title.split('/')[1].strip() elif '\\' in heb_eng_title: title = heb_eng_title.split('\\')[1].strip() else: continue download_id = row.find('div', class_='bTitle').find( href=re.compile(r'download\.php'))['href'] if not all([title, download_id]): continue download_url = urljoin(self.url, download_id) seeders = try_int( row.find('div', class_='bUping').get_text(strip=True)) leechers = try_int( row.find('div', class_='bDowning').get_text(strip=True)) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = row.find( 'div', class_='bSize').get_text(strip=True) size = convert_size(torrent_size[5:], sep='') or -1 pubdate_raw = row.find('div', class_=re.compile( 'bHow')).find_all('span')[1].next_sibling.strip() pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ def process_column_header(td): result = '' if td.a and td.a.img: result = td.a.img.get('title', td.a.get_text(strip=True)) if not result: result = td.get_text(strip=True) if not result and td.a and td.a.get('title'): result = td.a['title'] return result items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', class_='torrent_table') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items # Need to only search one level deep for 'td' tags, as one of the td's also has a td. labels = [ process_column_header(label) for label in torrent_rows[0].find_all('td', recursive=False) ] # Skip column headers for row in torrent_rows[1:]: cells = row.find_all('td', recursive=False) if len(cells) < len(labels): continue try: # Skip if torrent has been nuked due to poor quality if row.find('img', alt='Nuked'): continue title = cells[labels.index('Name')].find( 'a', class_='overlay_torrent').get_text(strip=True) download_url = urljoin( self.url, cells[labels.index('Name')].find('a')['href']) if not all([title, download_url]): continue seeders = int(cells[labels.index('Seeders')].get_text( strip=True).replace(',', '')) leechers = int(cells[labels.index('Leechers')].get_text( strip=True).replace(',', '')) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue units = ['B', 'KIB', 'MIB', 'GIB', 'TB', 'PB'] torrent_size = cells[labels.index('Size')].get_text( strip=True) size = convert_size(torrent_size, units=units) or -1 pubdate_raw = cells[3].find('span')['title'] pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ def process_column_header(td): result = '' if td.a: result = td.a.get('title') if not result: result = td.get_text(strip=True) return result items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', id='torrenttable') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items labels = [ process_column_header(label) for label in torrent_rows[0]('th') ] # Skip column headers for row in torrent_rows[1:]: cells = row('td') try: name = cells[labels.index('Name')] title = name.find('a').get_text(strip=True) download_url = row.find('td', class_='quickdownload').find('a') if not all([title, download_url]): continue download_url = urljoin(self.url, download_url['href']) seeders = int( cells[labels.index('Seeders')].get_text(strip=True)) leechers = int( cells[labels.index('Leechers')].get_text(strip=True)) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = cells[labels.index('Size')].get_text() size = convert_size(torrent_size) or -1 pubdate_raw = name.get_text(strip=True)[-19:] pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] def process_column_header(th): return th.span.get_text() if th.span else th.get_text() with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', class_='table2') if not torrent_table: log.debug('Data returned from provider does not contain any {0}torrents', 'confirmed ' if self.confirmed else '') return items torrent_rows = torrent_table.find_all('tr') labels = [process_column_header(label) for label in torrent_rows[0].find_all('th')] # Skip the first row, since it isn't a valid result for row in torrent_rows[1:]: cells = row.find_all('td') try: title_cell = cells[labels.index('Torrent Name')] verified = title_cell.find('img', title='Verified torrent') if self.confirmed and not verified: continue title_anchors = title_cell.find_all('a') if not title_anchors or len(title_anchors) < 2: continue title_url = title_anchors[0].get('href') title = title_anchors[1].get_text(strip=True) regex_result = id_regex.search(title_anchors[1].get('href')) alt_title = regex_result.group(1) if len(title) < len(alt_title): title = alt_title.replace('-', ' ') info_hash = hash_regex.search(title_url).group(2) if not all([title, info_hash]): continue download_url = 'magnet:?xt=urn:btih:{hash}&dn={title}{trackers}'.format( hash=info_hash, title=title, trackers=self._custom_trackers) # Remove comma as thousands separator from larger number like 2,000 seeders = 2000 seeders = try_int(cells[labels.index('Seed')].get_text(strip=True).replace(',', '')) leechers = try_int(cells[labels.index('Leech')].get_text(strip=True).replace(',', '')) if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue size = convert_size(cells[labels.index('Size')].get_text(strip=True)) or -1 pubdate_raw = cells[1].get_text().replace('Last', '1').replace('Yesterday', '24 hours') pubdate = self.parse_pubdate(pubdate_raw, human_time=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS. :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find(class_='ttable_headinner') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items # Catégorie, Release, Date, DL, Size, C, S, L labels = [ label.get_text(strip=True) for label in torrent_rows[0]('th') ] for torrent in torrent_rows[1:]: cells = torrent('td') if len(cells) < len(labels): continue try: torrent_id = re.search( 'id=([0-9]+)', cells[labels.index('Nom')].find('a')['href']).group(1) title = cells[labels.index('Nom')].get_text(strip=True) if not all([title, torrent_id]): continue download_url = self.urls['download'].format(torrent_id) seeders = int( cells[labels.index('S')].get_text(strip=True)) leechers = int( cells[labels.index('L')].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[labels.index('Taille')].get_text() size = convert_size(torrent_size, default=-1) date_raw = torrent('a')[2]['onmouseover'] pubdate_raw = re.search( r'Poster le: <\/b>(\d{2}-\d{2}-\d{4})', date_raw) pubdate = self.parse_pubdate(pubdate_raw.group(1), dayfirst=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', {'id': 'torrent_table'}) # Continue only if at least one release is found if not torrent_table: log.debug( 'Data returned from provider does not contain any {0}torrents', 'ranked ' if self.ranked else '') return items torrent_body = torrent_table.find('tbody') torrent_rows = torrent_body.contents del torrent_rows[1::2] for row in torrent_rows[1:]: try: torrent = row('td') if len(torrent) <= 1: break all_as = (torrent[1])('a') notinternal = row.find( 'img', src='/static//common/user_upload.png') if self.ranked and notinternal: log.debug( 'Found a user uploaded release, Ignoring it..') continue freeleech = row.find( 'img', src='/static//common/browse/freeleech.png') if self.freeleech and not freeleech: continue title = all_as[2].string download_url = urljoin(self.url, all_as[0].attrs['href']) if not all([title, download_url]): continue seeders = try_int((row('td')[6]).text.replace(',', '')) leechers = try_int((row('td')[7]).text.replace(',', '')) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = row.find( 'td', class_='nobr').find_next_sibling('td').string if torrent_size: size = convert_size(torrent_size) or -1 pubdate_raw = row.find('td', class_='nobr').find('span')['title'] pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] torrent_rows = data.get('torrent_results', {}) if not torrent_rows: log.debug( 'Data returned from provider does not contain any torrents') return items for row in torrent_rows: try: title = row.pop('title') download_url = row.pop('download') + self._custom_trackers if not all([title, download_url]): continue seeders = row.pop('seeders', 0) leechers = row.pop('leechers', 0) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = row.pop('size', None) size = convert_size(torrent_size, default=-1) pubdate_raw = row.pop('pubdate', None) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', id='torrents') torrents = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrents) < 2 or html.find(text='No Torrents Found!'): log.debug('Data returned from provider does not contain any torrents') return items # Skip column headers for row in torrents[1:]: try: title = row('td')[1].find('a').text download_url = self.urls['base_url'] + row('td')[3].find('a')['href'] if not all([title, download_url]): continue seeders = int(row.find('td', attrs={'class': 'ac t_seeders'}).text) leechers = int(row.find('td', attrs={'class': 'ac t_leechers'}).text) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = row('td')[5].text size = convert_size(torrent_size) or -1 pubdate_raw = row('td')[1].find('div').get_text().split('|')[-1].strip() pubdate = self.parse_pubdate(pubdate_raw, human_time=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS. :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find(class_='ttable_headinner') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items # Catégorie, Release, Date, DL, Size, C, S, L labels = [label.get_text(strip=True) for label in torrent_rows[0]('th')] for torrent in torrent_rows[1:]: cells = torrent('td') if len(cells) < len(labels): continue try: torrent_id = re.search('id=([0-9]+)', cells[labels.index('Nom')].find('a')['href']).group(1) title = cells[labels.index('Nom')].get_text(strip=True) if not all([title, torrent_id]): continue download_url = self.urls['download'].format(torrent_id) seeders = int(cells[labels.index('S')].get_text(strip=True)) leechers = int(cells[labels.index('L')].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[labels.index('Taille')].get_text() size = convert_size(torrent_size, default=-1) date_raw = torrent('a')[2]['onmouseover'] pubdate_raw = re.search(r'Poster le: <\/b>(\d{2}-\d{2}-\d{4})', date_raw) pubdate = self.parse_pubdate(pubdate_raw.group(1), dayfirst=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html('div', class_='panel-body', limit=2) if mode != 'RSS': torrent_rows = torrent_table[1]('tr') if torrent_table else [] else: torrent_rows = torrent_table[0]('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items # Skip column headers for row in torrent_rows[1:]: cells = row('td') try: title = cells[1].find('a').get_text() magnet = cells[2].find('a', title='Magnet link')['href'] download_url = '{magnet}{trackers}'.format( magnet=magnet, trackers=self._custom_trackers) if not all([title, download_url]): continue seeders = 1 leechers = 0 if len(cells) > 5: peers = cells[5].find('div') if peers and peers.get('title'): peers = peers['title'].replace(',', '').split(' | ', 1) # Removes 'Seeders: ' seeders = try_int(peers[0][9:]) # Removes 'Leechers: ' leechers = try_int(peers[1][10:]) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[3].get_text().replace(',', '') size = convert_size(torrent_size) or -1 pubdate_raw = cells[4].get_text().replace( 'yesterday', '24 hours') # "long ago" can't be translated to a date if pubdate_raw == 'long ago': pubdate_raw = None pubdate = self.parse_pubdate(pubdate_raw, human_time=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode, show=None): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ def is_season_exception(series_name): """Try to detect by series name, if this is a season exception.""" if not show: return return get_season_from_name(show, series_name) items = [] group_rows = data.get('Groups') if not group_rows: log.debug( 'Data returned from provider does not contain any torrents') return items for group in group_rows: torrent_rows = group.get('Torrents') if not torrent_rows: continue for row in torrent_rows: properties_string = row.get('Property').rstrip(' |').replace( ' ', '') # Hack for the h264 10bit stuff properties_string = properties_string.replace( 'h26410-bit', 'h264|hi10p') properties = properties_string.split('|') download_url = row.get('Link') if not (download_url or all(properties)): continue # Get rid of freeleech from properties if properties[-1] == 'Freeleech': del properties[-1] elif self.freeleech: # Discard if we wanted free leech continue tags = '{torrent_source}.{torrent_container}.{torrent_codec}.{torrent_res}.' \ '{torrent_audio}'.format(torrent_source=properties[0], torrent_container=properties[1], torrent_codec=properties[2], torrent_res=properties[3], torrent_audio=properties[4]) last_field = re.match(r'(.*)\((.*)\)', properties[-1]) # subs = last_field.group(1) if last_field else '' release_group = '-{0}'.format( last_field.group(2)) if last_field else '' release_type = OTHER season = None episode = None multi_ep_start = None multi_ep_end = None title = None # Attempt and get a season or episode number title_info = row.get('EditionData').get('EditionTitle') if title_info != '': if title_info.startswith('Episodes'): multi_ep_match = re.match(r'Episodes (\d+)-(\d+)', title_info) if multi_ep_match: multi_ep_start = multi_ep_match.group(1) multi_ep_end = multi_ep_match.group(2) release_type = MULTI_EP elif title_info.startswith('Episode'): episode = re.match('^Episode.([0-9]+)', title_info).group(1) release_type = SINGLE_EP season_match = re.match(r'.+[sS]eason.(\d+)$', group.get('SeriesName')) if season_match: season = season_match.group(1) elif title_info.startswith('Season'): if re.match(r'Season.[0-9]+-[0-9]+.\([0-9-]+\)', title_info): # We can read the season AND the episodes, but we can only process multiep. # So i've chosen to use it like 12-23 or 1-12. match = re.match( r'Season.([0-9]+)-([0-9]+).\(([0-9-]+)\)', title_info) episode = match.group(3).upper() season = '{0}-{1}'.format(match.group(1), match.group(2)) release_type = MULTI_SEASON else: season = re.match('Season.([0-9]+)', title_info).group(1) release_type = SEASON_PACK elif group.get('EpCount') > 0 and group.get( 'GroupName') != 'TV Special': # This is a season pack. # 13 episodes -> SXXEXX-EXX episode = int(group.get('EpCount')) multi_ep_start = 1 multi_ep_end = episode # Because we sometime get names without a season number, like season scene exceptions. # This is the most reliable way of creating a multi-episode release name. release_type = MULTI_EP # These are probably specials which we just can't handle anyways if release_type == OTHER: continue if release_type == SINGLE_EP: # Create the single episode release_name (use the shows default title) if is_season_exception(group.get('SeriesName')): # If this is a season exception, we can't parse the release name like: # Show.Title.Season.3.Exception.S01E01... # As that will confuse the parser, as it already has a season available. # We have to omit the season, to have it search for a season exception. title = '{title}.{episode}.{tags}' \ '{release_group}'.format(title=group.get('SeriesName'), episode='E{0:02d}'.format(int(episode)), tags=tags, release_group=release_group) else: title = '{title}.{season}.{episode}.{tags}' \ '{release_group}'.format(title=group.get('SeriesName'), season='S{0:02d}'.format(int(season)) if season else 'S01', episode='E{0:02d}'.format(int(episode)), tags=tags, release_group=release_group) if release_type == MULTI_EP: # Create the multi-episode release_name # Multiple.Episode.TV.Show.SXXEXX-EXX[Episode.Part].[Episode.Title].TAGS.[LANGUAGE].720p.FORMAT.x264-GROUP if is_season_exception(group.get('SeriesName')): # If this is a season exception, we can't parse the release name like: # Show.Title.Season.3.Exception.S01E01-E13... # As that will confuse the parser, as it already has a season available. # We have to omit the season, to have it search for a season exception. # Example: Show.Title.Season.3.Exception.E01-E13... title = '{title}.{multi_episode_start}-{multi_episode_end}.{tags}' \ '{release_group}'.format(title=group.get('SeriesName'), multi_episode_start='E{0:02d}'.format(int(multi_ep_start)), multi_episode_end='E{0:02d}'.format(int(multi_ep_end)), tags=tags, release_group=release_group) else: title = '{title}.{season}{multi_episode_start}-{multi_episode_end}.{tags}' \ '{release_group}'.format(title=group.get('SeriesName'), season='S{0:02d}'.format(season) if season else 'S01', multi_episode_start='E{0:02d}'.format(int(multi_ep_start)), multi_episode_end='E{0:02d}'.format(int(multi_ep_end)), tags=tags, release_group=release_group) if release_type == SEASON_PACK: # Create the season pack release_name # if `Season` is already in the SeriesName, we ommit adding it another time. title = '{title}.{season}.{tags}' \ '{release_group}'.format(title=group.get('SeriesName'), season='S{0:02d}'.format(int(season)) if season else 'S01', tags=tags, release_group=release_group) if release_type == MULTI_SEASON: # Create the multi season pack release_name # Multiple.Episode.TV.Show.EXX-EXX[Episode.Part].[Episode.Title].TAGS.[LANGUAGE].720p.FORMAT.x264-GROUP title = '{title}.{episode}.{tags}' \ '{release_group}'.format(title=group.get('SeriesName'), episode=episode, tags=tags, release_group=release_group) seeders = row.get('Seeders') leechers = row.get('Leechers') pubdate = self.parse_pubdate(row.get('UploadTime')) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue size = convert_size(row.get('Size'), default=-1) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ # Units units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] def process_column_header(td): result = '' if td.a and td.a.img: result = td.a.img.get('title', td.a.get_text(strip=True)) if not result: result = td.get_text(strip=True) return result items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('div', id='torrentBrowse') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items labels = [process_column_header(label) for label in torrent_rows[0]('td')] # Skip column headers for row in torrent_rows[1:]: cells = row('td') if len(cells) < len(labels): continue try: title_anchor = cells[labels.index('Name')].find('a').find_next('a') or \ cells[labels.index('Name')].find('a') title = title_anchor.get('title') if title_anchor else None download_url = urljoin(self.url, cells[labels.index('DL')].find('a')['href']) if not all([title, download_url]): continue peers = cells[labels.index('S/L')].get_text(strip=True).split('/', 1) seeders = try_int(peers[0]) leechers = try_int(peers[1]) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = cells[labels.index('Size/Snatched')].get_text(strip=True).split('/', 1)[0] size = convert_size(torrent_size, units=units) or -1 pubdate_raw = cells[labels.index('Added')].get_text(' ') pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', {'id': 'torrentsTable'}) if torrent_table: torrent_rows = torrent_table.find_all('tr') # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items # Skip column headers for row in torrent_rows[1:]: try: torrent_items = row.find_all('td') title = torrent_items[1].find('a').get_text(strip=True) download_url = torrent_items[2].find('a')['href'] if not all([title, download_url]): continue download_url = urljoin(self.url, download_url) seeders = try_int(torrent_items[5].get_text(strip=True)) leechers = try_int(torrent_items[6].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = torrent_items[4].get_text() size = convert_size(torrent_size) or -1 pubdate_raw = torrent_items[1].find('div').get_text() pubdate = self.parse_pubdate(pubdate_raw, human_time=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: # Continue only if at least one release is found empty = html.find('h2', text='No .torrents fit this filter criteria') if empty: log.debug( 'Data returned from provider does not contain any torrents' ) return items torrent_table = html.find( 'table', attrs={'style': 'border: none; width: 100%;'}) torrent_rows = torrent_table( 'tr', class_='browse') if torrent_table else [] for row in torrent_rows: cells = row('td') try: title = cells[1].find('a').get('title') torrent_url = cells[2].find('a').get('href') download_url = urljoin(self.url, torrent_url) if not all([title, torrent_url]): continue seeders = try_int(cells[9].get_text(), 1) leechers = try_int(cells[10].get_text()) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = self._norm_size( cells[7].get_text(strip=True)) size = convert_size(torrent_size) or -1 pubdate_raw = cells[5].get_text() pubdate = self.parse_pubdate(pubdate_raw, human_time=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: if not html: log.debug('No html data parsed from provider') return items torrents = html('tr') if not torrents or len(torrents) < 2: log.debug('Data returned from provider does not contain any torrents') return items # Skip column headers for row in torrents[1:]: # Skip extraneous rows at the end if len(row.contents) < 10: continue try: comments_counter = row.find_all('td', class_='lista', attrs={'align': 'center'})[4].find('a') if comments_counter: title = comments_counter['title'][10:] else: title = row.find('td', class_='lista', attrs={'align': 'left'}).find('a').get_text() dl_href = row.find('td', class_='lista', attrs={'width': '20', 'style': 'text-align: center;'}).find('a').get('href') download_url = urljoin(self.url, dl_href) if not all([title, dl_href]): continue seeders = try_int(row.find('span', class_='seedy').find('a').get_text(), 1) leechers = try_int(row.find('span', class_='leechy').find('a').get_text()) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = row.find('td', class_='lista222', attrs={'width': '100%'}).get_text() size = convert_size(torrent_size) or -1 pubdate_td = row.find_all('td', class_='lista', attrs={'align': 'center'})[3] pubdate_human_offset = pubdate_td.find('b') if pubdate_human_offset: time_search = re.search('([0-9:]+)', pubdate_td.get_text()) pubdate_raw = pubdate_human_offset.get_text() + ' at ' + time_search.group(1) else: pubdate_raw = pubdate_td.get_text() pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def search(self, search_strings, age=0, ep_obj=None): """ Search indexer using the params in search_strings, either for latest releases, or a string/id search. :return: list of results in dict form """ results = [] if not self._check_auth(): return results # For providers that don't have caps, or for which the t=caps is not working. if not self.caps and all(provider not in self.url for provider in self.providers_without_caps): self.get_newznab_categories(just_caps=True) if not self.caps: return results for mode in search_strings: self.torznab = False search_params = { 't': 'search', 'limit': 100, 'offset': 0, 'cat': self.cat_ids.strip(', ') or '5030,5040', 'maxage': app.USENET_RETENTION } if self.needs_auth and self.key: search_params['apikey'] = self.key if mode != 'RSS': match_indexer = self._match_indexer() search_params[ 't'] = 'tvsearch' if match_indexer and not self.force_query else 'search' if search_params['t'] == 'tvsearch': search_params.update(match_indexer) if ep_obj.series.air_by_date or ep_obj.series.sports: date_str = str(ep_obj.airdate) search_params['season'] = date_str.partition('-')[0] search_params['ep'] = date_str.partition( '-')[2].replace('-', '/') else: search_params['season'] = ep_obj.scene_season search_params['ep'] = ep_obj.scene_episode if mode == 'Season': search_params.pop('ep', '') items = [] log.debug('Search mode: {0}', mode) for search_string in search_strings[mode]: if mode != 'RSS': # If its a PROPER search, need to change param to 'search' so it searches using 'q' param if any(proper_string in search_string for proper_string in self.proper_strings): search_params['t'] = 'search' log.debug( 'Search show using {search}', { 'search': 'search string: {search_string}'.format( search_string=search_string if search_params['t'] != 'tvsearch' else 'indexer_id: {indexer_id}'.format( indexer_id=match_indexer)) }) if search_params['t'] != 'tvsearch': search_params['q'] = search_string time.sleep(cpu_presets[app.CPU_PRESET]) response = self.get_url(urljoin(self.url, 'api'), params=search_params, returns='response') if not response or not response.text: log.debug('No data returned from provider') continue with BS4Parser(response.text, 'html5lib') as html: if not self._check_auth_from_data(html): return items try: self.torznab = 'xmlns:torznab' in html.rss.attrs except AttributeError: self.torznab = False if not html('item'): log.debug( 'No results returned from provider. Check chosen Newznab search categories' ' in provider settings and/or usenet retention') continue for item in html('item'): try: title = item.title.get_text(strip=True) download_url = None if item.link: if validators.url( item.link.get_text(strip=True)): download_url = item.link.get_text( strip=True) elif validators.url(item.link.next.strip()): download_url = item.link.next.strip() if not download_url and item.enclosure: if validators.url( item.enclosure.get('url', '').strip()): download_url = item.enclosure.get( 'url', '').strip() if not (title and download_url): continue seeders = leechers = -1 if 'gingadaddy' in self.url: size_regex = re.search(r'\d*.?\d* [KMGT]B', str(item.description)) item_size = size_regex.group( ) if size_regex else -1 else: item_size = item.size.get_text( strip=True) if item.size else -1 for attr in item('newznab:attr') + item( 'torznab:attr'): item_size = attr['value'] if attr[ 'name'] == 'size' else item_size seeders = try_int( attr['value'] ) if attr['name'] == 'seeders' else seeders peers = try_int( attr['value'] ) if attr['name'] == 'peers' else None leechers = peers - seeders if peers else leechers if not item_size or (self.torznab and (seeders is -1 or leechers is -1)): continue size = convert_size(item_size) or -1 pubdate_raw = item.pubdate.get_text(strip=True) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': if seeders == -1: log.debug('Found result: {0}', title) else: log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error( 'Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) continue # Since we arent using the search string, # break out of the search string loop if 'tvdbid' in search_params: break results += items # Reproces but now use force_query = True if not results and not self.force_query: self.force_query = True return self.search(search_strings, ep_obj=ep_obj) return results
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] torrent_rows = data.get('data') if not torrent_rows: log.debug('Data returned from provider does not contain any torrents') return items for row in torrent_rows: title = row.get('name', '') torrent_id = row.get('id', '') download_url = self.urls['download'].format( urlencode({'id': torrent_id, 'passkey': self.passkey})) if not all([title, download_url]): continue seeders = row.get('seeders', 1) leechers = row.get('leechers', 0) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue size = convert_size(row.get('size'), default=-1) pubdate_raw = row.get('added') pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {title} with {x} seeders' ' and {y} leechers', { 'title': title, 'x': seeders, 'y': leechers } ) items.append(item) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] for row in data: try: # Check if this is a freeleech torrent and if we've configured to only allow freeleech. if self.freeleech and row.get('download-multiplier') != 0: continue title = re.sub(r'\[.*\=.*\].*\[/.*\]', '', row['name']) if row['name'] else None download_url = urljoin( self.urls['download'], '{0}/{1}.torrent'.format( row['t'], row['name'])) if row['t'] and row['name'] else None if not all([title, download_url]): continue seeders = int(row['seeders']) leechers = int(row['leechers']) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = row['size'] size = convert_size(torrent_size) or -1 if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) pubdate_raw = row['ctime'] pubdate = self.parse_pubdate(pubdate_raw, fromtimestamp=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode, **kwargs): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS. :return: A list of items found """ items = [] keywords = kwargs.pop('keywords', None) with BS4Parser(data, 'html5lib') as html: torrent_table = html.find(id='sortabletable') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items labels = [ label.img['title'] if label.img else label.get_text(strip=True) for label in torrent_rows[0]('td') ] for torrent in torrent_rows[1:]: try: if self.freeleech and not torrent.find( 'img', alt=re.compile('TORRENT GRATUIT : Seulement ' 'l\'upload sera compter.')): continue title = torrent.find( class_='tooltip-content').div.get_text(strip=True) download_url = torrent.find( title='Télécharger le torrent!').parent['href'] if not all([title, download_url]): continue # Chop off tracker/channel prefix or we cannot parse the result! if mode != 'RSS' and keywords: show_name_first_word = re.search(r'^[^ .]+', keywords).group() if not title.startswith(show_name_first_word): title = re.sub( r'.*(' + show_name_first_word + '.*)', r'\1', title) seeders = try_int( torrent.find(title='Seeders').get_text(strip=True)) leechers = try_int( torrent.find(title='Leechers').get_text(strip=True)) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = torrent('td')[labels.index( 'Taille')].get_text(strip=True) size = convert_size(torrent_size) or -1 pubdate_raw = torrent('td')[labels.index('Nom')].find_all( 'div')[-1].get_text(strip=True) pubdate = self.parse_pubdate(pubdate_raw, dayfirst=True) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_rows = html('tr') if not torrent_rows or not len(torrent_rows) > 1: log.debug('Data returned from provider does not contain any torrents') return items # Cat., Active, Filename, Dl, Wl, Added, Size, Uploader, S, L, C labels = [label.a.get_text(strip=True) if label.a else label.get_text(strip=True) for label in torrent_rows[0]('th')] # Skip column headers for row in torrent_rows[1:]: try: cells = row.find_all('td', recursive=False)[:len(labels)] if len(cells) < len(labels): continue torrent_name = cells[labels.index('Torrent name')].a title = torrent_name.get_text(strip=True) if torrent_name else None download_url = torrent_name.get('href') if torrent_name else None if not all([title, download_url]): continue slc = cells[labels.index('S')].get_text() seeders, leechers, _ = [int(value.strip()) for value in slc.split('/')] if slc else (0, 0, 0) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[labels.index('Size')].get_text() size = convert_size(torrent_size) or -1 pubdate_raw = cells[labels.index('Added')].get_text() pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A KV with a list of items found and if there's an next page to search """ def process_column_header(td): ret = '' if td.a and td.a.img: ret = td.a.img.get('title', td.a.get_text(strip=True)) if not ret: ret = td.get_text(strip=True) return ret items = [] has_next_page = False with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', id='torrent_table') torrent_rows = torrent_table('tr') if torrent_table else [] # ignore next page in RSS mode has_next_page = mode != 'RSS' and html.find('a', class_='pager_next') is not None log.debug('Are there more pages? {0}'.format(has_next_page)) # Continue only if at least one Release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return {'has_next_page': has_next_page, 'items': []} # '', '', 'Name /Year', 'Files', 'Time', 'Size', 'Snatches', 'Seeders', 'Leechers' labels = [process_column_header(label) for label in torrent_rows[0]('td')] group_title = '' # Skip column headers for result in torrent_rows[1:]: cells = result('td') result_class = result.get('class') # When "Grouping Torrents" is enabled, the structure of table change group_index = -2 if 'group_torrent' in result_class else 0 try: title = result.select('a[href^="torrents.php?id="]')[0].get_text() title = re.sub(r'\s+', ' ', title).strip() # clean empty lines and multiple spaces if 'group' in result_class or 'torrent' in result_class: # get international title if available title = re.sub(r'.* \[(.*?)\](.*)', r'\1\2', title) if 'group' in result_class: group_title = title continue for serie in self.absolute_numbering: if serie in title: # remove season from title when its in absolute format title = re.sub(r'S\d{2}E(\d{2,4})', r'\1', title) break download_url = urljoin(self.url, result.select('a[href^="torrents.php?action=download"]')[0]['href']) if not all([title, download_url]): continue seeders = try_int(cells[labels.index('Seeders') + group_index].get_text(strip=True)) leechers = try_int(cells[labels.index('Leechers') + group_index].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_details = None if 'group_torrent' in result_class: # torrents belonging to a group torrent_details = title title = group_title elif 'torrent' in result_class: # standalone/un grouped torrents torrent_details = cells[labels.index('Nome/Ano')].find('div', class_='torrent_info').get_text() torrent_details = torrent_details.replace('[', ' ').replace(']', ' ').replace('/', ' ') torrent_details = torrent_details.replace('Full HD ', '1080p').replace('HD ', '720p') torrent_size = cells[labels.index('Tamanho') + group_index].get_text(strip=True) size = convert_size(torrent_size) or -1 torrent_name = '{0} {1}'.format(title, torrent_details.strip()).strip() torrent_name = re.sub(r'\s+', ' ', torrent_name) items.append({ 'title': torrent_name, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None }) if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers'.format (torrent_name, seeders, leechers)) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return {'has_next_page': has_next_page, 'items': items}
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser( data, 'html.parser' ) as html: # Use html.parser, since html5parser has issues with this site. tables = html( 'table', width='800') # Get the last table with a width of 800px. torrent_table = tables[-1] if tables else [] torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug( 'Data returned from provider does not contain any torrents' ) return items # Skip column headers for row in torrent_rows[1:]: cells = row('td') if len(cells) < 3: # We must have cells[2] because it contains the title continue if self.freeleech and not row.get('bgcolor'): continue try: title = cells[2].find('a')['title'] if cells[2] else None download_url = urljoin( self.url, cells[0].find('a')['href']) if cells[0] else None if not all([title, download_url]): continue seeders = try_int(cells[8].get_text( strip=True)) if len(cells) > 8 else 1 leechers = try_int(cells[9].get_text( strip=True)) if len(cells) > 9 else 0 # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = cells[6].get_text( ' ') if len(cells) > 6 else None size = convert_size(torrent_size) or -1 pubdate_raw = cells[5].get_text(' ') pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_rows = html.find_all('tr') # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items # Scenetime apparently uses different number of cells in #torrenttable based # on who you are. This works around that by extracting labels from the first # <tr> and using their index to find the correct download/seeders/leechers td. labels = [label.get_text(strip=True) or label.img['title'] for label in torrent_rows[0]('td')] # Skip column headers for row in torrent_rows[1:]: cells = row('td') if len(cells) < len(labels): continue try: link = cells[labels.index('Name')].find('a') torrent_id = link['href'].replace('details.php?id=', '').split('&')[0] title = link.get_text(strip=True) download_url = self.urls['download'].format( torrent_id, '{0}.torrent'.format(title.replace(' ', '.')) ) if not all([title, download_url]): continue seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True)) leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True)) # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue torrent_size = cells[labels.index('Size')].get_text() torrent_size = re.sub(r'(\d+\.?\d*)', r'\1 ', torrent_size) size = convert_size(torrent_size) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.error('Failed parsing provider. Traceback: {0!r}', traceback.format_exc()) return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: rows = html('item') if not rows: log.debug( 'No results returned from provider. Check chosen Newznab search categories' ' in provider settings and/or usenet retention') return items try: self.torznab = 'xmlns:torznab' in html.rss.attrs except AttributeError: self.torznab = False for item in rows: try: title = item.title.get_text(strip=True) download_url = None if item.enclosure: url = item.enclosure.get('url', '').strip() if url.startswith('magnet:'): download_url = url elif validators.url(url): download_url = url # Jackett needs extension added (since v0.8.396) if not url.endswith('.torrent'): content_type = item.enclosure.get('type', '') if content_type == 'application/x-bittorrent': download_url = '{0}{1}'.format(url, '.torrent') if not download_url and item.link: url = item.link.get_text(strip=True) if validators.url(url) or url.startswith('magnet:'): download_url = url if not download_url: url = item.link.next.strip() if validators.url(url) or url.startswith('magnet:'): download_url = url if not (title and download_url): continue seeders = leechers = -1 if 'gingadaddy' in self.url: size_regex = re.search(r'\d*.?\d* [KMGT]B', str(item.description)) item_size = size_regex.group() if size_regex else -1 else: item_size = item.size.get_text(strip=True) if item.size else -1 # Use regex to find name-spaced tags # see BeautifulSoup4 bug 1720605 # https://bugs.launchpad.net/beautifulsoup/+bug/1720605 newznab_attrs = item(re.compile('newznab:attr')) torznab_attrs = item(re.compile('torznab:attr')) for attr in newznab_attrs + torznab_attrs: item_size = attr['value'] if attr['name'] == 'size' else item_size seeders = try_int(attr['value']) if attr['name'] == 'seeders' else seeders peers = try_int(attr['value']) if attr['name'] == 'peers' else None leechers = peers - seeders if peers else leechers if not item_size or (self.torznab and (seeders == -1 or leechers == -1)): continue size = convert_size(item_size) or -1 pubdate_raw = item.pubdate.get_text(strip=True) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': if seeders == -1: log.debug('Found result: {0}', title) else: log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', border='1') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items # "Type", "Name", Files", "Comm.", "Added", "TTL", "Size", "Snatched", "Seeders", "Leechers" labels = [label.get_text(strip=True) for label in torrent_rows[0]('td')] # Skip column headers for row in torrent_rows[1:]: cells = row('td') if len(cells) < len(labels): continue try: download_url = urljoin(self.url, cells[labels.index('Name')].find('a', href=re.compile(r'download.php\?id='))['href']) title_element = cells[labels.index('Name')].find('a', href=re.compile(r'details.php\?id=')) title = title_element.get('title', '') or title_element.get_text(strip=True) if not all([title, download_url]): continue # Free leech torrents are marked with green [F L] in the title # (i.e. <font color=green>[F L]</font>) freeleech = cells[labels.index('Name')].find('font', color='green') if freeleech: # \xa0 is a non-breaking space in Latin1 (ISO 8859-1) freeleech_tag = '[F\xa0L]' title = title.replace(freeleech_tag, '') if self.freeleech and freeleech.get_text(strip=True) != freeleech_tag: continue seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True), 1) leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[labels.index('Size')].get_text(' ', strip=True) size = convert_size(torrent_size) or -1 pubdate_raw = cells[labels.index('Added')].get_text(' ', strip=True) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def test_convert_size(): # converts pretty file sizes to integers assert sut.convert_size('1 B') == 1 assert sut.convert_size('1 KB') == 1024 # can use decimal units (e.g. KB = 1000 bytes instead of 1024) assert sut.convert_size('1 kb', use_decimal=True) == 1000 # returns integer sizes for integers assert sut.convert_size(0, -1) == 0 assert sut.convert_size(100, -1) == 100 # returns integer sizes for floats too assert sut.convert_size(1.312, -1) == 1 # return integer variant when passed as str assert sut.convert_size('1024', -1) == 1024 # without a default value, failures return None assert sut.convert_size('pancakes') is None # default value can be anything assert sut.convert_size(None, -1) == -1 assert sut.convert_size('', 3.14) == 3.14 assert sut.convert_size('elephant', 'frog') == 'frog' # negative sizes return 0 assert sut.convert_size(-1024, -1) == 0 assert sut.convert_size('-1 GB', -1) == 0 # can also use `or` for a default value assert sut.convert_size(None) or 100 == 100 # default doesn't have to be integer assert sut.convert_size(None) or 1.61803 == 1.61803 # default doesn't have to be numeric either assert sut.convert_size(None) or '100' == '100' # can use `or` to provide a default when size evaluates to 0 assert sut.convert_size('-1 GB') or -1 == -1 # default units can be kwarg'd assert sut.convert_size('1', default_units='GB') == sut.convert_size('1 GB') # separator can be kwarg'd assert sut.convert_size('1?GB', sep='?') == sut.convert_size('1 GB') # can use custom dictionary to support internationalization french = ['O', 'KO', 'MO', 'GO', 'TO', 'PO'] assert sut.convert_size('1 o', units=french) == 1 assert sut.convert_size('1 go', use_decimal=True, units=french) == 1000000000 assert sut.convert_size('1 o') is None # Wrong units so result is None # custom units need to be uppercase or they won't match oops = ['b', 'kb', 'Mb', 'Gb', 'tB', 'Pb'] assert sut.convert_size('1 b', units=oops) is None assert sut.convert_size('1 B', units=oops) is None assert sut.convert_size('1 Mb', units=oops) is None assert sut.convert_size('1 MB', units=oops) is None # utilize the regex to parse sizes without separator assert sut.convert_size('1GB', sep='') == 1073741824 assert sut.convert_size('1.00GB', sep='') == 1073741824 assert sut.convert_size('1.01GB', sep='') == 1084479242 assert sut.convert_size('1B', sep='') == 1 # no separator and custom units french = ['O', 'KO', 'MO', 'GO', 'TO', 'PO'] assert sut.convert_size('1Go', sep='', units=french) == 1073741824 assert sut.convert_size('1.00Go', sep='', units=french) == 1073741824 assert sut.convert_size('1.01Go', sep='', units=french) == 1084479242 assert sut.convert_size('1o', sep='', units=french) == 1 # no separator, custom units need to be uppercase or they won't match oops = ['b', 'kb', 'Mb', 'Gb', 'tB', 'Pb'] assert sut.convert_size('1b', sep='', units=oops) is None assert sut.convert_size('1B', sep='', units=oops) is None assert sut.convert_size('1Mb', sep='', units=oops) is None assert sut.convert_size('1MB', sep='', units=oops) is None
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: rows = html('item') if not rows: log.debug( 'No results returned from provider. Check chosen Torznab search categories ' 'in provider settings.') return items for item in rows: try: title = item.title.get_text(strip=True) download_url = item.enclosure.get('url') if not all([title, download_url]): continue seeders_attr = item.find('torznab:attr', attrs={'name': 'seeders'}) peers_attr = item.find('torznab:attr', attrs={'name': 'peers'}) seeders = int(seeders_attr.get('value', 0)) if seeders_attr else 1 peers = int(peers_attr.get('value', 0)) if peers_attr else 0 leechers = peers - seeders if peers - seeders > 0 else 0 # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug( "Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = item.size.get_text(strip=True) size = convert_size(torrent_size, default=-1) pubdate_raw = item.pubdate.get_text(strip=True) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as soup: torrent_table = soup.find('table', class_='listing') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items a = 1 if len(torrent_rows[0]('td')) < 2 else 0 # Skip column headers for top, bot in zip(torrent_rows[a::2], torrent_rows[a + 1::2]): try: desc_top = top.find('td', class_='desc-top') title = desc_top.get_text(strip=True) if desc_top else None download_url = desc_top.find('a')['href'] if desc_top else None if not all([title, download_url]): continue stats = bot.find('td', class_='stats').get_text(strip=True) sl = re.match(r'S:(?P<seeders>\d+)L:(?P<leechers>\d+)C:(?:\d+)ID:(?:\d+)', stats.replace(' ', '')) seeders = try_int(sl.group('seeders')) if sl else 0 leechers = try_int(sl.group('leechers')) if sl else 0 # Filter unseeded torrent if seeders < min(self.minseed, 1): if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" " minimum seeders: {0}. Seeders: {1}", title, seeders) continue desc_bottom = bot.find('td', class_='desc-bot').get_text(strip=True) size = convert_size(desc_bottom.split('|')[1].strip('Size: ')) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ # Units units = ['B', 'KIB', 'MIB', 'GIB', 'TIB', 'PIB'] def process_column_header(th): result = '' if th.a: result = th.a.get_text(strip=True) if not result: result = th.get_text(strip=True) return result items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', id='searchResult') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any {0}torrents', 'confirmed ' if self.confirmed else '') return items labels = [process_column_header(label) for label in torrent_rows[0]('th')] # Skip column headers for row in torrent_rows[1:]: cells = row('td') if len(cells) < len(labels): continue try: title = row.find(class_='detName') title = title.get_text(strip=True) if title else None download_url = row.find(title='Download this torrent using magnet') download_url = download_url['href'] + self._custom_trackers if download_url else None if download_url and 'magnet:?' not in download_url: log.debug('Invalid ThePirateBay proxy please try another one') continue if not all([title, download_url]): continue seeders = try_int(cells[labels.index('SE')].get_text(strip=True), 1) leechers = try_int(cells[labels.index('LE')].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue # Accept Torrent only from Good People for every Episode Search if self.confirmed and not row.find(alt=re.compile(r'VIP|Trusted')): if mode != 'RSS': log.debug("Found result {0} but that doesn't seem like a trusted" " result so I'm ignoring it", title) continue # Convert size after all possible skip scenarios torrent_size = cells[labels.index('Name')].find(class_='detDesc') torrent_size = torrent_size.get_text(strip=True).split(', ')[1] torrent_size = re.sub(r'Size ([\d.]+).+([KMGT]iB)', r'\1 \2', torrent_size) size = convert_size(torrent_size, units=units) or -1 item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': None, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: table_header = html.find('thead') # Continue only if at least one release is found if not table_header: log.debug('Data returned from provider does not contain any torrents') return items table_ths = table_header.find_all('th') # [u'Category', u'', u'Filename', u'Comments', u'Torrent', u'Magnet', # u'File size', u'Age', u'Seeders', u'Leechers', u'Completed'] labels = [label.span.get('title') if label.span else '' for label in table_ths] torrent_rows = html.find('tbody').find_all('tr') for row in torrent_rows: cells = row.find_all('td') try: title = cells[labels.index('Filename')].span.get('title') download_url = cells[labels.index('Torrent')].a.get('href') if not all([title, download_url]): continue download_url = urljoin(self.url, download_url) seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True)) leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[labels.index('File size')].get_text() size = convert_size(torrent_size) or -1 pubdate_raw = cells[labels.index('Age')].get('title') pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: rows = html('item') if not rows: log.debug( 'No results returned from provider. Check chosen Torznab search categories ' 'in provider settings.') return items for item in rows: try: title = item.title.get_text(strip=True) download_url = item.enclosure.get('url') if not all([title, download_url]): continue seeders_attr = item.find('torznab:attr', attrs={'name': 'seeders'}) peers_attr = item.find('torznab:attr', attrs={'name': 'peers'}) seeders = int(seeders_attr.get('value', 0)) if seeders_attr else 1 leechers = int(peers_attr.get('value', 0)) if peers_attr else 0 # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = item.size.get_text(strip=True) size = convert_size(torrent_size, default=-1) pubdate_raw = item.pubdate.get_text(strip=True) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ # Units units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] def process_column_header(td): result = '' if td.a and td.a.img: result = td.a.img.get('title', td.a.get_text(strip=True)) if not result: result = td.get_text(strip=True) return result items = [] with BS4Parser(data, 'html5lib') as html: torrent_table = html.find('table', id='torrent_table') torrent_rows = torrent_table('tr') if torrent_table else [] # Continue only if at least one release is found if len(torrent_rows) < 2: log.debug('Data returned from provider does not contain any torrents') return items # '', '', 'Name /Year', 'Files', 'Time', 'Size', 'Snatches', 'Seeders', 'Leechers' labels = [process_column_header(label) for label in torrent_rows[0]('td')] # Skip column headers for row in torrent_rows[1:]: cells = row('td') if len(cells) < len(labels): continue try: title = cells[labels.index('Name /Year')].find('a', dir='ltr').get_text(strip=True) download = cells[labels.index('Name /Year')].find('a', title='Download')['href'] download_url = urljoin(self.url, download) if not all([title, download_url]): continue seeders = try_int(cells[labels.index('Seeders')].get_text(strip=True)) leechers = try_int(cells[labels.index('Leechers')].get_text(strip=True)) # Filter unseeded torrent if seeders < self.minseed: if mode != 'RSS': log.debug("Discarding torrent because it doesn't meet the" ' minimum seeders: {0}. Seeders: {1}', title, seeders) continue torrent_size = cells[labels.index('Size')].get_text(strip=True) size = convert_size(torrent_size, units=units) or -1 pubdate_raw = cells[labels.index('Time')].find('span')['title'] pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items