def get_torrent_link(self, content, websearch, *args): soup = BeautifulSoup(content, 'html.parser') if websearch.search_type == fflags.FILM_DIRECTORY_FLAG: try: link = '' tr = soup.findAll('tr') for index, item in enumerate(tr[32:-1]): try: link = item.findAll('a')[0]['href'] except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values: {0}'. format(err), traceback.format_exc()) except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc()) return '/' + link else: try: ttable = soup.findAll('table') for index, item in enumerate(ttable[19:-3]): try: if websearch.episode != '': id = item.findAll('a')[0]['href'] single_link_pattern = '/secciones.php?sec=descargas&ap=contar&tabla=series&id={0}'.format( id.split('-')[4]) if '{0}x{1}'.format(websearch.season[1:], websearch.episode) in \ id.split('-')[(len(id.split('-')) - 1):][0]: self.logger.debug( '{0} - {1}::{2} : {3}'.format( self.name, 'Normal_Mode Single-iD', id, single_link_pattern)) return single_link_pattern except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values: {0}'. format(err), traceback.format_exc()) except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc())
def get_raw_data(self, content=None): ''' :param content: :return: ''' raw_data = RAWDataInstance() soup = BeautifulSoup(content, 'html.parser') try: ttable = soup.findAll('tr') for index, item in enumerate(ttable[34:-14]): try: magnet_link = item.findAll('a')[0]['href'] raw_data.add_new_row(magnet=magnet_link) self.logger.debug0( '{0} New Entry Raw Values: {1:7} {2:>4}/{3:4} {4}'. format(self.name, str(int(0)), str(1), str(1), magnet_link)) except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values {0}'.format( err), traceback.format_exc()) except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc()) return raw_data
def get_raw_data(self, content=None): ''' :param content: :return: ''' raw_data = RAWDataInstance() soup = BeautifulSoup(content, 'html.parser') try: # Retrieving individual values from the search result ttable = soup.findAll('table', {'class': 'tmain'}) if ttable is not []: try: self.logger.info( '{0} Retrieving Raw Values from Search Result Response:' .format(self.name)) for items in ttable: tbody = items.findAll('tr') for tr in tbody[1:]: seed = (tr.findAll('td'))[3].text if seed == '0': seed = '1' leech = (tr.findAll('td'))[4].text if leech == '0': leech = '1' # Converting GB to MB, to Easily Manage The Pandas Structure size = (tr.findAll('td'))[2].text if 'MB' in size: size = float(size[:-2]) elif 'GB' in size: size = float(size[:-2]) * 1000 magnet_link = (tr.findAll('a'))[0]['href'] # Patch to Avoid Getting False Torrents if int(seed) < 1500: raw_data.add_new_row(size, seed, leech, magnet_link) self.logger.debug( '{0} New Entry Raw Values: {1:7} {2:>4}/{3:4} {4}' .format(self.name, str(int(size)), str(seed), str(leech), magnet_link)) except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values: {0}'.format( err), traceback.format_exc()) except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc()) return raw_data
def get_raw_data(self, content=None): raw_data = RAWDataInstance() soup = BeautifulSoup(content, 'html.parser') try: # Retrieving individual raw values from the search result ttable = soup.findAll('table', {'id': 'searchResult'}) if ttable != []: try: self.logger.info( '{0} Retrieving Raw Values from Search Result Response:' .format(self.name)) for items in ttable: tbody = items.findAll('tr') for tr in tbody[1:]: seed = (tr.findAll('td'))[2].text if seed == '0': seed = '1' leech = (tr.findAll('td'))[3].text if leech == '0': leech = '1' size_string = (tr.findAll('font', {'class': 'detDesc'})) size = (size_string[0].text).split(',')[1][6:] # Converting GB to MB, to easily manage the pandas structure if 'MiB' in size: size = size.replace('MiB', 'MB') size = float(size[:-2]) elif 'GiB' in size: size = size.replace('GiB', 'GB') size = float(size[:-2]) * 1000 elif 'B' in size: size = float(size[:-2]) * 0.000001 magnet_link = (tr.findAll('a'))[2]['href'] if size > 1: raw_data.add_new_row(size, seed, leech, magnet_link) self.logger.debug0( '{0} New Entry Raw Values: {1:7} {2:>4}/{3:4} {4}' .format(self.name, str(int(size)), str(seed), str(leech), magnet_link)) except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values: {0}'.format( err), traceback.format_exc()) except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc()) return raw_data
def get_magnet_info(self, content, *args): soup = BeautifulSoup(content, 'html.parser') magnet_info = '' try: content = (soup.findAll('div', {'class': 'download'})) if content != []: try: magnet_info = content[0].findAll('a')[0]['href'] return magnet_info except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values: {0}'.format( err), traceback.format_exc()) except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc())
def get_raw_data(self, content=None): ''' :param content: :return: ''' raw_data = RAWDataInstance() soup = BeautifulSoup(content, 'html.parser') try: ttable = soup.select('table tr') if ttable != []: try: dict_result = Utils.parse_nyaa(ttable, limit=None) for item in dict_result: size = item['size'] if 'MiB' in size: size = size.replace('MiB', 'MB') size = float(size[:-2]) elif 'GiB' in size: size = size.replace('GiB', 'GB') size = float(size[:-2]) * 1000 seed = str(item['seeders']) if seed == '0': seed = '1' leech = str(item['leechers']) if leech == '0': leech = '1' magnet_link = item['magnet'] raw_data.add_new_row(size, seed, leech, magnet_link) self.logger.debug('{0} New Entry Raw Values: {1:7} {2:>4}/{3:4} {4}'.format( self.name, str(size), str(seed), str(leech), magnet_link)) except Exception as err: raise WebScraperParseError(self.name, 'ParseError: unable to retrieve values: {0}'.format(err), traceback.format_exc()) except Exception as err: raise WebScraperContentError(self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc()) return raw_data
def get_magnet_link(self, content, *args): '''''' soup = BeautifulSoup(content, 'html.parser') try: content = (soup.findAll('div', {'class': 'content'})) if content is not []: try: magnet = content[2].findAll('a')[1]['href'] return magnet except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values: {0}'.format( err), traceback.format_exc()) except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc())
def get_torrent_link_batch(self, content, websearch, hop, *args): ''' :param content: :param websearch: :param hop: :param args: :return: ''' soup = BeautifulSoup(content, 'html.parser') try: surrogated_id = '' surrogated_list = [] ttable = soup.findAll('table') for index, item in enumerate(ttable[19:-3]): try: surrogated_id = hop.split( '-')[3:-(len(hop.split('-')[3:]) - 2)] surrogated_id = surrogated_id[0] + surrogated_id[1] id = item.findAll('a')[0]['href'] single_link_pattern = '/secciones.php?sec=descargas&ap=contar&tabla=series&id={0}'.format( id.split('-')[4]) self.logger.debug('{0} - {1}::{2} : {3}'.format( self.name, 'Batch_Mode Single-iD', id, single_link_pattern)) surrogated_list.append(single_link_pattern) except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values: {0}'.format( err), traceback.format_exc()) return surrogated_list, surrogated_id except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc())
def get_torrent_info(self, content, *args): soup = BeautifulSoup(content, 'html.parser') torrent_file = '' try: ttable = soup.findAll('table') try: # Retrieving the Link to the Torrent File torrent_file = ttable[15].select('a')[0]['href'] # Normalize output, because this web is inconsistent with it, removing the default_proxy_url if self.proxy_list[self._proxy_list_pos] in torrent_file: torrent_file = torrent_file[ len(self.proxy_list[self._proxy_list_pos]):] except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values: {0}'.format(err), traceback.format_exc()) except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc()) return torrent_file
def get_raw_data(self, content=None): ''' :param content: :return: ''' raw_data = RAWDataInstance() soup = BeautifulSoup(content, 'html.parser') try: ttable = soup.findAll('tr', {'class': 'odd'}) # Retrieving Individual Raw Values From Search Result if ttable != []: try: self.logger.info( '{0} Retrieving Raw Values from Search Result Response:' .format(self.name)) for items in ttable: _pos = len(raw_data.magnet_list) size = (items.findAll( 'td', {'class': 'nobr center'}))[0].text # Converting GB to MB, to Easily Manage The Pandas Structure if 'MiB' in size: size = size.replace('MiB', 'MB') size = float(size[:-3]) elif 'GiB' in size: size = size.replace('GiB', 'GB') size = float(size[:-3]) * 1000 elif 'B' in size: size = float(size[:-2]) * 0.000001 seed = (soup.findAll( 'td', {'class': 'green center'}))[_pos].text if seed == '0': seed = '1' leech = (soup.findAll( 'td', {'class': 'red lasttd center'}))[_pos].text if leech == '0': leech = '1' magnet_link = (items.findAll( 'a', {'title': 'Torrent magnet link'}))[0]['href'] if size > 1: raw_data.add_new_row(size, seed, leech, magnet_link) self.logger.debug( '{0} New Entry Raw Values: {1:7} {2:>4}/{3:4} {4}'. format(self.name, str(int(size)), str(seed), str(leech), magnet_link)) except Exception as err: raise WebScraperParseError( self.name, 'ParseError: unable to retrieve values: {0}'.format( err), traceback.format_exc()) except Exception as err: raise WebScraperContentError( self.name, 'ContentError: unable to retrieve values {0}'.format(err), traceback.format_exc()) return raw_data