def validateRSS(self): try: if self.cookies: cookie_validator = re.compile("^(\w+=\w+)(;\w+=\w+)*$") if not cookie_validator.match(self.cookies): return (False, 'Cookie is not correctly formatted: ' + self.cookies) data = self.cache._getRSSData()['entries'] if not data: return (False, 'No items found in the RSS feed ' + self.url) (title, url) = self._get_title_and_url(data[0]) if not title: return (False, 'Unable to get title from first item') if not url: return (False, 'Unable to get torrent url from first item') if url.startswith('magnet:') and re.search('urn:btih:([\w]{32,40})', url): return (True, 'RSS feed Parsed correctly') else: if self.cookies: requests.utils.add_dict_to_cookiejar(self.session.cookies, dict(x.rsplit('=', 1) for x in (self.cookies.split(';')))) torrent_file = self.getURL(url) try: bdecode(torrent_file) except Exception, e: self.dumpHTML(torrent_file) return (False, 'Torrent link is not a valid torrent file: ' + ex(e)) return (True, 'RSS feed Parsed correctly')
def validateRSS(self): try: data = self.cache._getRSSData() if not data: return (False, 'No data returned from url: ' + self.url) items = data.entries if not len(items) > 0: return (False, 'No items found in the RSS feed ' + self.url) (title, url) = self._get_title_and_url(items[0]) if not title: return (False, 'Unable to get title from first item') if not url: return (False, 'Unable to get torrent url from first item') if url.startswith('magnet:') and re.search('urn:btih:([\w]{32,40})', url): return (True, 'RSS feed Parsed correctly') else: torrent_file = self.getURL(url) try: bdecode(torrent_file) except Exception, e: self.dumpHTML(torrent_file) return (False, 'Torrent link is not a valid torrent file: ' + ex(e)) return (True, 'RSS feed Parsed correctly')
def validate_feed(self): success, err_msg = self._check_cookie() if not success: return success, err_msg try: items = self.cache_data() for item in items: title, url = self._title_and_url(item) if not (title and url): continue if url.startswith('magnet:'): if re.search('urn:btih:([0-9a-f]{32,40})', url): break else: torrent_file = self.get_url(url) try: bdecode(torrent_file) break except Exception: pass else: return False, '%s fetched RSS feed data: %s' % \ (('Fail to validate', 'No items found in the')[0 == len(items)], self.url) return True, None except Exception as e: return False, 'Error when trying to load RSS: ' + ex(e)
def _get_torrent_hash(self, result): if result.url.startswith('magnet'): result.hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0] if len(result.hash) == 32: result.hash = b16encode(b32decode(result.hash)).lower() else: if not result.content: logger.log('Torrent without content', logger.ERROR) raise Exception('Torrent without content') try: torrent_bdecode = bdecode(result.content) except BTFailure as e: logger.log('Unable to bdecode torrent', logger.ERROR) logger.log('Torrent bencoded data: {0}'.format(str(result.content)), logger.DEBUG) raise try: info = torrent_bdecode["info"] except Exception as e: logger.log('Unable to find info field in torrent', logger.ERROR) raise result.hash = sha1(bencode(info)).hexdigest() return result
def _get_torrent_hash(self, result): if result.url.startswith('magnet'): result.hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0] if len(result.hash) == 32: result.hash = b16encode(b32decode(result.hash)).lower() else: if not result.content: logger.log('Torrent without content', logger.ERROR) raise Exception('Torrent without content') try: torrent_bdecode = bdecode(result.content) except BTFailure as e: logger.log('Unable to bdecode torrent', logger.ERROR) logger.log( 'Torrent bencoded data: {0}'.format(str(result.content)), logger.DEBUG) raise try: info = torrent_bdecode["info"] except Exception as e: logger.log('Unable to find info field in torrent', logger.ERROR) raise result.hash = sha1(bencode(info)).hexdigest() return result
def get_torrent(self, url, savelocation=None): torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t'] self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)) downloadurl = 'http://dl.rutracker.org/forum/dl.php?t=' + torrent_id torrent_name = torrent_id + '.torrent' try: prev = os.umask(headphones.UMASK) page = self.opener.open(downloadurl) torrent = page.read() decoded = bdecode(torrent) metainfo = decoded['info'] tor_hash = sha1(bencode(metainfo)).hexdigest() if savelocation: download_path = os.path.join(savelocation, torrent_name) else: tempdir = mkdtemp(suffix='_rutracker_torrents') download_path = os.path.join(tempdir, torrent_name) fp = open (download_path, 'wb') fp.write (torrent) fp.close () os.umask(prev) # Add file to utorrent if headphones.TORRENT_DOWNLOADER == 2: self.utorrent_add_file(download_path) except Exception, e: logger.error('Error getting torrent: %s' % e) return False
def _get_torrent_hash(self, result): if result.url.startswith('magnet'): torrent_hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0] else: info = bdecode(result.content)["info"] torrent_hash = sha1(bencode(info)).hexdigest() return torrent_hash
def _TorrentHash(url=None,torrent=None): hash=None if url.startswith('magnet'): hash = re.search('urn:btih:([\w]{32,40})', url).group(1) if len(hash) == 32: hash = b16encode(b32decode(hash)).upper() else: info = bdecode(torrent)["info"] hash = sha1(bencode(info)).hexdigest().upper() return hash
def run(self): self.rejoin_dht() while True: # noinspection PyBroadException try: (data, address) = self.ufd.recvfrom(65536) msg = bdecode(data) self.on_message(msg, address) except Exception: pass
def validate_feed(self): success, err_msg = self._check_cookie() if not success: return success, err_msg try: items = self._search_provider({'Validate': ['']}) for item in items: title, url = self._title_and_url(item) if not (title and url): continue if url.startswith('magnet:'): btih = None try: btih = re.findall('urn:btih:([\w]{32,40})', url)[0] if 32 == len(btih): from base64 import b16encode, b32decode btih = b16encode(b32decode(btih)) except (StandardError, Exception): pass if re.search('(?i)[0-9a-f]{32,40}', btih): break else: torrent_file = self.get_url(url) if self.should_skip(): break try: bdecode(torrent_file) break except (StandardError, Exception): pass else: return False, '%s fetched RSS feed data: %s' % \ (('Fail to validate', 'No items found in the')[0 == len(items)], self.url) return True, None except Exception as e: return False, 'Error when trying to load RSS: ' + ex(e)
def CalcTorrentHash(torrent): if torrent.startswith('magnet'): hash = re.findall('urn:btih:([\w]{32,40})', torrent)[0] if len(hash) == 32: hash = b16encode(b32decode(hash)).lower() else: info = bdecode(torrent)["info"] hash = sha1(bencode(info)).hexdigest() logger.debug('Torrent Hash: ' + hash) return hash
def _get_torrent_hash(self, result): if result.url.startswith('magnet'): result.hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0] if len(result.hash) == 32: result.hash = b16encode(b32decode(result.hash)).lower() else: info = bdecode(result.content)['info'] result.hash = sha1(bencode(info)).hexdigest() return result
def _get_torrent_hash(self, result): if result.url.startswith("magnet"): result.hash = re.findall("urn:btih:([\w]{32,40})", result.url)[0] if len(result.hash) == 32: result.hash = b16encode(b32decode(result.hash)).lower() else: result.content = result.provider.getURL(result.url) info = bdecode(result.content)["info"] result.hash = sha1(bencode(info)).hexdigest() return result
def _get_torrent_hash(self, result): if result.url.startswith('magnet'): torrent_hash = re.findall('urn:btih:([\w]{32,40})', result.url)[0] else: if hasattr(result , 'extraInfo') and len(result.extraInfo)>0: torrent_hash = result.extraInfo[0] elif hasattr(result,'content') : info = bdecode(result.content)["info"] torrent_hash = sha1(bencode(info)).hexdigest() else: torrent_hash = result.url return torrent_hash
def from_torrent_url(url): import base64 from lib import bencode import hashlib print "#### url: %s" % (url) torrent_data = url_get(url) metadata = bencode.bdecode(torrent_data) hashcontents = bencode.bencode(metadata["info"]) digest = hashlib.sha1(hashcontents).digest() b32hash = base64.b32encode(digest) params = {"dn": metadata["info"]["name"], "tr": metadata["announce"]} logger.info(params) paramstr = urllib.urlencode(params) return "magnet:?%s&%s" % ("xt=urn:btih:%s" % b32hash, paramstr)
def from_torrent_url(url): import base64 from lib import bencode import hashlib print "#### url: %s" % (url) torrent_data = url_get(url) metadata = bencode.bdecode(torrent_data) hashcontents = bencode.bencode(metadata['info']) digest = hashlib.sha1(hashcontents).digest() b32hash = base64.b32encode(digest) params = { 'dn': metadata['info']['name'], 'tr': metadata['announce'], } logger.info(params) paramstr = urllib.urlencode(params) return 'magnet:?%s&%s' % ('xt=urn:btih:%s' % b32hash, paramstr)
def calculate_torrent_hash(link, data=None): """ Calculate the torrent hash from a magnet link or data. Returns empty string when it cannot create a torrent hash given the input data. """ if link.startswith("magnet:"): torrent_hash = re.findall("urn:btih:([\w]{32,40})", link)[0] if len(torrent_hash) == 32: torrent_hash = b16encode(b32decode(torrent_hash)).lower() elif data: try: # noinspection PyUnresolvedReferences info = bdecode(data)["info"] torrent_hash = sha1(bencode(info)).hexdigest() except Exception as e: logger.error("Error calculating hash: %s" % e) return '' else: logger.error("Cannot calculate torrent hash without magnet link or data") return '' logger.debug('Torrent Hash: ' + torrent_hash) return torrent_hash
def main(): path = os.getcwd() + '\\test\\blah.torrent' with open(path, 'r') as torrent_file: data = torrent_file.read() torrent = bdecode(data) print(torrent)
def search(self, searchurl, maxsize, minseeders, albumid): """ Parse the search results and return valid torrent list """ titles = [] urls = [] seeders = [] sizes = [] torrentlist = [] rulist = [] try: page = self.opener.open(searchurl, timeout=60) soup = BeautifulSoup(page.read()) # Debug #logger.debug (soup.prettify()) # Title for link in soup.find_all('a', attrs={'class' : 'med tLink hl-tags bold'}): title = link.get_text() titles.append(title) # Download URL for link in soup.find_all('a', attrs={'class' : 'small tr-dl dl-stub'}): url = link.get('href') urls.append(url) # Seeders for link in soup.find_all('b', attrs={'class' : 'seedmed'}): seeder = link.get_text() seeders.append(seeder) # Size for link in soup.find_all('td', attrs={'class' : 'row4 small nowrap tor-size'}): size = link.u.string sizes.append(size) except : pass # Combine lists torrentlist = zip(titles, urls, seeders, sizes) # return if nothing found if not torrentlist: return False # don't bother checking track counts anymore, let searcher filter instead # leave code in just in case check_track_count = False if check_track_count: # get headphones track count for album, return if not found myDB = db.DBConnection() tracks = myDB.select('SELECT * from tracks WHERE AlbumID=?', [albumid]) hptrackcount = len(tracks) if not hptrackcount: logger.info('headphones track info not found, cannot compare to torrent') return False # Return all valid entries, ignored, required words now checked in searcher.py #unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd'] formatlist = ['ape', 'flac', 'ogg', 'm4a', 'aac', 'mp3', 'wav', 'aif'] deluxelist = ['deluxe', 'edition', 'japanese', 'exclusive'] for torrent in torrentlist: returntitle = torrent[0].encode('utf-8') url = torrent[1] seeders = torrent[2] size = torrent[3] if int(size) <= maxsize and int(seeders) >= minseeders: #Torrent topic page torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t'] topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id # add to list if not check_track_count: valid = True else: # Check torrent info self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)) # Debug #for cookie in self.cookiejar: # logger.debug ('Cookie: %s' % cookie) try: page = self.opener.open(url) torrent = page.read() if torrent: decoded = bdecode(torrent) metainfo = decoded['info'] page.close () except Exception, e: logger.error('Error getting torrent: %s' % e) return False # get torrent track count and check for cue trackcount = 0 cuecount = 0 if 'files' in metainfo: # multi for pathfile in metainfo['files']: path = pathfile['path'] for file in path: if any(file.lower().endswith('.' + x.lower()) for x in formatlist): trackcount += 1 if '.cue' in file: cuecount += 1 title = returntitle.lower() logger.debug ('torrent title: %s' % title) logger.debug ('headphones trackcount: %s' % hptrackcount) logger.debug ('rutracker trackcount: %s' % trackcount) # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s) # This is for the case where we have a single .flac/.wav which can be split by cue # Not great, but shouldn't be doing this too often totallogcount = 0 if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount: page = self.opener.open(topicurl, timeout=60) soup = BeautifulSoup(page.read()) findtoc = soup.find_all(text='TOC of the extracted CD') if not findtoc: findtoc = soup.find_all(text='TOC извлечённого CD') for toc in findtoc: logcount = 0 for toccontent in toc.find_all_next(text=True): cut_string = toccontent.split('|') new_string = cut_string[0].lstrip().rstrip() if new_string == '1' or new_string == '01': logcount = 1 elif logcount > 0: if new_string.isdigit(): logcount += 1 else: break totallogcount = totallogcount + logcount if totallogcount > 0: trackcount = totallogcount logger.debug ('rutracker logtrackcount: %s' % totallogcount) # If torrent track count = hp track count then return torrent, # if greater, check for deluxe/special/foreign editions # if less, then allow if it's a single track with a cue valid = False if trackcount == hptrackcount: valid = True elif trackcount > hptrackcount: if any(deluxe in title for deluxe in deluxelist): valid = True # Add to list if valid: rulist.append((returntitle, size, topicurl)) else: if topicurl: logger.info(u'<a href="%s">Torrent</a> found with %s tracks but the selected headphones release has %s tracks, skipping for rutracker.org' % (topicurl, trackcount, hptrackcount)) else: logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (returntitle, int(size), int(seeders)))
def search(self, searchurl, maxsize, minseeders, albumid, bitrate): """ Parse the search results and return the first valid torrent """ titles = [] urls = [] seeders = [] sizes = [] torrentlist = [] rulist = [] try: page = self.opener.open(searchurl, timeout=60) soup = BeautifulSoup(page.read()) # Debug # logger.debug (soup.prettify()) # Title for link in soup.find_all("a", attrs={"class": "med tLink bold"}): title = link.get_text() titles.append(title) # Download URL for link in soup.find_all("a", attrs={"class": "small tr-dl dl-stub"}): url = link.get("href") urls.append(url) # Seeders for link in soup.find_all("td", attrs={"class": "row4 seedmed"}): seeder = link.get_text() seeders.append(seeder) # Size for link in soup.find_all("td", attrs={"class": "row4 small nowrap tor-size"}): size = link.u.string sizes.append(size) except: pass # Combine lists torrentlist = zip(titles, urls, seeders, sizes) # return if nothing found if not torrentlist: return False # get headphones track count for album, return if not found hptrackcount = 0 myDB = db.DBConnection() tracks = myDB.select("SELECT TrackTitle from tracks WHERE AlbumID=?", [albumid]) for track in tracks: hptrackcount += 1 if not hptrackcount: logger.info("headphones track info not found, cannot compare to torrent") return False # Return the first valid torrent, unless we want a preferred bitrate then we want all valid entries for torrent in torrentlist: returntitle = torrent[0].encode("utf-8") url = torrent[1] seeders = torrent[2] size = torrent[3] # Attempt to filter out unwanted title = returntitle.lower() if ( "promo" not in title and "vinyl" not in title and "songbook" not in title and "tvrip" not in title and "hdtv" not in title and "dvd" not in title and int(size) <= maxsize and int(seeders) >= minseeders ): # Check torrent info torrent_id = dict([part.split("=") for part in urlparse(url)[4].split("&")])["t"] self.cookiejar.set_cookie( cookielib.Cookie( version=0, name="bb_dl", value=torrent_id, port=None, port_specified=False, domain=".rutracker.org", domain_specified=False, domain_initial_dot=False, path="/", path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={"HttpOnly": None}, rfc2109=False, ) ) # Debug # for cookie in self.cookiejar: # logger.debug ('Cookie: %s' % cookie) try: page = self.opener.open(url) torrent = page.read() if torrent: decoded = bencode.bdecode(torrent) metainfo = decoded["info"] page.close() except Exception, e: logger.error("Error getting torrent: %s" % e) return False # get torrent track count and check for cue trackcount = 0 cuecount = 0 if "files" in metainfo: # multi for pathfile in metainfo["files"]: path = pathfile["path"] for file in path: if ( ".ape" in file or ".flac" in file or ".ogg" in file or ".m4a" in file or ".aac" in file or ".mp3" in file or ".wav" in file or ".aif" in file ): trackcount += 1 if ".cue" in file: cuecount += 1 # Torrent topic page topicurl = "http://rutracker.org/forum/viewtopic.php?t=" + torrent_id logger.debug("torrent title: %s" % title) logger.debug("headphones trackcount: %s" % hptrackcount) logger.debug("rutracker trackcount: %s" % trackcount) # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s) # This is for the case where we have a single .flac/.wav which can be split by cue # Not great, but shouldn't be doing this too often totallogcount = 0 if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount: page = self.opener.open(topicurl, timeout=60) soup = BeautifulSoup(page.read()) findtoc = soup.find_all(text="TOC of the extracted CD") if not findtoc: findtoc = soup.find_all(text="TOC извлечённого CD") for toc in findtoc: logcount = 0 for toccontent in toc.find_all_next(text=True): cut_string = toccontent.split("|") new_string = cut_string[0].lstrip().rstrip() if new_string == "1" or new_string == "01": logcount = 1 elif logcount > 0: if new_string.isdigit(): logcount += 1 else: break totallogcount = totallogcount + logcount if totallogcount > 0: trackcount = totallogcount logger.debug("rutracker logtrackcount: %s" % totallogcount) # If torrent track count = hp track count then return torrent, # if greater, check for deluxe/special/foreign editions # if less, then allow if it's a single track with a cue valid = False if trackcount == hptrackcount: valid = True elif trackcount > hptrackcount: if "deluxe" in title or "edition" in title or "japanese" or "exclusive" in title: valid = True # return 1st valid torrent if not checking by bitrate, else add to list and return at end if valid: rulist.append((returntitle, size, topicurl)) if not bitrate: return rulist
elif headphones.TORRENTBLACKHOLE_DIR != "": # Get torrent name from .torrent, this is usually used by the torrent client as the folder name torrent_name = torrent_folder_name + '.torrent' download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name) try: #Write the torrent file to a path derived from the TORRENTBLACKHOLE_DIR and file name. torrent_file = open(download_path, 'wb') torrent_file.write(data) torrent_file.close() #Open the fresh torrent file again so we can extract the proper torrent name #Used later in post-processing. torrent_file = open(download_path, 'rb') torrent_info = bencode.bdecode(torrent_file.read()) torrent_file.close() torrent_folder_name = torrent_info['info'].get('name','').decode('utf-8') logger.info('Torrent folder name: %s' % torrent_folder_name) except Exception, e: logger.error('Couldn\'t get name from Torrent file: %s' % e) break myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]]) myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", torrent_folder_name]) def preprocesstorrent(resultlist): selresult = "" for result in resultlist: if selresult == "": selresult = result
if headphones.TORRENTBLACKHOLE_DIR == "sendtracker": torrent = classes.TorrentDataSearchResult() torrent.extraInfo.append(data) torrent.name = torrent_folder_name sab.sendTorrent(torrent) elif headphones.TORRENTBLACKHOLE_DIR != "": # Get torrent name from .torrent, this is usually used by the torrent client as the folder name torrent_name = torrent_folder_name + '.torrent' download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name) try: torrent_file = open(download_path, 'rb').read() torrent_info = bencode.bdecode(torrent_file) torrent_folder_name = torrent_info['info'].get('name','') logger.info('Torrent folder name: %s' % torrent_folder_name) except Exception, e: logger.error('Couldn\'t get name from Torrent file: %s' % e) break myDB.action('UPDATE albums SET status = "Snatched" WHERE AlbumID=?', [albums[2]]) myDB.action('INSERT INTO snatched VALUES( ?, ?, ?, ?, DATETIME("NOW", "localtime"), ?, ?)', [albums[2], bestqual[0], bestqual[1], bestqual[2], "Snatched", torrent_folder_name]) def preprocesstorrent(resultlist): selresult = "" for result in resultlist: if selresult == "": selresult = result elif int(selresult[1]) < int(result[1]): # if size is lower than new result replace previous selected result (bigger size = better quality?)
def search(self, searchurl, maxsize, minseeders, albumid, bitrate): """ Parse the search results and return valid torrent list """ titles = [] urls = [] seeders = [] sizes = [] torrentlist = [] rulist = [] result_table = [] entries = [] page = self.opener.open(searchurl, timeout=60) data = page.read() html = BeautifulSoup(data) torrent_table = html.find('table', attrs = {'class' : 'torrentlist'}) if torrent_table: torrents = torrent_table.find_all('tr') if torrents: for result in torrents[1:]: torrent = result.find_all('td')[1].find('a').find('b').string torrent_name = torrent.string # torrent_detail_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href'] # torrent_download_url = self.urls['base_url'] + (result.find_all('td')[2].find('a'))['href'] torrent_download_url = self.urls['base_url'] + (result.find_all('td')[2].find('a')['href']) torrent_seeders = int((result.find_all('td')[8].find('b').string)) torrent_size = int(float(result.find_all('td')[6].find('br').previous)) titles.append(torrent) urls.append(torrent_download_url) seeders.append(torrent_seeders) sizes.append(torrent_size) logger.info('got stuff from deildu torrent %s ' % torrent) # except: # logger.info('deildu.net exception') # pass # Combine lists torrentlist = zip(titles, urls, seeders, sizes) # return if nothing found if not torrentlist: logger.info('nothing in torrent list ....') return False # get headphones track count for album, return if not found myDB = db.DBConnection() tracks = myDB.select('SELECT * from tracks WHERE AlbumID=?', [albumid]) hptrackcount = len(tracks) if not hptrackcount: logger.info('headphones track info not found, cannot compare to torrent') return False # Return all valid entries, ignored, required words now checked in searcher.py #unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd'] formatlist = ['ape', 'flac', 'ogg', 'm4a', 'aac', 'mp3', 'wav', 'aif'] deluxelist = ['deluxe', 'edition', 'japanese', 'exclusive'] for torrent in torrentlist: returntitle = torrent[0].encode('utf-8') url = torrent[1] seeders = torrent[2] size = torrent[3] title = returntitle.lower() if int(size) <= maxsize and int(seeders) >= minseeders: # Check torrent info # torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t'] torrent_id = (urlparse(url)[2].split('/'))[2] # self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='deildu.net', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)) # Debug #for cookie in self.cookiejar: # logger.debug ('Cookie: %s' % cookie) try: page = self.opener.open(url) torrent = page.read() if torrent: decoded = bencode.bdecode(torrent) metainfo = decoded['info'] page.close () except Exception, e: logger.error('Error getting torrent: %s' % e) return False # get torrent track count and check for cue trackcount = 0 cuecount = 0 if 'files' in metainfo: # multi for pathfile in metainfo['files']: path = pathfile['path'] for file in path: if any(file.lower().endswith('.' + x.lower()) for x in formatlist): trackcount += 1 if '.cue' in file: cuecount += 1 #Torrent topic page # http://deildu.net/details.php?id=133636 # topicurl = 'http://deildu.net/details.php?id=' + torrent_id topicurl = url logger.debug ('torrent title: %s' % title) logger.debug ('headphones trackcount: %s' % hptrackcount) logger.debug ('deildu trackcount: %s' % trackcount) # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s) # This is for the case where we have a single .flac/.wav which can be split by cue # Not great, but shouldn't be doing this too often totallogcount = 0 if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount: page = self.opener.open(topicurl, timeout=60) soup = BeautifulSoup(page.read()) findtoc = soup.find_all(text='TOC of the extracted CD') if not findtoc: findtoc = soup.find_all(text='TOC .... CD') for toc in findtoc: logcount = 0 for toccontent in toc.find_all_next(text=True): cut_string = toccontent.split('|') new_string = cut_string[0].lstrip().rstrip() if new_string == '1' or new_string == '01': logcount = 1 elif logcount > 0: if new_string.isdigit(): logcount += 1 else: break totallogcount = totallogcount + logcount if totallogcount > 0: trackcount = totallogcount logger.debug ('deildu logtrackcount: %s' % totallogcount) # If torrent track count = hp track count then return torrent, # if greater, check for deluxe/special/foreign editions # if less, then allow if it's a single track with a cue valid = True # if trackcount == hptrackcount: # valid = True # elif trackcount > hptrackcount: # if any(deluxe in title for deluxe in deluxelist): # valid = True # Add to list if valid: rulist.append((returntitle, size, topicurl)) # else: # if topicurl: # logger.info(u'<a href="%s">Torrent</a> found with %s tracks but the selected headphones release has %s tracks, skipping for deildu.net' % (topicurl, trackcount, hptrackcount)) else: logger.info('%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)' % (returntitle, int(size), int(seeders)))
def search(self, searchurl, maxsize, minseeders, albumid, bitrate): """ Parse the search results and return valid torrent list """ titles = [] urls = [] seeders = [] sizes = [] torrentlist = [] rulist = [] try: page = self.opener.open(searchurl, timeout=60) soup = BeautifulSoup(page.read()) # Debug # logger.debug (soup.prettify()) # Title for link in soup.find_all("a", attrs={"class": "med tLink hl-tags bold"}): title = link.get_text() titles.append(title) # Download URL for link in soup.find_all("a", attrs={"class": "small tr-dl dl-stub"}): url = link.get("href") urls.append(url) # Seeders for link in soup.find_all("b", attrs={"class": "seedmed"}): seeder = link.get_text() seeders.append(seeder) # Size for link in soup.find_all("td", attrs={"class": "row4 small nowrap tor-size"}): size = link.u.string sizes.append(size) except: pass # Combine lists torrentlist = zip(titles, urls, seeders, sizes) # return if nothing found if not torrentlist: return False # get headphones track count for album, return if not found myDB = db.DBConnection() tracks = myDB.select("SELECT * from tracks WHERE AlbumID=?", [albumid]) hptrackcount = len(tracks) if not hptrackcount: logger.info("headphones track info not found, cannot compare to torrent") return False # Return all valid entries, ignored, required words now checked in searcher.py # unwantedlist = ['promo', 'vinyl', '[lp]', 'songbook', 'tvrip', 'hdtv', 'dvd'] formatlist = ["ape", "flac", "ogg", "m4a", "aac", "mp3", "wav", "aif"] deluxelist = ["deluxe", "edition", "japanese", "exclusive"] for torrent in torrentlist: returntitle = torrent[0].encode("utf-8") url = torrent[1] seeders = torrent[2] size = torrent[3] title = returntitle.lower() if int(size) <= maxsize and int(seeders) >= minseeders: # Check torrent info torrent_id = dict([part.split("=") for part in urlparse(url)[4].split("&")])["t"] self.cookiejar.set_cookie( cookielib.Cookie( version=0, name="bb_dl", value=torrent_id, port=None, port_specified=False, domain=".rutracker.org", domain_specified=False, domain_initial_dot=False, path="/", path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={"HttpOnly": None}, rfc2109=False, ) ) # Debug # for cookie in self.cookiejar: # logger.debug ('Cookie: %s' % cookie) try: page = self.opener.open(url) torrent = page.read() if torrent: decoded = bencode.bdecode(torrent) metainfo = decoded["info"] page.close() except Exception, e: logger.error("Error getting torrent: %s" % e) return False # get torrent track count and check for cue trackcount = 0 cuecount = 0 if "files" in metainfo: # multi for pathfile in metainfo["files"]: path = pathfile["path"] for file in path: if any(file.lower().endswith("." + x.lower()) for x in formatlist): trackcount += 1 if ".cue" in file: cuecount += 1 # Torrent topic page topicurl = "http://rutracker.org/forum/viewtopic.php?t=" + torrent_id logger.debug("torrent title: %s" % title) logger.debug("headphones trackcount: %s" % hptrackcount) logger.debug("rutracker trackcount: %s" % trackcount) # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s) # This is for the case where we have a single .flac/.wav which can be split by cue # Not great, but shouldn't be doing this too often totallogcount = 0 if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount: page = self.opener.open(topicurl, timeout=60) soup = BeautifulSoup(page.read()) findtoc = soup.find_all(text="TOC of the extracted CD") if not findtoc: findtoc = soup.find_all(text="TOC извлечённого CD") for toc in findtoc: logcount = 0 for toccontent in toc.find_all_next(text=True): cut_string = toccontent.split("|") new_string = cut_string[0].lstrip().rstrip() if new_string == "1" or new_string == "01": logcount = 1 elif logcount > 0: if new_string.isdigit(): logcount += 1 else: break totallogcount = totallogcount + logcount if totallogcount > 0: trackcount = totallogcount logger.debug("rutracker logtrackcount: %s" % totallogcount) # If torrent track count = hp track count then return torrent, # if greater, check for deluxe/special/foreign editions # if less, then allow if it's a single track with a cue valid = False if trackcount == hptrackcount: valid = True elif trackcount > hptrackcount: if any(deluxe in title for deluxe in deluxelist): valid = True # Add to list if valid: rulist.append((returntitle, size, topicurl)) else: if topicurl: logger.info( u'<a href="%s">Torrent</a> found with %s tracks but the selected headphones release has %s tracks, skipping for rutracker.org' % (topicurl, trackcount, hptrackcount) ) else: logger.info( "%s is larger than the maxsize or has too little seeders for this category, skipping. (Size: %i bytes, Seeders: %i)" % (returntitle, int(size), int(seeders)) )