def __init__(self,guiUtility): if TorrentManager.__single: raise RuntimeError, "TorrentSearchGridManager is singleton" TorrentManager.__single = self self.guiUtility = guiUtility # Contains all matches for keywords in DB, not filtered by category self.hits = [] # Remote results for current keywords self.remoteHits = {} # For asking for a refresh when remote results came in self.gridmgr = None self.guiserver = GUITaskQueue.getInstance() self.searchkeywords = [] self.rerankingStrategy = DefaultTorrentReranker() self.oldsearchkeywords = [] self.filteredResults = 0 self.bundler = Bundler() self.bundle_mode = None self.category = Category.getInstance()
class TorrentManager: # Code to make this a singleton __single = None def __init__(self,guiUtility): if TorrentManager.__single: raise RuntimeError, "TorrentSearchGridManager is singleton" TorrentManager.__single = self self.guiUtility = guiUtility # Contains all matches for keywords in DB, not filtered by category self.hits = [] # Remote results for current keywords self.remoteHits = {} # For asking for a refresh when remote results came in self.gridmgr = None self.guiserver = GUITaskQueue.getInstance() self.searchkeywords = [] self.rerankingStrategy = DefaultTorrentReranker() self.oldsearchkeywords = [] self.filteredResults = 0 self.bundler = Bundler() self.bundle_mode = None self.category = Category.getInstance() def getInstance(*args, **kw): if TorrentManager.__single is None: TorrentManager(*args, **kw) return TorrentManager.__single getInstance = staticmethod(getInstance) def getCollectedFilename(self, torrent): torrent_dir = self.guiUtility.utility.session.get_torrent_collecting_dir() if 'torrent_file_name' not in torrent or not torrent['torrent_file_name']: torrent['torrent_file_name'] = get_collected_torrent_filename(torrent['infohash']) torrent_filename = os.path.join(torrent_dir, torrent['torrent_file_name']) #.torrent found, return complete filename if os.path.isfile(torrent_filename): return torrent_filename #.torrent not found, possibly a new torrent_collecting_dir torrent['torrent_file_name'] = get_collected_torrent_filename(torrent['infohash']) torrent_filename = os.path.join(torrent_dir, torrent['torrent_file_name']) if os.path.isfile(torrent_filename): return torrent_filename def getTorrent(self, torrent, callback): """ TORRENT is a dictionary containing torrent information used to display the entry on the UI. it is NOT the torrent file! CALLBACK is called when the torrent is downloaded. When no torrent can be downloaded the callback is ignored Returns a filename, if filename is known or a boolean + request_type describing if the torrent is requested """ torrent_filename = self.getCollectedFilename(torrent) if torrent_filename: return torrent_filename #.torrent not found, try to download from peers if self.downloadTorrentfileFromPeers(torrent, callback): return (True, "from peers") return (False, "could not get torrent") def downloadTorrentfileFromPeers(self, torrent, callback, duplicate=True, prio = 0): """ TORRENT is a dictionary containing torrent information used to display the entry on the UI. it is NOT the torrent file! CALLBACK is called when the torrent is downloaded. When no torrent can be downloaded the callback is ignored DUPLICATE can be True: the file will be downloaded from peers regardless of a previous/current download attempt (returns True). Or DUPLICATE can be False: the file will only be downloaded when it was not yet attempted to download (when False is returned no callback will be made) PRIO is the priority, default is 0 which means we need this torrent now. If PRIO != 0, then a rate limiter could be used by the remotetorrentrequester Returns True or False """ # return False when duplicate if not duplicate and torrent.get('query_torrent_was_requested', False): return False torrent['query_torrent_was_requested'] = True if not 'query_permids' in torrent or len(torrent['query_permids']) == 0: self.guiUtility.utility.session.download_torrentfile(torrent['infohash'], callback, prio) else: for permid in torrent['query_permids']: self.guiUtility.utility.session.download_torrentfile_from_peer(permid, torrent['infohash'], callback, prio) return True def downloadTorrent(self, torrent, dest = None, secret = False, vodmode = False, selectedFiles = None): callback = lambda infohash, metadata, filename: self.downloadTorrent(torrent, dest, secret, vodmode, selectedFiles) callback.__name__ = "downloadTorrent_callback" torrent_filename = self.getTorrent(torrent, callback) if isinstance(torrent_filename, basestring): #got actual filename if torrent.get('name'): name = torrent['name'] else: name = torrent['infohash'] clicklog={'keywords': self.searchkeywords, 'reranking_strategy': self.rerankingStrategy.getID()} if "click_position" in torrent: clicklog["click_position"] = torrent["click_position"] # Api download d = self.guiUtility.frame.startDownload(torrent_filename,destdir=dest,clicklog=clicklog,name=name,vodmode=vodmode, selectedFiles = selectedFiles) ## remove name=name if d: if secret: self.torrent_db.setSecret(torrent['infohash'], secret) if DEBUG: print >>sys.stderr,'standardDetails: download: download started' torrent['myDownloadHistory'] = True elif torrent_filename[0]: #torrent is being requested from peers, using callback this function will be called again return torrent_filename[1] else: #torrent not found def showdialog(): str = self.guiUtility.utility.lang.get('delete_torrent') % torrent['name'] dlg = wx.MessageDialog(self.guiUtility.frame, str, self.guiUtility.utility.lang.get('delete_dead_torrent'), wx.YES_NO|wx.NO_DEFAULT|wx.ICON_QUESTION) result = dlg.ShowModal() dlg.Destroy() if result == wx.ID_YES: infohash = torrent['infohash'] self.torrent_db.deleteTorrent(infohash, delete_file=True, commit = True) wx.CallAfter(showdialog) def isTorrentPlayable(self, torrent, default=(False, [], []), callback=None): """ TORRENT is a dictionary containing torrent information used to display the entry on the UI. it is NOT the torrent file! DEFAULT indicates the default value when we don't know if the torrent is playable. CALLBACK can be given to result the actual 'playable' value for the torrent after some downloading/processing. The DEFAULT value is returned in this case. Will only be called if self.item == torrent The return value is a tuple consisting of a boolean indicating if the torrent is playable and a list. If the torrent is not playable or if the default value is returned the boolean is False and the list is empty. If it is playable the boolean is true and the list returned consists of the playable files within the actual torrent. """ torrent_callback = lambda infohash, metadata, filename: self.isTorrentPlayable(torrent, default, callback) torrent_callback.__name__ = "isTorrentPlayable_callback" torrent_filename = self.getTorrent(torrent, torrent_callback) if isinstance(torrent_filename, basestring): #got actual filename tdef = TorrentDef.load(torrent_filename) files = tdef.get_files_as_unicode(exts=videoextdefaults) allfiles = tdef.get_files_as_unicode_with_length() playable = len(files) > 0 torrent['comment'] = tdef.get_comment_as_unicode() if tdef.get_tracker_hierarchy(): torrent['trackers'] = tdef.get_tracker_hierarchy() else: torrent['trackers'] = [[tdef.get_tracker()]] if not callback is None: callback(torrent, (playable, files, allfiles)) else: return torrent, (playable, files, allfiles) elif not torrent_filename[0]: if DEBUG: print >>sys.stderr, "standardDetails:torrent_is_playable returning default", default callback(torrent, default) else: return torrent_filename[1] def getSwarmInfo(self, infohash): return self.torrent_db.getSwarmInfoByInfohash(infohash) def set_gridmgr(self,gridmgr): self.gridmgr = gridmgr def connect(self): session = self.guiUtility.utility.session self.torrent_db = session.open_dbhandler(NTFY_TORRENTS) self.pref_db = session.open_dbhandler(NTFY_PREFERENCES) self.mypref_db = session.open_dbhandler(NTFY_MYPREFERENCES) self.search_db = session.open_dbhandler(NTFY_SEARCH) self.votecastdb = session.open_dbhandler(NTFY_VOTECAST) self.searchmgr = SearchManager(self.torrent_db) self.library_manager = self.guiUtility.library_manager def getHitsInCategory(self, categorykey = 'all', sort = 'rameezmetric'): if DEBUG: begintime = time() # categorykey can be 'all', 'Video', 'Document', ... bundle_mode = self.bundle_mode if DEBUG: print >>sys.stderr,"TorrentSearchManager: getHitsInCategory:",categorykey categorykey = categorykey.lower() enabledcattuples = self.category.getCategoryNames() enabledcatslow = ["other"] for catname,_ in enabledcattuples: enabledcatslow.append(catname.lower()) # TODO: do all filtering in DB query def torrentFilter(torrent): #show dead torrents in library okCategory = False if not okCategory: categories = torrent.get("category", []) if not categories: categories = ["other"] if categorykey == 'all': for torcat in categories: if torcat.lower() in enabledcatslow: okCategory = True break elif categorykey in [cat.lower() for cat in categories]: okCategory = True if not okCategory: self.filteredResults += 1 okGood = torrent['status'] != 'dead' #print >>sys.stderr,"FILTER: lib",okLibrary,"cat",okCategory,"good",okGood return okCategory and okGood # 1. Local search puts hits in self.hits if DEBUG: beginlocalsearch = time() new_local_hits = self.searchLocalDatabase() if DEBUG: print >>sys.stderr,'TorrentSearchGridManager: getHitsInCat: search found: %d items took %s' % (len(self.hits), time() - beginlocalsearch) # 2. Filter self.hits on category and status if DEBUG: beginfilterhits = time() if new_local_hits: self.hits = filter(torrentFilter, self.hits) if DEBUG: print >>sys.stderr,'TorrentSearchGridManager: getHitsInCat: torrentFilter after filter found: %d items took %s' % (len(self.hits), time() - beginfilterhits) # 3. Add remote hits that may apply. TODO: double filtering, could # add remote hits to self.hits before filter(torrentFilter,...) self.addStoredRemoteResults() if DEBUG: print >>sys.stderr,'TorrentSearchGridManager: getHitsInCat: found after remote search: %d items' % len(self.hits) if DEBUG: beginsort = time() if sort == 'rameezmetric': self.sort() # Nic: Ok this is somewhat diagonal to the previous sorting algorithms # eventually, these should probably be combined # since for now, however, my reranking is very tame (exchanging first and second place under certain circumstances) # this should be fine... self.hits = self.rerankingStrategy.rerank(self.hits, self.searchkeywords, self.torrent_db, self.pref_db, self.mypref_db, self.search_db) # boudewijn: now that we have sorted the search results we # want to prefetch the top N torrents. self.guiserver.add_task(self.prefetch_hits, t = 1, id = "PREFETCH_RESULTS") self.hits = self.library_manager.addDownloadStates(self.hits) if DEBUG: beginbundle = time() # vliegendhart: do grouping here # Niels: important, we should not change self.hits otherwise prefetching will not work returned_hits, selected_bundle_mode = self.bundler.bundle(self.hits, bundle_mode, self.searchkeywords) if DEBUG: print >> sys.stderr, 'getHitsInCat took: %s of which sort took %s, bundle took %s' % (time() - begintime, beginbundle - beginsort, time() - beginbundle) #return [len(self.hits), self.filteredResults , self.hits] return [len(returned_hits), self.filteredResults , selected_bundle_mode, returned_hits] def prefetch_hits(self): """ Prefetching attempts to reduce the time required to get the user the data it wants. We assume the torrent at the beginning of self.hits are more likely to be selected by the user than the ones at the end. This allows us to perform prefetching operations on a subselection of these items. The prefetch_hits function can be called multiple times. It will only attempt to prefetch every PREFETCH_DELAY seconds. This gives search results from multiple sources the chance to be received and sorted before prefetching a subset. """ if DEBUG: begin_time = time() torrent_dir = Session.get_instance().get_torrent_collecting_dir() hit_counter = 0 prefetch_counter = 0 # prefetch .torrent files if they are from buddycast sources for hit in self.hits: def sesscb_prefetch_done(infohash, metadata, filename): if DEBUG: # find the origional hit for hit in self.hits: if hit["infohash"] == infohash: print >> sys.stderr, "Prefetch: in", "%.1fs" % (time() - begin_time), `hit["name"]` return print >> sys.stderr, "Prefetch BUG. We got a hit from something we didn't ask for" torrent_filename = self.getCollectedFilename(hit) if not torrent_filename: if self.downloadTorrentfileFromPeers(hit, sesscb_prefetch_done, duplicate=False, prio = 1): if DEBUG: print >> sys.stderr, "Prefetch: attempting to download", `hit["name"]` prefetch_counter += 1 hit_counter += 1 if prefetch_counter >= 10 or hit_counter >= 25: # (1) prefetch a maximum of N hits # (2) prefetch only from the first M hits # (.) wichever is lowest or (1) or (2) break def getSearchKeywords(self ): return self.searchkeywords, len(self.hits), self.filteredResults def setSearchKeywords(self, wantkeywords): if wantkeywords != self.searchkeywords: self.bundle_mode = None self.searchkeywords = wantkeywords if DEBUG: print >> sys.stderr, "TorrentSearchGridManager: keywords:", self.searchkeywords,";time:%", time() self.filteredResults = 0 self.remoteHits = {} self.oldsearchkeywords = '' def setBundleMode(self, bundle_mode): if bundle_mode != self.bundle_mode: self.bundle_mode = bundle_mode self.refreshGrid() def searchLocalDatabase(self): """ Called by GetHitsInCategory() to search local DB. Caches previous query result. """ if self.searchkeywords == self.oldsearchkeywords and len(self.hits) > 0: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: searchLocalDB: returning old hit list",len(self.hits) return False self.oldsearchkeywords = self.searchkeywords if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: searchLocalDB: Want",self.searchkeywords if len(self.searchkeywords) == 0 or len(self.searchkeywords) == 1 and self.searchkeywords[0] == '': return False self.hits = self.searchmgr.search(self.searchkeywords) return True def addStoredRemoteResults(self): """ Called by GetHitsInCategory() to add remote results to self.hits """ if len(self.remoteHits) > 0: numResults = 0 def catFilter(item): icat = item.get('category') if type(icat) == list: icat = icat[0].lower() elif type(icat) == str: icat = icat.lower() else: return False #catResults = filter(catFilter, self.remoteHits.values()) catResults = self.remoteHits.values() if DEBUG: print >> sys.stderr,"TorrentSearchGridManager: remote: Adding %d remote results (%d in category)" % (len(self.remoteHits), len(catResults)) for remoteItem in catResults: known = False for item in self.hits: #print >> sys.stderr,"TorrentSearchGridManager: remote: Should we add",`remoteItem['name']` if item['infohash'] == remoteItem['infohash']: known = True # if a hit belongs to a more popular channel, then replace the previous """ if remoteItem['channel_permid'] !="" and remoteItem['channel_name'] != "" and remoteItem['subscriptions']-remoteItem['neg_votes'] > item['subscriptions']-item['neg_votes']: item['subscriptions'] = remoteItem['subscriptions'] item['neg_votes'] = remoteItem['neg_votes'] item['channel_permid'] = remoteItem['channel_permid'] item['channel_name'] = remoteItem['channel_name'] """ break if not known: #print >> sys.stderr,"TorrentSearchGridManager: remote: Adding",`remoteItem['name']` self.hits.append(remoteItem) numResults+=1 def gotRemoteHits(self, permid, kws, answers): """ Called by GUIUtil when hits come in. 29/06/11 boudewijn: from now on called on the GUITaskQueue instead on the wx MainThread to avoid blocking the GUI because of the database queries. """ self.guiserver.add_task(lambda: self._gotRemoteHits(permid, kws, answers)) def _gotRemoteHits(self, permid, kws, answers): try: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: got",len(answers),"unfiltered results for",kws, bin2str(permid), time() # Always store the results, only display when in filesMode # We got some replies. First check if they are for the current query if self.searchkeywords == kws: numResults = 0 catobj = Category.getInstance() for key,value in answers.iteritems(): if self.torrent_db.hasTorrent(key): if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: Ignoring hit for",`value['content_name']`,"already got it" continue # do not show results we have ourselves # First, check if it matches the word boundaries, that belongs to previous version # Convert answer fields as per # Session.query_connected_peers() spec. to NEWDB format newval = {} newval['name'] = value['content_name'] newval['infohash'] = key newval['torrent_file_name'] = '' newval['length'] = value['length'] newval['creation_date'] = time() # None gives '?' in GUI newval['relevance'] = 0 newval['source'] = 'RQ' newval['category'] = value['category'][0] # We trust the peer newval['status'] = 'good' newval['num_seeders'] = value['seeder'] or 0 newval['num_leechers'] = value['leecher'] or 0 # OLPROTO_VER_NINETH includes a torrent_size. Set to # -1 when not available. if 'torrent_size' in value: newval['torrent_size'] = value['torrent_size'] else: newval['torrent_size'] = -1 # OLPROTO_VER_ELEVENTH includes channel_permid, channel_name fields. if 'channel_permid' not in value: # just to check if it is not OLPROTO_VER_ELEVENTH version # if so, check word boundaries in the swarm name ls = split_into_keywords(value['content_name']) if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: ls is",`ls` print >>sys.stderr,"TorrentSearchGridManager: kws is",`kws` flag = False for kw in kws: if kw not in ls: flag=True break if flag: continue if 'channel_permid' in value: newval['channel_permid']=value['channel_permid'] else: newval['channel_permid']="" if 'channel_name' in value: newval['channel_name'] = value['channel_name'] else: newval['channel_name']="" if 'channel_permid' in value: newval['neg_votes'] = self.votecastdb.getNegVotes(value['channel_permid']) newval['subscriptions'] = self.votecastdb.getNumSubscriptions(value['channel_permid']) if newval['subscriptions']-newval['neg_votes']<VOTE_LIMIT: # now, this is SPAM continue else: newval['subscriptions']=0 newval['neg_votes'] = 0 # Extra field: Set from which peer this info originates newval['query_permids'] = [permid] # Filter out results from unwanted categories flag = False for cat in value['category']: rank = catobj.getCategoryRank(cat) if rank == -1: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHits: Got",`newval['name']`,"from banned category",cat,", discarded it." flag = True self.filteredResults += 1 break if flag: continue if newval['infohash'] in self.remoteHits: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: merging hit",`newval['name']` # merge this result with previous results oldval = self.remoteHits[newval['infohash']] for query_permid in newval['query_permids']: if not query_permid in oldval['query_permids']: oldval['query_permids'].append(query_permid) # if a hit belongs to a more popular channel, then replace the previous if newval['channel_permid'] !="" and newval['channel_name'] != "" and newval['subscriptions']-newval['neg_votes'] > oldval['subscriptions']-oldval['neg_votes']: oldval['subscriptions'] = newval['subscriptions'] oldval['neg_votes'] = newval['neg_votes'] oldval['channel_permid'] = newval['channel_permid'] oldval['channel_name'] = newval['channel_name'] else: if DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHist: appending hit",`newval['name']` self.remoteHits[newval['infohash']] = newval numResults +=1 # if numResults % 5 == 0: # self.refreshGrid() if numResults > 0: self.refreshGrid() if DEBUG: print >>sys.stderr,'TorrentSearchGridManager: gotRemoteHits: Refresh grid after new remote torrent hits came in' return True elif DEBUG: print >>sys.stderr,"TorrentSearchGridManager: gotRemoteHits: got hits for",kws,"but current search is for",self.searchkeywords return False except: print_exc() return False def refreshGrid(self): if self.gridmgr is not None: self.gridmgr.refresh() #Rameez: The following code will call normalization functions and then #sort and merge the torrent results def sort(self): self.doStatNormalization(self.hits, 'num_seeders', 'norm_num_seeders') self.doStatNormalization(self.hits, 'neg_votes', 'norm_neg_votes') self.doStatNormalization(self.hits, 'subscriptions', 'norm_subscriptions') def cmp(a,b): # normScores can be small, so multiply return int( 1000000.0 * ( 0.8*b.get('norm_num_seeders',0) + 0.1*b.get('norm_neg_votes',0) + 0.1*b.get('norm_subscriptions',0) - 0.8*a.get('norm_num_seeders',0) - 0.1*a.get('norm_neg_votes',0) - 0.1*a.get('norm_subscriptions',0) )) self.hits.sort(cmp) def doStatNormalization(self, hits, normKey, newKey): '''Center the variance on zero (this means mean == 0) and divide all values by the standard deviation. This is sometimes called scaling. This is done on the field normKey of hits and the output is added to a new field called newKey.''' tot = 0 for hit in hits: tot += (hit.get(normKey, 0) or 0) if len(hits) > 0: mean = tot/len(hits) else: mean = 0 sum = 0 for hit in hits: temp = (hit.get(normKey, 0) or 0) - mean temp = temp * temp sum += temp if len(hits) > 1: dev = sum /(len(hits)-1) else: dev = 0 stdDev = sqrt(dev) for hit in hits: if stdDev > 0: hit[newKey] = ((hit.get(normKey, 0) or 0) - mean) / stdDev else: hit[newKey] = 0