def reindex_torrents(self): """ Reindex all torrents in the database. Required when upgrading to a newer FTS engine. """ results = self.db.fetchall("SELECT torrent_id, name FROM Torrent") for torrent_result in results: if torrent_result[1] is None: continue swarmname = split_into_keywords(torrent_result[1]) files_results = self.db.fetchall( "SELECT path FROM TorrentFiles WHERE torrent_id = ?", (torrent_result[0], )) filenames = "" fileexts = "" for file_result in files_results: filename, ext = os.path.splitext(file_result[0]) parts = split_into_keywords(filename) filenames += " ".join(parts) + " " fileexts += ext[1:] + " " self.db.execute_write( u"INSERT INTO FullTextIndex (rowid, swarmname, filenames, fileextensions)" u" VALUES(?,?,?,?)", (torrent_result[0], " ".join(swarmname), filenames[:-1], fileexts[:-1])) self.db.commit_now()
def test_split_into_keywords(self): result = split_into_keywords("to be or not to be") self.assertIsInstance(result, list) self.assertEqual(len(result), 6) result = split_into_keywords("to be or not to be", True) self.assertIsInstance(result, list) self.assertEqual(len(result), 4)
def get_local(self, filter): """ Search the local torrent database for torrent files by keyword. :param filter: (Optional) keyword filter. :return: List of torrents in dictionary format. """ keywords = split_into_keywords(unicode(filter)) keywords = [keyword for keyword in keywords if len(keyword) > 1] # T is the Torrent (when local) table or CollectedTorrent view (external), C is the _ChannelTorrents table TORRENT_REQ_COLUMNS = ['T.torrent_id', 'infohash', 'T.name', 'length', 'category', 'status', 'num_seeders', 'num_leechers', 'C.id', 'T.dispersy_id', 'C.name', 'T.name', 'C.description', 'C.time_stamp', 'C.inserted'] #TUMBNAILTORRENT_REQ_COLUMNS = ['torrent_id', 'Torrent.infohash', 'name', 'length', 'category', 'status', 'num_seeders', 'num_leechers'] @forceAndReturnDBThread def local_search(keywords): begintime = time() results = self._torrent_db.searchNames(keywords, doSort=False, keys=TORRENT_REQ_COLUMNS) print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" print results begintuples = time() if len(results) > 0: def create_channel(a): return Channel(*a) channels = {} for a in results: channel_details = a[-10:] if channel_details[0] and channel_details[0] not in channels: channels[channel_details[0]] = create_channel(channel_details) def create_torrent(a): #channel = channels.get(a[-10], False) #if channel and (channel.isFavorite() or channel.isMyChannel()): # t = ChannelTorrent(*a[:-12] + [channel, None]) #else: t = Torrent(*a[:11] + [False]) t.torrent_db = self._torrent_db t.channelcast_db = self._channelcast_db #t.metadata_db = self._metadata_db t.assignRelevance(a[-11]) return t results = map(create_torrent, results) print ">>>>>>> LOCAL RESULTS: %s" % results _logger.debug('TorrentSearchGridManager: _doSearchLocalDatabase took: %s of which tuple creation took %s', time() - begintime, time() - begintuples) return results results = self._prepare_torrents(local_search(keywords)) print ">>>>>>> LOCAL RESDICT: %s" % results return results
def reindex_torrents(self): """ Reindex all torrents in the database. Required when upgrading to a newer FTS engine. """ results = self.db.fetchall("SELECT torrent_id, name FROM Torrent") for torrent_result in results: if torrent_result[1] is None: continue swarmname = split_into_keywords(torrent_result[1]) files_results = self.db.fetchall("SELECT path FROM TorrentFiles WHERE torrent_id = ?", (torrent_result[0],)) filenames = "" fileexts = "" for file_result in files_results: filename, ext = os.path.splitext(file_result[0]) parts = split_into_keywords(filename) filenames += " ".join(parts) + " " fileexts += ext[1:] + " " self.db.execute_write(u"INSERT INTO FullTextIndex (rowid, swarmname, filenames, fileextensions)" u" VALUES(?,?,?,?)", (torrent_result[0], " ".join(swarmname), filenames[:-1], fileexts[:-1])) self.db.commit_now()
def assignRelevance(self, matches): """ Assigns a relevance score to this Torrent. @param matches A dict containing sets stored under the keys 'swarmname', 'filenames' and 'fileextensions'. """ # Find the lowest term position of the matching keywords pos_score = None if matches['swarmname']: swarmnameTerms = split_into_keywords(self.name) swarmnameMatches = matches['swarmname'] for i, term in enumerate(swarmnameTerms): if term in swarmnameMatches: pos_score = -i break self.relevance_score = [len(matches['swarmname']), pos_score, len( matches['filenames']), len(matches['fileextensions']), 0]
def _set_keywords(self, keywords): """ Set the keywords that a next search should use. This clears the previous keywords and results. :param keywords: Keyword string that should be searched for. :return: Boolean indicating success. """ keywords = split_into_keywords(unicode(keywords)) keywords = [keyword for keyword in keywords if len(keyword) > 1] if keywords == self._keywords: return True try: self._remote_lock.acquire() self._keywords = keywords self._results = [] self._result_cids = [] finally: self._remote_lock.release() return True
def _set_keywords(self, keywords): """ Set the keywords that a next search should use. This clears the previous keywords and results. :param keywords: Keyword string that should be searched for. :return: Boolean indicating success. """ keywords = split_into_keywords(unicode(keywords)) keywords = [keyword for keyword in keywords if len(keyword) > 1] if keywords == self._keywords: return True try: self._remote_lock.acquire() self._keywords = keywords self._results = [] self._result_infohashes = [] finally: self._remote_lock.release() return True
def assignRelevance(self, matches): """ Assigns a relevance score to this Torrent. @param matches A dict containing sets stored under the keys 'swarmname', 'filenames' and 'fileextensions'. """ # Find the lowest term position of the matching keywords pos_score = None if matches['swarmname']: swarmnameTerms = split_into_keywords(self.name) swarmnameMatches = matches['swarmname'] for i, term in enumerate(swarmnameTerms): if term in swarmnameMatches: pos_score = -i break self.relevance_score = [ len(matches['swarmname']), pos_score, len(matches['filenames']), len(matches['fileextensions']), 0 ]
def _on_torrent_search_results(self, subject, change_type, object_id, search_results): """ The callback function handles the search results from SearchCommunity. :param subject: Must be SIGNAL_SEARCH_COMMUNITY. :param change_type: Must be SIGNAL_ON_SEARCH_RESULTS. :param object_id: Must be None. :param search_results: The result dictionary which has 'keywords', 'results', and 'candidate'. """ if self.session is None: return 0 keywords = search_results['keywords'] results = search_results['results'] candidate = search_results['candidate'] self._logger.debug( "Got torrent search results %s, keywords %s, candidate %s", len(results), keywords, candidate) # drop it if these are the results of an old keyword if keywords != self._current_keywords: return # results is a list of tuples that are: # (1) infohash, (2) name, (3) length, (4) num_files, (5) category, (6) creation_date, (7) num_seeders # (8) num_leechers, (9) channel_cid remote_torrent_result_list = [] # get and cache channels channel_cid_list = [ result[-1] for result in results if result[-1] is not None ] channel_cache_list = self.channelcast_db.getChannelsByCID( channel_cid_list) channel_cache_dict = {} for channel in channel_cache_list: # index 1 is cid channel_cache_dict[channel[1]] = channel # create result dictionaries that are understandable for result in results: remote_torrent_result = { 'torrent_type': 'remote', # indicates if it is a remote torrent 'relevance_score': None, 'torrent_id': -1, 'infohash': result[0], 'name': result[1], 'length': result[2], 'num_files': result[3], 'category': result[4][0], 'creation_date': result[5], 'num_seeders': result[6], 'num_leechers': result[7], 'status': u'good', 'query_candidates': {candidate}, 'channel': None } channel_cid = result[-1] if channel_cid is not None and channel_cid in channel_cache_dict: channel = channel_cache_dict[channel_cid] channel_result = { 'id': channel[0], 'name': channel[2], 'description': channel[3], 'dispersy_cid': channel[1], 'num_torrents': channel[4], 'num_favorite': channel[5], 'num_spam': channel[6], 'modified': channel[8], } remote_torrent_result['channel'] = channel_result # guess matches keyword_set = set(keywords) swarmname_set = set( split_into_keywords(remote_torrent_result['name'])) matches = { 'fileextensions': set(), 'swarmname': swarmname_set & keyword_set, # all keywords matching in swarmname } matches['filenames'] = keyword_set - matches[ 'swarmname'] # remaining keywords should thus me matching in filenames or fileextensions if len(matches['filenames']) == 0: _, ext = os.path.splitext(result[0]) ext = ext[1:] matches['filenames'] = matches['swarmname'] matches['filenames'].discard(ext) if ext in keyword_set: matches['fileextensions'].add(ext) # Find the lowest term position of the matching keywords pos_score = None if matches['swarmname']: swarmnameTerms = split_into_keywords( remote_torrent_result['name']) swarmnameMatches = matches['swarmname'] for i, term in enumerate(swarmnameTerms): if term in swarmnameMatches: pos_score = -i break remote_torrent_result['relevance_score'] = [ len(matches['swarmname']), pos_score, len(matches['filenames']), len(matches['fileextensions']), 0 ] # append the result into the result list remote_torrent_result_list.append(remote_torrent_result) results_data = { 'keywords': keywords, 'result_list': remote_torrent_result_list } # inform other components about the results self.session.notifier.notify(SIGNAL_TORRENT, SIGNAL_ON_SEARCH_RESULTS, None, results_data)
def dosearch(self, input=None): if input is None: sf = self.frame.top_bg.searchField if sf is None: return input = sf.GetValue() if input: input = input.strip() if input == '': return else: return self.frame.top_bg.searchField.SetValue(input) if self.frame.startDownloadFromArg(input): self.frame.top_bg.searchField.Clear() self.ShowPage('my_files') else: keywords = split_into_keywords(input) keywords = [keyword for keyword in keywords if len(keyword) > 1] if len(keywords) == 0: self.Notify('Please enter a search term', "Your search term '%s' was either to small or to general." % input, icon=wx.ART_INFORMATION) else: self.frame.top_bg.StartSearch() self.current_search_query = keywords self._logger.debug("GUIUtil: searchFiles: %s %s", keywords, time()) self.frame.searchlist.Freeze() self.torrentsearch_manager.setSearchKeywords(keywords) self.channelsearch_manager.setSearchKeywords(keywords) # We set oldkeywords to '', which will trigger a reset in SetKeywords (called from ShowPage). # This avoids calling reset twice. # Niels: 17-09-2012, unfortunately showpage calls show(true) # which results in the dirty items being refreshed. # We need to call Reset in order to prevent this from happening self.frame.searchlist.Reset() self.ShowPage('search_results', keywords) # We now have to call thaw, otherwise loading message will not be shown. self.frame.searchlist.Thaw() # Peform local search self.torrentsearch_manager.set_gridmgr(self.frame.searchlist.GetManager()) self.channelsearch_manager.set_gridmgr(self.frame.searchlist.GetManager()) def db_thread(): self.torrentsearch_manager.refreshGrid() nr_peers_connected = self.torrentsearch_manager.searchDispersy() self.channelsearch_manager.searchDispersy() return nr_peers_connected def wx_thread(delayedResult): nr_peers_connected = delayedResult.get() if self and self.frame and self.frame.searchlist: self.frame.searchlist.SetMaxResults(nr_peers_connected + 1, keywords) self.frame.searchlist.NewResult() startWorker(wx_thread, db_thread, priority=1024)
def render_GET(self, request): """ .. http:get:: /search?q=(string:query) A GET request to this endpoint will create a search. Results are returned over the events endpoint, one by one. First, the results available in the local database will be pushed. After that, incoming Dispersy results are pushed. The query to this endpoint is passed using the url, i.e. /search?q=pioneer. **Example request**: .. sourcecode:: none curl -X GET http://localhost:8085/search?q=tribler **Example response**: .. sourcecode:: javascript { "type": "search_result_channel", "query": "test", "result": { "id": 3, "dispersy_cid": "da69aaad39ccf468aba2ab9177d5f8d8160135e6", "name": "My fancy channel", "description": "A description of this fancy channel", "subscribed": True, "votes": 23, "torrents": 3, "spam": 5, "modified": 14598395, "can_edit": False } } """ if 'q' not in request.args: request.setResponseCode(http.BAD_REQUEST) return json.dumps({"error": "query parameter missing"}) # Notify the events endpoint that we are starting a new search query self.events_endpoint.start_new_query() # We first search the local database for torrents and channels query = unicode(request.args['q'][0], 'utf-8') keywords = split_into_keywords(query) results_local_channels = self.channel_db_handler.search_in_local_channels_db(query) results_dict = {"keywords": keywords, "result_list": results_local_channels} self.session.notifier.notify(SIGNAL_CHANNEL, SIGNAL_ON_SEARCH_RESULTS, None, results_dict) torrent_db_columns = ['T.torrent_id', 'infohash', 'T.name', 'length', 'category', 'num_seeders', 'num_leechers', 'last_tracker_check'] results_local_torrents = self.torrent_db_handler.search_in_local_torrents_db(query, keys=torrent_db_columns) results_dict = {"keywords": keywords, "result_list": results_local_torrents} self.session.notifier.notify(SIGNAL_TORRENT, SIGNAL_ON_SEARCH_RESULTS, None, results_dict) # Create remote searches try: self.session.search_remote_torrents(keywords) self.session.search_remote_channels(keywords) except OperationNotEnabledByConfigurationException as exc: self._logger.error(exc) return json.dumps({"queried": True})
def _on_torrent_search_results(self, subject, change_type, object_id, search_results): """ The callback function handles the search results from SearchCommunity. :param subject: Must be SIGNAL_SEARCH_COMMUNITY. :param change_type: Must be SIGNAL_ON_SEARCH_RESULTS. :param object_id: Must be None. :param search_results: The result dictionary which has 'keywords', 'results', and 'candidate'. """ if self.session is None: return 0 keywords = search_results['keywords'] results = search_results['results'] candidate = search_results['candidate'] self._logger.debug("Got torrent search results %s, keywords %s, candidate %s", len(results), keywords, candidate) # drop it if these are the results of an old keyword if keywords != self._current_keywords: return # results is a list of tuples that are: # (1) infohash, (2) name, (3) length, (4) num_files, (5) category, (6) creation_date, (7) num_seeders # (8) num_leechers, (9) channel_cid remote_torrent_result_list = [] # get and cache channels channel_cid_list = [result[-1] for result in results if result[-1] is not None] channel_cache_list = self.channelcast_db.getChannelsByCID(channel_cid_list) channel_cache_dict = {} for channel in channel_cache_list: # index 1 is cid channel_cache_dict[channel[1]] = channel # create result dictionaries that are understandable for result in results: remote_torrent_result = {'torrent_type': 'remote', # indicates if it is a remote torrent 'relevance_score': None, 'torrent_id':-1, 'infohash': result[0], 'name': result[1], 'length': result[2], 'num_files': result[3], 'category': result[4][0], 'creation_date': result[5], 'num_seeders': result[6], 'num_leechers': result[7], 'status': u'good', 'query_candidates': {candidate}, 'channel': None} channel_cid = result[-1] if channel_cid is not None and channel_cid in channel_cache_dict: channel = channel_cache_dict[channel_cid] channel_result = {'id': channel[0], 'name': channel[2], 'description': channel[3], 'dispersy_cid': channel[1], 'num_torrents': channel[4], 'num_favorite': channel[5], 'num_spam': channel[6], 'modified': channel[8], } remote_torrent_result['channel'] = channel_result # guess matches keyword_set = set(keywords) swarmname_set = set(split_into_keywords(remote_torrent_result['name'])) matches = {'fileextensions': set(), 'swarmname': swarmname_set & keyword_set, # all keywords matching in swarmname } matches['filenames'] = keyword_set - matches['swarmname'] # remaining keywords should thus me matching in filenames or fileextensions if len(matches['filenames']) == 0: _, ext = os.path.splitext(result[0]) ext = ext[1:] matches['filenames'] = matches['swarmname'] matches['filenames'].discard(ext) if ext in keyword_set: matches['fileextensions'].add(ext) # Find the lowest term position of the matching keywords pos_score = None if matches['swarmname']: swarmnameTerms = split_into_keywords(remote_torrent_result['name']) swarmnameMatches = matches['swarmname'] for i, term in enumerate(swarmnameTerms): if term in swarmnameMatches: pos_score = -i break remote_torrent_result['relevance_score'] = [len(matches['swarmname']), pos_score, len(matches['filenames']), len(matches['fileextensions']), 0] # append the result into the result list remote_torrent_result_list.append(remote_torrent_result) results_data = {'keywords': keywords, 'result_list': remote_torrent_result_list} # inform other components about the results self.session.notifier.notify(SIGNAL_TORRENT, SIGNAL_ON_SEARCH_RESULTS, None, results_data)
def dosearch(self, input=None): if input is None: sf = self.frame.top_bg.searchField if sf is None: return input = sf.GetValue() if input: input = input.strip() if input == '': return else: return self.frame.top_bg.searchField.SetValue(input) if self.frame.startDownloadFromArg(input): self.frame.top_bg.searchField.Clear() self.ShowPage('my_files') else: keywords = split_into_keywords(input) keywords = [keyword for keyword in keywords if len(keyword) > 1] if len(keywords) == 0: self.Notify( 'Please enter a search term', "Your search term '%s' was either to small or to general." % input, icon=wx.ART_INFORMATION) else: self.frame.top_bg.StartSearch() self.current_search_query = keywords self._logger.debug("GUIUtil: searchFiles: %s %s", keywords, time()) self.frame.searchlist.Freeze() self.torrentsearch_manager.setSearchKeywords(keywords) self.channelsearch_manager.setSearchKeywords(keywords) # We set oldkeywords to '', which will trigger a reset in SetKeywords (called from ShowPage). # This avoids calling reset twice. # Niels: 17-09-2012, unfortunately showpage calls show(true) # which results in the dirty items being refreshed. # We need to call Reset in order to prevent this from happening self.frame.searchlist.Reset() self.ShowPage('search_results', keywords) # We now have to call thaw, otherwise loading message will not be shown. self.frame.searchlist.Thaw() # Peform local search self.torrentsearch_manager.set_gridmgr( self.frame.searchlist.GetManager()) self.channelsearch_manager.set_gridmgr( self.frame.searchlist.GetManager()) def db_thread(): self.torrentsearch_manager.refreshGrid() nr_peers_connected = self.torrentsearch_manager.searchDispersy( ) self.channelsearch_manager.searchDispersy() return nr_peers_connected def wx_thread(delayedResult): nr_peers_connected = delayedResult.get() if self and self.frame and self.frame.searchlist: self.frame.searchlist.SetMaxResults( nr_peers_connected + 1, keywords) self.frame.searchlist.NewResult() startWorker(wx_thread, db_thread, priority=1024)
def render_GET(self, request): """ .. http:get:: /search?q=(string:query) A GET request to this endpoint will create a search. Results are returned over the events endpoint, one by one. First, the results available in the local database will be pushed. After that, incoming Dispersy results are pushed. The query to this endpoint is passed using the url, i.e. /search?q=pioneer. **Example request**: .. sourcecode:: none curl -X GET http://localhost:8085/search?q=tribler **Example response**: .. sourcecode:: javascript { "type": "search_result_channel", "query": "test", "result": { "id": 3, "dispersy_cid": "da69aaad39ccf468aba2ab9177d5f8d8160135e6", "name": "My fancy channel", "description": "A description of this fancy channel", "subscribed": True, "votes": 23, "torrents": 3, "spam": 5, "modified": 14598395, "can_edit": False } } """ if 'q' not in request.args: request.setResponseCode(http.BAD_REQUEST) return json.dumps({"error": "query parameter missing"}) # Notify the events endpoint that we are starting a new search query self.events_endpoint.start_new_query() # We first search the local database for torrents and channels query = unicode(request.args['q'][0], 'utf-8') keywords = split_into_keywords(query) results_local_channels = self.channel_db_handler.search_in_local_channels_db(query) with db_session: results_local_channels.extend(map(convert_channel_metadata_to_tuple, self.session.lm.mds.ChannelMetadata.search_keyword(query))) results_dict = {"keywords": keywords, "result_list": results_local_channels} self.session.notifier.notify(SIGNAL_CHANNEL, SIGNAL_ON_SEARCH_RESULTS, None, results_dict) torrent_db_columns = ['T.torrent_id', 'infohash', 'T.name', 'length', 'category', 'num_seeders', 'num_leechers', 'last_tracker_check'] results_local_torrents = self.torrent_db_handler.search_in_local_torrents_db(query, keys=torrent_db_columns) with db_session: results_local_torrents.extend(map(convert_torrent_metadata_to_tuple, self.session.lm.mds.TorrentMetadata.search_keyword(query))) results_dict = {"keywords": keywords, "result_list": results_local_torrents} self.session.notifier.notify(SIGNAL_TORRENT, SIGNAL_ON_SEARCH_RESULTS, None, results_dict) # Create remote searches try: self.session.search_remote_torrents(keywords) self.session.search_remote_channels(keywords) except OperationNotEnabledByConfigurationException as exc: self._logger.error(exc) return json.dumps({"queried": True})