Exemplo n.º 1
0
    def reindex_torrents(self):
        """
        Reindex all torrents in the database. Required when upgrading to a newer FTS engine.
        """
        results = self.db.fetchall("SELECT torrent_id, name FROM Torrent")
        for torrent_result in results:
            if torrent_result[1] is None:
                continue

            swarmname = split_into_keywords(torrent_result[1])
            files_results = self.db.fetchall(
                "SELECT path FROM TorrentFiles WHERE torrent_id = ?",
                (torrent_result[0], ))
            filenames = ""
            fileexts = ""
            for file_result in files_results:
                filename, ext = os.path.splitext(file_result[0])
                parts = split_into_keywords(filename)
                filenames += " ".join(parts) + " "
                fileexts += ext[1:] + " "

            self.db.execute_write(
                u"INSERT INTO FullTextIndex (rowid, swarmname, filenames, fileextensions)"
                u" VALUES(?,?,?,?)", (torrent_result[0], " ".join(swarmname),
                                      filenames[:-1], fileexts[:-1]))

        self.db.commit_now()
Exemplo n.º 2
0
    def test_split_into_keywords(self):
        result = split_into_keywords("to be or not to be")
        self.assertIsInstance(result, list)
        self.assertEqual(len(result), 6)

        result = split_into_keywords("to be or not to be", True)
        self.assertIsInstance(result, list)
        self.assertEqual(len(result), 4)
Exemplo n.º 3
0
    def get_local(self, filter):
        """
        Search the local torrent database for torrent files by keyword.
        :param filter: (Optional) keyword filter.
        :return: List of torrents in dictionary format.
        """
        keywords = split_into_keywords(unicode(filter))
        keywords = [keyword for keyword in keywords if len(keyword) > 1]

        # T is the Torrent (when local) table or CollectedTorrent view (external), C is the _ChannelTorrents table
        TORRENT_REQ_COLUMNS = ['T.torrent_id', 'infohash', 'T.name', 'length', 'category', 'status', 'num_seeders', 'num_leechers', 'C.id', 'T.dispersy_id', 'C.name', 'T.name', 'C.description', 'C.time_stamp', 'C.inserted']
        #TUMBNAILTORRENT_REQ_COLUMNS = ['torrent_id', 'Torrent.infohash', 'name', 'length', 'category', 'status', 'num_seeders', 'num_leechers']

        @forceAndReturnDBThread
        def local_search(keywords):
            begintime = time()

            results = self._torrent_db.searchNames(keywords, doSort=False, keys=TORRENT_REQ_COLUMNS)
            print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
            print results

            begintuples = time()

            if len(results) > 0:
                def create_channel(a):
                    return Channel(*a)

                channels = {}
                for a in results:
                    channel_details = a[-10:]
                    if channel_details[0] and channel_details[0] not in channels:
                        channels[channel_details[0]] = create_channel(channel_details)

                def create_torrent(a):
                    #channel = channels.get(a[-10], False)
                    #if channel and (channel.isFavorite() or channel.isMyChannel()):
                    #    t = ChannelTorrent(*a[:-12] + [channel, None])
                    #else:
                    t = Torrent(*a[:11] + [False])

                    t.torrent_db = self._torrent_db
                    t.channelcast_db = self._channelcast_db
                    #t.metadata_db = self._metadata_db
                    t.assignRelevance(a[-11])
                    return t

                results = map(create_torrent, results)
            print ">>>>>>> LOCAL RESULTS: %s" % results

            _logger.debug('TorrentSearchGridManager: _doSearchLocalDatabase took: %s of which tuple creation took %s', time() - begintime, time() - begintuples)
            return results

        results = self._prepare_torrents(local_search(keywords))
        print ">>>>>>> LOCAL RESDICT: %s" % results

        return results
Exemplo n.º 4
0
    def reindex_torrents(self):
        """
        Reindex all torrents in the database. Required when upgrading to a newer FTS engine.
        """
        results = self.db.fetchall("SELECT torrent_id, name FROM Torrent")
        for torrent_result in results:
            if torrent_result[1] is None:
                continue

            swarmname = split_into_keywords(torrent_result[1])
            files_results = self.db.fetchall("SELECT path FROM TorrentFiles WHERE torrent_id = ?", (torrent_result[0],))
            filenames = ""
            fileexts = ""
            for file_result in files_results:
                filename, ext = os.path.splitext(file_result[0])
                parts = split_into_keywords(filename)
                filenames += " ".join(parts) + " "
                fileexts += ext[1:] + " "

            self.db.execute_write(u"INSERT INTO FullTextIndex (rowid, swarmname, filenames, fileextensions)"
                                  u" VALUES(?,?,?,?)",
                                  (torrent_result[0], " ".join(swarmname), filenames[:-1], fileexts[:-1]))

        self.db.commit_now()
Exemplo n.º 5
0
    def assignRelevance(self, matches):
        """
        Assigns a relevance score to this Torrent.
        @param matches A dict containing sets stored under the keys 'swarmname', 'filenames' and 'fileextensions'.
        """

        # Find the lowest term position of the matching keywords
        pos_score = None
        if matches['swarmname']:
            swarmnameTerms = split_into_keywords(self.name)
            swarmnameMatches = matches['swarmname']

            for i, term in enumerate(swarmnameTerms):
                if term in swarmnameMatches:
                    pos_score = -i
                    break

        self.relevance_score = [len(matches['swarmname']), pos_score, len(
            matches['filenames']), len(matches['fileextensions']), 0]
Exemplo n.º 6
0
    def _set_keywords(self, keywords):
        """
        Set the keywords that a next search should use. This clears the previous keywords and results.
        :param keywords: Keyword string that should be searched for.
        :return: Boolean indicating success.
        """
        keywords = split_into_keywords(unicode(keywords))
        keywords = [keyword for keyword in keywords if len(keyword) > 1]

        if keywords == self._keywords:
            return True

        try:
            self._remote_lock.acquire()

            self._keywords = keywords
            self._results = []
            self._result_cids = []
        finally:
            self._remote_lock.release()

        return True
Exemplo n.º 7
0
    def _set_keywords(self, keywords):
        """
        Set the keywords that a next search should use. This clears the previous keywords and results.
        :param keywords: Keyword string that should be searched for.
        :return: Boolean indicating success.
        """
        keywords = split_into_keywords(unicode(keywords))
        keywords = [keyword for keyword in keywords if len(keyword) > 1]

        if keywords == self._keywords:
            return True

        try:
            self._remote_lock.acquire()

            self._keywords = keywords
            self._results = []
            self._result_infohashes = []
        finally:
            self._remote_lock.release()

        return True
Exemplo n.º 8
0
    def assignRelevance(self, matches):
        """
        Assigns a relevance score to this Torrent.
        @param matches A dict containing sets stored under the keys 'swarmname', 'filenames' and 'fileextensions'.
        """

        # Find the lowest term position of the matching keywords
        pos_score = None
        if matches['swarmname']:
            swarmnameTerms = split_into_keywords(self.name)
            swarmnameMatches = matches['swarmname']

            for i, term in enumerate(swarmnameTerms):
                if term in swarmnameMatches:
                    pos_score = -i
                    break

        self.relevance_score = [
            len(matches['swarmname']), pos_score,
            len(matches['filenames']),
            len(matches['fileextensions']), 0
        ]
Exemplo n.º 9
0
    def _on_torrent_search_results(self, subject, change_type, object_id,
                                   search_results):
        """
        The callback function handles the search results from SearchCommunity.
        :param subject: Must be SIGNAL_SEARCH_COMMUNITY.
        :param change_type: Must be SIGNAL_ON_SEARCH_RESULTS.
        :param object_id: Must be None.
        :param search_results: The result dictionary which has 'keywords', 'results', and 'candidate'.
        """
        if self.session is None:
            return 0

        keywords = search_results['keywords']
        results = search_results['results']
        candidate = search_results['candidate']

        self._logger.debug(
            "Got torrent search results %s, keywords %s, candidate %s",
            len(results), keywords, candidate)

        # drop it if these are the results of an old keyword
        if keywords != self._current_keywords:
            return

        # results is a list of tuples that are:
        # (1) infohash, (2) name, (3) length, (4) num_files, (5) category, (6) creation_date, (7) num_seeders
        # (8) num_leechers, (9) channel_cid

        remote_torrent_result_list = []

        # get and cache channels
        channel_cid_list = [
            result[-1] for result in results if result[-1] is not None
        ]
        channel_cache_list = self.channelcast_db.getChannelsByCID(
            channel_cid_list)
        channel_cache_dict = {}
        for channel in channel_cache_list:
            # index 1 is cid
            channel_cache_dict[channel[1]] = channel

        # create result dictionaries that are understandable
        for result in results:
            remote_torrent_result = {
                'torrent_type':
                'remote',  # indicates if it is a remote torrent
                'relevance_score': None,
                'torrent_id': -1,
                'infohash': result[0],
                'name': result[1],
                'length': result[2],
                'num_files': result[3],
                'category': result[4][0],
                'creation_date': result[5],
                'num_seeders': result[6],
                'num_leechers': result[7],
                'status': u'good',
                'query_candidates': {candidate},
                'channel': None
            }

            channel_cid = result[-1]
            if channel_cid is not None and channel_cid in channel_cache_dict:
                channel = channel_cache_dict[channel_cid]
                channel_result = {
                    'id': channel[0],
                    'name': channel[2],
                    'description': channel[3],
                    'dispersy_cid': channel[1],
                    'num_torrents': channel[4],
                    'num_favorite': channel[5],
                    'num_spam': channel[6],
                    'modified': channel[8],
                }
                remote_torrent_result['channel'] = channel_result

            # guess matches
            keyword_set = set(keywords)
            swarmname_set = set(
                split_into_keywords(remote_torrent_result['name']))
            matches = {
                'fileextensions': set(),
                'swarmname': swarmname_set
                & keyword_set,  # all keywords matching in swarmname
            }
            matches['filenames'] = keyword_set - matches[
                'swarmname']  # remaining keywords should thus me matching in filenames or fileextensions

            if len(matches['filenames']) == 0:
                _, ext = os.path.splitext(result[0])
                ext = ext[1:]

                matches['filenames'] = matches['swarmname']
                matches['filenames'].discard(ext)

                if ext in keyword_set:
                    matches['fileextensions'].add(ext)

            # Find the lowest term position of the matching keywords
            pos_score = None
            if matches['swarmname']:
                swarmnameTerms = split_into_keywords(
                    remote_torrent_result['name'])
                swarmnameMatches = matches['swarmname']

                for i, term in enumerate(swarmnameTerms):
                    if term in swarmnameMatches:
                        pos_score = -i
                        break

            remote_torrent_result['relevance_score'] = [
                len(matches['swarmname']), pos_score,
                len(matches['filenames']),
                len(matches['fileextensions']), 0
            ]

            # append the result into the result list
            remote_torrent_result_list.append(remote_torrent_result)

        results_data = {
            'keywords': keywords,
            'result_list': remote_torrent_result_list
        }
        # inform other components about the results
        self.session.notifier.notify(SIGNAL_TORRENT, SIGNAL_ON_SEARCH_RESULTS,
                                     None, results_data)
Exemplo n.º 10
0
    def dosearch(self, input=None):
        if input is None:
            sf = self.frame.top_bg.searchField
            if sf is None:
                return

            input = sf.GetValue()

        if input:
            input = input.strip()
            if input == '':
                return
        else:
            return
        self.frame.top_bg.searchField.SetValue(input)

        if self.frame.startDownloadFromArg(input):
            self.frame.top_bg.searchField.Clear()
            self.ShowPage('my_files')
        else:
            keywords = split_into_keywords(input)
            keywords = [keyword for keyword in keywords if len(keyword) > 1]

            if len(keywords) == 0:
                self.Notify('Please enter a search term',
                            "Your search term '%s' was either to small or to general." % input,
                            icon=wx.ART_INFORMATION)

            else:
                self.frame.top_bg.StartSearch()
                self.current_search_query = keywords
                self._logger.debug("GUIUtil: searchFiles: %s %s", keywords, time())

                self.frame.searchlist.Freeze()

                self.torrentsearch_manager.setSearchKeywords(keywords)
                self.channelsearch_manager.setSearchKeywords(keywords)

                # We set oldkeywords to '', which will trigger a reset in SetKeywords (called from ShowPage).
                # This avoids calling reset twice.
                # Niels: 17-09-2012, unfortunately showpage calls show(true)
                # which results in the dirty items being refreshed.
                # We need to call Reset in order to prevent this from happening
                self.frame.searchlist.Reset()
                self.ShowPage('search_results', keywords)

                # We now have to call thaw, otherwise loading message will not be shown.
                self.frame.searchlist.Thaw()

                # Peform local search
                self.torrentsearch_manager.set_gridmgr(self.frame.searchlist.GetManager())
                self.channelsearch_manager.set_gridmgr(self.frame.searchlist.GetManager())

                def db_thread():
                    self.torrentsearch_manager.refreshGrid()

                    nr_peers_connected = self.torrentsearch_manager.searchDispersy()
                    self.channelsearch_manager.searchDispersy()
                    return nr_peers_connected

                def wx_thread(delayedResult):
                    nr_peers_connected = delayedResult.get()

                    if self and self.frame and self.frame.searchlist:
                        self.frame.searchlist.SetMaxResults(nr_peers_connected + 1, keywords)
                        self.frame.searchlist.NewResult()

                startWorker(wx_thread, db_thread, priority=1024)
Exemplo n.º 11
0
    def render_GET(self, request):
        """
        .. http:get:: /search?q=(string:query)

        A GET request to this endpoint will create a search. Results are returned over the events endpoint, one by one.
        First, the results available in the local database will be pushed. After that, incoming Dispersy results are
        pushed. The query to this endpoint is passed using the url, i.e. /search?q=pioneer.

            **Example request**:

            .. sourcecode:: none

                curl -X GET http://localhost:8085/search?q=tribler

            **Example response**:

            .. sourcecode:: javascript

                {
                    "type": "search_result_channel",
                    "query": "test",
                    "result": {
                        "id": 3,
                        "dispersy_cid": "da69aaad39ccf468aba2ab9177d5f8d8160135e6",
                        "name": "My fancy channel",
                        "description": "A description of this fancy channel",
                        "subscribed": True,
                        "votes": 23,
                        "torrents": 3,
                        "spam": 5,
                        "modified": 14598395,
                        "can_edit": False
                    }
                }
        """
        if 'q' not in request.args:
            request.setResponseCode(http.BAD_REQUEST)
            return json.dumps({"error": "query parameter missing"})

        # Notify the events endpoint that we are starting a new search query
        self.events_endpoint.start_new_query()

        # We first search the local database for torrents and channels
        query = unicode(request.args['q'][0], 'utf-8')
        keywords = split_into_keywords(query)
        results_local_channels = self.channel_db_handler.search_in_local_channels_db(query)
        results_dict = {"keywords": keywords, "result_list": results_local_channels}
        self.session.notifier.notify(SIGNAL_CHANNEL, SIGNAL_ON_SEARCH_RESULTS, None, results_dict)

        torrent_db_columns = ['T.torrent_id', 'infohash', 'T.name', 'length', 'category',
                              'num_seeders', 'num_leechers', 'last_tracker_check']
        results_local_torrents = self.torrent_db_handler.search_in_local_torrents_db(query, keys=torrent_db_columns)
        results_dict = {"keywords": keywords, "result_list": results_local_torrents}
        self.session.notifier.notify(SIGNAL_TORRENT, SIGNAL_ON_SEARCH_RESULTS, None, results_dict)

        # Create remote searches
        try:
            self.session.search_remote_torrents(keywords)
            self.session.search_remote_channels(keywords)
        except OperationNotEnabledByConfigurationException as exc:
            self._logger.error(exc)

        return json.dumps({"queried": True})
Exemplo n.º 12
0
    def _on_torrent_search_results(self, subject, change_type, object_id, search_results):
        """
        The callback function handles the search results from SearchCommunity.
        :param subject: Must be SIGNAL_SEARCH_COMMUNITY.
        :param change_type: Must be SIGNAL_ON_SEARCH_RESULTS.
        :param object_id: Must be None.
        :param search_results: The result dictionary which has 'keywords', 'results', and 'candidate'.
        """
        if self.session is None:
            return 0

        keywords = search_results['keywords']
        results = search_results['results']
        candidate = search_results['candidate']

        self._logger.debug("Got torrent search results %s, keywords %s, candidate %s",
                           len(results), keywords, candidate)

        # drop it if these are the results of an old keyword
        if keywords != self._current_keywords:
            return

        # results is a list of tuples that are:
        # (1) infohash, (2) name, (3) length, (4) num_files, (5) category, (6) creation_date, (7) num_seeders
        # (8) num_leechers, (9) channel_cid

        remote_torrent_result_list = []

        # get and cache channels
        channel_cid_list = [result[-1] for result in results if result[-1] is not None]
        channel_cache_list = self.channelcast_db.getChannelsByCID(channel_cid_list)
        channel_cache_dict = {}
        for channel in channel_cache_list:
            # index 1 is cid
            channel_cache_dict[channel[1]] = channel

        # create result dictionaries that are understandable
        for result in results:
            remote_torrent_result = {'torrent_type': 'remote',  # indicates if it is a remote torrent
                                     'relevance_score': None,
                                     'torrent_id':-1,
                                     'infohash': result[0],
                                     'name': result[1],
                                     'length': result[2],
                                     'num_files': result[3],
                                     'category': result[4][0],
                                     'creation_date': result[5],
                                     'num_seeders': result[6],
                                     'num_leechers': result[7],
                                     'status': u'good',
                                     'query_candidates': {candidate},
                                     'channel': None}

            channel_cid = result[-1]
            if channel_cid is not None and channel_cid in channel_cache_dict:
                channel = channel_cache_dict[channel_cid]
                channel_result = {'id': channel[0],
                                  'name': channel[2],
                                  'description': channel[3],
                                  'dispersy_cid': channel[1],
                                  'num_torrents': channel[4],
                                  'num_favorite': channel[5],
                                  'num_spam': channel[6],
                                  'modified': channel[8],
                                  }
                remote_torrent_result['channel'] = channel_result

            # guess matches
            keyword_set = set(keywords)
            swarmname_set = set(split_into_keywords(remote_torrent_result['name']))
            matches = {'fileextensions': set(),
                       'swarmname': swarmname_set & keyword_set,  # all keywords matching in swarmname
                       }
            matches['filenames'] = keyword_set - matches['swarmname']  # remaining keywords should thus me matching in filenames or fileextensions

            if len(matches['filenames']) == 0:
                _, ext = os.path.splitext(result[0])
                ext = ext[1:]

                matches['filenames'] = matches['swarmname']
                matches['filenames'].discard(ext)

                if ext in keyword_set:
                    matches['fileextensions'].add(ext)

            # Find the lowest term position of the matching keywords
            pos_score = None
            if matches['swarmname']:
                swarmnameTerms = split_into_keywords(remote_torrent_result['name'])
                swarmnameMatches = matches['swarmname']

                for i, term in enumerate(swarmnameTerms):
                    if term in swarmnameMatches:
                        pos_score = -i
                        break

            remote_torrent_result['relevance_score'] = [len(matches['swarmname']),
                                                        pos_score,
                                                        len(matches['filenames']),
                                                        len(matches['fileextensions']),
                                                        0]

            # append the result into the result list
            remote_torrent_result_list.append(remote_torrent_result)

        results_data = {'keywords': keywords,
                        'result_list': remote_torrent_result_list}
        # inform other components about the results
        self.session.notifier.notify(SIGNAL_TORRENT, SIGNAL_ON_SEARCH_RESULTS, None, results_data)
Exemplo n.º 13
0
    def dosearch(self, input=None):
        if input is None:
            sf = self.frame.top_bg.searchField
            if sf is None:
                return

            input = sf.GetValue()

        if input:
            input = input.strip()
            if input == '':
                return
        else:
            return
        self.frame.top_bg.searchField.SetValue(input)

        if self.frame.startDownloadFromArg(input):
            self.frame.top_bg.searchField.Clear()
            self.ShowPage('my_files')
        else:
            keywords = split_into_keywords(input)
            keywords = [keyword for keyword in keywords if len(keyword) > 1]

            if len(keywords) == 0:
                self.Notify(
                    'Please enter a search term',
                    "Your search term '%s' was either to small or to general."
                    % input,
                    icon=wx.ART_INFORMATION)

            else:
                self.frame.top_bg.StartSearch()
                self.current_search_query = keywords
                self._logger.debug("GUIUtil: searchFiles: %s %s", keywords,
                                   time())

                self.frame.searchlist.Freeze()

                self.torrentsearch_manager.setSearchKeywords(keywords)
                self.channelsearch_manager.setSearchKeywords(keywords)

                # We set oldkeywords to '', which will trigger a reset in SetKeywords (called from ShowPage).
                # This avoids calling reset twice.
                # Niels: 17-09-2012, unfortunately showpage calls show(true)
                # which results in the dirty items being refreshed.
                # We need to call Reset in order to prevent this from happening
                self.frame.searchlist.Reset()
                self.ShowPage('search_results', keywords)

                # We now have to call thaw, otherwise loading message will not be shown.
                self.frame.searchlist.Thaw()

                # Peform local search
                self.torrentsearch_manager.set_gridmgr(
                    self.frame.searchlist.GetManager())
                self.channelsearch_manager.set_gridmgr(
                    self.frame.searchlist.GetManager())

                def db_thread():
                    self.torrentsearch_manager.refreshGrid()

                    nr_peers_connected = self.torrentsearch_manager.searchDispersy(
                    )
                    self.channelsearch_manager.searchDispersy()
                    return nr_peers_connected

                def wx_thread(delayedResult):
                    nr_peers_connected = delayedResult.get()

                    if self and self.frame and self.frame.searchlist:
                        self.frame.searchlist.SetMaxResults(
                            nr_peers_connected + 1, keywords)
                        self.frame.searchlist.NewResult()

                startWorker(wx_thread, db_thread, priority=1024)
Exemplo n.º 14
0
    def render_GET(self, request):
        """
        .. http:get:: /search?q=(string:query)

        A GET request to this endpoint will create a search. Results are returned over the events endpoint, one by one.
        First, the results available in the local database will be pushed. After that, incoming Dispersy results are
        pushed. The query to this endpoint is passed using the url, i.e. /search?q=pioneer.

            **Example request**:

            .. sourcecode:: none

                curl -X GET http://localhost:8085/search?q=tribler

            **Example response**:

            .. sourcecode:: javascript

                {
                    "type": "search_result_channel",
                    "query": "test",
                    "result": {
                        "id": 3,
                        "dispersy_cid": "da69aaad39ccf468aba2ab9177d5f8d8160135e6",
                        "name": "My fancy channel",
                        "description": "A description of this fancy channel",
                        "subscribed": True,
                        "votes": 23,
                        "torrents": 3,
                        "spam": 5,
                        "modified": 14598395,
                        "can_edit": False
                    }
                }
        """
        if 'q' not in request.args:
            request.setResponseCode(http.BAD_REQUEST)
            return json.dumps({"error": "query parameter missing"})

        # Notify the events endpoint that we are starting a new search query
        self.events_endpoint.start_new_query()

        # We first search the local database for torrents and channels
        query = unicode(request.args['q'][0], 'utf-8')
        keywords = split_into_keywords(query)

        results_local_channels = self.channel_db_handler.search_in_local_channels_db(query)
        with db_session:
            results_local_channels.extend(map(convert_channel_metadata_to_tuple,
                                              self.session.lm.mds.ChannelMetadata.search_keyword(query)))

        results_dict = {"keywords": keywords, "result_list": results_local_channels}
        self.session.notifier.notify(SIGNAL_CHANNEL, SIGNAL_ON_SEARCH_RESULTS, None, results_dict)

        torrent_db_columns = ['T.torrent_id', 'infohash', 'T.name', 'length', 'category',
                              'num_seeders', 'num_leechers', 'last_tracker_check']
        results_local_torrents = self.torrent_db_handler.search_in_local_torrents_db(query, keys=torrent_db_columns)
        with db_session:
            results_local_torrents.extend(map(convert_torrent_metadata_to_tuple,
                                              self.session.lm.mds.TorrentMetadata.search_keyword(query)))
        results_dict = {"keywords": keywords, "result_list": results_local_torrents}
        self.session.notifier.notify(SIGNAL_TORRENT, SIGNAL_ON_SEARCH_RESULTS, None, results_dict)

        # Create remote searches
        try:
            self.session.search_remote_torrents(keywords)
            self.session.search_remote_channels(keywords)
        except OperationNotEnabledByConfigurationException as exc:
            self._logger.error(exc)

        return json.dumps({"queried": True})