Exemplo n.º 1
0
 def _ensureWordInDatabase(self, cursor, word):
     db_words = Word.selectWhere(cursor, word=word)
     if not db_words:
         db_word = Word(word=word,
                        claim_count=0.,
                        download_count=0.)
         db_word.commit(cursor, update=False)
Exemplo n.º 2
0
 def _ensureWordInDatabase(self, cursor, word):
     db_words = Word.selectWhere(cursor, word=word)
     if not db_words:
         db_word = Word(word=word,
                        claim_count=0.,
                        download_count=0.)
         db_word.commit(cursor, update=False)
Exemplo n.º 3
0
    def _updateDownloadStatistics(self, document, words):
        cursor = self._cnx.cursor()
        document.download_count = max(0, document.download_count) + 1
        document.commit(cursor, update=True)
        db_document_id = document.db_document_id
        scores = {}
        wordInfo = {}
        for word in words:
            scores[word] = DocumentScore.selectOrInsertWhere(
                cursor, db_document_id=db_document_id, word=word)[0]
            wordInfo[word] = Word.selectOrInsertWhere(cursor, word=word)[0]

        for winfo in wordInfo.itervalues():
            winfo.download_count += 1 / len(words)
            winfo.commit(cursor, update=True)

        for word, score in scores.iteritems():
            score.download_count = max(0,
                                       score.download_count) + 1 / len(words)
            winfo_downloads = wordInfo[word].download_count

            score.popularity = score.download_count / winfo_downloads
            score.popularity -= hoeffding_deviation(winfo_downloads)

            score.relevance = score.download_count / document.download_count
            score.relevance -= hoeffding_deviation(document.download_count)

            score.commit(cursor, update=True)
        cursor.close()
        self._cnx.commit()
Exemplo n.º 4
0
    def _updateDownloadStatistics(self, document, words):
        cursor = self._cnx.cursor()
        document.download_count = max(0, document.download_count) + 1
        document.commit(cursor, update=True)
        db_document_id = document.db_document_id
        scores = {}
        wordInfo = {}
        for word in words:
            scores[word] = DocumentScore.selectOrInsertWhere(cursor,
                                      db_document_id=db_document_id,
                                      word=word)[0]
            wordInfo[word] = Word.selectOrInsertWhere(cursor,
                                                      word=word)[0]

        for winfo in wordInfo.itervalues():
            winfo.download_count += 1 / len(words)
            winfo.commit(cursor, update=True)

        for word,score in scores.iteritems():
            score.download_count = max(0, score.download_count) + 1.0 / len(words)
            winfo_downloads = wordInfo[word].download_count
            
            score.popularity = float(score.download_count) / winfo_downloads
            score.popularity -= hoeffding_deviation(winfo_downloads)
            score.popularity = max(1e-6, score.popularity)
            
            score.relevance = float(score.download_count) / document.download_count
            score.relevance -= hoeffding_deviation(document.download_count)
            score.relevance = max(1e-6, score.relevance)
            
            score.commit(cursor, update=True)
        cursor.close()
        self._cnx.commit()
Exemplo n.º 5
0
 def _updateQueryStatistics(self, words):
     # FIXME: update node_interests too
     cursor = self._cnx.cursor()
     for word in words:
         winfo = Word.selectOrInsertWhere(cursor, word=word)[0]
         winfo.claim_count += 1 / len(words)
         winfo.commit(cursor, update=True)
     cursor.close
     self._cnx.commit()
Exemplo n.º 6
0
 def _updateQueryStatistics(self, words):
     # FIXME: update node_interests too, but we need the nodeId to do this
     cursor = self._cnx.cursor()
     for word in words:
         winfo = Word.selectOrInsertWhere(cursor, word=word)[0]
         winfo.claim_count += 1 / len(words)
         winfo.commit(cursor, update=True)
     cursor.close()
     self._cnx.commit()