def _ensureWordInDatabase(self, cursor, word): db_words = Word.selectWhere(cursor, word=word) if not db_words: db_word = Word(word=word, claim_count=0., download_count=0.) db_word.commit(cursor, update=False)
def _updateDownloadStatistics(self, document, words): cursor = self._cnx.cursor() document.download_count = max(0, document.download_count) + 1 document.commit(cursor, update=True) db_document_id = document.db_document_id scores = {} wordInfo = {} for word in words: scores[word] = DocumentScore.selectOrInsertWhere( cursor, db_document_id=db_document_id, word=word)[0] wordInfo[word] = Word.selectOrInsertWhere(cursor, word=word)[0] for winfo in wordInfo.itervalues(): winfo.download_count += 1 / len(words) winfo.commit(cursor, update=True) for word, score in scores.iteritems(): score.download_count = max(0, score.download_count) + 1 / len(words) winfo_downloads = wordInfo[word].download_count score.popularity = score.download_count / winfo_downloads score.popularity -= hoeffding_deviation(winfo_downloads) score.relevance = score.download_count / document.download_count score.relevance -= hoeffding_deviation(document.download_count) score.commit(cursor, update=True) cursor.close() self._cnx.commit()
def _updateDownloadStatistics(self, document, words): cursor = self._cnx.cursor() document.download_count = max(0, document.download_count) + 1 document.commit(cursor, update=True) db_document_id = document.db_document_id scores = {} wordInfo = {} for word in words: scores[word] = DocumentScore.selectOrInsertWhere(cursor, db_document_id=db_document_id, word=word)[0] wordInfo[word] = Word.selectOrInsertWhere(cursor, word=word)[0] for winfo in wordInfo.itervalues(): winfo.download_count += 1 / len(words) winfo.commit(cursor, update=True) for word,score in scores.iteritems(): score.download_count = max(0, score.download_count) + 1.0 / len(words) winfo_downloads = wordInfo[word].download_count score.popularity = float(score.download_count) / winfo_downloads score.popularity -= hoeffding_deviation(winfo_downloads) score.popularity = max(1e-6, score.popularity) score.relevance = float(score.download_count) / document.download_count score.relevance -= hoeffding_deviation(document.download_count) score.relevance = max(1e-6, score.relevance) score.commit(cursor, update=True) cursor.close() self._cnx.commit()
def _updateQueryStatistics(self, words): # FIXME: update node_interests too cursor = self._cnx.cursor() for word in words: winfo = Word.selectOrInsertWhere(cursor, word=word)[0] winfo.claim_count += 1 / len(words) winfo.commit(cursor, update=True) cursor.close self._cnx.commit()
def _updateQueryStatistics(self, words): # FIXME: update node_interests too, but we need the nodeId to do this cursor = self._cnx.cursor() for word in words: winfo = Word.selectOrInsertWhere(cursor, word=word)[0] winfo.claim_count += 1 / len(words) winfo.commit(cursor, update=True) cursor.close() self._cnx.commit()