def __intercept(self, dbcursor, sql, parameters): shouldintercept = False tokens = nltk.tokenize.word_tokenize(sql) if tokens[0].upper() == "UPDATEWORD": dbcursor.execute( """SELECT * FROM {0} WHERE Word=? LIMIT 1""".format( BoWIDFIntelligence.wordTable), (parameters[0], )) wordrow = dbcursor.fetchone() if wordrow != None: doccount = wordrow[1] + 1 totalcount = wordrow[2] + parameters[2] if parameters[3] == None: prediction = None else: prediction = ( ((parameters[3] * parameters[2]) + (ValueTimes.loads(wordrow[3]) * wordrow[2])) * (1.0 / float(totalcount))).dumps() dbcursor.execute( """UPDATE {0} SET DocumentCount=?, TotalCount=?, Prediction=? WHERE Word=?""".format( BoWIDFIntelligence.wordTable), ( doccount, totalcount, prediction, parameters[0], )) else: if parameters[3] == None: prediction = None elif isinstance(parameters[3], str): prediction = parameters[3] elif isinstance(parameters[3], ValueTimes): prediction = parameters[3].dumps() else: prediction = None dbcursor.execute( """INSERT INTO {0} VALUES (?,?,?,?)""".format( BoWIDFIntelligence.wordTable), ( parameters[0], 1, parameters[2], prediction, )) shouldintercept = True return shouldintercept
def __predicterWorker(self, aQueue, aStopToken): while True: item = aQueue.get() if item[1] == aStopToken: break newsitem = self.__getNewsItemByID(item[1][1][5]) if newsitem == None: self.parserManager.put(item[1]) elif newsitem[7] == None: textinfo = self.__getTextInformation(newsitem[6]) masterinfo = None wordcount = 0 for word in textinfo: wordcount += textinfo[word][0] if word == self.masterWord: masterinfo = textinfo[word] weights = {} weighttotal = 0.0 for word in textinfo: if word != self.masterWord: tf = float(textinfo[word][0]) / float(wordcount) idf = log(1.0 + (float(masterinfo[1][1]) / (1 + float(textinfo[word][1][1])))) tfidf = tf * idf weights[word] = tfidf weighttotal += tfidf mastervaltime = self.__getEmptyPrediction() if weighttotal != 0: for word in weights: valtime = ValueTimes.loads(textinfo[word][1][3]) if weights[word] > 0 and not valtime.isZero(): mastervaltime = mastervaltime + valtime * ( (128.0 * weights[word]) / weighttotal) mastervaltime = mastervaltime * (1 / 128.0) self.bowidfdbconnection.execute( "UPDATEARTICLE {0} SET Prediction=? WHERE NewsID=?".format( self.articleTable), ( mastervaltime, item[1][1][5], ), block=True) if item[1][0] == self.learnLabel: self.learnerManager.put(item[1]) aQueue.task_done() aQueue.task_done()
def __predicterWorker(self, aQueue, aStopToken): while True: item = aQueue.get() if item[1] == aStopToken: break newsitem = self.__getNewsItemByID(item[1][1][5]) if newsitem == None: self.parserManager.put(item[1]) elif newsitem[7] == None: textinfo = self.__getTextInformation(newsitem[6]) masterinfo = None wordcount = 0 for word in textinfo: wordcount += textinfo[word][0] if word == self.masterWord: masterinfo = textinfo[word] weights = {} weighttotal = 0.0 for word in textinfo: if word != self.masterWord: tf = float(textinfo[word][0]) / float(wordcount) idf = log( 1.0 + (float(masterinfo[1][1]) / (1 + float(textinfo[word][1][1]))) ) tfidf = tf*idf weights[word] = tfidf weighttotal += tfidf mastervaltime = self.__getEmptyPrediction() if weighttotal != 0: for word in weights: valtime = ValueTimes.loads(textinfo[word][1][3]) if weights[word] > 0 and not valtime.isZero(): mastervaltime = mastervaltime + valtime*((128.0 * weights[word]) / weighttotal) mastervaltime = mastervaltime * (1 / 128.0) self.bowidfdbconnection.execute("UPDATEARTICLE {0} SET Prediction=? WHERE NewsID=?".format(self.articleTable), (mastervaltime, item[1][1][5],), block=True) if item[1][0] == self.learnLabel: self.learnerManager.put(item[1]) aQueue.task_done() aQueue.task_done()
def __intercept(self, dbcursor, sql, parameters): shouldintercept = False tokens = nltk.tokenize.word_tokenize(sql) if tokens[0].upper() == "UPDATEWORDS": for word in parameters[0]: dbcursor.execute("""SELECT * FROM {0} WHERE Word=? LIMIT 1""".format(BoWIDFIntelligence.wordTable), (word,)) wordrow = dbcursor.fetchone() if wordrow != None: doccount = wordrow[1] + 1 totalcount = wordrow[2] + parameters[0][word] if parameters[1] == None: prediction = None else: prediction = (((parameters[1] * parameters[0][word]) + (ValueTimes.loads(wordrow[3]) * wordrow[2])) * (1.0 / float(totalcount))).dumps() dbcursor.execute("""UPDATE {0} SET DocumentCount=?, TotalCount=?, Prediction=? WHERE Word=?""".format(BoWIDFIntelligence.wordTable), (doccount, totalcount, prediction, word,)) else: if parameters[1] == None: prediction = None elif isinstance(parameters[1], str): prediction = parameters[1] elif isinstance(parameters[1], ValueTimes): prediction = parameters[1].dumps() else: prediction = None dbcursor.execute("""INSERT INTO {0} VALUES (?,?,?,?)""".format(BoWIDFIntelligence.wordTable), (word, 1, parameters[0][word], prediction,)) dbcursor.execute("""INSERT OR IGNORE INTO {0} VALUES (?,?,?,?)""".format(BoWIDFIntelligence.wordTable), (BoWIDFIntelligence.masterWord,0,0,None)) dbcursor.execute("""UPDATE {0} SET DocumentCount=DocumentCount+1 WHERE Word=?""".format(BoWIDFIntelligence.wordTable), (BoWIDFIntelligence.masterWord,)) shouldintercept = True return shouldintercept