def tweetSimilarityScore(TFvec, fakeTweet): v1=TFvec; v2=fakeTweet; absv1=np.sqrt(np.dot(v1, v1)); absv2=np.sqrt(np.dot(v2, v2)); if ( not(UsefulFuncs.feq(absv1,0) | UsefulFuncs.feq(absv2,0)) ): tweetScore=np.dot(v1, v2) / (absv1*absv2); else: tweetScore=0.0 return tweetScore
def tweetSimilarityScore(TFvec, fakeTweet): v1 = TFvec v2 = fakeTweet absv1 = np.sqrt(np.dot(v1, v1)) absv2 = np.sqrt(np.dot(v2, v2)) if (not (UsefulFuncs.feq(absv1, 0) | UsefulFuncs.feq(absv2, 0))): tweetScore = np.dot(v1, v2) / (absv1 * absv2) else: tweetScore = 0.0 return tweetScore
def est_probability_UD(textList): vocabFile='vocabUD.txt'; vocabUD=[]; with open(vocabFile,'r') as vFile: for line in vFile: vocabUD.append(line); vocabUD_dict={}; for i in range(len(textList)): textWords=textList[i].split(" "); for word in vocabUD: count=textWords.count(word); if word in vocabUD_dict: vocabUD_dict[word]=vocabUD_dict[word]+count; else: vocabUD_dict[word]=count; sumDict=0 for word in vocabUD_dict: sumDict=sumDict+vocabUD_dict[word] pDict={} for word in vocabUD_dict.keys(): pDict[word.lower().rstrip()]=float(vocabUD_dict[word])/sumDict #print "word= "+word+" Prob="+str(self.prob[word.lower().rstrip()]) #if (UsefulFuncs.trunc((sum(self.prob.values())),4) != 1.0): if (not(UsefulFuncs.feq(sum(pDict.values()),1.0))): raise Exception('Probability Error: pdf doesnt sum to one'); return pDict;
def est_probability_UD(textList): vocabFile = 'vocabUD.txt' vocabUD = [] with open(vocabFile, 'r') as vFile: for line in vFile: vocabUD.append(line) vocabUD_dict = {} for i in range(len(textList)): textWords = textList[i].split(" ") for word in vocabUD: count = textWords.count(word) if word in vocabUD_dict: vocabUD_dict[word] = vocabUD_dict[word] + count else: vocabUD_dict[word] = count sumDict = 0 for word in vocabUD_dict: sumDict = sumDict + vocabUD_dict[word] pDict = {} for word in vocabUD_dict.keys(): pDict[word.lower().rstrip()] = float(vocabUD_dict[word]) / sumDict #print "word= "+word+" Prob="+str(self.prob[word.lower().rstrip()]) #if (UsefulFuncs.trunc((sum(self.prob.values())),4) != 1.0): if (not (UsefulFuncs.feq(sum(pDict.values()), 1.0))): raise Exception('Probability Error: pdf doesnt sum to one') return pDict
def est_probability_vocab(textListsumm, vocab): sumDict=0; pModelEst={}; cleanWords=cleanUpWords(vocab.keys()) for word in cleanWords: pModelEst[word]=0.0 for i in range(len(textListsumm)): tweetWordList=createStemmedWordList(textListsumm[i]); for word in cleanWords: pModelEst[word] = pModelEst[word] + tweetWordList.count(word); sumDict = sumDict + tweetWordList.count(word) for word in vocab: pModelEst[word] = pModelEst[word]/float(sumDict) if (not(UsefulFuncs.feq(sum(pModelEst.values()),1.0))): raise Exception('Probability Error: pdf doesnt sum to one') return pModelEst;
def est_probability_vocab(textListsumm, vocab): sumDict = 0 pModelEst = {} cleanWords = cleanUpWords(vocab.keys()) for word in cleanWords: pModelEst[word] = 0.0 for i in range(len(textListsumm)): tweetWordList = createStemmedWordList(textListsumm[i]) for word in cleanWords: pModelEst[word] = pModelEst[word] + tweetWordList.count(word) sumDict = sumDict + tweetWordList.count(word) for word in vocab: pModelEst[word] = pModelEst[word] / float(sumDict) if (not (UsefulFuncs.feq(sum(pModelEst.values()), 1.0))): raise Exception('Probability Error: pdf doesnt sum to one') return pModelEst
def getSummMRS(textList, Nsumm, pModel, lenPdf, textSentiDict, TFMat, parameters, scorener, scorevar): indexAll=[] cleanWords=cleanUpWords(pModel.keys()) f = open('senti_words.txt','r') sentiDict={} for line in f: new = line.split('\t') #pdb.set_trace(); if (len(new)>1): sentiDict[new[0].rstrip()]=int(new[1].rstrip()) else: sentiDict[new[0].rstrip()]=0 w1=parameters['w1']; w2=parameters['w2'];w3=parameters['w3'];w4=parameters['w4'] minCS_score=parameters['minCS_score'];minFS_score=parameters['minFS_score']; flagList=[]; finalscore = []; bestTweetList=[]; TscoreLim=1 cnt=0 sentimentIndex=dict(); indexAllSelect=[] sentimentHistogram=dict();summTweetMRS=[] sentimentList=dict(); bestTweetScore=[]; bestSentimentScore=[]; sentimentEval=[]; bestFinalScore=[] for word in pModel.keys(): sentimentHistogram[word]=[0.0, 0.0] #Create wordIndex List. count=0; wordIndexList=dict() for word in pModel.keys(): wordIndexList[count]=word; count = count+1; fid=open('temp/SummParam'+'.txt','w'); print >>fid, "Start of file\n"; fid.close(); fid=open('temp/SummParam'+'.txt','a'); #Generate the length samples lenSamp=np.random.normal(lenPdf['mean'],lenPdf['stddev'],Nsumm) while (cnt<Nsumm): print "Summary tweet number:"+str(cnt)+"out of "+str(Nsumm)+"Tweets" x=int(ceil(max((lenSamp[cnt],1)))) index=numpy.random.multinomial(x,pMlist,1) fakeTweet=index[0]; #[Tweet, TweetIndex, tweetScore, SentiScore,finalscore]=getSummTweet_vSnt(index[0], textList, pModel, bestTweetList, fid, textSentiDict, sentimentProb) tweetScore=np.array([-10.0 for col in range(len(textList))]) v1=np.array(fakeTweet); absv1=np.sqrt(np.dot(v1, v1)); score=np.array([0.0 for x in range(len(textList))]) finalscore=np.array([0.0 for x in range(len(textList))]) indexMatchList=[]; for i in range(len(textList)): if (i not in bestTweetList): TFvec=TFMat[i] v2=TFvec; absv2=np.sqrt(np.dot(v2, v2)); den=absv1*absv2 if ( not(UsefulFuncs.feq(den,0) ) ): #tweetScore[i]=float(np.dot(v1, v2)) / float(den); tweetScore[i]=np.divide(float(np.dot(v1, v2)), den) else: tweetScore[i]=0.0 if(tweetScore[i]>minCS_score): indexMatchList.append(i); #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen) #f1=open('temp/wordVector'+str(i)+'.txt','w'); #print >>fid, "%s\n%s\n%s\n%s\n%f\n\n" % (str(textWords), str(cleanWords),str(TFvec), str(fakeTweet), tweetScore[i]) #f1.close() if (max(tweetScore) >minCS_score): sentiment=0.0 indList=[]; count=0; fakewordsList=[]; for fakewords in fakeTweet: if (wordIndexList[count] in sentiWordsImp): if (fakewords!=0): for FWsingle in range(fakewords): ind=numpy.random.multinomial(1,sentimentProb[wordIndexList[count]],1); #print "word="+str(wordIndexList[count])+" SentimentProb=" #print sentimentProb[wordIndexList[count]] indList.append(ind[0]); sentimentHistogram[wordIndexList[count]]= [(sentimentHistogram[wordIndexList[count]][0]+ind[0][0]), (sentimentHistogram[wordIndexList[count]][1]+ind[0][1])] fakewordsList.append(wordIndexList[count]); count=count+1; if( len(indList)==0 ): sentiment=0.0 else: xarray=numpy.array(indList) if ((xarray.transpose()[0].sum()) > (xarray.transpose()[1].sum())): sentiment=1.0 else: sentiment=-1.0 #for j in range(len(textList)): for j in indexMatchList: if(textSentiDict[j]*sentiment >= 1.0): #score[i]=min(abs(textSentiDict[i]),3)/3; score[j]=1.0 finalscore[j] = w1*tweetScore[j] + w2*score[j]+w3*scorener[j]+w4*scorevar[j]; if (max(finalscore) > minFS_score): bestTweetIndex=finalscore.argmax() #Store the selected paramters Tweet=textList[bestTweetIndex] summTweetMRS.append(Tweet) bestTweetList.append(bestTweetIndex) indexAllSelect.append(index) sentimentList[cnt]=xarray bestTweetScore.append(tweetScore[bestTweetIndex]) bestSentimentScore.append(textSentiDict[bestTweetIndex]) sentimentEval.append(sentiment) bestFinalScore.append(finalscore[bestTweetIndex]) cnt=cnt+1; #Print the parameters into thte file print >>fid, "faketweet=%s\nGenerated Sentiment=%s\nGenerated Tweet=%s" % (str(fakewordsList), str(sentiment), str(clean(Tweet))); #Store all the generated fakeTweets indexAll.append(index) fid.close() return (summTweetMRS, bestTweetList, indexAllSelect, sentimentList, bestTweetScore, bestSentimentScore, sentimentEval, bestFinalScore, sentimentHistogram);
def getSummTweet_vSnt(fakeTweet, textList, pModel,bestTweetList, fid, textSentiDict, sentimentProb): tweetScore=np.array([-10.0 for col in range(len(textList))]) cleanWords=cleanUpWords(pModel.keys()) flagList=[]; finalscore = [] TscoreLim=1 cnt=0 w1=0.5; w2=0.5; minCS_score=0.8; #Prepocessing and initial calculations v1=fakeTweet; absv1=np.sqrt(np.dot(v1, v1)); score=np.array([0.0 for x in range(len(textList))]) finalscore=np.array([0.0 for x in range(len(textList))]) #Create wordIndex List. count=0; wordIndexList=dict() for word in pModel.keys(): wordIndexList[count]=word; count = count+1; sentimentIndex=dict() for i in range(len(textList)): if (i not in bestTweetList): textWords=createStemmedWordList(textList[i]) j=0 TFvec=np.array([0 for col in range(len(pModel.values()))]) for wordVocab in cleanWords: #print"i="+str(i)+"j="+str(j) TFvec[j]= textWords.count(wordVocab) j=j+1 v2=TFvec; absv2=np.sqrt(np.dot(v2, v2)); den=absv1*absv2 if ( not(UsefulFuncs.feq(den,0) ) ): #tweetScore[i]=float(np.dot(v1, v2)) / float(den); tweetScore[i]=np.divide(float(np.dot(v1, v2)), den) else: tweetScore[i]=0.0 #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen) #f1=open('temp/wordVector'+str(i)+'.txt','w'); #f1.close() #if (tweetScore[i]>minCS_score): #Calculate the sentiment score from the faketweet if (max(tweetScore) >minCS_score): sentiment=0.0 indList=[]; count=0; for fakewords in fakeTweet: if (fakewords!=0): for FWsingle in range(fakewords): ind=numpy.random.multinomial(1,sentimentProb[wordIndexList[count]],1); #print "word="+str(wordIndexList[count])+" SentimentProb=" #print sentimentProb[wordIndexList[count]] indList.append(ind[0]); count=count+1; xarray=numpy.array(indList) if ((xarray.transpose()[0].sum()) > (xarray.transpose()[1].sum())): sentiment=1.0 else: sentiment=-1.0 for j in range(len(textList)): if(textSentiDict[j]*sentiment >= 1.0): #score[i]=min(abs(textSentiDict[i]),3)/3; score[j]=1.0 finalscore[j] = w1*tweetScore[j] + w2*score[j] bestTweetIndex=finalscore.argmax() #print "Found the best Tweet" #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(textList[bestTweetIndex]), str(fakeTweet), tweetScore[bestTweetIndex], sentiment, score[i], finalscore[i]) #bestTweetScore=tweetScore.index(max(tweetScore)) #Dump the different variables for debugging! return (textList[bestTweetIndex], bestTweetIndex, tweetScore[bestTweetIndex], score[bestTweetIndex],finalscore[bestTweetIndex])
def getSummTweet(fakeTweet, textList, pModel,bestTweetList, fid): tweetScore=np.array([-10.0 for col in range(len(textList))]) cleanWords=cleanUpWords(pModel.keys()) flagList=[]; finalscore = [] TscoreLim=1 cnt=0 w1=0.7; w2=0.3; minCS_score=0.7; f = open('senti_words.txt','r') sentiDict={} for line in f: new = line.split('\t') #pdb.set_trace(); if (len(new)>1): sentiDict[new[0].rstrip()]=int(new[1].rstrip()) else: sentiDict[new[0].rstrip()]=0 v1=fakeTweet; absv1=np.sqrt(np.dot(v1, v1)); score=np.array([0.0 for x in range(len(textList))]) finalscore=np.array([0.0 for x in range(len(textList))]) for i in range(len(textList)): if (i not in bestTweetList): textWords=createStemmedWordList(textList[i]) j=0 TFvec=np.array([0 for col in range(len(pModel.values()))]) for wordVocab in cleanWords: #print"i="+str(i)+"j="+str(j) TFvec[j]= textWords.count(wordVocab) j=j+1 v2=TFvec; absv2=np.sqrt(np.dot(v2, v2)); den=absv1*absv2 if ( not(UsefulFuncs.feq(den,0) ) ): #tweetScore[i]=float(np.dot(v1, v2)) / float(den); tweetScore[i]=np.divide(float(np.dot(v1, v2)), den) else: tweetScore[i]=0.0 #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen) #f1=open('temp/wordVector'+str(i)+'.txt','w'); #print >>fid, "%s\n%s\n%s\n%s\n%f\n\n" % (str(textWords), str(cleanWords),str(TFvec), str(fakeTweet), tweetScore[i]) #f1.close() if (tweetScore[i]>minCS_score): words = textList[i].split(' ') for word in words: try: score[i] += sentiDict[word] except KeyError: score[i] +=0 score[i]=min(abs(score[i]),3)/3; finalscore[i] = w1*tweetScore[i] + w2*score[i] #bestTweetScore=tweetScore.index(max(tweetScore)) bestTweetIndex=finalscore.argmax() print "Found the best Tweet" #Dump the different variables for debugging! return (textList[bestTweetIndex], bestTweetIndex, tweetScore[bestTweetIndex], score[bestTweetIndex],finalscore[bestTweetIndex])
def getSummMRS(textList, Nsumm, pModel, lenPdf, textSentiDict, TFMat, parameters, scorener, scorevar): indexAll = [] cleanWords = cleanUpWords(pModel.keys()) f = open('senti_words.txt', 'r') sentiDict = {} for line in f: new = line.split('\t') #pdb.set_trace(); if (len(new) > 1): sentiDict[new[0].rstrip()] = int(new[1].rstrip()) else: sentiDict[new[0].rstrip()] = 0 w1 = parameters['w1'] w2 = parameters['w2'] w3 = parameters['w3'] w4 = parameters['w4'] minCS_score = parameters['minCS_score'] minFS_score = parameters['minFS_score'] flagList = [] finalscore = [] bestTweetList = [] TscoreLim = 1 cnt = 0 sentimentIndex = dict() indexAllSelect = [] sentimentHistogram = dict() summTweetMRS = [] sentimentList = dict() bestTweetScore = [] bestSentimentScore = [] sentimentEval = [] bestFinalScore = [] for word in pModel.keys(): sentimentHistogram[word] = [0.0, 0.0] #Create wordIndex List. count = 0 wordIndexList = dict() for word in pModel.keys(): wordIndexList[count] = word count = count + 1 fid = open('temp/SummParam' + '.txt', 'w') print >> fid, "Start of file\n" fid.close() fid = open('temp/SummParam' + '.txt', 'a') #Generate the length samples lenSamp = np.random.normal(lenPdf['mean'], lenPdf['stddev'], Nsumm) while (cnt < Nsumm): print "Summary tweet number:" + str(cnt) + "out of " + str( Nsumm) + "Tweets" x = int(ceil(max((lenSamp[cnt], 1)))) index = numpy.random.multinomial(x, pMlist, 1) fakeTweet = index[0] #[Tweet, TweetIndex, tweetScore, SentiScore,finalscore]=getSummTweet_vSnt(index[0], textList, pModel, bestTweetList, fid, textSentiDict, sentimentProb) tweetScore = np.array([-10.0 for col in range(len(textList))]) v1 = np.array(fakeTweet) absv1 = np.sqrt(np.dot(v1, v1)) score = np.array([0.0 for x in range(len(textList))]) finalscore = np.array([0.0 for x in range(len(textList))]) indexMatchList = [] for i in range(len(textList)): if (i not in bestTweetList): TFvec = TFMat[i] v2 = TFvec absv2 = np.sqrt(np.dot(v2, v2)) den = absv1 * absv2 if (not (UsefulFuncs.feq(den, 0))): #tweetScore[i]=float(np.dot(v1, v2)) / float(den); tweetScore[i] = np.divide(float(np.dot(v1, v2)), den) else: tweetScore[i] = 0.0 if (tweetScore[i] > minCS_score): indexMatchList.append(i) #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen) #f1=open('temp/wordVector'+str(i)+'.txt','w'); #print >>fid, "%s\n%s\n%s\n%s\n%f\n\n" % (str(textWords), str(cleanWords),str(TFvec), str(fakeTweet), tweetScore[i]) #f1.close() if (max(tweetScore) > minCS_score): sentiment = 0.0 indList = [] count = 0 fakewordsList = [] for fakewords in fakeTweet: if (wordIndexList[count] in sentiWordsImp): if (fakewords != 0): for FWsingle in range(fakewords): ind = numpy.random.multinomial( 1, sentimentProb[wordIndexList[count]], 1) #print "word="+str(wordIndexList[count])+" SentimentProb=" #print sentimentProb[wordIndexList[count]] indList.append(ind[0]) sentimentHistogram[wordIndexList[count]] = [ (sentimentHistogram[wordIndexList[count]][0] + ind[0][0]), (sentimentHistogram[wordIndexList[count]][1] + ind[0][1]) ] fakewordsList.append(wordIndexList[count]) count = count + 1 if (len(indList) == 0): sentiment = 0.0 else: xarray = numpy.array(indList) if ((xarray.transpose()[0].sum()) > (xarray.transpose()[1].sum())): sentiment = 1.0 else: sentiment = -1.0 #for j in range(len(textList)): for j in indexMatchList: if (textSentiDict[j] * sentiment >= 1.0): #score[i]=min(abs(textSentiDict[i]),3)/3; score[j] = 1.0 finalscore[j] = w1 * tweetScore[j] + w2 * score[ j] + w3 * scorener[j] + w4 * scorevar[j] if (max(finalscore) > minFS_score): bestTweetIndex = finalscore.argmax() #Store the selected paramters Tweet = textList[bestTweetIndex] summTweetMRS.append(Tweet) bestTweetList.append(bestTweetIndex) indexAllSelect.append(index) sentimentList[cnt] = xarray bestTweetScore.append(tweetScore[bestTweetIndex]) bestSentimentScore.append(textSentiDict[bestTweetIndex]) sentimentEval.append(sentiment) bestFinalScore.append(finalscore[bestTweetIndex]) cnt = cnt + 1 #Print the parameters into thte file print >> fid, "faketweet=%s\nGenerated Sentiment=%s\nGenerated Tweet=%s" % ( str(fakewordsList), str(sentiment), str(clean(Tweet))) #Store all the generated fakeTweets indexAll.append(index) fid.close() return (summTweetMRS, bestTweetList, indexAllSelect, sentimentList, bestTweetScore, bestSentimentScore, sentimentEval, bestFinalScore, sentimentHistogram)
def getSummTweet_vSnt(fakeTweet, textList, pModel, bestTweetList, fid, textSentiDict, sentimentProb): tweetScore = np.array([-10.0 for col in range(len(textList))]) cleanWords = cleanUpWords(pModel.keys()) flagList = [] finalscore = [] TscoreLim = 1 cnt = 0 w1 = 0.5 w2 = 0.5 minCS_score = 0.8 #Prepocessing and initial calculations v1 = fakeTweet absv1 = np.sqrt(np.dot(v1, v1)) score = np.array([0.0 for x in range(len(textList))]) finalscore = np.array([0.0 for x in range(len(textList))]) #Create wordIndex List. count = 0 wordIndexList = dict() for word in pModel.keys(): wordIndexList[count] = word count = count + 1 sentimentIndex = dict() for i in range(len(textList)): if (i not in bestTweetList): textWords = createStemmedWordList(textList[i]) j = 0 TFvec = np.array([0 for col in range(len(pModel.values()))]) for wordVocab in cleanWords: #print"i="+str(i)+"j="+str(j) TFvec[j] = textWords.count(wordVocab) j = j + 1 v2 = TFvec absv2 = np.sqrt(np.dot(v2, v2)) den = absv1 * absv2 if (not (UsefulFuncs.feq(den, 0))): #tweetScore[i]=float(np.dot(v1, v2)) / float(den); tweetScore[i] = np.divide(float(np.dot(v1, v2)), den) else: tweetScore[i] = 0.0 #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen) #f1=open('temp/wordVector'+str(i)+'.txt','w'); #f1.close() #if (tweetScore[i]>minCS_score): #Calculate the sentiment score from the faketweet if (max(tweetScore) > minCS_score): sentiment = 0.0 indList = [] count = 0 for fakewords in fakeTweet: if (fakewords != 0): for FWsingle in range(fakewords): ind = numpy.random.multinomial( 1, sentimentProb[wordIndexList[count]], 1) #print "word="+str(wordIndexList[count])+" SentimentProb=" #print sentimentProb[wordIndexList[count]] indList.append(ind[0]) count = count + 1 xarray = numpy.array(indList) if ((xarray.transpose()[0].sum()) > (xarray.transpose()[1].sum())): sentiment = 1.0 else: sentiment = -1.0 for j in range(len(textList)): if (textSentiDict[j] * sentiment >= 1.0): #score[i]=min(abs(textSentiDict[i]),3)/3; score[j] = 1.0 finalscore[j] = w1 * tweetScore[j] + w2 * score[j] bestTweetIndex = finalscore.argmax() #print "Found the best Tweet" #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(textList[bestTweetIndex]), str(fakeTweet), tweetScore[bestTweetIndex], sentiment, score[i], finalscore[i]) #bestTweetScore=tweetScore.index(max(tweetScore)) #Dump the different variables for debugging! return (textList[bestTweetIndex], bestTweetIndex, tweetScore[bestTweetIndex], score[bestTweetIndex], finalscore[bestTweetIndex])
def getSummTweet(fakeTweet, textList, pModel, bestTweetList, fid): tweetScore = np.array([-10.0 for col in range(len(textList))]) cleanWords = cleanUpWords(pModel.keys()) flagList = [] finalscore = [] TscoreLim = 1 cnt = 0 w1 = 0.7 w2 = 0.3 minCS_score = 0.7 f = open('senti_words.txt', 'r') sentiDict = {} for line in f: new = line.split('\t') #pdb.set_trace(); if (len(new) > 1): sentiDict[new[0].rstrip()] = int(new[1].rstrip()) else: sentiDict[new[0].rstrip()] = 0 v1 = fakeTweet absv1 = np.sqrt(np.dot(v1, v1)) score = np.array([0.0 for x in range(len(textList))]) finalscore = np.array([0.0 for x in range(len(textList))]) for i in range(len(textList)): if (i not in bestTweetList): textWords = createStemmedWordList(textList[i]) j = 0 TFvec = np.array([0 for col in range(len(pModel.values()))]) for wordVocab in cleanWords: #print"i="+str(i)+"j="+str(j) TFvec[j] = textWords.count(wordVocab) j = j + 1 v2 = TFvec absv2 = np.sqrt(np.dot(v2, v2)) den = absv1 * absv2 if (not (UsefulFuncs.feq(den, 0))): #tweetScore[i]=float(np.dot(v1, v2)) / float(den); tweetScore[i] = np.divide(float(np.dot(v1, v2)), den) else: tweetScore[i] = 0.0 #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen) #f1=open('temp/wordVector'+str(i)+'.txt','w'); #print >>fid, "%s\n%s\n%s\n%s\n%f\n\n" % (str(textWords), str(cleanWords),str(TFvec), str(fakeTweet), tweetScore[i]) #f1.close() if (tweetScore[i] > minCS_score): words = textList[i].split(' ') for word in words: try: score[i] += sentiDict[word] except KeyError: score[i] += 0 score[i] = min(abs(score[i]), 3) / 3 finalscore[i] = w1 * tweetScore[i] + w2 * score[i] #bestTweetScore=tweetScore.index(max(tweetScore)) bestTweetIndex = finalscore.argmax() print "Found the best Tweet" #Dump the different variables for debugging! return (textList[bestTweetIndex], bestTweetIndex, tweetScore[bestTweetIndex], score[bestTweetIndex], finalscore[bestTweetIndex])