def tweetSimilarityScore(TFvec, fakeTweet):
    v1=TFvec; v2=fakeTweet;
    absv1=np.sqrt(np.dot(v1, v1));
    absv2=np.sqrt(np.dot(v2, v2));

    if ( not(UsefulFuncs.feq(absv1,0) | UsefulFuncs.feq(absv2,0)) ):
        tweetScore=np.dot(v1, v2) / (absv1*absv2);
    else:
        tweetScore=0.0

    return tweetScore
def tweetSimilarityScore(TFvec, fakeTweet):
    v1 = TFvec
    v2 = fakeTweet
    absv1 = np.sqrt(np.dot(v1, v1))
    absv2 = np.sqrt(np.dot(v2, v2))

    if (not (UsefulFuncs.feq(absv1, 0) | UsefulFuncs.feq(absv2, 0))):
        tweetScore = np.dot(v1, v2) / (absv1 * absv2)
    else:
        tweetScore = 0.0

    return tweetScore
def est_probability_UD(textList):
        vocabFile='vocabUD.txt';
        vocabUD=[];
        with open(vocabFile,'r') as vFile:
            for line in vFile:
                vocabUD.append(line);
        vocabUD_dict={};
        for i in range(len(textList)):
            textWords=textList[i].split(" ");
            for word in vocabUD:
                count=textWords.count(word);
                if word in vocabUD_dict:
                    vocabUD_dict[word]=vocabUD_dict[word]+count;
                else:
                    vocabUD_dict[word]=count;
        sumDict=0
        for word in vocabUD_dict:
            sumDict=sumDict+vocabUD_dict[word]
        pDict={}
        for word in vocabUD_dict.keys():
            pDict[word.lower().rstrip()]=float(vocabUD_dict[word])/sumDict
            #print "word= "+word+" Prob="+str(self.prob[word.lower().rstrip()])
        #if (UsefulFuncs.trunc((sum(self.prob.values())),4) != 1.0):
        if (not(UsefulFuncs.feq(sum(pDict.values()),1.0))):
            raise Exception('Probability Error: pdf doesnt sum to one');

        return pDict;
def est_probability_UD(textList):
    vocabFile = 'vocabUD.txt'
    vocabUD = []
    with open(vocabFile, 'r') as vFile:
        for line in vFile:
            vocabUD.append(line)
    vocabUD_dict = {}
    for i in range(len(textList)):
        textWords = textList[i].split(" ")
        for word in vocabUD:
            count = textWords.count(word)
            if word in vocabUD_dict:
                vocabUD_dict[word] = vocabUD_dict[word] + count
            else:
                vocabUD_dict[word] = count
    sumDict = 0
    for word in vocabUD_dict:
        sumDict = sumDict + vocabUD_dict[word]
    pDict = {}
    for word in vocabUD_dict.keys():
        pDict[word.lower().rstrip()] = float(vocabUD_dict[word]) / sumDict
        #print "word= "+word+" Prob="+str(self.prob[word.lower().rstrip()])
    #if (UsefulFuncs.trunc((sum(self.prob.values())),4) != 1.0):
    if (not (UsefulFuncs.feq(sum(pDict.values()), 1.0))):
        raise Exception('Probability Error: pdf doesnt sum to one')

    return pDict
def est_probability_vocab(textListsumm, vocab):
    sumDict=0; pModelEst={};
    cleanWords=cleanUpWords(vocab.keys())

    for word in cleanWords:
        pModelEst[word]=0.0

    for i in range(len(textListsumm)):
        tweetWordList=createStemmedWordList(textListsumm[i]);
        for word in cleanWords:
            pModelEst[word] = pModelEst[word] + tweetWordList.count(word);
            sumDict = sumDict + tweetWordList.count(word)

    for word in vocab:
        pModelEst[word] = pModelEst[word]/float(sumDict)

    if (not(UsefulFuncs.feq(sum(pModelEst.values()),1.0))):
        raise Exception('Probability Error: pdf doesnt sum to one')

    return pModelEst;
def est_probability_vocab(textListsumm, vocab):
    sumDict = 0
    pModelEst = {}
    cleanWords = cleanUpWords(vocab.keys())

    for word in cleanWords:
        pModelEst[word] = 0.0

    for i in range(len(textListsumm)):
        tweetWordList = createStemmedWordList(textListsumm[i])
        for word in cleanWords:
            pModelEst[word] = pModelEst[word] + tweetWordList.count(word)
            sumDict = sumDict + tweetWordList.count(word)

    for word in vocab:
        pModelEst[word] = pModelEst[word] / float(sumDict)

    if (not (UsefulFuncs.feq(sum(pModelEst.values()), 1.0))):
        raise Exception('Probability Error: pdf doesnt sum to one')

    return pModelEst
def getSummMRS(textList, Nsumm, pModel, lenPdf, textSentiDict, TFMat, parameters, scorener, scorevar):

    indexAll=[]
    cleanWords=cleanUpWords(pModel.keys())
    f = open('senti_words.txt','r')
    sentiDict={}
    for line in f:
        new = line.split('\t')
        #pdb.set_trace();
        if (len(new)>1):
            sentiDict[new[0].rstrip()]=int(new[1].rstrip())
        else:
            sentiDict[new[0].rstrip()]=0

    w1=parameters['w1']; w2=parameters['w2'];w3=parameters['w3'];w4=parameters['w4']
    minCS_score=parameters['minCS_score'];minFS_score=parameters['minFS_score'];
    flagList=[]; finalscore = []; bestTweetList=[];
    TscoreLim=1
    cnt=0
    sentimentIndex=dict();
    indexAllSelect=[]
    sentimentHistogram=dict();summTweetMRS=[]
    sentimentList=dict(); bestTweetScore=[]; bestSentimentScore=[]; sentimentEval=[]; bestFinalScore=[]

    for word in pModel.keys():
        sentimentHistogram[word]=[0.0, 0.0]

    #Create wordIndex List.
    count=0; wordIndexList=dict()
    for word in pModel.keys():
        wordIndexList[count]=word;
        count = count+1;

    fid=open('temp/SummParam'+'.txt','w');
    print >>fid, "Start of file\n";
    fid.close();
    fid=open('temp/SummParam'+'.txt','a');

    #Generate the length samples
    lenSamp=np.random.normal(lenPdf['mean'],lenPdf['stddev'],Nsumm)

    while (cnt<Nsumm):
        print "Summary tweet number:"+str(cnt)+"out of "+str(Nsumm)+"Tweets"
        x=int(ceil(max((lenSamp[cnt],1))))
        index=numpy.random.multinomial(x,pMlist,1)
        fakeTweet=index[0];
        #[Tweet, TweetIndex, tweetScore, SentiScore,finalscore]=getSummTweet_vSnt(index[0], textList, pModel, bestTweetList, fid, textSentiDict, sentimentProb)

        tweetScore=np.array([-10.0 for col in range(len(textList))])
        v1=np.array(fakeTweet); absv1=np.sqrt(np.dot(v1, v1));
        score=np.array([0.0 for x in range(len(textList))])
        finalscore=np.array([0.0 for x in range(len(textList))])
        indexMatchList=[];
        for i in range(len(textList)):
            if (i not in bestTweetList):
                TFvec=TFMat[i]
                v2=TFvec;
                absv2=np.sqrt(np.dot(v2, v2));
                den=absv1*absv2

                if ( not(UsefulFuncs.feq(den,0) ) ):
                    #tweetScore[i]=float(np.dot(v1, v2)) / float(den);
                    tweetScore[i]=np.divide(float(np.dot(v1, v2)), den)
                else:
                     tweetScore[i]=0.0

                if(tweetScore[i]>minCS_score):
                    indexMatchList.append(i);

                #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen)

                #f1=open('temp/wordVector'+str(i)+'.txt','w');
                #print >>fid, "%s\n%s\n%s\n%s\n%f\n\n" % (str(textWords), str(cleanWords),str(TFvec), str(fakeTweet), tweetScore[i])

                #f1.close()
        if (max(tweetScore) >minCS_score):
            sentiment=0.0
            indList=[];
            count=0;
            fakewordsList=[];
            for fakewords in fakeTweet:
                if (wordIndexList[count] in sentiWordsImp):
                    if (fakewords!=0):
                        for FWsingle in range(fakewords):
                            ind=numpy.random.multinomial(1,sentimentProb[wordIndexList[count]],1);
                            #print "word="+str(wordIndexList[count])+" SentimentProb="
                            #print sentimentProb[wordIndexList[count]]
                            indList.append(ind[0]);
                            sentimentHistogram[wordIndexList[count]]= [(sentimentHistogram[wordIndexList[count]][0]+ind[0][0]), (sentimentHistogram[wordIndexList[count]][1]+ind[0][1])]
                            fakewordsList.append(wordIndexList[count]);
                count=count+1;

            if( len(indList)==0 ):
                sentiment=0.0
            else:
                xarray=numpy.array(indList)
                if ((xarray.transpose()[0].sum()) > (xarray.transpose()[1].sum())):
                    sentiment=1.0
                else:
                    sentiment=-1.0

            #for j in range(len(textList)):
            for j in indexMatchList:
                if(textSentiDict[j]*sentiment >= 1.0):
                    #score[i]=min(abs(textSentiDict[i]),3)/3;
                    score[j]=1.0
                    finalscore[j] = w1*tweetScore[j] + w2*score[j]+w3*scorener[j]+w4*scorevar[j];

            if (max(finalscore) > minFS_score):
                bestTweetIndex=finalscore.argmax()
                #Store the selected paramters
                Tweet=textList[bestTweetIndex]
                summTweetMRS.append(Tweet)
                bestTweetList.append(bestTweetIndex)
                indexAllSelect.append(index)
                sentimentList[cnt]=xarray
                bestTweetScore.append(tweetScore[bestTweetIndex])
                bestSentimentScore.append(textSentiDict[bestTweetIndex])
                sentimentEval.append(sentiment)
                bestFinalScore.append(finalscore[bestTweetIndex])
                cnt=cnt+1;
                #Print the parameters into thte file
                print >>fid, "faketweet=%s\nGenerated Sentiment=%s\nGenerated Tweet=%s" % (str(fakewordsList), str(sentiment), str(clean(Tweet)));

        #Store all the generated fakeTweets
        indexAll.append(index)

    fid.close()
    return (summTweetMRS, bestTweetList, indexAllSelect, sentimentList, bestTweetScore, bestSentimentScore, sentimentEval, bestFinalScore, sentimentHistogram);
def getSummTweet_vSnt(fakeTweet, textList, pModel,bestTweetList, fid, textSentiDict, sentimentProb):

    tweetScore=np.array([-10.0 for col in range(len(textList))])
    cleanWords=cleanUpWords(pModel.keys())
    flagList=[];
    finalscore = []
    TscoreLim=1
    cnt=0
    w1=0.5; w2=0.5;
    minCS_score=0.8;

    #Prepocessing and initial calculations
    v1=fakeTweet; absv1=np.sqrt(np.dot(v1, v1));
    score=np.array([0.0 for x in range(len(textList))])
    finalscore=np.array([0.0 for x in range(len(textList))])

    #Create wordIndex List.
    count=0; wordIndexList=dict()
    for word in pModel.keys():
        wordIndexList[count]=word;
        count = count+1;

    sentimentIndex=dict()
    for i in range(len(textList)):
        if (i not in bestTweetList):
            textWords=createStemmedWordList(textList[i])
            j=0
            TFvec=np.array([0 for col in range(len(pModel.values()))])
            for wordVocab in cleanWords:
                #print"i="+str(i)+"j="+str(j)
                TFvec[j]= textWords.count(wordVocab)
                j=j+1

            v2=TFvec;
            absv2=np.sqrt(np.dot(v2, v2));
            den=absv1*absv2

            if ( not(UsefulFuncs.feq(den,0) ) ):
                #tweetScore[i]=float(np.dot(v1, v2)) / float(den);
                tweetScore[i]=np.divide(float(np.dot(v1, v2)), den)
            else:
                 tweetScore[i]=0.0
            #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen)

            #f1=open('temp/wordVector'+str(i)+'.txt','w');


            #f1.close()

            #if (tweetScore[i]>minCS_score):
                #Calculate the sentiment score from the faketweet

    if (max(tweetScore) >minCS_score):
        sentiment=0.0
        indList=[];
        count=0;
        for fakewords in fakeTweet:
            if (fakewords!=0):
                for FWsingle in range(fakewords):
                    ind=numpy.random.multinomial(1,sentimentProb[wordIndexList[count]],1);
                    #print "word="+str(wordIndexList[count])+" SentimentProb="
                    #print sentimentProb[wordIndexList[count]]
                    indList.append(ind[0]);
            count=count+1;

        xarray=numpy.array(indList)
        if ((xarray.transpose()[0].sum()) > (xarray.transpose()[1].sum())):
            sentiment=1.0
        else:
            sentiment=-1.0

        for j in range(len(textList)):
            if(textSentiDict[j]*sentiment >= 1.0):
                #score[i]=min(abs(textSentiDict[i]),3)/3;
                score[j]=1.0
                finalscore[j] = w1*tweetScore[j] + w2*score[j]


        bestTweetIndex=finalscore.argmax()
        #print "Found the best Tweet"
        #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(textList[bestTweetIndex]), str(fakeTweet), tweetScore[bestTweetIndex], sentiment, score[i], finalscore[i])
        #bestTweetScore=tweetScore.index(max(tweetScore))


        #Dump the different variables for debugging!
    return (textList[bestTweetIndex], bestTweetIndex, tweetScore[bestTweetIndex], score[bestTweetIndex],finalscore[bestTweetIndex])
def getSummTweet(fakeTweet, textList, pModel,bestTweetList, fid):


    tweetScore=np.array([-10.0 for col in range(len(textList))])
    cleanWords=cleanUpWords(pModel.keys())
    flagList=[];
    finalscore = []
    TscoreLim=1
    cnt=0
    w1=0.7; w2=0.3;
    minCS_score=0.7;

    f = open('senti_words.txt','r')
    sentiDict={}
    for line in f:
        new = line.split('\t')
        #pdb.set_trace();
        if (len(new)>1):
            sentiDict[new[0].rstrip()]=int(new[1].rstrip())
        else:
            sentiDict[new[0].rstrip()]=0

    v1=fakeTweet; absv1=np.sqrt(np.dot(v1, v1));
    score=np.array([0.0 for x in range(len(textList))])
    finalscore=np.array([0.0 for x in range(len(textList))])
    for i in range(len(textList)):
        if (i not in bestTweetList):
            textWords=createStemmedWordList(textList[i])
            j=0
            TFvec=np.array([0 for col in range(len(pModel.values()))])
            for wordVocab in cleanWords:
                #print"i="+str(i)+"j="+str(j)
                TFvec[j]= textWords.count(wordVocab)
                j=j+1

            v2=TFvec;
            absv2=np.sqrt(np.dot(v2, v2));
            den=absv1*absv2

            if ( not(UsefulFuncs.feq(den,0) ) ):
                #tweetScore[i]=float(np.dot(v1, v2)) / float(den);
                tweetScore[i]=np.divide(float(np.dot(v1, v2)), den)
            else:
                 tweetScore[i]=0.0
            #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen)

            #f1=open('temp/wordVector'+str(i)+'.txt','w');
            #print >>fid, "%s\n%s\n%s\n%s\n%f\n\n" % (str(textWords), str(cleanWords),str(TFvec), str(fakeTweet), tweetScore[i])

            #f1.close()
            if (tweetScore[i]>minCS_score):
                words = textList[i].split(' ')
                for word in words:
                    try:
                        score[i] += sentiDict[word]
                    except KeyError:
                        score[i] +=0
                score[i]=min(abs(score[i]),3)/3;

            finalscore[i] = w1*tweetScore[i] + w2*score[i]
    #bestTweetScore=tweetScore.index(max(tweetScore))
    bestTweetIndex=finalscore.argmax()
    print "Found the best Tweet"
    #Dump the different variables for debugging!
    return (textList[bestTweetIndex], bestTweetIndex, tweetScore[bestTweetIndex], score[bestTweetIndex],finalscore[bestTweetIndex])
def getSummMRS(textList, Nsumm, pModel, lenPdf, textSentiDict, TFMat,
               parameters, scorener, scorevar):

    indexAll = []
    cleanWords = cleanUpWords(pModel.keys())
    f = open('senti_words.txt', 'r')
    sentiDict = {}
    for line in f:
        new = line.split('\t')
        #pdb.set_trace();
        if (len(new) > 1):
            sentiDict[new[0].rstrip()] = int(new[1].rstrip())
        else:
            sentiDict[new[0].rstrip()] = 0

    w1 = parameters['w1']
    w2 = parameters['w2']
    w3 = parameters['w3']
    w4 = parameters['w4']
    minCS_score = parameters['minCS_score']
    minFS_score = parameters['minFS_score']
    flagList = []
    finalscore = []
    bestTweetList = []
    TscoreLim = 1
    cnt = 0
    sentimentIndex = dict()
    indexAllSelect = []
    sentimentHistogram = dict()
    summTweetMRS = []
    sentimentList = dict()
    bestTweetScore = []
    bestSentimentScore = []
    sentimentEval = []
    bestFinalScore = []

    for word in pModel.keys():
        sentimentHistogram[word] = [0.0, 0.0]

    #Create wordIndex List.
    count = 0
    wordIndexList = dict()
    for word in pModel.keys():
        wordIndexList[count] = word
        count = count + 1

    fid = open('temp/SummParam' + '.txt', 'w')
    print >> fid, "Start of file\n"
    fid.close()
    fid = open('temp/SummParam' + '.txt', 'a')

    #Generate the length samples
    lenSamp = np.random.normal(lenPdf['mean'], lenPdf['stddev'], Nsumm)

    while (cnt < Nsumm):
        print "Summary tweet number:" + str(cnt) + "out of " + str(
            Nsumm) + "Tweets"
        x = int(ceil(max((lenSamp[cnt], 1))))
        index = numpy.random.multinomial(x, pMlist, 1)
        fakeTweet = index[0]
        #[Tweet, TweetIndex, tweetScore, SentiScore,finalscore]=getSummTweet_vSnt(index[0], textList, pModel, bestTweetList, fid, textSentiDict, sentimentProb)

        tweetScore = np.array([-10.0 for col in range(len(textList))])
        v1 = np.array(fakeTweet)
        absv1 = np.sqrt(np.dot(v1, v1))
        score = np.array([0.0 for x in range(len(textList))])
        finalscore = np.array([0.0 for x in range(len(textList))])
        indexMatchList = []
        for i in range(len(textList)):
            if (i not in bestTweetList):
                TFvec = TFMat[i]
                v2 = TFvec
                absv2 = np.sqrt(np.dot(v2, v2))
                den = absv1 * absv2

                if (not (UsefulFuncs.feq(den, 0))):
                    #tweetScore[i]=float(np.dot(v1, v2)) / float(den);
                    tweetScore[i] = np.divide(float(np.dot(v1, v2)), den)
                else:
                    tweetScore[i] = 0.0

                if (tweetScore[i] > minCS_score):
                    indexMatchList.append(i)

                #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen)

                #f1=open('temp/wordVector'+str(i)+'.txt','w');
                #print >>fid, "%s\n%s\n%s\n%s\n%f\n\n" % (str(textWords), str(cleanWords),str(TFvec), str(fakeTweet), tweetScore[i])

                #f1.close()
        if (max(tweetScore) > minCS_score):
            sentiment = 0.0
            indList = []
            count = 0
            fakewordsList = []
            for fakewords in fakeTweet:
                if (wordIndexList[count] in sentiWordsImp):
                    if (fakewords != 0):
                        for FWsingle in range(fakewords):
                            ind = numpy.random.multinomial(
                                1, sentimentProb[wordIndexList[count]], 1)
                            #print "word="+str(wordIndexList[count])+" SentimentProb="
                            #print sentimentProb[wordIndexList[count]]
                            indList.append(ind[0])
                            sentimentHistogram[wordIndexList[count]] = [
                                (sentimentHistogram[wordIndexList[count]][0] +
                                 ind[0][0]),
                                (sentimentHistogram[wordIndexList[count]][1] +
                                 ind[0][1])
                            ]
                            fakewordsList.append(wordIndexList[count])
                count = count + 1

            if (len(indList) == 0):
                sentiment = 0.0
            else:
                xarray = numpy.array(indList)
                if ((xarray.transpose()[0].sum()) >
                    (xarray.transpose()[1].sum())):
                    sentiment = 1.0
                else:
                    sentiment = -1.0

            #for j in range(len(textList)):
            for j in indexMatchList:
                if (textSentiDict[j] * sentiment >= 1.0):
                    #score[i]=min(abs(textSentiDict[i]),3)/3;
                    score[j] = 1.0
                    finalscore[j] = w1 * tweetScore[j] + w2 * score[
                        j] + w3 * scorener[j] + w4 * scorevar[j]

            if (max(finalscore) > minFS_score):
                bestTweetIndex = finalscore.argmax()
                #Store the selected paramters
                Tweet = textList[bestTweetIndex]
                summTweetMRS.append(Tweet)
                bestTweetList.append(bestTweetIndex)
                indexAllSelect.append(index)
                sentimentList[cnt] = xarray
                bestTweetScore.append(tweetScore[bestTweetIndex])
                bestSentimentScore.append(textSentiDict[bestTweetIndex])
                sentimentEval.append(sentiment)
                bestFinalScore.append(finalscore[bestTweetIndex])
                cnt = cnt + 1
                #Print the parameters into thte file
                print >> fid, "faketweet=%s\nGenerated Sentiment=%s\nGenerated Tweet=%s" % (
                    str(fakewordsList), str(sentiment), str(clean(Tweet)))

        #Store all the generated fakeTweets
        indexAll.append(index)

    fid.close()
    return (summTweetMRS, bestTweetList, indexAllSelect, sentimentList,
            bestTweetScore, bestSentimentScore, sentimentEval, bestFinalScore,
            sentimentHistogram)
def getSummTweet_vSnt(fakeTweet, textList, pModel, bestTweetList, fid,
                      textSentiDict, sentimentProb):

    tweetScore = np.array([-10.0 for col in range(len(textList))])
    cleanWords = cleanUpWords(pModel.keys())
    flagList = []
    finalscore = []
    TscoreLim = 1
    cnt = 0
    w1 = 0.5
    w2 = 0.5
    minCS_score = 0.8

    #Prepocessing and initial calculations
    v1 = fakeTweet
    absv1 = np.sqrt(np.dot(v1, v1))
    score = np.array([0.0 for x in range(len(textList))])
    finalscore = np.array([0.0 for x in range(len(textList))])

    #Create wordIndex List.
    count = 0
    wordIndexList = dict()
    for word in pModel.keys():
        wordIndexList[count] = word
        count = count + 1

    sentimentIndex = dict()
    for i in range(len(textList)):
        if (i not in bestTweetList):
            textWords = createStemmedWordList(textList[i])
            j = 0
            TFvec = np.array([0 for col in range(len(pModel.values()))])
            for wordVocab in cleanWords:
                #print"i="+str(i)+"j="+str(j)
                TFvec[j] = textWords.count(wordVocab)
                j = j + 1

            v2 = TFvec
            absv2 = np.sqrt(np.dot(v2, v2))
            den = absv1 * absv2

            if (not (UsefulFuncs.feq(den, 0))):
                #tweetScore[i]=float(np.dot(v1, v2)) / float(den);
                tweetScore[i] = np.divide(float(np.dot(v1, v2)), den)
            else:
                tweetScore[i] = 0.0
            #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen)

            #f1=open('temp/wordVector'+str(i)+'.txt','w');

            #f1.close()

            #if (tweetScore[i]>minCS_score):
            #Calculate the sentiment score from the faketweet

    if (max(tweetScore) > minCS_score):
        sentiment = 0.0
        indList = []
        count = 0
        for fakewords in fakeTweet:
            if (fakewords != 0):
                for FWsingle in range(fakewords):
                    ind = numpy.random.multinomial(
                        1, sentimentProb[wordIndexList[count]], 1)
                    #print "word="+str(wordIndexList[count])+" SentimentProb="
                    #print sentimentProb[wordIndexList[count]]
                    indList.append(ind[0])
            count = count + 1

        xarray = numpy.array(indList)
        if ((xarray.transpose()[0].sum()) > (xarray.transpose()[1].sum())):
            sentiment = 1.0
        else:
            sentiment = -1.0

        for j in range(len(textList)):
            if (textSentiDict[j] * sentiment >= 1.0):
                #score[i]=min(abs(textSentiDict[i]),3)/3;
                score[j] = 1.0
                finalscore[j] = w1 * tweetScore[j] + w2 * score[j]

        bestTweetIndex = finalscore.argmax()
        #print "Found the best Tweet"
        #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(textList[bestTweetIndex]), str(fakeTweet), tweetScore[bestTweetIndex], sentiment, score[i], finalscore[i])
        #bestTweetScore=tweetScore.index(max(tweetScore))

        #Dump the different variables for debugging!
    return (textList[bestTweetIndex], bestTweetIndex,
            tweetScore[bestTweetIndex], score[bestTweetIndex],
            finalscore[bestTweetIndex])
def getSummTweet(fakeTweet, textList, pModel, bestTweetList, fid):

    tweetScore = np.array([-10.0 for col in range(len(textList))])
    cleanWords = cleanUpWords(pModel.keys())
    flagList = []
    finalscore = []
    TscoreLim = 1
    cnt = 0
    w1 = 0.7
    w2 = 0.3
    minCS_score = 0.7

    f = open('senti_words.txt', 'r')
    sentiDict = {}
    for line in f:
        new = line.split('\t')
        #pdb.set_trace();
        if (len(new) > 1):
            sentiDict[new[0].rstrip()] = int(new[1].rstrip())
        else:
            sentiDict[new[0].rstrip()] = 0

    v1 = fakeTweet
    absv1 = np.sqrt(np.dot(v1, v1))
    score = np.array([0.0 for x in range(len(textList))])
    finalscore = np.array([0.0 for x in range(len(textList))])
    for i in range(len(textList)):
        if (i not in bestTweetList):
            textWords = createStemmedWordList(textList[i])
            j = 0
            TFvec = np.array([0 for col in range(len(pModel.values()))])
            for wordVocab in cleanWords:
                #print"i="+str(i)+"j="+str(j)
                TFvec[j] = textWords.count(wordVocab)
                j = j + 1

            v2 = TFvec
            absv2 = np.sqrt(np.dot(v2, v2))
            den = absv1 * absv2

            if (not (UsefulFuncs.feq(den, 0))):
                #tweetScore[i]=float(np.dot(v1, v2)) / float(den);
                tweetScore[i] = np.divide(float(np.dot(v1, v2)), den)
            else:
                tweetScore[i] = 0.0
            #print >>fid, "%s\n%s\n%f\t%f\t%f\t%f\n\n" % (str(v1), str(v2), np.dot(v1, v2),den, tweetScore[i], sen)

            #f1=open('temp/wordVector'+str(i)+'.txt','w');
            #print >>fid, "%s\n%s\n%s\n%s\n%f\n\n" % (str(textWords), str(cleanWords),str(TFvec), str(fakeTweet), tweetScore[i])

            #f1.close()
            if (tweetScore[i] > minCS_score):
                words = textList[i].split(' ')
                for word in words:
                    try:
                        score[i] += sentiDict[word]
                    except KeyError:
                        score[i] += 0
                score[i] = min(abs(score[i]), 3) / 3

            finalscore[i] = w1 * tweetScore[i] + w2 * score[i]
    #bestTweetScore=tweetScore.index(max(tweetScore))
    bestTweetIndex = finalscore.argmax()
    print "Found the best Tweet"
    #Dump the different variables for debugging!
    return (textList[bestTweetIndex], bestTweetIndex,
            tweetScore[bestTweetIndex], score[bestTweetIndex],
            finalscore[bestTweetIndex])