
# Begin processing tweets from the get_tweets client.

while True:
    print "Waiting to recieve a message"
    messageIN = socketIN.recv()
    rcvd = json.loads(messageIN)
    print "Message accepted, processing"
    for tweet in rcvd:
        # Handler for tweet_send type.
        if tweet['type'] == "tweet_send":

            date = date_convert(tweet)
            best = -9999
            win = 'none'
            for rating in trainCFD.keys():
                logProb = 0
                for word in tweet['text'].split():
                    logProb += log( ((trainCFD[rating][word] + 0.001)/(trainCFD[rating].N() + 0.001)) * (trainCFD[rating].N()/trainCFD.N()))
                if (logProb > best):
                    best = logProb
                    win = rating
                    # Formatted JSON that will be sent to the server.
                    data_set = {'type': "tweet_push", 'company':tweet["company"], 'date': date, 'sentiment' : win, 'id' : tweet["id"],'tweet' : tweet['text']}

                    # Send data to the zmq server
def main():
    jsonFile = open("master_tweet_sample.json")
    jsonString =
    tweetRawData = json.loads(jsonString)
    tweetCounter = 0
    testList = []
    trainList = []
    testSentimentDict = dict()
    testTextDict = dict()
    allTrigrams = ""
    posTrigrams = ""
    neuTrigrams = ""
    negTrigrams = ""
    for _dict in tweetRawData:
        if (tweetCounter < 0): #setting to zero so that all tweets will be placed in the training CFD
            testSentimentDict[_dict['id']] = _dict['sentiment']
            testTextDict[_dict['id']] = _dict['text']
            # ^^^ unigram
            #textArray = _dict['text'].split()
            #bigrams = ""
            #for i in range(len(textArray)):
             #   bigram = ""
              #  if (i > 0):
               #     bigram = textArray[i-1] + textArray[i]
                #bigrams += bigram + " "
            #testTextDict[_dict['id']] = bigrams
            # ^^^ bigram
            tweetCounter += 1
            textArray = _dict['text'].split()
            trigrams = ""    
            for i in range(len(textArray)):
                #trigram = ""
                #if (i > 1):
                    #trigram = textArray[i-2] + textArray[i-1] + textArray[i]
                #trigrams += trigram + " "
                # ^^^ trigram
                #trigram = ""
                #if (i > 0):
                 #   trigram = textArray[i-1] + textArray[i]
                #trigrams += trigram + " "
                # ^^^ bigram
                trigram = textArray[i]
                trigrams += trigram + " "
                # ^^^ unigram
            tweetCounter += 1
            sentiment = _dict['sentiment']
            allTrigrams += trigrams
            if (sentiment == "positive"):
                posTrigrams += trigrams
            elif (sentiment == "negative"):
                negTrigrams += trigrams
                neuTrigrams += trigrams  
    posTrigramListX = []
    negTrigramListX = []
    neuTrigramListX = []
    posTrigramsX = ""
    negTrigramsX = ""
    neuTrigramsX = ""
    posTrigramList = posTrigrams.split()
    negTrigramList = negTrigrams.split()
    neuTrigramList = neuTrigrams.split()
    allTrigramList = allTrigrams.split()
    #Calculate ratios of classifications in raw training data
    posRatio = len(posTrigramList)/len(allTrigramList)
    negRatio = len(negTrigramList)/len(allTrigramList)
    neuRatio = len(neuTrigramList)/len(allTrigramList)
    # Using naive 'Information Gain' technique, to improve quality of CFD by removing N-grams of similiar frequency
    discardThreshold = 300;
    for trigram in posTrigramList:
        calc1 = (posTrigramList.count(trigram) * posRatio) / ((neuTrigramList.count(trigram) * neuRatio) + 0.0001)
        calc2 = (posTrigramList.count(trigram) * posRatio) / ((negTrigramList.count(trigram) * negRatio) + 0.0001)
        if ( (discardThreshold < calc1 or calc1 < (1/discardThreshold)) or (discardThreshold < calc2 or calc2 < (1/discardThreshold))): posTrigramListX.append(trigram)
    for trigram in negTrigramList:
        calc1 = ((negTrigramList.count(trigram) * negRatio) + 0.0001) / ((posTrigramList.count(trigram) * posRatio) + 0.0001)
        calc2 = ((negTrigramList.count(trigram) * negRatio) + 0.0001)/ ((neuTrigramList.count(trigram) * neuRatio) + 0.0001)
        if ( (discardThreshold < calc1 or calc1 < (1/discardThreshold)) or (discardThreshold < calc2 or calc2 < (1/discardThreshold))): negTrigramListX.append(trigram)    
    for trigram in neuTrigramList:
        calc1 = ((neuTrigramList.count(trigram) * neuRatio) + 0.0001) / ((negTrigramList.count(trigram) * negRatio) + 0.0001)
        calc2 = ((neuTrigramList.count(trigram) * neuRatio) + 0.0001) / ((posTrigramList.count(trigram) * posRatio) + 0.0001)
        if ( (discardThreshold < calc1 or calc1 < (1/discardThreshold)) or (discardThreshold < calc2 or calc2 < (1/discardThreshold))): neuTrigramListX.append(trigram)
    for trigram in posTrigramListX:
        posTrigramsX += " " + trigram
    for trigram in negTrigramListX:
        negTrigramsX += " " + trigram
    for trigram in neuTrigramListX:
        neuTrigramsX += " " + trigram
    trainDictX = dict()
    trainDictX['positive'] = posTrigramsX
    trainDictX['negative'] = negTrigramsX
    trainDictX['neutral'] = neuTrigramsX
    trainCFD = nltk.ConditionalFreqDist(
        (sentiment, trigram)
        for sentiment in trainDictX.keys()
        for trigram in trainDictX[sentiment].split())
    #actualScoreList = []
    #testScoreList = []
    #Print statements to analyze contents of the training CFD
    #print "Analyzing %d total tri-grams." % trainCFD.N()
    #print "There are %d postively rated trigrams." % trainCFD['positive'].N()
    #print "There are %d negatively rated trigrams." % trainCFD['negative'].N()
    #print "There are %d neutrally rated trigrams." % trainCFD['neutral'].N()
    #print trainCFD.keys()
    # Create a port for recieving data on port 5556 (for 
    contextIN = zmq.Context()
    socketIN = contextIN.socket(zmq.REP)
    # Connect to the zmq server and prepare it to send data                     
    contextOUT = zmq.Context()
    socketOUT = contextOUT.socket(zmq.REQ)
