def getFeatures(tweets, word_vec_dict): X = [] # Y = [] # print tweets for tweet in tweets: # dummy_tweet = [tweet['target'].lower()] # dummy_tweet.extend(tweet['tweet']) # print oldWork.getSumVectors(dummy_tweet, word_vec_dict) x = word_vec_dict['hi'] x = x * 0 for word in tweet: x = x + oldWork.getWordVector(word, word_vec_dict) X.append(x) # if tweet['stance'] == 'AGAINST': # Y.append(-1) # elif tweet['stance'] == 'FOR': # Y.append(1) # else: # Y.append(0) # print len(X) # print len(tweets) # print X nrc = readLexicon('../nrc_unigram.txt') # print nrc s140 = readLexicon('../s140_unigram.txt') # print s140 sumPos = 0 sumNeg = 0 numPos = 0 numNeg = 0 maxPos = -1 maxNeg = 1 maxPosIndex = 0 maxNegIndex = 0 for i in xrange(len(tweets)): sumPos = 0 sumNeg = 0 numPos = 0 numNeg = 0 maxPos = -1 maxNeg = 1 maxPosIndex = 0 maxNegIndex = 0 tweet = tweets[i] # print tweet for j in xrange(len(tweet)): word = tweet[j] if word in nrc: if nrc[word] > 0: numPos += 1 if nrc[word] > maxPos: maxPos = nrc[word] maxPosIndex = j elif nrc[word] < 0: numNeg += 1 if nrc[word] < maxNeg: maxNeg = nrc[word] maxNegIndex = j elif word in s140: if s140[word] > 0: numPos += 1 if s140[word] > maxPos: maxPos = s140[word] maxPosIndex = j elif s140[word] < 0: numNeg += 1 if s140[word] < maxNeg: maxNeg = s140[word] maxNegIndex = j # np.concatenate(X[i], oldWork.getWordVector(tweet[maxPosIndex], word_vec_dict)) # np.concatenate(X[i], oldWork.getWordVector(tweet[maxNegIndex], word_vec_dict)) # np.concatenate(X[i], numPos/len(tweet)) # np.concatenate(X[i], numNeg/len(tweet)) X[i] = X[i].tolist() # print X[i] # print 'index ' + str(maxPosIndex) # print len(tweet) X[i].extend(oldWork.getWordVector(tweet[maxPosIndex], word_vec_dict)) X[i].extend(oldWork.getWordVector(tweet[maxNegIndex], word_vec_dict)) X[i].append(numPos / len(tweet)) X[i].append(numNeg / len(tweet)) #print len(X[i]) return X
def getFeatures(tweets, word_vec_dict): X = [] # Y = [] # print tweets for tweet in tweets: # dummy_tweet = [tweet['target'].lower()] # dummy_tweet.extend(tweet['tweet']) # print oldWork.getSumVectors(dummy_tweet, word_vec_dict) x = word_vec_dict['hi'] x = x * 0 for word in tweet: x = x + oldWork.getWordVector(word, word_vec_dict) X.append(x) # if tweet['stance'] == 'AGAINST': # Y.append(-1) # elif tweet['stance'] == 'FOR': # Y.append(1) # else: # Y.append(0) # print len(X) # print len(tweets) # print X nrc = readLexicon('../nrc_unigram.txt') # print nrc s140 = readLexicon('../s140_unigram.txt') # print s140 sumPos = 0 sumNeg = 0 numPos = 0 numNeg = 0 maxPos = -1 maxNeg = 1 maxPosIndex = 0 maxNegIndex = 0 for i in xrange(len(tweets)): sumPos = 0 sumNeg = 0 numPos = 0 numNeg = 0 maxPos = -1 maxNeg = 1 maxPosIndex = 0 maxNegIndex = 0 tweet = tweets[i] # print tweet for j in xrange(len(tweet)): word = tweet[j] if word in nrc: if nrc[word] > 0: numPos += 1 if nrc[word] > maxPos: maxPos = nrc[word] maxPosIndex = j elif nrc[word] < 0: numNeg += 1 if nrc[word] < maxNeg: maxNeg = nrc[word] maxNegIndex = j elif word in s140: if s140[word] > 0: numPos += 1 if s140[word] > maxPos: maxPos = s140[word] maxPosIndex = j elif s140[word] < 0: numNeg += 1 if s140[word] < maxNeg: maxNeg = s140[word] maxNegIndex = j # np.concatenate(X[i], oldWork.getWordVector(tweet[maxPosIndex], word_vec_dict)) # np.concatenate(X[i], oldWork.getWordVector(tweet[maxNegIndex], word_vec_dict)) # np.concatenate(X[i], numPos/len(tweet)) # np.concatenate(X[i], numNeg/len(tweet)) X[i] = X[i].tolist() # print X[i] # print 'index ' + str(maxPosIndex) # print len(tweet) X[i].extend(oldWork.getWordVector(tweet[maxPosIndex], word_vec_dict)) X[i].extend(oldWork.getWordVector(tweet[maxNegIndex], word_vec_dict)) X[i].append(numPos/len(tweet)) X[i].append(numNeg/len(tweet)) #print len(X[i]) return X