def classify(text, verbose=False): polarity = 'Neutral' pos_score, neg_score = senti_classifier.polarity_scores([text]) neg_score = -neg_score if pos_score + neg_score > 0: polarity = "Positive" elif pos_score + neg_score < 0: polarity = "Negative" if verbose: return { 'pos_score' : pos_score, 'neg_score' : neg_score, 'polarity' : polarity, 'total_score' : pos_score + neg_score } return polarity
def post(self): json_data = request.get_json(force=True) print json_data if 'sentences' not in json_data: return "Nope" sentences = json_data['sentences'] swear_score = 0 percentage = 0 for swear in swear_words: for single_sentence in sentences: if swear in single_sentence: swear_score +=1 if swear_score: gloop = str(sentences) words = gloop.split(' ') print len(words) percentage = (float(swear_score) / len(words)) *100.0 # Avert all ye eyes gentle folk, thar be monsters percentage = float("{:.2f}".format(percentage)) # so dirty. I know there is a decimal module # but, this a hack day and writing this comment # took longer than this hack pos_score, neg_score = senti_classifier.polarity_scores(sentences) print sentences print pos_score, neg_score return {'positive_score': pos_score, 'negative_score': neg_score, 'swear_count': swear_score, 'swear_percentage': percentage}
def calculate_score(tweet, polarityDictionary): score = {} tweet = [i.lower().strip(specialChar) for i in tweet] tweet = [i for i in tweet if i] length = len(tweet) init = 0 neutralScore = 0 while init < length: for i in range(init, length): flag = 0 for j in range(length, i, -1): phrase = frozenset(tweet[i:j]) if phrase in polarityDictionary: init = j flag = 1 posScore = polarityDictionary[phrase][positive] negScore = polarityDictionary[phrase][negative] neutralScore = polarityDictionary[phrase][neutral] score[phrase] = [posScore, negScore, neutralScore] break if flag == 1: break else: posScore, negScore = senti_classifier.polarity_scores([tweet[i]]) score[frozenset([tweet[i]])] = [posScore, negScore, neutralScore] polarityDictionary[frozenset([tweet[i]])] = [posScore, negScore, neutralScore] return score, polarityDictionary
def calculateScore(tweet, polarityDictionary): score = {} tweet = [i.lower().strip(specialChar) for i in tweet] tweet = [i for i in tweet if i] length = len(tweet) init = 0 neutralScore = 0 while init < length: for i in range(init, length): flag = 0 for j in range(length, i, -1): phrase = frozenset(tweet[i:j]) if phrase in polarityDictionary: init = j flag = 1 posScore = polarityDictionary[phrase][positive] negScore = polarityDictionary[phrase][negative] neutralScore = polarityDictionary[phrase][neutral] score[phrase] = [posScore, negScore, neutralScore] break if flag == 1: break else: posScore, negScore = senti_classifier.polarity_scores( [tweet[i]]) score[frozenset([tweet[i] ])] = [posScore, negScore, neutralScore] polarityDictionary[frozenset( [tweet[i]])] = [posScore, negScore, neutralScore] return score, polarityDictionary
def hmatrix_filtered(): for w in hg.content_filtered_words_tops: # Quantity hg.content_filtered_words_matrix_tops[w[0]] = {'Qtde' : w[1]} # Tagged tagged = nltk.pos_tag([w[0]]) hg.content_filtered_words_matrix_tops[w[0]]['Tag'] = tagged[0][1] # Synonym if wordnet.synsets(w[0]): hg.content_filtered_words_matrix_tops[w[0]]['Synonym'] = wordnet.synsets(w[0])[0].name() else: hg.content_filtered_words_matrix_tops[w[0]]['Synonym'] = "---" # Antonym # if wordnet.synsets(w[0]): # hg.content_filtered_words_matrix_tops[w[0]]['Antonym'] = wordnet.antonyms()#(w[0])[0] # else: # hg.content_filtered_words_matrix_tops[w[0]]['Antonym'] = "---" # Steammed hg.content_filtered_words_matrix_tops[w[0]]['Stemmed'] = ps.stem(w[0]) # Lemmatized lem = lemmatizer.lemmatize(w[0]) hg.content_filtered_words_matrix_tops[w[0]]['Lemm'] = lem # Sentiment Analyzer pos_score, neg_score = senti_classifier.polarity_scores(w[0]) hg.content_filtered_words_matrix_tops[w[0]]['Score +'] = pos_score hg.content_filtered_words_matrix_tops[w[0]]['Score -'] = neg_score
def sentiment(string): sentimentScores = senti_classifier.polarity_scores([string]) if sentimentScores[0]==sentimentScores[1]: return 'neutral' elif sentimentScores[0]>sentimentScores[1]: return 'pos' else: return 'neg'
def addaffect(tweets): """ returns (tweet,freq,affect value) from (tweet, freq, related tweets) """ for keyword in tweets: pos_score, neg_score = senti_classifier.polarity_scores(keyword[2]) if pos_score > neg_score: keyword[2] = pos_score else: keyword[2] = neg_score return tweets
def sentimentTweet(tweet): pos_score, neg_score = senti_classifier.polarity_scores([tweet]) if pos_score > neg_score: vote = 1 elif pos_score < neg_score: vote = -1 else: vote = 0 return vote
def get_sentiment(userinput): userinput = userinput.lower() if 'yes' in userinput: return True (pos, neg) = senti_classifier.polarity_scores([userinput]) if pos > neg: return True else: return False
def calculateScore(tweet): score = {} tweet = [i.lower().strip(specialChar) for i in tweet] neutralScore = 0 for i in range(len(tweet)): posScore, negScore = senti_classifier.polarity_scores([tweet[i]]) # print posScore # print negScore score[frozenset([tweet[i]])] = [posScore, negScore, neutralScore] return score
def bayesSentiment(self, text): from nltk.tokenize.punkt import PunktSentenceTokenizer from senti_classifier import senti_classifier # break up text into sentences stzr = PunktSentenceTokenizer() sents = stzr.tokenize(text) pos_score, neg_score = senti_classifier.polarity_scores(sents) #print pos_score, neg_score return [pos_score, neg_score]
def computeSentiment(text): p,n=senti_classifier.polarity_scores(text) if p == n: return 0 elif p > n: return 1 else: return -1
def calculateScore(tweet): score = {} tweet=[i.lower().strip(specialChar) for i in tweet] neutralScore=0 for i in range(len(tweet)): posScore, negScore = senti_classifier.polarity_scores([tweet[i]]) # print posScore # print negScore score[frozenset([tweet[i]])]=[posScore, negScore, neutralScore] return score
def classify_sentences(sents): """ Return list of tuples (sentence, positive, negative) """ results = [] for sent in sents: pos_score, neg_score = senti_classifier.polarity_scores([sent]) results.append([sent, pos_score, neg_score]) return results
def computeSentiment(text): p, n = senti_classifier.polarity_scores(text) if p == n: return 0 elif p > n: return 1 else: return -1
def sentiment_score(dataset, sample): count = pos_sum = neg_sum = 0 for sentence in dataset: if count%sample==0: pos_score, neg_score = senti_classifier.polarity_scores([sentence]) #print "pos_score: " + str(pos_score) + " neg_score" + str(neg_score) pos_sum += pos_score neg_sum += neg_score count += 1 sum_val = pos_sum + neg_sum pos_score,neg_score = pos_sum/max(0.0000001,sum_val),neg_sum/max(0.0000001,sum_val) return pos_score - neg_score
def sentiment_score(dataset, sample): count = net_sum = 0 for sentence in dataset: if count%sample==0: pos_score, neg_score = senti_classifier.polarity_scores([sentence]) #print "pos_score: " + str(pos_score) + " neg_score" + str(neg_score) count += 1 if (pos_score - neg_score)>0: net_sum += 1 length = count / sample + 1 score = net_sum * 1.0 / length return score
def getSentiment(url,date,company,num,sentences): #print(url) #sentences = u.dataFromURL(url) #print(type(sentences)) #print(sentences.split(". ")) #sentences = sentences.split(". ") pos_score, neg_score = senti_classifier.polarity_scores(sentences); f = open("Articles/"+company+date+"_"+str(num)+'.txt', 'w'); f.write(". ".join(sentences).encode('utf-8')); f.close(); return(str(pos_score) + " " + str(neg_score)) """
def analyze_sents(content, blog_title): # fo = io.open('/home/chaitrali/officework/nltkCode/blogs/24-hours-in-fort-kochi.html.txt', 'r+', encoding='utf8', newline="\r") # content = fo.read() sents = sent_tokenize(content) # print len(sents) pos_score, neg_score = senti_classifier.polarity_scores(sents) #print (blog_title, pos_score, neg_score) global overallPosScore overallPosScore += pos_score global overallNegScore overallNegScore += neg_score #print (blog_title, sents, pos_score, neg_score) blogwiseScores[blog_title] = pos_score, neg_score
def on_data(self, data): #file = open('C:/Users/anshul/jupyter/sample.txt', 'a') all_data = json.loads(data) tweet = all_data["text"] pos, neg = senti_classifier.polarity_scores(tweet) print(tweet) print(pos, neg) time.sleep(0.3) #file.write(': ' + str(d)) #file.write('\n') #file.close() return True
def main() : fo = open("sentiment_data.txt","wb") prev_pos_scores = {} prev_neg_scores = {} for t in range(418) : doc = libxml2.parseFile("cams/cams_" + str(t + 1) + ".xml") ctxt = doc.xpathNewContext() res = ctxt.xpathEval("//specsKey[@name=\"reviewText\"]/text()") # print len(res),"//specsKey[@name=\"reviewText\"]/@name" pos_scores = 0 neg_scores = 0 num_reviews = 0 for val in res : vals = val.content.split('.') sentences = [] for valu in vals : sentences_temp = valu.split(',') for sentence in sentences_temp : sentences.append(sentence) tuple_sentences = tuple(sentences) if tuple_sentences in prev_pos_scores and tuple_sentences in prev_neg_scores : pos_score = prev_pos_scores[tuple_sentences] neg_score = prev_neg_scores[tuple_sentences] else : try : pos_score, neg_score = senti_classifier.polarity_scores(sentences) except : pos_score = neg_score = 0 pos_scores += pos_score neg_scores += neg_score prev_pos_scores[tuple_sentences] = pos_score prev_neg_scores[tuple_sentences] = neg_score print tuple_sentences num_reviews += 1 if num_reviews == 0 : fo.write("Document " + str(t + 1) + " : 0 0\n") print "Document " + str(t + 1) + " : 0 0\n" else : fo.write("Document " + str(t + 1) + " : " + str(pos_scores/num_reviews) + " " + str(neg_scores/num_reviews) + "\n") print "Document " + str(t + 1) + " : " + str(pos_scores/num_reviews) + " " + str(neg_scores/num_reviews) + "\n" fo.close()
def computeSentimentScores(self, record, tokenizer): """ record is a dict which must have record['quote_text']. It normally should have record['quote_id'] or record['vin_id'] tokenizer is a tokenizer with a tokenize method. The unit of analysis (e.g., word, ngram, sentence) is determined by the tokenizer passed in """ self.text = record['quote_text'] # To allow this to be used with arbitrary inputs try: self.quoteID = record['quote_id'] except: try: self.quoteID = record['vin_id'] except: # Make random ID if none exists self.quoteID = 'ID' + str(np.random.rand()) # Tokenize the text into the appropriate units self.tokens = tokenizer.tokenize(self.text) # Calc number of tokens in the record self.numTokens = len(self.tokens) # Calc sentiment scores self.pos_score, self.neg_score = senti_classifier.polarity_scores( self.tokens) # Averages are needed because otherwise the score will vary with number of sentences # Average positive sentiment score of the record self.avgPos = self.pos_score / self.numTokens # Average negative sentiment of the record self.avgNeg = (self.neg_score / self.numTokens) * -1 # Net average sentiment of the record self.netSent = self.avgPos + self.avgNeg # Objectivity score (from chris potts ) self.obj_score = 1.0 - self.netSent # Put the results in a dictionary self.scores = dict(quoteID=self.quoteID, avgPos=self.avgPos, avgNeg=self.avgNeg, netSent=self.netSent) return self.scores
def fetchMyTweets(self, count = 0, page = 0): if self.twitter_api_ == None: return timeline = [] if count == 0: i = 0 do_loop = True while do_loop == True: #TODO przy malych ilosciach tweetow sciaga je 2 razy tmp_timeline = self.twitter_api_.GetUserTimeline(id = self.uid_, count = 200, page = i) if len(tmp_timeline) == 0 or i == 4: do_loop = False timeline.extend(tmp_timeline) if len(tmp_timeline) < 100: ################TODO######################### do_loop = False ########################TODO######################### i += 1 else: timeline.extend(self.twitter_api_.GetUserTimeline(id = self.uid_, count = count, page = page)) keyword_extractor = KeywordExtractor() for status in timeline: status_text = status.GetText() #pozbawienie tekstu linkow status_text = re.sub(r'http.*$', "", status_text) #wydobycie hashtagow pat = re.compile(r"#(\w+)") hash_tags = pat.findall(status_text) #uzyskanie danych na temat tego czy dany tweet jest pozytywny czy negatywny if self.context_ == True: pos_score, neg_score = senti_classifier.polarity_scores([status_text]) else: pos_score = 0 neg_score = 0 context = pos_score - neg_score if context > 0: context = 1 else: if context < 0: context = -1 #uzyskanie slow kluczowych na podstawie twit keywords = [ x[0] for x in keyword_extractor.extract(status_text)] keywords.extend(hash_tags) #dopisanie uzyskanych slow kluczowych do tablicy zawierajacej wszystki slowa kluczowe usera keywords_with_context = [(x,context) for x in keywords] #print keywords_with_context[0][0], ",", keywords_with_context[0][1] self.all_key_words_.extend(keywords_with_context) self.tweets_.append([status_text, hash_tags, keywords, context])
def probTraining(priorScore): #Returns a Dictonary containing the probability of word being positive, negative, neutral wordProb = {} for i in priorScore.keys(): if i: wordProb[i] = [0.0, 0.0, 0.0] posScore, negScore = senti_classifier.polarity_scores(list(i)) if priorScore[i] > 0.0: wordProb[i][positive] = priorScore[i] / 5.0 wordProb[i][negative] = negScore elif priorScore[i] < 0.0: wordProb[i][negative] = -(priorScore[i] / 5.0) wordProb[i][positive] = posScore else: wordProb[i][positive] = posScore wordProb[i][negative] = negScore return wordProb
def probTraining(priorScore): '''creates a prior score''' wordProb = {} tweetCount = [0, 0, 0, 0] for i in priorScore.keys(): if i: wordProb[i] = [0.0, 0.0, 0.0] posScore, negScore = senti_classifier.polarity_scores(list(i)) if priorScore[i] > 0.0: wordProb[i][positive] = priorScore[i] / 5.0 wordProb[i][negative] = negScore elif priorScore[i] < 0.0: wordProb[i][negative] = -(priorScore[i] / 5.0) wordProb[i][positive] = posScore else: wordProb[i][positive] = posScore wordProb[i][negative] = negScore return wordProb
def computeSentimentScores(self, record, tokenizer): """ record is a dict which must have record['quote_text']. It normally should have record['quote_id'] or record['vin_id'] tokenizer is a tokenizer with a tokenize method. The unit of analysis (e.g., word, ngram, sentence) is determined by the tokenizer passed in """ self.text = record['quote_text'] # To allow this to be used with arbitrary inputs try: self.quoteID = record['quote_id'] except: try: self.quoteID = record['vin_id'] except: # Make random ID if none exists self.quoteID = 'ID' + str(np.random.rand()) # Tokenize the text into the appropriate units self.tokens = tokenizer.tokenize(self.text) # Calc number of tokens in the record self.numTokens = len(self.tokens) # Calc sentiment scores self.pos_score, self.neg_score = senti_classifier.polarity_scores(self.tokens) # Averages are needed because otherwise the score will vary with number of sentences # Average positive sentiment score of the record self.avgPos = self.pos_score / self.numTokens # Average negative sentiment of the record self.avgNeg = (self.neg_score / self.numTokens) * -1 # Net average sentiment of the record self.netSent = self.avgPos + self.avgNeg # Objectivity score (from chris potts ) self.obj_score = 1.0 - self.netSent # Put the results in a dictionary self.scores = dict(quoteID=self.quoteID, avgPos=self.avgPos, avgNeg=self.avgNeg, netSent=self.netSent) return self.scores
def probTraining(priorScore): '''creates a prior score''' wordProb={} tweetCount=[0,0,0,0] for i in priorScore.keys(): if i: wordProb[i]=[0.0,0.0,0.0] posScore, negScore = senti_classifier.polarity_scores(list(i)) if priorScore[i]>0.0: wordProb[i][positive]=priorScore[i]/5.0 wordProb[i][negative]=negScore elif priorScore[i]<0.0: wordProb[i][negative]=-(priorScore[i]/5.0) wordProb[i][positive]=posScore else: wordProb[i][positive]=posScore wordProb[i][negative]=negScore return wordProb
def print_extract(tweets): x = PrettyTable(["Date", "Pos", "Neg", "Sentiment", "Text"]) x.align["Text"] = "l" x.set_style(MSWORD_FRIENDLY) counter = 0 for tweet in tweets: if "text" in tweet: text = tweet["text"] else: text = "" sentiment = classify(text) pos_score, neg_score = senti_classifier.polarity_scores([text]) x.add_row([tweet["created_at_date"], pos_score, neg_score, sentiment, text]) counter += 1 print(str(counter) + "/" + str(len(tweets))) print(x)
def classify_tweet(stemmed_tokens, sentences): pos_score, neg_score = senti_classifier.polarity_scores(sentences) print pos_score, neg_score tweet_emotion = 'Neutural' emotionCnt = { 'Happy': 0, 'Sad': 0, 'Anger': 0, 'Fear': 0, 'Surprise': 0, 'Disgust': 0 } for token in stemmed_tokens: if token in happyList: emotionCnt['Happy'] += 1 elif token in sadList: emotionCnt['Sad'] += 1 elif token in angerList: emotionCnt['Anger'] += 1 elif token in fearList: emotionCnt['Fear'] += 1 elif token in surpList: emotionCnt['Surprise'] += 1 elif token in disgList: emotionCnt['Disgust'] += 1 print('happy count:', emotionCnt['Happy']) print('sad count:', emotionCnt['Sad']) print('anger count:', emotionCnt['Anger']) print('fear count:', emotionCnt['Fear']) print('surprise count:', emotionCnt['Surprise']) print('disgust count:', emotionCnt['Disgust']) max_emotion = max(emotionCnt.keys(), key=(lambda k: emotionCnt[k])) if max(emotionCnt.values()) == 0: tweet_emotion = 'Neutural' elif pos_score > neg_score: if max_emotion == 'Happy' or max_emotion == 'Surprise': tweet_emotion = max_emotion else: if max_emotion != 'Happy': tweet_emotion = max_emotion return tweet_emotion
def probTraining(priorScore): """trainFile is a file which contain the traind data is following format tokenizedTweet\tpos\tlabel\n it return the dictonary comtaining the prob of word being positive, negative, neutral""" wordProb = {} tweetCount = [0, 0, 0, 0] for i in priorScore.keys(): if i: wordProb[i] = [0.0, 0.0, 0.0] posScore, negScore = senti_classifier.polarity_scores(list(i)) if priorScore[i] > 0.0: wordProb[i][positive] = priorScore[i] / 5.0 wordProb[i][negative] = negScore elif priorScore[i] < 0.0: wordProb[i][negative] = -(priorScore[i] / 5.0) wordProb[i][positive] = posScore else: wordProb[i][positive] = posScore wordProb[i][negative] = negScore return wordProb
def classify(text, verbose=False): polarity = 'Neutral' pos_score, neg_score = senti_classifier.polarity_scores([text]) neg_score = -neg_score if pos_score + neg_score > 0: polarity = "Positive" elif pos_score + neg_score < 0: polarity = "Negative" if verbose: return { 'pos_score': pos_score, 'neg_score': neg_score, 'polarity': polarity, 'total_score': pos_score + neg_score } return polarity
def process_files(): matches = [] for root, dirnames, filenames in os.walk(DIR_NAME): for filename in fnmatch.filter(filenames, MATCH): matches.append(os.path.join(root, filename)) out_f = open(OUT_DIR_NAME+'nltk_sentiments','r') # stop from processing same files if rerun for line in out_f.readlines(): matches.remove(line.split(',')[0]) out_f.close() out_f = open(OUT_DIR_NAME+'nltk_sentiments','a') for match in matches: print match f = open(match, 'r') text = f.readlines() pos_score, neg_score = senti_classifier.polarity_scores(text) netScore = pos_score - neg_score print netScore out_f.write(match +',' + str(netScore) +'\n')
def processEntrySentiment(strText, msg): negCount = 0 posCount = 0 posList = [] negList = [] sentenceArray = [] if strText: text = TextBlob(strText) count = 0 for sentence in text.sentences: sentenceArray.append(str(sentence)) blob = TextBlob(str(sentence), analyzer=NaiveBayesAnalyzer()) if blob.sentiment.classification == 'neg': negCount += 1 negList.append(str(sentence)) else: posCount += 1 posList.append(str(sentence)) # Another module sentiment indicator to confirm results by comparison pos_score, neg_score = senti_classifier.polarity_scores(sentenceArray) print ("Comparative Positive score: "+str(pos_score)) print ("Comparative Negative score: "+str(neg_score)) print ("Negative Indicators: ") print (negList) print ("Positive Indicators: ") print (posList) if negCount == posCount: print ("Overall Sentiment: Neutral") else: if negCount > posCount: print ("Overall Sentiment: Negative") else: print ("Overall Sentiment: Positive") else: print(msg) return
def tweet(request, id): import nltk from senti_classifier import senti_classifier t = TwitterPost.objects.all().filter(id=id).first() sentence = t.text tokens = nltk.word_tokenize(sentence) pos_score, neg_score = senti_classifier.polarity_scores([t.text]) tagged = nltk.pos_tag(tokens) import os from nltk.tree import Tree from nltk.draw.tree import TreeView tr = Tree.fromstring('(S (NP this tree) (VP (V is) (AdjP pretty)))') TreeView(tr)._cframe.print_to_file('output.ps') os.system('convert output.ps output.png') os.system('cp output.png static/tree.png') entities = nltk.chunk.ne_chunk(tagged) data = {"twitt": t, "tokens": tokens, "tags": entities, "pos_score": pos_score, "neg_score": neg_score} return render(request, "tweet.html", data)
if pos_score > neg_score: return "positive" elif neg_score > pos_score: return "negative" elif pos_score == neg_score: return "neutral" def convertDateTime(dt): return parser.parse(dt) client = MongoClient() db = client.blackhole tweets_c = db.singularity sentiment_c = db.sentiment for post in tweets_c.find({},{'_id':0, 'text':1, 'created_at':1, 'user.location':1}): dateTime = convertDateTime(post['created_at']) text = escapeSpecialCharacters( post['text'], '\'"/\\' ) pos_score, neg_score = senti_classifier.polarity_scores([text]) sentiment = getSentiment(pos_score, neg_score) doc = { "queryable": "APPLE", "pos_score": pos_score, "neg_score": neg_score, "sentiment": sentiment, "dateTime": dateTime } sentiment_c.insert(doc) print doc
def sentimentalize(s): pos_score, neg_score = senti_classifier.polarity_scores([s]) return [pos_score, neg_score]
def arg_max(iterable): return max(enumerate(iterable), key=lambda x: x[1])[0] def polarity(index): if index == 0: return '1' elif index == 1: return '-1' def clean(text, exclude): return ''.join(ch for ch in text if ch not in exclude) exclude = set(punctuation) # Write to output with open(sys.argv[2], 'w') as fout: writer = UnicodeWriter(fout, delimiter='\t') # Read dataset with open(sys.argv[1], 'r') as fin: for l_i, line in enumerate(fin): line = to_unicode(line).strip() line = clean(line, exclude) pos_neg = senti_classifier.polarity_scores([line]) pol = polarity(arg_max(pos_neg)) line_number = to_unicode(str(l_i)) writer.writerow([line_number, pol])
currentAbstract = [] currentAbstract.append(row[123]+row[124]) # string containing title and subjects for each term currentSubject = row[1]+row[71]+row[72]+row[73]+row[123]+row[124] #string containing current abstract and author abstract currentString = row[123]+row[124] #get frequency distribution of topic and compare to threshold fd = FreqDist(nltk.tokenize.word_tokenize(str.lower(currentSubject))) #get frequency of word fdTopic = fd[str.lower(topic)] if fdTopic > threshold: publicationDates.append(row[63]) print "Processing" #get sentiment pos_score, neg_score = senti_classifier.polarity_scores(currentAbstract) netScore = pos_score - neg_score # append netScore to array netScoreList.append(netScore) print row[1] if netScore > localMax: #get title of article with max positive sentiment localMax = netScore maxArticleTitle = row[1] + row[63]+str(counter) if netScore < localMin: #get title of article with min positive sentiment localMin = netScore minArticleTitle = row[1] + row[63]+str(counter) #write to output file
if not isinstance(obj, unicode): obj = unicode(obj, encoding) return obj def arg_max(iterable): return max(enumerate(iterable), key=lambda x: x[1])[0] def polarity(index): if index == 0: return '1' elif index == 1: return '-1' def clean(text, exclude): return ''.join(ch for ch in text if ch not in exclude) exclude = set(punctuation) # Write to output with open(sys.argv[2], 'w') as fout: writer = UnicodeWriter(fout, delimiter='\t') # Read dataset with open(sys.argv[1], 'r') as fin: for l_i, line in enumerate(fin): line = to_unicode(line).strip() line = clean(line, exclude) pos_neg = senti_classifier.polarity_scores([line]) pol = polarity(arg_max(pos_neg)) line_number = to_unicode(str(l_i)) writer.writerow([line_number, pol])
writer = csv.writer(out, delimiter='\t') inp = open('/home/rohan/Desktop/input/output-1-lem.csv', 'r') reader = csv.reader(inp) for row in reader: row_str = str( row )[2: -2] #convert the row to string ignoring the square brackets and single quote row_arr = row_str.split('\\t') # split the string with tab delimiter tweet = row_arr[2] # 2 is the index of tweet text if tweet == '': continue tweet = [tweet] pos, neg = senti_classifier.polarity_scores(tweet) if DEBUG: print(pos, neg) if not DEBUG: if pos > neg: sentiment = 'positive' elif pos < neg: sentiment = 'negative' else: sentiment = 'neutral' writer.writerow([ row_arr[0], # username row_arr[1], #original tweet row_arr[2], # tweet
#! /usr/bin/env python # -*- coding: utf-8 -*- import warnings,sys,os,argparse from senti_classifier import senti_classifier with warnings.catch_warnings(): warnings.filterwarnings("ignore",category=DeprecationWarning) import gdata.youtube import gdata.youtube.service import urlparse youtube_service = gdata.youtube.service.YouTubeService() def ids_from_urls(youtubeurls = []): video_ids = [] for url in youtubeurls: url_data = urlparse.urlparse(url) query = urlparse.parse_qs(url_data.query) video_ids.append(query["v"][0]) return video_ids def comments(youtubeurls = []): comments = [] for vid in ids_from_urls(youtubeurls): for comment in youtube_service.GetYouTubeVideoCommentFeed(video_id = vid).entry: comments.append(comment.content.text) return comments if __name__ == '__main__': youtubeurls = ["http://www.youtube.com/watch?v=u1vASMbEEQc"] allcomments = comments(youtubeurls) print senti_classifier.polarity_scores(allcomments)
i += 1 if i != 0: line_avg = line_avg / i sentinent_file.write(str(round(line_avg, 3)) + ", ") else: sentinent_file.write("0, ") line_avg = 0.0 i = 0 for sentence in score_min_negative: for scores in sentence: line_avg += scores i += 1 if i != 0: line_avg = line_avg / i sentinent_file.write(str(round(line_avg, 3)) + ", ") else: sentinent_file.write("0, ") try: pos_score, neg_score = senti_classifier.polarity_scores( sentences) except Exception as e: pos_score = neg_score = 0 sentinent_file.write( str(pos_score) + ", " + str(neg_score) + ", ") sentinent_file.close() #end of loop tweets import generate_matrix subprocess.call(["python", "generate_matrix.py"], shell=False)
def sentiClassfierScore(self,doc): pos_score,neg_score = senti_classifier.polarity_scores([doc]) result = {'pos_score':pos_score,'neg_score':neg_score} return result
def get_api_keys(api_keys): #return {'consumer_secret': 'a4ksdf7s, 'consumer'.... if len([key for key in api_keys.values() if key]) <4: keys = open('_twitter.api','r').readlines() for line in keys: key,val = [k.strip() for k in line.split('\t') if k] api_keys[key] = val return api_keys def twitter_api(): _api_keys = get_api_keys(api_keys) api = twitter.Api() api = twitter.Api(consumer_key = _api_keys['consumer_key'], consumer_secret = _api_keys['consumer_secret'], access_token_key = _api_keys['access_token_key'], access_token_secret = _api_keys['access_token_secret']) return api def tweets(usr=None): api = twitter_api() statuses = api.GetUserTimeline("TheSJFC") _tweets = [s.text for s in statuses] return _tweets if __name__ == '__main__': pos, neg = senti_classifier.polarity_scores(tweets(usr = "******")) print pos, neg
for row in reader: i += 1 if i == 1: continue # row_str = str(row)[2:-2] #convert the row to string ignoring the square brackets and single quote # row_arr = row_str.split('\\t') # split the string with tab delimiter # tweet = row_arr[2] # 2 is the index of tweet text row_arr = row tweet = row[2] if tweet == "" or tweet == prev_tweet: continue prev_tweet = tweet tweet = [tweet] pos, neg = senti_classifier.polarity_scores(tweet) # pos,neg = 0,0 if DEBUG: print (pos, neg) if not DEBUG: if pos > neg: # sentiment='positive' sentiment = 1 count_pos += 1 elif pos < neg: # sentiment='negative' sentiment = -1 count_neg += 1 else:
def senti(): for w in hg.content_summary: pos_score, neg_score = senti_classifier.polarity_scores(w) hg.content_summary[w].append({'positive': pos_score}) hg.content_summary[w].append({'negative': neg_score})
import MySQLdb as mdb from senti_classifier import senti_classifier #f = open('trial.txt' , 'r') #sentences = f.read().rstrip() #sentence_list = list() #sentence_list.append(sentences) #pos_score , neg_score = senti_classifier.polarity_scores(sentence_list) #print pos_score , neg_score con = mdb.connect('localhost' , 'root' , 'admin' , 'happiness_index') cur = con.cursor() cur.execute("SELECT * from geo_tweets") con.commit() numrows = int(cur.rowcount) cur1 = con.cursor() for x in range(0 , numrows): row = cur.fetchone() tweet = row[1] tweet_list = list() tweet_list.append(tweet) pos_score , neg_score = senti_classifier.polarity_scores(tweet_list) cur1.execute("update geo_tweets SET sent_pos = %s , sent_neg = %s WHERE id = %s" , (pos_score , neg_score , row[0])) print "success\n" con.commit()
def test_against_data(self): for r in self.reviews_data['reviews']: sentences = [s.strip() for s in r['text'].split(".")] pos_score, neg_score = senti_classifier.polarity_scores(sentences) self.assertEqual(pos_score, r['pos']) self.assertEqual(neg_score, r['neg'])
from senti_classifier import senti_classifier sentences = ['The movie was the worst movie', 'It was the worst acting by the actors'] pos_score, neg_score = senti_classifier.polarity_scores(sentences) print pos_score, neg_score
""" 0.625 """ ##print happy.neg_score """ 0.25 """ ##print happy.obj_score # Works on sentences using senti_classifier from senti_classifier import senti_classifier s1 = ['I could only get out of the house twice today'] s2 = ['I got out of the house twice today'] ##sentences = ['The movie was the best movie', 'It was the best acting by the actors'] print(s1) pos_score, neg_score = senti_classifier.polarity_scores(s1) print('positive:') print(pos_score) print('negative: ') print(neg_score) print(s2) pos_score, neg_score = senti_classifier.polarity_scores(s2) print('positive:') print(pos_score) print('negative') print(neg_score) ##print synsets_scores['peaceful.a.01']['pos']
token = [i for i in token if not i in tweet_remove] token = [emoji_pattern.sub(r'', i) for i in token] tokens.append(token) for to in tokens: key = ' '.join(to) sentences.append(key) m = {} scScore = 0 hlScore = 0 ssScore = 0 for i in range(len(sentences)): if (i >= 1001): break pos_score0, neg_score0 = senti_classifier.polarity_scores([sentences[i]]) if pos_score0 > neg_score0: if acc[i] == 1: scScore += 1 m[i] = [1] elif pos_score0 < neg_score0: if acc[i] == 0: scScore += 1 m[i] = [0] else: if acc[i] == .5: scScore += 1 m[i] = [.5] if hl[i] == "Neutral": if acc[i] == .5:
import nltk import glob import os, sys #nltk.download() from senti_classifier import senti_classifier path = '/home/stonehange/Desktop/txt_sentoken/pos/' for filename in os.listdir(path): #sentences = ['The movie was the worst movie', 'It was the worst acting by the actors'] pos_score, neg_score = senti_classifier.polarity_scores(filename) print pos_score, neg_score
import csv import re from senti_classifier import senti_classifier array = [] allratings = [] allRatings = [] with open('/home/suhas/Downloads/userTable.csv', 'r+') as f: with open('/home/suhas/Downloads/user.csv', 'wb') as f1: reader = csv.reader(f) writer = csv.writer(f1) for row in reader: #re.sub(r'[^\w]','',row[4]) array.append(re.sub(r'!""', ' ', row[4])) for i in array: pos_score, neg_score = senti_classifier.polarity_scores([i]) sum = pos_score + neg_score if (sum == 0): pos_percentage = 0 rating = 0 else: pos_percentage = (pos_score) / (pos_score + neg_score) * 100 if (pos_percentage >= 85): rating = 5 elif (pos_percentage >= 70 and pos_percentage < 85): rating = 4 elif (pos_percentage >= 55 and pos_percentage < 70): rating = 3 elif (pos_percentage >= 40 and pos_percentage < 55): rating = 2 elif (pos_percentage >= 25 and pos_percentage < 40):
def getScore(post): pos_score, neg_score = senti_classifier.polarity_scores([post]) #print str(pos_score) + " " + str(neg_score) return [pos_score, neg_score]