def test_sentiment(self): # Assert < 0 for negative adjectives and > 0 for positive adjectives. self.assertTrue(en.sentiment("wonderful")[0] > 0) self.assertTrue(en.sentiment("horrible")[0] < 0) self.assertTrue(en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0) self.assertTrue(en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0] < 0) # Assert that :) and :( are recognized. self.assertTrue(en.sentiment(":)")[0] > 0) self.assertTrue(en.sentiment(":(")[0] < 0) # Assert the accuracy of the sentiment analysis (for the positive class). # Given are the scores for Pang & Lee's polarity dataset v2.0: # http://www.cs.cornell.edu/people/pabo/movie-review-data/ # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test reviews = [] for score, review in Datasheet.load(os.path.join(PATH, "corpora", "polarity-en-pang&lee1.csv")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: en.positive(review), reviews) self.assertTrue(A > 0.755) self.assertTrue(P > 0.760) self.assertTrue(R > 0.747) self.assertTrue(F > 0.754) # Assert the accuracy of the sentiment analysis on short text (for the positive class). # Given are the scores for Pang & Lee's sentence polarity dataset v1.0: # http://www.cs.cornell.edu/people/pabo/movie-review-data/ reviews = [] for score, review in Datasheet.load(os.path.join(PATH, "corpora", "polarity-en-pang&lee2.csv")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: en.positive(review), reviews) self.assertTrue(A > 0.642) self.assertTrue(P > 0.653) self.assertTrue(R > 0.607) self.assertTrue(F > 0.629) print "pattern.en.sentiment()"
def task10(): """ This function sort all the tweets from CrimeReport according to posititve function of pattern.en who has polarity >= threshold will put in positive-sentiment-tweets.txt otherwise in negative-sentiment-tweets.txt. :return: """ # Initialize tweets list tweets = [] # Open the data file and read line by line with open('CrimeReport.txt', 'r') as data_file: for line in data_file: # Load it as a json object tweets.append(json.loads(line)) # Open output files file1 = open('positive-sentiment-tweets.txt','w+') file2 = open('negative-sentiment-tweets.txt','w+') for tweet in tweets: # If positive function return TRUE then store to positive-tweets else negative-tweets. if positive(tweet['text'],threshold=0.1): file1.write(json.dumps(tweet) + '\n') else: file2.write(json.dumps(tweet) + '\n') # Close the output files. file1.close() file2.close() print "Please check the output files:\npositive-sentiment-tweets.txt negative-sentiment-tweets"
def getAFFECT(self, lineLIST): wordLIST=list() positiveCOUNTER=0 negativeCOUNTER=0 everyLINE=0 sentencecount=0 Sentences1=[] for everyPARAGRAPH in lineLIST: Sentences1=sent_tokenize(lineLIST[everyLINE]) newsentencecount=len(Sentences1) sentencecount=sentencecount+newsentencecount count1=0 for everysentence in Sentences1: if count1<len(Sentences1)and len(Sentences1)>0: newSentences1=word_tokenize(Sentences1[count1]) count1=count1+1 c=0 for everyword in newSentences1: lowercaseCHAR=newSentences1[c].lower() wordLIST.append(lowercaseCHAR) c=c+1 if positive(lowercaseCHAR, threshold=0.5): positiveCOUNTER=positiveCOUNTER+1 else: negativeCOUNTER=negativeCOUNTER+1 totalAFFECT=float(negativeCOUNTER+positiveCOUNTER) Affect=float(totalAFFECT/sentencecount) return Affect
def test_sentiment_twitter(self): sanders = os.path.join(PATH, "corpora", "polarity-en-sanders.csv") if os.path.exists(sanders): # Assert the accuracy of the sentiment analysis on tweets. # Given are the scores for Sanders Twitter Sentiment Corpus: # http://www.sananalytics.com/lab/twitter-sentiment/ # Positive + neutral is taken as polarity >= 0.0, # Negative is taken as polarity < 0.0. # Since there are a lot of neutral cases, # and the algorithm predicts 0.0 by default (i.e., majority class) the results are good. # Distinguishing negative from neutral from positive is a much # harder task from pattern.db import Datasheet from pattern.metrics import test reviews = [] for i, id, date, tweet, polarity, topic in Datasheet.load(sanders): if polarity != "irrelevant": reviews.append( (tweet, polarity in ("positive", "neutral"))) A, P, R, F = test( lambda review: en.positive(review, threshold=0.0), reviews) #print(A, P, R, F) self.assertTrue(A > 0.824) self.assertTrue(P > 0.879) self.assertTrue(R > 0.911) self.assertTrue(F > 0.895)
def test_sentiment(self): # Assert < 0 for negative adjectives and > 0 for positive adjectives. self.assertTrue(en.sentiment("wonderful")[0] > 0) self.assertTrue(en.sentiment("horrible")[0] < 0) self.assertTrue( en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0) self.assertTrue( en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0] < 0) # Assert the accuracy of the sentiment analysis. # Given are the scores for Pang & Lee's polarity dataset v2.0: # http://www.cs.cornell.edu/people/pabo/movie-review-data/ # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test reviews = [] for score, review in Datasheet.load( os.path.join("corpora", "pang&lee-polarity.txt")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: en.positive(review), reviews) self.assertTrue(A > 0.71) self.assertTrue(P > 0.72) self.assertTrue(R > 0.70) self.assertTrue(F > 0.71) print "pattern.en.sentiment()"
def test_sentiment_twitter(self): sanders = os.path.join(PATH, "corpora", "polarity-en-sanders.csv") if os.path.exists(sanders): # Assert the accuracy of the sentiment analysis on tweets. # Given are the scores for Sanders Twitter Sentiment Corpus: # http://www.sananalytics.com/lab/twitter-sentiment/ # Positive + neutral is taken as polarity >= 0.0, # Negative is taken as polarity < 0.0. # Since there are a lot of neutral cases, # and the algorithm predicts 0.0 by default (i.e., majority class) the results are good. # Distinguishing negative from neutral from positive is a much harder task from pattern.db import Datasheet from pattern.metrics import test reviews = [] for i, id, date, tweet, polarity, topic in Datasheet.load(sanders): if polarity != "irrelevant": reviews.append((tweet, polarity in ("positive", "neutral"))) A, P, R, F = test( lambda review: en.positive(review, threshold=0.0), reviews) #print A, P, R, F self.assertTrue(A > 0.824) self.assertTrue(P > 0.879) self.assertTrue(R > 0.911) self.assertTrue(F > 0.895)
def on_status(self, status): if status.lang != "en": return is_positive = positive(status.text) if self.led_matrix: pixels = Pixels.HAPPY_FACE if is_positive else Pixels.SAD_FACE self.led_matrix.run_animation(pixels) else: print "[%s] %s" % \ ("+" if is_positive else "-", status.text)
def main(): i = 0 f = open('BERNIE_Sioux_Falls_DATA.txt', "r") for line in f: #tweet = json.loads(line) if positive(str(line), threshold=0.1): file = open('bernie_sioux_falls_positive.txt', "a") file.write(json.dumps(line) + '\n') i += 1 file.close() else: file1 = open('bernie_sioux_falls_negative.txt', "a") file1.write(json.dumps(line) + '\n') file1.close()
def main(): i = 0 f = open('Hillary_Boston.txt', "r") for line in f: #tweet = json.loads(line) if positive(str(line), threshold=0.1): file = open('clinton_boston_positive.txt', "a") file.write(json.dumps(line) + '\n') i += 1 file.close() else: file1 = open('clinton_boston_negative.txt', "a") file1.write(json.dumps(line) + '\n') file1.close()
def main(): i = 0 f = open('Trump_Jacksonville.txt', "r") for line in f: #tweet = json.loads(line) if positive(str(line), threshold=0.1): file = open('trump_jacksonville_positive.txt', "a") file.write(json.dumps(line) + '\n') i += 1 file.close() else: file1 = open('trump_jacksonville_negative.txt', "a") file1.write(json.dumps(line) + '\n') file1.close()
def main(): i = 0 f = open('Bernie_NYC.txt', "r") for line in f: #tweet = json.loads(line) if positive(str(line), threshold=0.1): file = open('Bernie_Positive_tweets.txt', "a") file.write(json.dumps(line) + '\n') i += 1 file.close() else: file1 = open('Bernie_Negative_tweets.txt', "a") file1.write(json.dumps(line) + '\n') file1.close()
def main(): i = 0 f = open('CLINTON_MILWAUKEE_DATA.txt', "r") for line in f: #tweet = json.loads(line) if positive(str(line), threshold=0.1): file = open('clinton_milwaukee_positive.txt', "a") file.write(json.dumps(line) + '\n') i += 1 file.close() else: file1 = open('clinton_milwaukee_negative.txt', "a") file1.write(json.dumps(line) + '\n') file1.close()
def main(): i = 0 f = open('TRUMP_DENVER_DATA.txt', "r") for line in f: #tweet = json.loads(line) if positive(str(line), threshold=0.1): file = open('trump_denver_positive.txt', "a") file.write(json.dumps(line) + '\n') i += 1 file.close() else: file1 = open('trump_denver_negative.txt', "a") file1.write(json.dumps(line) + '\n') file1.close()
def main(): i = 0 f = open('CRUZ_SEATTLE_DATA.txt', "r") for line in f: #tweet = json.loads(line) if positive(str(line), threshold=0.1): file = open('cruz_seattle_positive.txt', "a") file.write(json.dumps(line) + '\n') i += 1 file.close() else: file1 = open('cruz_seattle_negative.txt', "a") file1.write(json.dumps(line) + '\n') file1.close()
def main(): i = 0 f = open('CLINTON_LITTLE_ROCK_DATA.txt', "r") for line in f: #tweet = json.loads(line) if positive(str(line), threshold=0.1): file = open('clinton_little_rock_positive.txt', "a") file.write(json.dumps(line) + '\n') i += 1 file.close() else: file1 = open('clinton_little_rock_negative.txt', "a") file1.write(json.dumps(line) + '\n') file1.close()
def main(): tweets_filename = 'sports.txt' tweets_file = open(tweets_filename, "r") #file1 = 'test.txt' #file2 = open(file1, 'w') #f = open('test1.txt', 'w') count = 0 tweets = [] B = [1, 1, "xyz"] print "***********************************************************************" for line in tweets_file: try: # Read in one line of the file, convert it into a json object tweet = json.loads(line.strip()) tweet_text = tweet['text'] tweet_assment = sentiment(tweet_text) #B[0] = count #B[2] = tweet['text'] if positive(tweet_assment, threshold=0.1): fil = open('sports_train.txt', 'a') count += 1 B[0] = count B[1] = 1 B[2] = tweet['text'] fil.write(json.dumps(B) + '\n') fil.close() else: fil = open('sports_train.txt', 'a') count += 1 B[0] = count B[1] = 0 B[2] = tweet['text'] fil.write(json.dumps(B) + '\n') fil.close() #f.write(json.dumps(B) + '\r\n') #tweets.insert(count,B) except: # read in a line is not in JSON format continue print "***********************************************************************" print count fil.close()
def task10(): print("TASK 10:") tweets = [] for line in open('CrimeReport.txt', 'r').readlines(): tweet = json.loads(line) tweets.append(tweet) print sentiment(tweet['text']).assessments if positive(tweet['text'],threshold=0.1): with open('positive-sentiment-tweets.txt','a') as fp: fp.write(json.dumps(tweet)+'\n') fp.close() else: with open('negative-sentiment-tweets.txt','a') as fn: fn.write(json.dumps(tweet)+'\n') fn.close()
def task10(): print 'Task 10' tweets = [] positive_feeds = [] negative_feeds = [] with open('CrimeReport.txt') as f: for line in f: tweet = json.loads(line) tweets.append(tweet) if pen.positive(tweet['text'], threshold=0.0): positive_feeds.append(tweet) else: negative_feeds.append(tweet) with open('positive-entiment-tweets.txt', 'w') as file: json.dump(positive_feeds, file) with open('negative-sentiment-tweets.txt', 'w') as file: json.dump(negative_feeds, file)
def test_sentiment(self): # Assert < 0 for negative adjectives and > 0 for positive adjectives. self.assertTrue(en.sentiment("wonderful")[0] > 0) self.assertTrue(en.sentiment("horrible")[0] < 0) self.assertTrue(en.sentiment(en.wordnet.synsets("horrible", pos="JJ")[0])[0] < 0) self.assertTrue(en.sentiment(en.Text(en.parse("A bad book. Really horrible.")))[0] < 0) # Assert the accuracy of the sentiment analysis. # Given are the scores for Pang & Lee's polarity dataset v2.0: # http://www.cs.cornell.edu/people/pabo/movie-review-data/ # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test reviews = [] for score, review in Datasheet.load(os.path.join("corpora", "pang&lee-polarity.txt")): reviews.append((review, int(score) > 0)) A, P, R, F = test(lambda review: en.positive(review), reviews) self.assertTrue(A > 0.71) self.assertTrue(P > 0.72) self.assertTrue(R > 0.70) self.assertTrue(F > 0.71) print "pattern.en.sentiment()"
def task10(): print "Answer for Task 10" tweets = [] # Reading each line and appending it to tweets array file10 = open('removedDuplicates.txt', 'r') for line in file10: tweets.append(json.loads(line)) # Opening two files to add positive and negative tweets file10Positive = open('positive.txt', 'w') file10Negative = open('negative.txt', 'w') for tweet in tweets: # Finding Tweets Threshold value if positive(tweet['text'], threshold=0.1): # Writing Positive tweets to positive.txt print "Wonderful ... Its positive tweets." file10Positive.write(json.dumps(tweet) + '\n') else: # Writing negative tweets to negative.txt print "Awful ... Negative tweets." file10Negative.write(json.dumps(tweet) + '\n') file10Positive.close() file10Negative.close()
print print sentiment( "The movie attempts to be surreal by incorporating time travel and various time paradoxes," "but it's presented in such a ridiculous way it's seriously boring.") # The input string can be: # - a string, # - a Synset (see pattern.en.wordnet), # - a parsed Sentence, Text, Chunk or Word (see pattern.en), # - a Document (see pattern.vector). # The positive() function returns True if the string's polarity >= threshold. # The threshold can be lowered or raised, # but overall for strings with multiple words +0.1 yields the best results. print print "good:", positive("good", threshold=0.1) print " bad:", positive("bad") print # You can also do sentiment analysis in Dutch or French, # it works exactly the same: #from pattern.nl import sentiment as sentiment_nl #print "In Dutch:" #print sentiment_nl("Een onwijs spannend goed boek!") # You can also use Pattern with SentiWordNet. # You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/ # Put the file "SentiWordNet*.txt" in pattern/en/wordnet/ # You can then use Synset.weight() and wordnet.sentiwordnet:
def task10(filename): #declaring the tweets list tweets = [] #opening the file handle to read with open(filename, 'r') as f: #reading full file in a list1 list1 = f.readlines() #closing the file handle f.close() #reading the list1 by element by element for string in list1: #reading each element of list in string tweet = json.loads(string) #appending the tweet into tweets list tweets.append(tweet) #positive sentiments a filename postiveFilename = "positive-entiment-tweets.txt" #negative sentiments a filename negativeFilename = "negative-entiment-tweets.txt" #opening a file handle for postive sentiment file p = open(postiveFilename, 'a') #opening a file handle for negative sentiment file n = open(negativeFilename, 'a') #reading the tweet from a list one by one for tweet in tweets: # if positive of a tweet is beyond threshold for polarity, then writing in positve sentiment file if positive(tweet["text"], threshold=0.1): #dumping the json object of tweet into a file p.write(json.dumps(tweet)) #appending a new line character into a file p.write("\n") # if negative of a tweet is beyond threshold for polarity, then writing in negative sentiment file else: #dumping the json object of tweet into a file n.write(json.dumps(tweet)) #appending a new line character into a file n.write("\n") #closing the file handle for positve sentiment file p.close() #closing the file handle for negative sentiment file n.close() print "task 10 Done"
# Build the output csv files by separating retrieved phrases as either positive or negative word_cnt_pos = defaultdict(int) word_list = [] noun_dict_pos = defaultdict(int) word_cnt_neg = defaultdict(int) noun_dict_neg = defaultdict(int) reviewHash = {} keyCount = 0 for item in madisonHospitalReviews.find(): keyCount = keyCount + 1 reviewHash[keyCount] = item["text"] for key, value in reviewHash.iteritems(): string = value tokenized = custom_sent_tokenizer.tokenize(string) if(positive(string,0.1)): try: words = nltk.word_tokenize(string) tagged = nltk.pos_tag(words) chunkGram = "NP: {<JJ> <NN>|<JJ> <NNS>|<NN> <NNS>}" chunkParser = nltk.RegexpParser(chunkGram) chunked = chunkParser.parse(tagged) for subtree in chunked.subtrees(): if subtree.label() == 'NP': string = subtree.leaves() (terms, tags) = zip(*subtree) for i in range(0,len(terms)): word_list.append(terms[i].lower()) word_cnt_pos[terms[i].lower()] += 1 noun_dict_pos[(terms[0].lower(),terms[1].lower())] = 1 word_list = []
for item in sortedIDs: for i in charlotteHospitalReviews.find(): if item == i["text"]: sortedCollection.insert(i) keyCount = 0;reviewHash = {} for item in sortedCollection.find(): keyCount = keyCount + 1 reviewHash[keyCount] = item["text"] count_pos_sent =0 for key,value in reviewHash.iteritems(): final_score = 3.0 count_pos = 0;count_neg = 0;total_count = 0 review = value;phrase = "" if positive(value,0.1) == True:count_pos_sent += 1 words = nltk.word_tokenize(review) tagged = nltk.pos_tag(words) try: chunked = chunkParser_pos.parse(tagged) for subtree in chunked.subtrees(): if subtree.label() == 'POS': phrase = "" (terms, tags) = zip(*subtree) for i in range(0,len(terms)): phrase = phrase + " " + terms[i] if positive(phrase.strip(),0.1) == True:count_pos += 1;total_count += 1 chunked = chunkParser_neg.parse(tagged) for subtree in chunked.subtrees(): if subtree.label() == 'NEG': phrase = ""
# subjectivity() measures objective vs. subjective, as a number between 0.0 and 1.0. # sentiment() returns a tuple of (polarity, subjectivity) for a given string. for word in ("amazing", "horrible", "public"): print word, sentiment(word) print print sentiment( "The movie attempts to be surreal by incorporating time travel and various time paradoxes," "but it's presented in such a ridiculous way it's seriously boring.") # The input string can also be a Synset, or a parsed Sentence, Text, Chunk or Word. # positive() returns True if the string's polarity >= threshold. # The threshold can be lowered or raised, # but overall for strings with multiple words +0.1 yields the best results. print positive("good", threshold=0.1) print positive("bad") print # You can also do sentiment analysis in Dutch, it works exactly the same: #from pattern.nl import sentiment as sentiment_nl #print "In Dutch:" #print sentiment_nl("Een onwijs spannend goed boek!") # You can also use Pattern with SentiWordNet. # You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/ # Put the file "SentiWordNet*.txt" in pattern/en/wordnet/ # You can then use Synset.weight() and wordnet.sentiwordnet: #from pattern.en import wordnet, ADJECTIVE
def get_recent_tweets(user, total_n=50): data = {} unbiasseddata = {} max_id = None total = 0 n = 0 global D global M global N global A global B global C while True: unbiassedtweets = api.search(geocode="44.467186,-73.214804,9mi", screen_name=user, count=total_n, max_id=max_id) if len(unbiassedtweets) == 0: break for unbiassedtweet in unbiassedtweets: n += 1 if unbiassedtweet.id not in unbiasseddata: D += 1 if positive(unbiassedtweet.text, threshold=0.1): C += 1 unbiasseddata[unbiassedtweet.id] = str(unbiassedtweet) if ("news" in unbiassedtweet.text or "News" in unbiassedtweet.text or "NEWS" in unbiassedtweet.text): N += 1 if positive(unbiassedtweet.text, threshold=0.1): B += 1 C -= 1 if n >= total_n: break max_id = min([s.id for s in unbiassedtweets]) - 1 if n >= total_n: break # return unbiasseddata.values() total = 0 n = 0 while True: """ rawtweets = api.user_timeline(screen_name=user, count=total_n, max_id=max_id) """ """rawtweets=api.geo_search(query="USA", granularity="country") """ query = "Elections OR Clinton OR Donald OR Fake OR Hillary OR Obama OR Russian OR Trump OR Fake news OR news" rawtweets = api.search(q=query, geocode="39.8,-95.583068847656,2500km", screen_name=user, count=total_n, max_id=max_id) if len(rawtweets) == 0: break for rawtweet in rawtweets: n += 1 if rawtweet.id not in data: data[rawtweet.id] = str(rawtweet) if rawtweet.id in unbiasseddata: M += 1 if positive(rawtweet.text, threshold=0.1): A += 1 C -= 1 B -= 1 if n >= total_n: break max_id = min([s.id for s in rawtweets]) - 1 if n >= total_n: break return data.values()
word_cnt_pos = defaultdict(int) word_list = [] count = 0 noun_dict_pos = defaultdict(int) word_cnt_neg = defaultdict(int) noun_dict_neg = defaultdict(int) reviewHash = {} keyCount = 0 for item in pittsburghHospitalReviews.find(): keyCount = keyCount + 1 reviewHash[keyCount] = item["text"] for key, value in reviewHash.iteritems(): count += 1 print count string = value if (positive(string, 0.1)): try: words = nltk.word_tokenize(string) tagged = nltk.pos_tag(words) chunkGram = "NP: {<JJ> <NN>|<JJ> <NNS>|<NN> <NNS>}" chunkParser = nltk.RegexpParser(chunkGram) chunked = chunkParser.parse(tagged) for subtree in chunked.subtrees(): if subtree.label() == 'NP': string = subtree.leaves() (terms, tags) = zip(*subtree) for i in range(0, len(terms)): word_list.append(terms[i].lower()) word_cnt_pos[terms[i].lower()] += 1 noun_dict_pos[(terms[0].lower(), terms[1].lower())] = 1 word_list = []
print( sentiment( "The movie attempts to be surreal by incorporating time travel and various time paradoxes," "but it's presented in such a ridiculous way it's seriously boring.")) # The input string can be: # - a string, # - a Synset (see pattern.en.wordnet), # - a parsed Sentence, Text, Chunk or Word (see pattern.en), # - a Document (see pattern.vector). # The positive() function returns True if the string's polarity >= threshold. # The threshold can be lowered or raised, # but overall for strings with multiple words +0.1 yields the best results. print("") print("good", positive("good", threshold=0.1)) print("bad", positive("bad")) print("") # You can also do sentiment analysis in Dutch or French, # it works exactly the same: #from pattern.nl import sentiment as sentiment_nl #print("In Dutch:") #print(sentiment_nl("Een onwijs spannend goed boek!")) # You can also use Pattern with SentiWordNet. # You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/ # Put the file "SentiWordNet*.txt" in pattern/en/wordnet/ # You can then use Synset.weight() and wordnet.sentiwordnet:
print("") print(sentiment( "The movie attempts to be surreal by incorporating time travel and various time paradoxes," "but it's presented in such a ridiculous way it's seriously boring.")) # The input string can be: # - a string, # - a Synset (see pattern.en.wordnet), # - a parsed Sentence, Text, Chunk or Word (see pattern.en), # - a Document (see pattern.vector). # The positive() function returns True if the string's polarity >= threshold. # The threshold can be lowered or raised, # but overall for strings with multiple words +0.1 yields the best results. print("") print("good", positive("good", threshold=0.1)) print("bad", positive("bad")) print("") # You can also do sentiment analysis in Dutch or French, # it works exactly the same: #from pattern.nl import sentiment as sentiment_nl #print("In Dutch:") #print(sentiment_nl("Een onwijs spannend goed boek!")) # You can also use Pattern with SentiWordNet. # You can get SentiWordNet at: http://sentiwordnet.isti.cnr.it/ # Put the file "SentiWordNet*.txt" in pattern/en/wordnet/ # You can then use Synset.weight() and wordnet.sentiwordnet:
dicWord[nn]=1 else: for w in chunk: if w.type=="JJ": index=c.lower().find(w.string) print index print w if index>0 and judge(c,index): c=c[:index+len(w.string)]+'</span>'+c[index+len(w.string):] c=c[:index]+'<span class=*JJ* >'+c[index:] #print c c='<span class=*sentence* sentiment=*'+str(sentiment(sentence))+'* positive=*'+str(positive(sentence))+'* mood=*'+str(mood(sentence))+'* modality=*'+str(modality(sentence))+'*>'+c+"</span>" c=c.replace('"','*') v.texts=v.texts+c #print c #pdb.set_trace() #print v.texts print v.date #print v.nouns #print v.texts print v.stars cur.execute('insert into wZwZcte4lcbu51NOzCjWbQ values("'+v.date+'","'+v.user+'","'+v.nouns+'","'+str(v.stars)+'" ,"'+v.texts+'")') #cur.execute('create table wordfre(word varchar(20) UNIQUE,uid integer)') cur.close() cx.commit()
#reload(sys) # when you run this code make sure you put the correct location of file: CrimeReport.txt in_file = open( '/Users/manasgaur/Desktop/MyApp/Chen_Python_work/Data_folder/CrimeReport.txt', 'r') tweets = [] for line in in_file: tweet = json.loads(line) tweets.append(tweet) in_file.close() for i in range(len(tweets)): val = tweets[i]["text"] if positive(val, threshold=0.1): if os.path.isfile('positive.txt'): with open('positive.txt', 'a') as pos: pos.write("Text") json.dump(val, pos) #pos.write(str(val)) pos.write('\n') pos.write('\n') pos.write("Sentiment Assessment:\n") l = sentiment(val).assessments json.dump(l, pos) pos.write('\n') pos.write('\n') #pos.close() else: in2_file = open('positive.txt', 'w') in2_file.write("Text")
def test_positive(self): # Assert that en.positive() yields polarity >= 0.1. s = "A great day!" self.assertTrue(en.positive(s)) print "pattern.en.subjectivity()"
from pattern.en import sentiment from pattern.en import positive import json file_open = open('crime.txt' , 'r').readlines() for line in file_open: tweet = json.loads(line) t = tweet['text'] s = sentiment(t) #print s[0] if positive(t,0.1) == True: open('positive-sentiment-tweets.txt','a').write(json.dumps(tweet)+"\n") else: open('nagative-sentiment-tweets.txt','a').write(json.dumps(tweet)+"\n")
def test_positive(self): # Assert that en.positive() yields polarity >= 0.1. s = "A great day!" self.assertTrue(en.positive(s)) print("pattern.en.subjectivity()")
import os import json from pattern.en import positive, sentiment file_name = 'final_data/washington.txt' positive_file = 'positive/washington.txt' negative_file = 'negative/washington.txt' positive_tweets = [] negative_tweets = [] with open(file_name) as in_file: for line in in_file: tweet = line if positive(tweet): positive_tweets.append(tweet) else: negative_tweets.append(tweet) with open(positive_file, 'w') as out_file: for tweet in positive_tweets: out_file.write(tweet) out_file.write("\n") with open(negative_file, 'w') as out_file: for tweet in negative_tweets: out_file.write(tweet) out_file.write("\n")