def test_modality(self): # Assert -1.0 => +1.0 representing the degree of certainty. v = en.modality(en.Sentence(en.parse("I wish it would stop raining."))) self.assertTrue(v < 0) v = en.modality( en.Sentence(en.parse("It will surely stop raining soon."))) self.assertTrue(v > 0) # Assert the accuracy of the modality algorithm. # Given are the scores for the CoNLL-2010 Shared Task 1 Wikipedia uncertainty data: # http://www.inf.u-szeged.hu/rgai/conll2010st/tasks.html#task1 # The baseline should increase (not decrease) when the algorithm is modified. from pattern.db import Datasheet from pattern.metrics import test sentences = [] for certain, sentence in Datasheet.load( os.path.join(PATH, "corpora", "uncertainty-conll2010.csv")): sentence = en.parse(sentence, chunks=False, light=True) sentence = en.Sentence(sentence) sentences.append((sentence, int(certain) > 0)) A, P, R, F = test(lambda sentence: en.modality(sentence) > 0.5, sentences) #print A, P, R, F self.assertTrue(A > 0.69) self.assertTrue(P > 0.71) self.assertTrue(R > 0.64) self.assertTrue(F > 0.67) print "pattern.en.modality()"
def test_modality(self): # Assert -1.0 => +1.0 representing the degree of certainty. v = en.modality(en.Sentence(en.parse("I wish it would stop raining."))) self.assertTrue(v < 0) v = en.modality( en.Sentence(en.parse("It will surely stop raining soon."))) self.assertTrue(v > 0) # Assert the accuracy of the modality algorithm. # Given are the scores for the CoNLL-2010 Shared Task 1 Wikipedia uncertainty data: # http://www.inf.u-szeged.hu/rgai/conll2010st/tasks.html#task1 # The baseline should increase (not decrease) when the algorithm is # modified. from pattern.db import Datasheet from pattern.metrics import test sentences = [] for certain, sentence in Datasheet.load(os.path.join(PATH, "corpora", "uncertainty-conll2010.csv")): sentence = en.parse(sentence, chunks=False, light=True) sentence = en.Sentence(sentence) sentences.append((sentence, int(certain) > 0)) A, P, R, F = test( lambda sentence: en.modality(sentence) > 0.5, sentences) #print(A, P, R, F) self.assertTrue(A > 0.69) self.assertTrue(P > 0.72) self.assertTrue(R > 0.64) self.assertTrue(F > 0.68) print("pattern.en.modality()")
def getData(self, params): if self.now_cache is not None: if (self.now_cache + datetime.timedelta(minutes=5)) < datetime.datetime.now(): self.data_cache = None self.today_cache = None self.now_cache = None if self.data_cache is None: tweets = [] for cand in candidates: tweets.append({'tweets': api.user_timeline(cand['user'], count=20), 'name': cand['name'], 'party': cand['party']}) all_tweets = [] for tweet_data in tweets: name = tweet_data['name'] party = tweet_data['party'] for tweet in tweet_data['tweets']: all_tweets.append( {'Name': name, 'Tweet': tweet.text, 'Favorites': tweet.favorite_count, 'Retweets': tweet.retweet_count} ) dfs = pd.DataFrame(all_tweets) sentiments = [sentiment(tweet) for tweet in dfs['Tweet']] dfs['Polarity'] = [sent[0] for sent in sentiments] dfs['Subjectivity'] = [sent[1] for sent in sentiments] modal = [modality(Sentence(parse(tweet, lemmata=True))) for tweet in dfs['Tweet']] dfs['Certainty'] = modal today = date.strftime(datetime.datetime.now(), format='%m/%d/%Y, %H:%M') now = datetime.datetime.now() self.data_cache = dfs self.today_cache = today self.now_cache = now return self.data_cache
def add_modality(tdb): for tweet in tdb: s = parse(tweet[2], lemmata=True) s = Sentence(s) (form, score) = (mood(s), modality(s)) tweet.extend((form, score)) return tdb
def team_sentiment_analysis(stats): for s in stats.sentences: this_sentiment = sentiment(s) polarity = float("{0:.2f}".format(this_sentiment[0])) subjectivity = float("{0:.2f}".format(this_sentiment[1])) polarity_10 = float("{0:.1f}".format(this_sentiment[0])) subjectivity_10 = float("{0:.1f}".format(this_sentiment[1])) stats.polarity_counts[polarity] += 1 stats.subjectivity_counts[subjectivity] += 1 stats.polarity_counts_10s[polarity_10] += 1 stats.subjectivity_counts_10s[subjectivity_10] += 1 s = Sentence(parse(s, lemmata=True)) stats.mood_counts[mood(s)] += 1 rounded_modality = float("{0:.2f}".format(modality(s))) rounded_modality_10 = float("{0:.1f}".format(modality(s))) stats.modality_counts[rounded_modality] += 1 stats.modality_counts_10s[rounded_modality_10] += 1
def getData(self, params): if self.now_cache is not None: if (self.now_cache + datetime.timedelta(minutes=5)) < datetime.datetime.now(): self.data_cache = None self.today_cache = None self.now_cache = None if self.data_cache is None: tweets = [] for cand in candidates: tweets.append({ 'tweets': api.user_timeline(cand['user'], count=20), 'name': cand['name'], 'party': cand['party'] }) all_tweets = [] for tweet_data in tweets: name = tweet_data['name'] party = tweet_data['party'] for tweet in tweet_data['tweets']: all_tweets.append({ 'Name': name, 'Tweet': tweet.text, 'Favorites': tweet.favorite_count, 'Retweets': tweet.retweet_count }) dfs = pd.DataFrame(all_tweets) sentiments = [sentiment(tweet) for tweet in dfs['Tweet']] dfs['Polarity'] = [sent[0] for sent in sentiments] dfs['Subjectivity'] = [sent[1] for sent in sentiments] modal = [ modality(Sentence(parse(tweet, lemmata=True))) for tweet in dfs['Tweet'] ] dfs['Certainty'] = modal today = date.strftime(datetime.datetime.now(), format='%m/%d/%Y, %H:%M') now = datetime.datetime.now() self.data_cache = dfs self.today_cache = today self.now_cache = now return self.data_cache
def calculate_phrase_sentiment(self, phrases): # print "Rating phrases sentiment..." valence_list = [] arousal_list = [] for p in phrases: pol = sentiment(p)[0] sent = parse(p, lemmata=True) mod = modality(Sentence(sent)) print mod valence_list.append(10 * pol) arousal_list.append(5 * mod) valence = max(valence_list) arousal = max(arousal_list) print "Valence: " + str(valence) print "arousal: " + str(arousal) return ((valence, arousal))
def messages(): #sorts friends by sentiment and modality of their last message to you. Returns rankings as "Friends' Happiness" and "Friends' Confidence" graph = GraphAPI(token) me = f.profile() happiness = {} confidence = {} snippets = graph.fql('SELECT snippet, snippet_author FROM thread WHERE folder_id = 0 OR folder_id = 1 Limit 10000',3) #the above code was heavily influenced by arofcoding.blogspot.com/2012/10/python-script-to-fetch-messages-from.html #returns a dictionary of message snippets along with the corresponding facebook friend IDs for dictionary in snippets['data']: #puts snippets in a dictionary where each author is mapped to the sentiment of their message happiness[sentiment(dictionary['snippet'])] = dictionary['snippet_author'] confidence[modality(dictionary['snippet'])] = dictionary['snippet_author'] #ranks dictionary entries by positivity of sentiment happiness_rankings = rank(happiness) confidence_rankings = rank(confidence) print "Friends' Happiness (low to high):" print happiness_rankings print "Friends' Confidence (low to high):" print confidence_rankings
def transform(self, text_fields): stats = [] punctuation = string.punctuation abvs = ['CNN', 'FBI', 'ABC', 'MSNBC', 'GOP', 'U.S.', 'US', 'ISIS', 'DNC', 'TV', 'CIA', 'I', 'AP', 'PM', 'AM', 'EU', 'USA', 'UK', 'UN', 'CEO', 'NASA', 'LGBT', 'LGBTQ', 'NAFTA', 'ACLU'] for field in text_fields: field_stats = {} tok_text = nltk.word_tokenize(field) try: num_upper = float(len([w for w in tok_text if w.isupper() and w not in abvs]))/len(tok_text) except: num_upper = 0 try: num_punct = float(len([ch for ch in field if ch in punctuation]))/len(field) except: num_punct = 0 try: sent_lengths = [len(nltk.word_tokenize(s)) for s in nltk.sent_tokenize(field)] av_sent_len = float(sum(sent_lengths))/len(sent_lengths) except: av_sent_len = 0 try: num_prof = float(len([w for w in tok_text if w.lower() in PROFANITY]))/len(tok_text) except: num_prof = 0 mood = modality(field) polarity, subjectivity = sentiment(field) field_stats['all_caps'] = num_upper field_stats['sent_len'] = av_sent_len field_stats['polarity'] = polarity field_stats['subjectivity'] = subjectivity field_stats['profanity'] = num_prof field_stats['mood'] = mood stats.append(field_stats) return stats
print 'Review:' print review print print 'Labeled Sentiment:', review_sentiment print final_sentiment = analyze_sentiment_pattern_lexicon(review, threshold=0.1, verbose=True) print '-'*60 for review, review_sentiment in sample_data: print 'Review:' print review print 'Labeled Sentiment:', review_sentiment print 'Mood:', mood(review) mod_score = modality(review) print 'Modality Score:', round(mod_score, 2) print 'Certainty:', 'Strong' if mod_score > 0.5 \ else 'Medium' if mod_score > 0.35 \ else 'Low' print '-'*60 pattern_predictions = [analyze_sentiment_pattern_lexicon(review, threshold=0.1) for review in test_reviews]
def process(self, message): # print pattern_en.suggest(message) -- suggestions if message == ">!train": self.train() return "It is nice to learn new stuff." if message == ">!forget": memory.clear() return "I am reborn. So much free space :) maybe you will use files to store memory and not RAM..." if message == ">!load_page": if sessionId not in memory: response = "Hello! My name is Chad and I am passionate about music." response += "We can share our experiences and maybe we can get along." response += "Would you mind telling me your name first?" expect[sessionId] = "name" memory[sessionId] = dict() else: response = "Welcome back!" search.search("new songs") with open('results.json') as data_file: data = json.load(data_file) for i in range(10): if 'musicrecording' in data['items'][i]['pagemap']: mr = data['items'][i]['pagemap']['musicrecording'] which = random.randint(0, len(mr) - 1) if 'name' not in mr[which]: response += " Did you know that " + mr[which][ 'byartist'] + " has released a new song?" else: response += " You can check out this cool song, " + mr[which]['name'] + ", by " + \ mr[which]['byartist'] return response s = nlp.get_sentences(message) doc = spacy_nlp(message) for w in doc: print "(", w, w.dep_, w.pos_, w.head, ")" aiml_sent_type = [] aiml_responses = [] memory_responses = [] sentence_types = [] emotions = [] for sentence in s: sentence_type = self.instant_classifier.classify( dialogue_act_features(sentence)) sentence_types.append(sentence_type) polarity, subjective = pattern_en.sentiment(sentence) sent = pattern_en.parse(sentence, lemmata=True) sent = pattern_en.Sentence(sent) modality = pattern_en.modality(sent) mood = pattern_en.mood(sent) if polarity > 0.8: emotions.append("SUPER HAPPY") elif polarity > 0.3: emotions.append("GOOD SURPRISE") elif polarity < -0.4: emotions.append("FEAR") elif polarity > 0.4: emotions.append("COOL") elif polarity < -0.1: emotions.append("SAD") elif polarity < -0.7: emotions.append("ANGER") else: emotions.append("NEUTER") print sentence_type, polarity, subjective, modality, mood if sentence_type not in ["whQuestion", "ynQuestion"]: try: aiml_sent_type_res = self.kernel.respond( sentence_type, sessionId) except: aiml_sent_type_res = "" aiml_sent_type.append(aiml_sent_type_res) verbs_subj = set() sentence = sentence[0].upper() + sentence[1:] doc = spacy_nlp(sentence) for possible_subject in doc: if (possible_subject.dep == nsubj or possible_subject.dep == nsubjpass) and possible_subject.head.pos == VERB: verbs_subj.add((possible_subject, possible_subject.head)) try: aiml_response = self.kernel.respond(sentence, sessionId) except: aiml_response = "" aiml_responses.append(aiml_response) # MEMORY MODULE memory_msg = "" if sentence_type == "Statement": # insert into memory for i in verbs_subj: subjs = [] subjects = [i[0]] for tok in i[0].children: if tok.dep == conj: subjects.append(tok) for subj in subjects: predec = "" for tok in subj.children: if tok.dep_ == "poss" or tok.dep == amod: predec += tok.lower_ if len(predec) > 0: subjs.append(predec + " " + subj.lower_) else: subjs.append(subj.lower_) vb = i[1].lower_ if vb not in memory[sessionId]: memory[sessionId][vb] = dict() for subj in subjs: for c in i[1].children: if c.dep in [prep]: memory[sessionId][vb][subj] = c.lower_ + " " for c_prep in c.children: if c_prep.dep in [dobj, pobj, attr]: memory[sessionId][vb][ subj] += c_prep.text memory_responses.append( self.kernel.respond( "memorate", sessionId)) elif c.dep in [dobj, pobj, attr]: memory[sessionId][vb][subj] = c.text memory_responses.append( self.kernel.respond("memorate", sessionId)) elif sentence_type == "whQuestion": for i in verbs_subj: subjs = [] subjects = [i[0]] for tok in i[0].children: if tok.dep == conj: subjects.append(tok) for subj in subjects: predec = "" for tok in subj.children: if tok.dep_ == "poss" or tok.dep == amod: predec += tok.lower_ if len(predec) > 0: subjs.append(predec + " " + subj.lower_) else: subjs.append(subj.lower_) max_similarity = 0 verb = i[1].lower_ for j in memory[sessionId]: p_word = spacy_nlp(j) similarity = i[1].similarity(p_word[0]) if similarity > max_similarity: max_similarity = similarity verb = j if max_similarity > 0.5 and verb in memory[sessionId]: num_subjs = len(subjs) memory_msg = "" for subj in subjs: if subj in memory[sessionId][verb]: toks = nlp.tokenize_text(subj) memory_msg = "" for t in toks: if t in first_person: memory_msg += pron_translate[t] + " " else: memory_msg += t + " " num_subjs -= 1 if num_subjs > 2: memory_msg += ", " elif num_subjs == 1: memory_msg += "and " if len(memory_msg) > 0: memory_msg += verb + " " if num_subjs != len(subjs): memory_msg += memory[sessionId][verb][ subjs[-1]] + "." memory_responses.append(memory_msg) arr_response = [] for i in aiml_sent_type: if len(i) > 0: arr_response.append(i) for i in aiml_responses: if len(i) > 0: arr_response.append(i) for i in memory_responses: if len(i) > 0: arr_response.append(i) if len(arr_response) == 0: data = search.search(message) snip = data['items'][0]['snippet'] sents = nlp.get_sentences(snip) arr_response.append(sents[0]) response = "" for i in emotions: try: emoi = self.kernel.respond(i, sessionId) except: emoi = None if emoi is not None: if random.randint(0, 100) < 50: response += " " + emoi + "." break for res in arr_response: if len(res) > 1: response += res + " " # generic response, if no response restoks = nlp.tokenize_text(response) if len(restoks) == 0: idx = random.randint(0, len(sentence_types) - 1) try: aiml_response = self.kernel.respond(sentence_types[idx], sessionId) except: aiml_response = "" response += aiml_response # polarity, subjective = pattern_en.sentiment(response) # sent = pattern_en.parse(sentence, lemmata=True) # sent = pattern_en.Sentence(sent) # modality = pattern_en.modality(sent) # mood = pattern_en.mood(sent) # sentence_type = self.instant_classifier.classify(dialogue_act_features(response)) # print response, polarity, subjective, modality, mood return response
def extract_bias_features(text): features = {} txt_lwr = str(text).lower() words = nltk.word_tokenize(txt_lwr) words = [w for w in words if len(w) > 0 and w not in '.?!,;:\'s"$'] if len(words) < 1: return None unigrams = sorted(list(set(words))) bigram_tokens = nltk.bigrams(words) bigrams = [" ".join([w1, w2]) for w1, w2 in sorted(set(bigram_tokens))] trigram_tokens = nltk.trigrams(words) trigrams = [" ".join([w1, w2, w3]) for w1, w2, w3 in sorted(set(trigram_tokens))] # print words # print unigrams # print bigrams # print trigrams # print "----------------------" # word count features['word_count'] = float(len(words)) # unique word count features['unique_word_count'] = float(len(unigrams)) # coherence marker count count, instances = count_feature_list_freq(coherence, words, bigrams, trigrams) # if count > 0: features['coherence_marker_count'] = count features['coherence_marker_prop'] = round(float(count) / float(len(words)), 4) features['coherence_marker_list'] = instances # degree modifier count count, instances = count_feature_list_freq(modifiers, words, bigrams, trigrams) #if count > 0: features['degree_modifier_count'] = count features['degree_modifier_prop'] = round(float(count) / float(len(words)), 4) features['degree_modifier_list'] = instances # hedge word count count, instances = count_feature_list_freq(hedges, words, bigrams, trigrams) #if count > 0: features['hedge_word_count'] = count features['hedge_word_prop'] = round(float(count) / float(len(words)), 4) features['hedge_word_list'] = instances # factive verb count count, instances = count_feature_list_freq(factives, words, bigrams, trigrams) #if count > 0: features['factive_verb_count'] = count features['factive_verb_prop'] = round(float(count) / float(len(words)), 4) features['factive_verb_list'] = instances # assertive verb count count, instances = count_feature_list_freq(assertives, words, bigrams, trigrams) #if count > 0: features['assertive_verb_count'] = count features['assertive_verb_prop'] = round(float(count) / float(len(words)), 4) features['assertive_verb_list'] = instances # implicative verb count count, instances = count_feature_list_freq(implicatives, words, bigrams, trigrams) #if count > 0: features['implicative_verb_count'] = count features['implicative_verb_prop'] = round(float(count) / float(len(words)), 4) features['implicative_verb_list'] = instances # bias words and phrases count count, instances = count_feature_list_freq(biased, words, bigrams, trigrams) #if count > 0: features['bias_count'] = count features['bias_prop'] = round(float(count) / float(len(words)), 4) features['bias_list'] = instances # opinion word count count, instances = count_feature_list_freq(opinionLaden, words, bigrams, trigrams) #if count > 0: features['opinion_count'] = count features['opinion_prop'] = round(float(count) / float(len(words)), 4) features['opinion_list'] = instances # weak subjective word count count, instances = count_feature_list_freq(subj_weak, words, bigrams, trigrams) #if count > 0: features['subjective_weak_count'] = count features['subjective_weak_prop'] = round(float(count) / float(len(words)), 4) features['subjective_weak_list'] = instances # strong subjective word count count, instances = count_feature_list_freq(subj_strong, words, bigrams, trigrams) #if count > 0: features['subjective_strong_count'] = count features['subjective_strong_prop'] = round(float(count) / float(len(words)), 4) features['subjective_strong_list'] = instances # composite sentiment score using VADER sentiment analysis package compound_sentiment = vader_sentiment_analysis.polarity_scores(text)['compound'] features['vader_composite_sentiment'] = float(compound_sentiment) # subjectivity score using Pattern.en pattern_subjectivity = pattern_sentiment(text)[1] features['subjectivity_score'] = round(pattern_subjectivity, 4) # modality (certainty) score and mood using http://www.clips.ua.ac.be/pages/pattern-en#modality sentence = parse(text, lemmata=True) sentenceObj = Sentence(sentence) features['modality'] = round(modality(sentenceObj), 4) try: features['mood'] = mood(sentenceObj) except IndexError as e: print "IndexError: %s" % e print "Ignoring..." features['mood'] = 'err' # Flesch-Kincaid Grade Level (reading difficulty) using textstat try: features['flesch-kincaid_grade_level'] = float(textstat.flesch_kincaid_grade(text)) except TypeError as e: print "TypeError: %s" % e print "Ignoring..." features['flesch-kincaid_grade_level'] = 0.0 # liwc 3rd person pronoun count (combines S/he and They) count, instances = count_liwc_list_freq(liwc_3pp, words) #if count > 0: features['liwc_3rd_person_pronoum_count'] = count features['liwc_3rd_person_pronoun_prop'] = round(float(count) / float(len(words)), 4) features['liwc_3rd_person_pronoun_list'] = instances # liwc auxiliary verb count count, instances = count_liwc_list_freq(liwc_aux, words) #if count > 0: features['liwc_auxiliary_verb_count'] = count features['liwc_auxiliary_verb_prop'] = round(float(count) / float(len(words)), 4) features['liwc_auxiliary_verb_list'] = instances # liwc adverb count count, instances = count_liwc_list_freq(liwc_adv, words) #if count > 0: features['liwc_adverb_count'] = count features['liwc_adverb_prop'] = round(float(count) / float(len(words)), 4) features['liwc_adverb_list'] = instances # liwc preposition count count, instances = count_liwc_list_freq(liwc_prep, words) #if count > 0: features['liwc_preposition_count'] = count features['liwc_preposition_prop'] = round(float(count) / float(len(words)), 4) features['liwc_preposition_list'] = instances # liwc conjunction count count, instances = count_liwc_list_freq(liwc_conj, words) #if count > 0: features['liwc_conjunction_count'] = count features['liwc_conjunction_prop'] = round(float(count) / float(len(words)), 4) features['liwc_conjunction_list'] = instances # liwc discrepency word count count, instances = count_liwc_list_freq(liwc_discr, words) #if count > 0: features['liwc_discrepency_word_count'] = count features['liwc_discrepency_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_discrepency_word_list'] = instances # liwc tentative word count count, instances = count_liwc_list_freq(liwc_tent, words) #if count > 0: features['liwc_tentative_word_count'] = count features['liwc_tentative_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_tentative_word_list'] = instances # liwc certainty word count count, instances = count_liwc_list_freq(liwc_cert, words) #if count > 0: features['liwc_certainty_word_count'] = count features['liwc_certainty_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_certainty_word_list'] = instances # liwc causation word count count, instances = count_liwc_list_freq(liwc_causn, words) #if count > 0: features['liwc_causation_word_count'] = count features['liwc_causation_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_causation_word_list'] = instances # liwc work word count count, instances = count_liwc_list_freq(liwc_work, words) #if count > 0: features['liwc_work_word_count'] = count features['liwc_work_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_work_word_list'] = instances # liwc achievement word count count, instances = count_liwc_list_freq(liwc_achiev, words) #if count > 0: features['liwc_achievement_word_count'] = count features['liwc_achievement_word_prop'] = round(float(count) / float(len(words)), 4) features['liwc_achievement_word_list'] = instances return features
type=None, role=None, relation=None) print pnp.string # String of words (Unicode). print pnp.chunks # List of Chunk objects. # print pnp.preposition # First PP chunk in the PNP. # sentiment print sentiment( "The movie attempts to be surreal by incorporating various time paradoxes," "but it's presented in such a ridiculous way it's seriously boring.") print sentiment('Wonderfully awful! :-)').assessments # mode and modality s = "Some amino acids tend to be acidic while others may be basic." # weaseling s = parse(s, lemmata=True) s = Sentence(s) print modality(s) # wordnet s = wordnet.synsets('bird')[0] print 'Definition:', s.gloss # Definition string. print ' Synonyms:', s.synonyms # List of word forms (i.e., synonyms) print ' Hypernyms:', s.hypernyms( ) # returns a list of parent synsets (i.e., more general). Synset (semantic parent). print ' Hypernyms:', s.hypernyms(recursive=False, depth=None) print ' Hyponyms:', s.hyponyms( ) # returns a list child synsets (i.e., more specific). print ' Hyponyms:', s.hyponyms(recursive=False, depth=None) print ' Holonyms:', s.holonyms( ) # List of synsets (of which this is a member). print ' Meronyms:', s.meronyms() # List of synsets (members/parts). print ' POS:', s.pos # Part-of-speech: NOUN | VERB | ADJECTIVE | ADVERB. print ' Category:', s.lexname # Category string, or None.
from docs import TEST_DOCUMENTS for doc in TEST_DOCUMENTS: sentences = doc['sentences'] conditionals = 0 indicatives = 0 imperatives = 0 subjunctives = 0 minModality = 1 maxModality = -1 for sentence in sentences: s = parse(sentence, lemmata=True) s = Sentence(s) m = mood(s) modal = modality(s) #set the max or min value if modal > maxModality: maxModality = modal if modal < minModality: minModality = modal #this count moods if m is "conditional": conditionals = conditionals + 1 elif m is "indicative": indicatives = indicatives + 1 elif m is "imperative": imperatives = imperatives + 1 elif m is "subjunctive": subjunctives = subjunctives + 1 writer.writerow({
# Explanation: # # - 0.75 show the sentiment score of the sentence that means highly positive # - 0.8 is the subjectivity score that is a personal of the user # ### Checking if a Statement is a Fact from pattern.en import parse, Sentence from pattern.en import modality text = "Paris is the capital of France" sent = parse(text, lemmata=True) sent = Sentence(sent) print(modality(sent)) text = "I think we can complete this task" sent = parse(text, lemmata=True) sent = Sentence(sent) print(modality(sent)) # ### Spelling Corrections from pattern.en import suggest print(suggest("Whitle")) from pattern.en import suggest print(suggest("Fracture"))
def extract_bias_features(text): features = {} text = unicode(text, errors='ignore') txt_lwr = str(text).lower() words = nltk.word_tokenize(txt_lwr) words = [w for w in words if len(w) > 0 and w not in '.?!,;:\'s"$'] unigrams = sorted(list(set(words))) bigram_tokens = nltk.bigrams(words) bigrams = [" ".join([w1, w2]) for w1, w2 in sorted(set(bigram_tokens))] trigram_tokens = nltk.trigrams(words) trigrams = [ " ".join([w1, w2, w3]) for w1, w2, w3 in sorted(set(trigram_tokens)) ] # print words # print unigrams # print bigrams # print trigrams # print "----------------------" # word count features['word_cnt'] = len(words) # unique word count features['unique_word_cnt'] = len(unigrams) # coherence marker count count = count_feature_list_freq(coherence, words, bigrams, trigrams) features['cm_cnt'] = count features['cm_rto'] = round(float(count) / float(len(words)), 4) # degree modifier count count = count_feature_list_freq(modifiers, words, bigrams, trigrams) features['dm_cnt'] = count features['dm_rto'] = round(float(count) / float(len(words)), 4) # hedge word count count = count_feature_list_freq(hedges, words, bigrams, trigrams) features['hedge_cnt'] = count features['hedge_rto'] = round(float(count) / float(len(words)), 4) # factive verb count count = count_feature_list_freq(factives, words, bigrams, trigrams) features['factive_cnt'] = count features['factive_rto'] = round(float(count) / float(len(words)), 4) # assertive verb count count = count_feature_list_freq(assertives, words, bigrams, trigrams) features['assertive_cnt'] = count features['assertive_rto'] = round(float(count) / float(len(words)), 4) # implicative verb count count = count_feature_list_freq(implicatives, words, bigrams, trigrams) features['implicative_cnt'] = count features['implicative_rto'] = round(float(count) / float(len(words)), 4) # bias words and phrases count count = count_feature_list_freq(biased, words, bigrams, trigrams) features['bias_cnt'] = count features['bias_rto'] = round(float(count) / float(len(words)), 4) # opinion word count count = count_feature_list_freq(opinionLaden, words, bigrams, trigrams) features['opinion_cnt'] = count features['opinion_rto'] = round(float(count) / float(len(words)), 4) # weak subjective word count count = count_feature_list_freq(subj_weak, words, bigrams, trigrams) features['subj_weak_cnt'] = count features['subj_weak_rto'] = round(float(count) / float(len(words)), 4) # strong subjective word count count = count_feature_list_freq(subj_strong, words, bigrams, trigrams) features['subj_strong_cnt'] = count features['subj_strong_rto'] = round(float(count) / float(len(words)), 4) # composite sentiment score using VADER sentiment analysis package compound_sentiment = vader_sentiment_analysis.polarity_scores( text)['compound'] features['vader_sentiment'] = compound_sentiment # subjectivity score using Pattern.en pattern_subjectivity = pattern_sentiment(text)[1] features['subjectivity'] = round(pattern_subjectivity, 4) # modality (certainty) score and mood using http://www.clips.ua.ac.be/pages/pattern-en#modality sentence = parse(text, lemmata=True) sentenceObj = Sentence(sentence) features['modality'] = round(modality(sentenceObj), 4) features['mood'] = mood(sentenceObj) # Flesch-Kincaid Grade Level (reading difficulty) using textstat features['fk_gl'] = textstat.flesch_kincaid_grade(text) # liwc 3rd person pronoun count (combines S/he and They) count = count_liwc_list_freq(liwc_3pp, words) features['liwc_3pp_cnt'] = count features['liwc_3pp_rto'] = round(float(count) / float(len(words)), 4) # liwc auxiliary verb count count = count_liwc_list_freq(liwc_aux, words) features['liwc_aux_cnt'] = count features['liwc_aux_rto'] = round(float(count) / float(len(words)), 4) # liwc adverb count count = count_liwc_list_freq(liwc_adv, words) features['liwc_adv_cnt'] = count features['liwc_adv_rto'] = round(float(count) / float(len(words)), 4) # liwc preposition count count = count_liwc_list_freq(liwc_prep, words) features['liwc_prep_cnt'] = count features['liwc_prep_rto'] = round(float(count) / float(len(words)), 4) # liwc conjunction count count = count_liwc_list_freq(liwc_conj, words) features['liwc_conj_cnt'] = count features['liwc_conj_rto'] = round(float(count) / float(len(words)), 4) # liwc discrepency word count count = count_liwc_list_freq(liwc_discr, words) features['liwc_discr_cnt'] = count features['liwc_discr_rto'] = round(float(count) / float(len(words)), 4) # liwc tentative word count count = count_liwc_list_freq(liwc_tent, words) features['liwc_tent_cnt'] = count features['liwc_tent_rto'] = round(float(count) / float(len(words)), 4) # liwc certainty word count count = count_liwc_list_freq(liwc_cert, words) features['liwc_cert_cnt'] = count features['liwc_cert_rto'] = round(float(count) / float(len(words)), 4) # liwc causation word count count = count_liwc_list_freq(liwc_causn, words) features['liwc_causn_cnt'] = count features['liwc_causn_rto'] = round(float(count) / float(len(words)), 4) # liwc work word count count = count_liwc_list_freq(liwc_work, words) features['liwc_work_cnt'] = count features['liwc_work_rto'] = round(float(count) / float(len(words)), 4) # liwc achievement word count count = count_liwc_list_freq(liwc_achiev, words) features['liwc_achiev_cnt'] = count features['liwc_achiev_rto'] = round(float(count) / float(len(words)), 4) return features
def modality_score(sentence): s = parse(sentence, lemmata=True) s = Sentence(s) return modality(s)
print PAST in tenses('purred') # 'p' in tenses() also works. print (PAST, 1, PL) in tenses('purred') print 'Quantification' print quantify(['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken']) print quantify('carrot', amount=90) print quantify({'carrot': 100, 'parrot': 20}) print 'ngrams' print ngrams("I am eating a pizza.", n=2) #parse s = parse('I eat pizza with a fork.') pprint(s) #tag for word, t in tag('The cat felt happy.'): print word +' is ' +t s = "The movie attempts to be surreal by incorporating various time paradoxes, but it's presented in such a ridiculous way it's seriously boring." print sentiment(s) print polarity(s) print subjectivity(s) #The modality() function returns a value between -1.0 and +1.0, expressing the degree of certainty s2 = "Some amino acids tend to be acidic while others may be basic." # weaseling se = Sentence(parse(s, chunks=False, lemmata=True)) print modality(se)
dicWord[nn]=1 else: for w in chunk: if w.type=="JJ": index=c.lower().find(w.string) print index print w if index>0 and judge(c,index): c=c[:index+len(w.string)]+'</span>'+c[index+len(w.string):] c=c[:index]+'<span class=*JJ* >'+c[index:] #print c c='<span class=*sentence* sentiment=*'+str(sentiment(sentence))+'* positive=*'+str(positive(sentence))+'* mood=*'+str(mood(sentence))+'* modality=*'+str(modality(sentence))+'*>'+c+"</span>" c=c.replace('"','*') v.texts=v.texts+c #print c #pdb.set_trace() #print v.texts print v.date #print v.nouns #print v.texts print v.stars cur.execute('insert into wZwZcte4lcbu51NOzCjWbQ values("'+v.date+'","'+v.user+'","'+v.nouns+'","'+str(v.stars)+'" ,"'+v.texts+'")') #cur.execute('create table wordfre(word varchar(20) UNIQUE,uid integer)') cur.close() cx.commit()
print conjugate('purred', '3sg') print PAST in tenses('purred') # 'p' in tenses() also works. print(PAST, 1, PL) in tenses('purred') print 'Quantification' print quantify(['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken']) print quantify('carrot', amount=90) print quantify({'carrot': 100, 'parrot': 20}) print 'ngrams' print ngrams("I am eating a pizza.", n=2) #parse s = parse('I eat pizza with a fork.') pprint(s) #tag for word, t in tag('The cat felt happy.'): print word + ' is ' + t s = "The movie attempts to be surreal by incorporating various time paradoxes, but it's presented in such a ridiculous way it's seriously boring." print sentiment(s) print polarity(s) print subjectivity(s) #The modality() function returns a value between -1.0 and +1.0, expressing the degree of certainty s2 = "Some amino acids tend to be acidic while others may be basic." # weaseling se = Sentence(parse(s, chunks=False, lemmata=True)) print modality(se)
def checkModality(sentence): return modality(sentence)
def get_score(self, content: str): self.sent = parse(content, lemmata=True) self.sent = Sentence(self.sent) self.modality = modality(self.sent) return self.modality