示例#1
1
def sentiment_analysis(message):
	actual_range = 2
	final = []
	message = re.sub("(@[A-Za-z0-9]+)|( RT)|( rt)|(\w+:\/\/\S+)"," ",message).strip() #filter usernames,urls
	message = re.sub('#',"",message)
	message = filter(lambda x: x in string.printable, message) #filter non printable characters
	message = HTMLParser.HTMLParser().unescape(message) #unescape html
	tokenized = tokenize(message,puctuation='.!?:')
	tokenized = filter(bool,tokenized)
	tok1=[]
	for index,it in enumerate(tokenized):
		mod = mood(it)
		if '?' in it or mod=='conditional':
			continue
		tok1.append(it.strip())
	score = 0.0
	possed = [re.split(' ',sentence)for sentence in tok1]
	possed = [nltk.pos_tag(sentence) for sentence in possed]
	final = []
	for sentence in possed:
		check = []
		for entry in sentence:
			check.append(list(entry))
		final.append(check)
	range_count=0
	for sentence in final:
		sentence = dictionary_tag(sentence)
		score = score + sentiment_score(sentence)
	return score
示例#2
0
 def test_mood(self):
     # Assert imperative mood.
     v = en.mood(en.Sentence(en.parse("Do your homework!")))
     self.assertEqual(v, en.IMPERATIVE)
     # Assert conditional mood.
     v = en.mood(en.Sentence(en.parse("We ought to help him.")))
     self.assertEqual(v, en.CONDITIONAL)
     # Assert subjunctive mood.
     v = en.mood(en.Sentence(en.parse("I wouldn't do that if I were you.")))
     self.assertEqual(v, en.SUBJUNCTIVE)
     # Assert indicative mood.
     v = en.mood(en.Sentence(en.parse("The weather is nice today.")))
     self.assertEqual(v, en.INDICATIVE)
     print "pattern.en.mood()"
示例#3
0
 def test_mood(self):
     # Assert imperative mood.
     v = en.mood(en.Sentence(en.parse("Do your homework!")))
     self.assertEqual(v, en.IMPERATIVE)
     # Assert conditional mood.
     v = en.mood(en.Sentence(en.parse("We ought to help him.")))
     self.assertEqual(v, en.CONDITIONAL)
     # Assert subjunctive mood.
     v = en.mood(en.Sentence(en.parse("I wouldn't do that if I were you.")))
     self.assertEqual(v, en.SUBJUNCTIVE)
     # Assert indicative mood.
     v = en.mood(en.Sentence(en.parse("The weather is nice today.")))
     self.assertEqual(v, en.INDICATIVE)
     print("pattern.en.mood()")
def add_modality(tdb):
        for tweet in tdb:
                s = parse(tweet[2], lemmata=True)
                s = Sentence(s)
                (form, score) = (mood(s), modality(s))
                tweet.extend((form, score))
        return tdb
示例#5
0
def sentiment_analysis(message):
    actual_range = 2
    final = []
    message = re.sub("(@[A-Za-z0-9]+)|( RT)|( rt)|(\w+:\/\/\S+)", " ",
                     message).strip()  #filter usernames,urls
    message = re.sub('#', "", message)
    message = filter(lambda x: x in string.printable,
                     message)  #filter non printable characters
    message = HTMLParser.HTMLParser().unescape(message)  #unescape html
    tokenized = tokenize(message, puctuation='.!?:')
    tokenized = filter(bool, tokenized)
    tok1 = []
    for index, it in enumerate(tokenized):
        mod = mood(it)
        if '?' in it or mod == 'conditional':
            continue
        tok1.append(it.strip())
    score = 0.0
    possed = [re.split(' ', sentence) for sentence in tok1]
    possed = [nltk.pos_tag(sentence) for sentence in possed]
    final = []
    for sentence in possed:
        check = []
        for entry in sentence:
            check.append(list(entry))
        final.append(check)
    range_count = 0
    for sentence in final:
        sentence = dictionary_tag(sentence)
        score = score + sentiment_score(sentence)
    return score
def extractMood(characterSentences):
    """
    Analyzes the sentence using grammatical mood module from pattern.
    """
    characterMoods = defaultdict(list)
    for key, value in characterSentences.iteritems():
        for x in value:
            characterMoods[key].append(mood(Sentence(parse(str(x),
                                                           lemmata=True))))
    return characterMoods
def extractMood(characterSentences):
    """
    Analyzes the sentence using grammatical mood module from pattern.
    """
    characterMoods = defaultdict(list)
    for key, value in characterSentences.iteritems():
        for x in value:
            characterMoods[key].append(
                mood(Sentence(parse(str(x), lemmata=True))))
    return characterMoods
def get_mood(sentence):
    """Returns mood of sentence string"""
    conditional_words = [
        "assuming", "if", "in case", "no matter how", "supposing", "unless",
        "would", "'d", "should", "could", "might", "going to", "whenever",
        "as long as", "because", "in order to"
    ]
    result = mood(sentence)
    if result == 'imperative':
        return 'nonconditional'
    if result in ['subjunctive', 'conditional']:
        for cw in conditional_words:
            if cw in sentence.lower():
                return 'conditional'
        return 'subjunctive'
    return 'nonconditional'  # indicative
示例#9
0
def team_sentiment_analysis(stats):
	for s in stats.sentences:
		this_sentiment = sentiment(s)
		polarity = float("{0:.2f}".format(this_sentiment[0]))
		subjectivity = float("{0:.2f}".format(this_sentiment[1]))
		polarity_10 = float("{0:.1f}".format(this_sentiment[0]))
		subjectivity_10 = float("{0:.1f}".format(this_sentiment[1]))
		stats.polarity_counts[polarity] += 1
		stats.subjectivity_counts[subjectivity] += 1
		stats.polarity_counts_10s[polarity_10] += 1
		stats.subjectivity_counts_10s[subjectivity_10] += 1

		s = Sentence(parse(s, lemmata=True))
		stats.mood_counts[mood(s)] += 1
		rounded_modality = float("{0:.2f}".format(modality(s)))
		rounded_modality_10 = float("{0:.1f}".format(modality(s)))
		stats.modality_counts[rounded_modality] += 1
		stats.modality_counts_10s[rounded_modality_10] += 1
示例#10
0
for review, review_sentiment in sample_data:
    print 'Review:'
    print review
    print
    print 'Labeled Sentiment:', review_sentiment    
    print    
    final_sentiment = analyze_sentiment_pattern_lexicon(review,
                                                        threshold=0.1,
                                                        verbose=True)
    print '-'*60            
      
for review, review_sentiment in sample_data:
    print 'Review:'
    print review
    print 'Labeled Sentiment:', review_sentiment 
    print 'Mood:', mood(review)
    mod_score = modality(review)
    print 'Modality Score:', round(mod_score, 2)
    print 'Certainty:', 'Strong' if mod_score > 0.5 \
                                    else 'Medium' if mod_score > 0.35 \
                                                    else 'Low'
    print '-'*60            

                  




                               
                                
pattern_predictions = [analyze_sentiment_pattern_lexicon(review, threshold=0.1)
    from docs import TEST_DOCUMENTS

    for doc in TEST_DOCUMENTS:
        sentences = doc['sentences']
        conditionals = 0
        indicatives = 0
        imperatives = 0
        subjunctives = 0
        minModality = 1
        maxModality = -1

        for sentence in sentences:
            s = parse(sentence, lemmata=True)
            s = Sentence(s)
            m = mood(s)
            modal = modality(s)
            #set the max or min value
            if modal > maxModality:
                maxModality = modal
            if modal < minModality:
                minModality = modal
#this count moods
            if m is "conditional":
                conditionals = conditionals + 1
            elif m is "indicative":
                indicatives = indicatives + 1
            elif m is "imperative":
                imperatives = imperatives + 1
            elif m is "subjunctive":
                subjunctives = subjunctives + 1
示例#12
0
def extract_bias_features(text):
    features = {}
    text = unicode(text, errors='ignore')
    txt_lwr = str(text).lower()
    words = nltk.word_tokenize(txt_lwr)
    words = [w for w in words if len(w) > 0 and w not in '.?!,;:\'s"$']
    unigrams = sorted(list(set(words)))
    bigram_tokens = nltk.bigrams(words)
    bigrams = [" ".join([w1, w2]) for w1, w2 in sorted(set(bigram_tokens))]
    trigram_tokens = nltk.trigrams(words)
    trigrams = [
        " ".join([w1, w2, w3]) for w1, w2, w3 in sorted(set(trigram_tokens))
    ]
    # print words
    # print unigrams
    # print bigrams
    # print trigrams
    # print "----------------------"

    # word count
    features['word_cnt'] = len(words)

    # unique word count
    features['unique_word_cnt'] = len(unigrams)

    # coherence marker count
    count = count_feature_list_freq(coherence, words, bigrams, trigrams)
    features['cm_cnt'] = count
    features['cm_rto'] = round(float(count) / float(len(words)), 4)

    # degree modifier count
    count = count_feature_list_freq(modifiers, words, bigrams, trigrams)
    features['dm_cnt'] = count
    features['dm_rto'] = round(float(count) / float(len(words)), 4)

    # hedge word count
    count = count_feature_list_freq(hedges, words, bigrams, trigrams)
    features['hedge_cnt'] = count
    features['hedge_rto'] = round(float(count) / float(len(words)), 4)

    # factive verb count
    count = count_feature_list_freq(factives, words, bigrams, trigrams)
    features['factive_cnt'] = count
    features['factive_rto'] = round(float(count) / float(len(words)), 4)

    # assertive verb count
    count = count_feature_list_freq(assertives, words, bigrams, trigrams)
    features['assertive_cnt'] = count
    features['assertive_rto'] = round(float(count) / float(len(words)), 4)

    # implicative verb count
    count = count_feature_list_freq(implicatives, words, bigrams, trigrams)
    features['implicative_cnt'] = count
    features['implicative_rto'] = round(float(count) / float(len(words)), 4)

    # bias words and phrases count
    count = count_feature_list_freq(biased, words, bigrams, trigrams)
    features['bias_cnt'] = count
    features['bias_rto'] = round(float(count) / float(len(words)), 4)

    # opinion word count
    count = count_feature_list_freq(opinionLaden, words, bigrams, trigrams)
    features['opinion_cnt'] = count
    features['opinion_rto'] = round(float(count) / float(len(words)), 4)

    # weak subjective word count
    count = count_feature_list_freq(subj_weak, words, bigrams, trigrams)
    features['subj_weak_cnt'] = count
    features['subj_weak_rto'] = round(float(count) / float(len(words)), 4)

    # strong subjective word count
    count = count_feature_list_freq(subj_strong, words, bigrams, trigrams)
    features['subj_strong_cnt'] = count
    features['subj_strong_rto'] = round(float(count) / float(len(words)), 4)

    # composite sentiment score using VADER sentiment analysis package
    compound_sentiment = vader_sentiment_analysis.polarity_scores(
        text)['compound']
    features['vader_sentiment'] = compound_sentiment

    # subjectivity score using Pattern.en
    pattern_subjectivity = pattern_sentiment(text)[1]
    features['subjectivity'] = round(pattern_subjectivity, 4)

    # modality (certainty) score and mood using  http://www.clips.ua.ac.be/pages/pattern-en#modality
    sentence = parse(text, lemmata=True)
    sentenceObj = Sentence(sentence)
    features['modality'] = round(modality(sentenceObj), 4)
    features['mood'] = mood(sentenceObj)

    # Flesch-Kincaid Grade Level (reading difficulty) using textstat
    features['fk_gl'] = textstat.flesch_kincaid_grade(text)

    # liwc 3rd person pronoun count (combines S/he and They)
    count = count_liwc_list_freq(liwc_3pp, words)
    features['liwc_3pp_cnt'] = count
    features['liwc_3pp_rto'] = round(float(count) / float(len(words)), 4)

    # liwc auxiliary verb count
    count = count_liwc_list_freq(liwc_aux, words)
    features['liwc_aux_cnt'] = count
    features['liwc_aux_rto'] = round(float(count) / float(len(words)), 4)

    # liwc adverb count
    count = count_liwc_list_freq(liwc_adv, words)
    features['liwc_adv_cnt'] = count
    features['liwc_adv_rto'] = round(float(count) / float(len(words)), 4)

    # liwc preposition count
    count = count_liwc_list_freq(liwc_prep, words)
    features['liwc_prep_cnt'] = count
    features['liwc_prep_rto'] = round(float(count) / float(len(words)), 4)

    # liwc conjunction count
    count = count_liwc_list_freq(liwc_conj, words)
    features['liwc_conj_cnt'] = count
    features['liwc_conj_rto'] = round(float(count) / float(len(words)), 4)

    # liwc discrepency word count
    count = count_liwc_list_freq(liwc_discr, words)
    features['liwc_discr_cnt'] = count
    features['liwc_discr_rto'] = round(float(count) / float(len(words)), 4)

    # liwc tentative word count
    count = count_liwc_list_freq(liwc_tent, words)
    features['liwc_tent_cnt'] = count
    features['liwc_tent_rto'] = round(float(count) / float(len(words)), 4)

    # liwc certainty word count
    count = count_liwc_list_freq(liwc_cert, words)
    features['liwc_cert_cnt'] = count
    features['liwc_cert_rto'] = round(float(count) / float(len(words)), 4)

    # liwc causation word count
    count = count_liwc_list_freq(liwc_causn, words)
    features['liwc_causn_cnt'] = count
    features['liwc_causn_rto'] = round(float(count) / float(len(words)), 4)

    # liwc work word count
    count = count_liwc_list_freq(liwc_work, words)
    features['liwc_work_cnt'] = count
    features['liwc_work_rto'] = round(float(count) / float(len(words)), 4)

    # liwc achievement word count
    count = count_liwc_list_freq(liwc_achiev, words)
    features['liwc_achiev_cnt'] = count
    features['liwc_achiev_rto'] = round(float(count) / float(len(words)), 4)

    return features
示例#13
0
def printReview(sentence, tagger,nlp):
    POStags=['NN','RB','VB','JJ','MD','PR']
    terms = nltk.word_tokenize(sentence.lower())
    #print(tagger.tag(terms))
    POSterms = getPOSterms(terms,POStags,tagger)
    nouns = POSterms['NN']
    adverbs = POSterms['RB']
    verbs = POSterms['VB']
    adjectives = POSterms['JJ']
    modalAuxilary = POSterms['MD']
    pronouns = POSterms['PR']
    #print(adverbs)
    #print(adjectives)
    #print(nouns)
    if(len(terms) > 3):
        fourgrams = ngrams(terms,4)
        for tg in fourgrams:
            case1 = tg[0] in nouns and tg[1] in verbs and tg[2] in adverbs and tg[3] in adjectives
            case2 = tg[0] in nouns and tg[1] in verbs and tg[2] in adjectives
            case3 = tg[0] in adverbs and tg[1] in adjectives and tg[2] in nouns
            case4 = tg[0] in nouns and tg[1] in verbs and tg[3] in adjectives
            case5 = tg[1] in nouns and tg[2] in verbs and tg[3] in adjectives
            case6 = tg[1] in adverbs and tg[2] in adjectives and tg[3] in nouns
            case7 = tg[0] in adjectives and tg[1] in adjectives and tg[2] in adjectives and tg[3] in nouns
            case8 = tg[0] in pronouns and tg[1] in modalAuxilary and tg[2] in verbs
            #case9 = tg[1] in adjectives and tg[2] in nouns and tg[3] in nouns
            case10 = tg[0] in pronouns and tg[1] in modalAuxilary and tg[3] in verbs
            case11 = tg[1] in verbs and tg[2] in adverbs and tg[3] in adjectives
            case12 = tg[1] in adjectives and tg[2] in adjectives and tg[3] in nouns
#            case13 = tg[1] in verbs and tg[2] in pronouns and tg[3] in nouns
#            case14 = tg[0] in verbs and tg[1] in pronouns and tg[3] in nouns
#            case15 = tg[0] in pronouns and tg[1] in verbs and tg[3] in nouns
#            case16 = tg[0] in pronouns and tg[1] in verbs and tg[3] in adjectives
            #print(tg)
            if(case1 or case2 or case3 or case4 or case5 or case6 or case7 or case8 or case10 or case11 or case12):
                #print(tg)
                #print('case1',case1,'case2',case2,'case3',case3,'case4',case4,'case5',case5,'case6',case6)
                #print('case7',case7,'case8',case8,'case10',case10,'case11',case11,'case12',case12)
                return(sentence)
            #if(case13 or case14 or case15 or case16):
                #print(tg)
                #print('case11',case11,'case12',case12,'case13',case13,'case14',case14)
#                return(sentence)
                
        sentence = sentence.translate(str.maketrans('','',string.punctuation))
        #specialCase1 recommend
        #print(sentence)
        for word in sentence.lower().strip().split(sep=' '):
            if word == 'recommend':
                return(sentence)
        #specialCase2 must or must've
        for word in sentence.lower().strip().split(sep=' '):
            if word == 'must' or word == "must've":
                return(sentence)
        #specialCase3 amazing
        for word in sentence.lower().strip().split(sep=' '):
            if word == 'amazing':
                return(sentence)
        #specialCase4 Dont miss or Do not miss
        notContains = sentence.find('Dont miss') == -1 and sentence.find('Do not miss') == -1
        if not notContains:
            return(sentence)
        #specialCase5 definitely
        for word in sentence.lower().strip().split(sep=' '):
            if word == 'definitely':
                return(sentence)
        #specialCase6 amazing
        for word in sentence.lower().strip().split(sep=' '):
            if word == 'delicious' or word == 'fantastic':
                return(sentence)
        #specialCase7 byob
        for word in sentence.lower().strip().split(sep=' '):
            if word == 'byob':
                return(sentence)
        #specialCase8 bland
        for word in sentence.lower().strip().split(sep=' '):
            if word == 'bland':
                return(sentence)
        #specialCase9 reservation
        for word in sentence.lower().strip().split(sep=' '):
            if word == 'reservation':
                return(sentence)
        #specialCase10 imperative
        doc = nlp(sentence)
        hasSubject = False
        for xx in doc:
            if xx.dep_ == 'nsubj':
                hasSubject = True
            
        if hasSubject and mood(sentence) == 'imperative':
            return(sentence)
        #specialCase11 subject of sentence is you
        if hasSubject and xx.text == 'you' :
                return(sentence)
        '''subchk = []
示例#14
0
def extract_bias_features(text):
    features = {}
    txt_lwr = str(text).lower()
    words = nltk.word_tokenize(txt_lwr)
    words = [w for w in words if len(w) > 0 and w not in '.?!,;:\'s"$']
    if len(words) < 1:
        return None
    unigrams = sorted(list(set(words)))
    bigram_tokens = nltk.bigrams(words)
    bigrams = [" ".join([w1, w2]) for w1, w2 in sorted(set(bigram_tokens))]
    trigram_tokens = nltk.trigrams(words)
    trigrams = [" ".join([w1, w2, w3]) for w1, w2, w3 in sorted(set(trigram_tokens))]
    # print words
    # print unigrams
    # print bigrams
    # print trigrams
    # print "----------------------"

    # word count
    features['word_count'] = float(len(words))

    # unique word count
    features['unique_word_count'] = float(len(unigrams))

    # coherence marker count
    count, instances = count_feature_list_freq(coherence, words, bigrams, trigrams)
    # if count > 0:
    features['coherence_marker_count'] = count
    features['coherence_marker_prop'] = round(float(count) / float(len(words)), 4)
    features['coherence_marker_list'] = instances

    # degree modifier count
    count, instances = count_feature_list_freq(modifiers, words, bigrams, trigrams)
    #if count > 0:
    features['degree_modifier_count'] = count
    features['degree_modifier_prop'] = round(float(count) / float(len(words)), 4)
    features['degree_modifier_list'] = instances

    # hedge word count
    count, instances = count_feature_list_freq(hedges, words, bigrams, trigrams)
    #if count > 0:
    features['hedge_word_count'] = count
    features['hedge_word_prop'] = round(float(count) / float(len(words)), 4)
    features['hedge_word_list'] = instances

    # factive verb count
    count, instances = count_feature_list_freq(factives, words, bigrams, trigrams)
    #if count > 0:
    features['factive_verb_count'] = count
    features['factive_verb_prop'] = round(float(count) / float(len(words)), 4)
    features['factive_verb_list'] = instances

    # assertive verb count
    count, instances = count_feature_list_freq(assertives, words, bigrams, trigrams)
    #if count > 0:
    features['assertive_verb_count'] = count
    features['assertive_verb_prop'] = round(float(count) / float(len(words)), 4)
    features['assertive_verb_list'] = instances

    # implicative verb count
    count, instances = count_feature_list_freq(implicatives, words, bigrams, trigrams)
    #if count > 0:
    features['implicative_verb_count'] = count
    features['implicative_verb_prop'] = round(float(count) / float(len(words)), 4)
    features['implicative_verb_list'] = instances

    # bias words and phrases count
    count, instances = count_feature_list_freq(biased, words, bigrams, trigrams)
    #if count > 0:
    features['bias_count'] = count
    features['bias_prop'] = round(float(count) / float(len(words)), 4)
    features['bias_list'] = instances

    # opinion word count
    count, instances = count_feature_list_freq(opinionLaden, words, bigrams, trigrams)
    #if count > 0:
    features['opinion_count'] = count
    features['opinion_prop'] = round(float(count) / float(len(words)), 4)
    features['opinion_list'] = instances

    # weak subjective word count
    count, instances = count_feature_list_freq(subj_weak, words, bigrams, trigrams)
    #if count > 0:
    features['subjective_weak_count'] = count
    features['subjective_weak_prop'] = round(float(count) / float(len(words)), 4)
    features['subjective_weak_list'] = instances

    # strong subjective word count
    count, instances = count_feature_list_freq(subj_strong, words, bigrams, trigrams)
    #if count > 0:
    features['subjective_strong_count'] = count
    features['subjective_strong_prop'] = round(float(count) / float(len(words)), 4)
    features['subjective_strong_list'] = instances

    # composite sentiment score using VADER sentiment analysis package
    compound_sentiment = vader_sentiment_analysis.polarity_scores(text)['compound']
    features['vader_composite_sentiment'] = float(compound_sentiment)

    # subjectivity score using Pattern.en
    pattern_subjectivity = pattern_sentiment(text)[1]
    features['subjectivity_score'] = round(pattern_subjectivity, 4)

    # modality (certainty) score and mood using  http://www.clips.ua.ac.be/pages/pattern-en#modality
    sentence = parse(text, lemmata=True)
    sentenceObj = Sentence(sentence)
    features['modality'] = round(modality(sentenceObj), 4)
    try:
        features['mood'] = mood(sentenceObj)
    except IndexError as e:
        print "IndexError: %s" % e
        print "Ignoring..."
        features['mood'] = 'err'

    # Flesch-Kincaid Grade Level (reading difficulty) using textstat
    try:
        features['flesch-kincaid_grade_level'] = float(textstat.flesch_kincaid_grade(text))
    except TypeError as e:
        print "TypeError: %s" % e
        print "Ignoring..."
        features['flesch-kincaid_grade_level'] = 0.0

    # liwc 3rd person pronoun count (combines S/he and They)
    count, instances = count_liwc_list_freq(liwc_3pp, words)
    #if count > 0:
    features['liwc_3rd_person_pronoum_count'] = count
    features['liwc_3rd_person_pronoun_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_3rd_person_pronoun_list'] = instances

    # liwc auxiliary verb count
    count, instances = count_liwc_list_freq(liwc_aux, words)
    #if count > 0:
    features['liwc_auxiliary_verb_count'] = count
    features['liwc_auxiliary_verb_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_auxiliary_verb_list'] = instances

    # liwc adverb count
    count, instances = count_liwc_list_freq(liwc_adv, words)
    #if count > 0:
    features['liwc_adverb_count'] = count
    features['liwc_adverb_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_adverb_list'] = instances

    # liwc preposition count
    count, instances = count_liwc_list_freq(liwc_prep, words)
    #if count > 0:
    features['liwc_preposition_count'] = count
    features['liwc_preposition_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_preposition_list'] = instances

    # liwc conjunction count
    count, instances = count_liwc_list_freq(liwc_conj, words)
    #if count > 0:
    features['liwc_conjunction_count'] = count
    features['liwc_conjunction_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_conjunction_list'] = instances

    # liwc discrepency word count
    count, instances = count_liwc_list_freq(liwc_discr, words)
    #if count > 0:
    features['liwc_discrepency_word_count'] = count
    features['liwc_discrepency_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_discrepency_word_list'] = instances

    # liwc tentative word count
    count, instances = count_liwc_list_freq(liwc_tent, words)
    #if count > 0:
    features['liwc_tentative_word_count'] = count
    features['liwc_tentative_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_tentative_word_list'] = instances

    # liwc certainty word count
    count, instances = count_liwc_list_freq(liwc_cert, words)
    #if count > 0:
    features['liwc_certainty_word_count'] = count
    features['liwc_certainty_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_certainty_word_list'] = instances

    # liwc causation word count
    count, instances = count_liwc_list_freq(liwc_causn, words)
    #if count > 0:
    features['liwc_causation_word_count'] = count
    features['liwc_causation_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_causation_word_list'] = instances

    # liwc work word count
    count, instances = count_liwc_list_freq(liwc_work, words)
    #if count > 0:
    features['liwc_work_word_count'] = count
    features['liwc_work_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_work_word_list'] = instances

    # liwc achievement word count
    count, instances = count_liwc_list_freq(liwc_achiev, words)
    #if count > 0:
    features['liwc_achievement_word_count'] = count
    features['liwc_achievement_word_prop'] = round(float(count) / float(len(words)), 4)
    features['liwc_achievement_word_list'] = instances

    return features
type_of_sent = {}
type_of_sent["indicative"] = 0
type_of_sent["imperative"] = 0
type_of_sent["conditional"] = 0
type_of_sent["subjunctive"] = 0

with open(sys.argv[1]) as f:
  for line in f:
    line = line.rstrip('\n')
    line =  line.decode('utf-8')
    sentences = tokenize.sent_tokenize(line)
    for sentence in sentences:
      #print sentence #DEBUGGING
      s = parse(sentence, lemmata=True)
      s = Sentence(s)
      #print mood(s) #DEGUGGING
      mood_type = str(mood(s))
      current = type_of_sent[mood_type]
      current = current + 1
      type_of_sent[mood_type] = current

print type_of_sent

#s = "Some amino acids tend to be acidic while others may be basic." # weaseling
#s = parse(s, lemmata=True)
#s = Sentence(s)
# 
##print modality(s) #How sure a sentence is ... not using here
#print mood(s) 
type_of_sent = {}
type_of_sent["indicative"] = 0
type_of_sent["imperative"] = 0
type_of_sent["conditional"] = 0
type_of_sent["subjunctive"] = 0

with open(sys.argv[1]) as f:
    for line in f:
        line = line.rstrip('\n')
        line = line.decode('utf-8')
        sentences = tokenize.sent_tokenize(line)
        for sentence in sentences:
            #print sentence #DEBUGGING
            s = parse(sentence, lemmata=True)
            s = Sentence(s)
            #print mood(s) #DEGUGGING
            mood_type = str(mood(s))
            current = type_of_sent[mood_type]
            current = current + 1
            type_of_sent[mood_type] = current

print type_of_sent

#s = "Some amino acids tend to be acidic while others may be basic." # weaseling
#s = parse(s, lemmata=True)
#s = Sentence(s)
#
##print modality(s) #How sure a sentence is ... not using here
#print mood(s)
示例#17
0
    def process(self, message):
        # print pattern_en.suggest(message) -- suggestions
        if message == ">!train":
            self.train()
            return "It is nice to learn new stuff."
        if message == ">!forget":
            memory.clear()
            return "I am reborn. So much free space :) maybe you will use files to store memory and not RAM..."
        if message == ">!load_page":
            if sessionId not in memory:
                response = "Hello! My name is Chad and I am passionate about music."
                response += "We can share our experiences and maybe we can get along."
                response += "Would you mind telling me your name first?"
                expect[sessionId] = "name"
                memory[sessionId] = dict()
            else:
                response = "Welcome back!"
                search.search("new songs")
                with open('results.json') as data_file:
                    data = json.load(data_file)
                    for i in range(10):
                        if 'musicrecording' in data['items'][i]['pagemap']:
                            mr = data['items'][i]['pagemap']['musicrecording']
                            which = random.randint(0, len(mr) - 1)
                            if 'name' not in mr[which]:
                                response += " Did you know that " + mr[which][
                                    'byartist'] + " has released a new song?"
                            else:
                                response += " You can check out this cool song, " + mr[which]['name'] + ", by " + \
                                            mr[which]['byartist']
            return response

        s = nlp.get_sentences(message)

        doc = spacy_nlp(message)
        for w in doc:
            print "(", w, w.dep_, w.pos_, w.head, ")"

        aiml_sent_type = []
        aiml_responses = []
        memory_responses = []
        sentence_types = []
        emotions = []

        for sentence in s:
            sentence_type = self.instant_classifier.classify(
                dialogue_act_features(sentence))

            sentence_types.append(sentence_type)

            polarity, subjective = pattern_en.sentiment(sentence)
            sent = pattern_en.parse(sentence, lemmata=True)
            sent = pattern_en.Sentence(sent)
            modality = pattern_en.modality(sent)
            mood = pattern_en.mood(sent)

            if polarity > 0.8:
                emotions.append("SUPER HAPPY")
            elif polarity > 0.3:
                emotions.append("GOOD SURPRISE")
            elif polarity < -0.4:
                emotions.append("FEAR")
            elif polarity > 0.4:
                emotions.append("COOL")
            elif polarity < -0.1:
                emotions.append("SAD")
            elif polarity < -0.7:
                emotions.append("ANGER")
            else:
                emotions.append("NEUTER")

            print sentence_type, polarity, subjective, modality, mood

            if sentence_type not in ["whQuestion", "ynQuestion"]:
                try:
                    aiml_sent_type_res = self.kernel.respond(
                        sentence_type, sessionId)
                except:
                    aiml_sent_type_res = ""
                aiml_sent_type.append(aiml_sent_type_res)

            verbs_subj = set()
            sentence = sentence[0].upper() + sentence[1:]
            doc = spacy_nlp(sentence)
            for possible_subject in doc:
                if (possible_subject.dep == nsubj or possible_subject.dep
                        == nsubjpass) and possible_subject.head.pos == VERB:
                    verbs_subj.add((possible_subject, possible_subject.head))

            try:
                aiml_response = self.kernel.respond(sentence, sessionId)
            except:
                aiml_response = ""
            aiml_responses.append(aiml_response)

            # MEMORY MODULE
            memory_msg = ""
            if sentence_type == "Statement":
                # insert into memory
                for i in verbs_subj:
                    subjs = []
                    subjects = [i[0]]
                    for tok in i[0].children:
                        if tok.dep == conj:
                            subjects.append(tok)

                    for subj in subjects:
                        predec = ""
                        for tok in subj.children:
                            if tok.dep_ == "poss" or tok.dep == amod:
                                predec += tok.lower_
                        if len(predec) > 0:
                            subjs.append(predec + " " + subj.lower_)
                        else:
                            subjs.append(subj.lower_)

                    vb = i[1].lower_
                    if vb not in memory[sessionId]:
                        memory[sessionId][vb] = dict()
                    for subj in subjs:
                        for c in i[1].children:
                            if c.dep in [prep]:
                                memory[sessionId][vb][subj] = c.lower_ + " "
                                for c_prep in c.children:
                                    if c_prep.dep in [dobj, pobj, attr]:
                                        memory[sessionId][vb][
                                            subj] += c_prep.text
                                        memory_responses.append(
                                            self.kernel.respond(
                                                "memorate", sessionId))
                            elif c.dep in [dobj, pobj, attr]:
                                memory[sessionId][vb][subj] = c.text
                                memory_responses.append(
                                    self.kernel.respond("memorate", sessionId))
            elif sentence_type == "whQuestion":
                for i in verbs_subj:
                    subjs = []
                    subjects = [i[0]]
                    for tok in i[0].children:
                        if tok.dep == conj:
                            subjects.append(tok)

                    for subj in subjects:
                        predec = ""
                        for tok in subj.children:
                            if tok.dep_ == "poss" or tok.dep == amod:
                                predec += tok.lower_
                        if len(predec) > 0:
                            subjs.append(predec + " " + subj.lower_)
                        else:
                            subjs.append(subj.lower_)

                    max_similarity = 0
                    verb = i[1].lower_
                    for j in memory[sessionId]:
                        p_word = spacy_nlp(j)
                        similarity = i[1].similarity(p_word[0])
                        if similarity > max_similarity:
                            max_similarity = similarity
                            verb = j
                    if max_similarity > 0.5 and verb in memory[sessionId]:
                        num_subjs = len(subjs)
                        memory_msg = ""
                        for subj in subjs:
                            if subj in memory[sessionId][verb]:
                                toks = nlp.tokenize_text(subj)
                                memory_msg = ""
                                for t in toks:
                                    if t in first_person:
                                        memory_msg += pron_translate[t] + " "
                                    else:
                                        memory_msg += t + " "
                                num_subjs -= 1
                                if num_subjs > 2:
                                    memory_msg += ", "
                                elif num_subjs == 1:
                                    memory_msg += "and "
                        if len(memory_msg) > 0:
                            memory_msg += verb + " "
                            if num_subjs != len(subjs):
                                memory_msg += memory[sessionId][verb][
                                    subjs[-1]] + "."
            memory_responses.append(memory_msg)

        arr_response = []

        for i in aiml_sent_type:
            if len(i) > 0:
                arr_response.append(i)

        for i in aiml_responses:
            if len(i) > 0:
                arr_response.append(i)

        for i in memory_responses:
            if len(i) > 0:
                arr_response.append(i)

        if len(arr_response) == 0:
            data = search.search(message)
            snip = data['items'][0]['snippet']
            sents = nlp.get_sentences(snip)
            arr_response.append(sents[0])

        response = ""

        for i in emotions:
            try:
                emoi = self.kernel.respond(i, sessionId)
            except:
                emoi = None
            if emoi is not None:
                if random.randint(0, 100) < 50:
                    response += " " + emoi + "."
                    break

        for res in arr_response:
            if len(res) > 1:
                response += res + " "

        # generic response, if no response
        restoks = nlp.tokenize_text(response)
        if len(restoks) == 0:
            idx = random.randint(0, len(sentence_types) - 1)
            try:
                aiml_response = self.kernel.respond(sentence_types[idx],
                                                    sessionId)
            except:
                aiml_response = ""
            response += aiml_response

        # polarity, subjective = pattern_en.sentiment(response)
        # sent = pattern_en.parse(sentence, lemmata=True)
        # sent = pattern_en.Sentence(sent)
        # modality = pattern_en.modality(sent)
        # mood = pattern_en.mood(sent)
        # sentence_type = self.instant_classifier.classify(dialogue_act_features(response))
        # print response, polarity, subjective, modality, mood

        return response
示例#18
0
                        dicWord[nn]=1
            else:
                for w in chunk:
                    if w.type=="JJ":
                       index=c.lower().find(w.string)
                       
                       print index
                      
                       print w
                       if index>0 and judge(c,index):
                            c=c[:index+len(w.string)]+'</span>'+c[index+len(w.string):]
                            c=c[:index]+'<span class=*JJ* >'+c[index:]
                       
                       #print c

        c='<span class=*sentence* sentiment=*'+str(sentiment(sentence))+'* positive=*'+str(positive(sentence))+'* mood=*'+str(mood(sentence))+'* modality=*'+str(modality(sentence))+'*>'+c+"</span>"
        c=c.replace('"','*')
        v.texts=v.texts+c
        #print c
        #pdb.set_trace()
        #print v.texts            
        
    print v.date
    #print v.nouns
    #print v.texts
    print v.stars
    
    cur.execute('insert into wZwZcte4lcbu51NOzCjWbQ values("'+v.date+'","'+v.user+'","'+v.nouns+'","'+str(v.stars)+'" ,"'+v.texts+'")')
#cur.execute('create table wordfre(word varchar(20) UNIQUE,uid integer)')
cur.close()    
cx.commit()