예제 #1
0
def positiveSentimentDetector(descriptions):
    # Create a SentimentIntensityAnalyzer object.
    sid_obj = SIA()

    # polarity_scores method of SentimentIntensityAnalyzer
    # oject gives a sentiment dictionary.
    # which contains pos, neg, neu, and compound scores.
    sentiment_dict = sid_obj.polarity_scores(descriptions)

    positiveScore = sentiment_dict["pos"] * 100
    return positiveScore
예제 #2
0
def get_sentiment(df, series=str):
    # initialize sentiment classifier
    sia = SIA()

    # get sentiment
    sentiment = df[series].apply(sia.polarity_scores)

    # create sentiment df
    sentiment = pd.DataFrame(sentiment.tolist())

    # merge sentiment with your df
    return df.merge(sentiment, how='left', left_index=True, right_index=True)
예제 #3
0
def searchHeadlines(a1):

    t = Article(a1)

    try:

        t.download()
        t.parse()
        t.nlp()

        s1 = SIA()
        ps = s1.polarity_scores(t.summary)

        a1 = a1.replace('https://www.google.com/url?q=https://', '')

        while (a1.find('https://') != -1):
            a1 = a1.replace('https://', '')
        while (a1.find('www') != -1):
            a1 = a1.replace('www', '')

        d = a1.split('.')

        if (d[0] == 'www' or d[0] == ''):
            n = 1
        else:
            n = 0

        string = ''

        if (len(d[n]) > 1):
            string = d[n]

        n = n + 1

        while ('co' not in d[n] and 'com' not in d[n] and 'in' not in d[n]):
            if len(d[n]) > 2 and '%' not in d[n]:
                string = string + ' ' + d[n]
            n = n + 1
        return string, ps['compound']

    except:

        return '', 0
예제 #4
0
def get_sentiment(df, series=str):
    # initialize sentiment classifier
    sia = SIA()

    # get sentiment
    sentiment = df[series].apply(sia.polarity_scores)

    # create sentiment df
    sentiment = pd.DataFrame(sentiment.tolist())

    # merge sentiment with your df

    df = df.merge(sentiment, how='left', left_index=True, right_index=True)
    df['sentiment'] = df['compound'].apply(categorize_sentiment)
    df['sentiment'] = pd.Categorical(df['sentiment'])
    binary_sentiment = df['sentiment'].str.get_dummies()

    df = df.merge(binary_sentiment, how='left', left_index=True, right_index=True)
    return df
예제 #5
0
#import logging
#logging.basicConfig(level=logging.DEBUG)

# Initialise the PCA9685 using the default address (0x40).
pwm = Adafruit_PCA9685.PCA9685()

# Alternatively specify a different address and/or bus:
#pwm = Adafruit_PCA9685.PCA9685(address=0x41, busnum=2)

# Configure min and max servo pulse lengths
servo_min = 600  # Min pulse length out of 4096
servo_immigration = 1000  # Max pulse length out of 4096
servo_women = 2000
servo_tech = 2500

sid = SIA()


# Helper function to make setting a servo pulse width simpler.
def set_servo_pulse(self, channel, pulse):
    pulse_length = 1000000  # 1,000,000 us per second
    pulse_length //= 60  # 60 Hz
    print('{0}us per period'.format(pulse_length))
    pulse_length //= 4096  # 12 bits of resolution
    print('{0}us per bit'.format(pulse_length))
    pulse *= 1000
    pulse //= pulse_length
    pwm.set_pwm(channel, 0, pulse)
    # Set frequency to 60hz, good for servos.
    pwm.set_pwm_freq(60)
def generate_sentiment_features(top_words, labels, X_train_ids, X_holdout_ids,
                                corpus, i2s):
    """Create sentiment features
    
    Arguments
    - top_words: list of most informative or polarizing bias-words in corpus
    - labels: a dict mapping domain names to bias and credibility label information
    - X_train_ids: the set of row ids that are in the training set
    - X_holdout_ids: the set of row ids that are in the holdout/testing set
    - corpus: the set of news articles (documents)
    - i2s: a dict mapping integer vertex labels to string representations (domain names)

    Returns:
    - training_vector: a numpy ndarray containing sentiment scores for each word in top_words and each article in the training set
    - holdout_vector:a numpy ndarray containing sentiment scores for each word in top_words and each article in the holdout set
    - sentiment_stats_per_article: a dict mapping the context word to bias/credibility label information, article ID, and sentiment score
    - word2context: a dict mapping the context word to sentences in the corpus which include it 
    
    """

    sa = SIA()

    print('generating sentiment features...')
    # Get context
    word2context = {}
    sentiment_stats_per_article = {}
    training_feats = []
    holdout_feats = []

    # for each word in top_words, grab sentences that contain the word
    for w_idx, w in enumerate(top_words):
        training_feats.append([0] * len(X_train_ids))
        holdout_feats.append([0] * len(X_holdout_ids))
        sentiment_stats_per_article[w] = []
        word2context[w] = create_context(w, corpus, i2s)

        # loop through each article
        first_pass = True
        first = True
        for line in word2context[w]:
            sdom = line['sdom']
            label = labels[sdom]['bias']
            article_id = line['article_ID']

            # concatenate all text by label
            if labels[sdom]['bias'] in ["R"]:
                temp = ' '.join(line['sentences'])
                if first_pass:
                    conservative_terms = [temp]
                    first_pass = False
                else:
                    conservative_terms = [
                        ' '.join([conservative_terms[0], temp])
                    ]

            if labels[sdom]['bias'] in ["L"]:
                temp = ' '.join(line['sentences'])
                if first:
                    liberal_terms = [temp]
                    first = False
                else:
                    liberal_terms = [' '.join([liberal_terms[0], temp])]

        # remove shared terms from sentences and then compute sentiment score per article
        shared_terms = list(
            set(liberal_terms[0].split()) & set(conservative_terms[0].split()))
        for art in word2context[w]:
            article_id = art['article_ID']
            temp = ' '.join(art['sentences'])
            art_leftovers = ' '.join(
                [word for word in temp.split() if word not in shared_terms])
            score = sa.polarity_scores(art_leftovers)['compound']
            sentiment_stats_per_article[w].append({
                'article_id': article_id,
                'bias': labels[sdom]['bias'],
                'cred': labels[sdom]['cred'],
                'sentiment_score': score
            })

            # check that article is in labeled training dataset
            if article_id in X_train_ids:
                idx = X_train_ids.index(article_id)
                training_feats[w_idx][idx] = score

            # check that article is in labeled holdout dataset
            if article_id in X_holdout_ids:
                idx = X_holdout_ids.index(article_id)
                holdout_feats[w_idx][idx] = score

#        # replace articles without context word with label average
#        avg_lib_score =np.mean([line['sentiment_score'] for line in sentiment_stats_per_article[w] if line['bias'] in ["L","LC"]])
#        avg_conserv_score = np.mean([line['sentiment_score'] for line in sentiment_stats_per_article[w] if line['bias'] in ["R","RC"]])
#
#        zero_idx = np.where(training_feats==0)[0]
#        lib_idx = np.where()
#        training_feats = np.array(training_feats)
#        training_feats[zero_idx]
#        for idx in zero_idx:
#            if
#            training_feats[idx] = avg_lib_score

# transform list of lists into numpy arrays so that each column is feature for word 'w'
    training_vector = np.transpose(np.array(training_feats))
    holdout_vector = np.transpose(np.array(holdout_feats))

    return training_vector, holdout_vector, sentiment_stats_per_article, word2context
예제 #7
0
## Input Chaperone 'results' data into dataframe
## Output the same Chaperone data as a series of floating point numbers

NUMER_DATA = ['D', 'E', 'F', 'G', 'AA', 'AB', 'AF', 'AR', 'AS']
CAT_ORD_DATA = [
    'H', 'I', 'O', 'AC', 'AD', 'AT', 'AU', 'AV', 'AW', 'AX', 'AY', 'AZ'
]
BIN_POS = ['J', 'K', 'L', 'M', 'R', 'S', 'T', 'U', 'AI']
BIN_PRES = ['W', 'X']
COMM_DATA = ['AE', 'BA']
ROLES = ['mentor', 'expert', 'tech', 'judge']
NLO, NHI = 1, 4
wlo, wmed, whi = 'low', 'med', 'high'
chp_sheet = 'Startup-O Chaperone Evaluation Sheet- 1 MAR 2018.csv'
res, rnk = 'results', 'Ranking test'
sia_ana = SIA()

qs = [
    'Does the management believe in building a strong in-house tech team ?',
    'Relevant Industry/market background of team',
    'How well can team execute their presented plans ?',
    'What is the level of understanding and clarity of the financial projections by the team ?',
    'Additional Comments', 'Potential Execution Risks',
    'Are the hiring plans relevant to the roadmap and business growth ?',
    'Does he understand the business space clearly or is he just a techie ?',
    'Go to market plan strength',
    'What is the level of uniqueness in the proposition for the venture?',
    'Is it built using a relevant tech stack ? Are they using relevant technologies to build the product ?',
    'Do the tech team (and biz team) understand and follow a form of agile / lean SDLC ?',
    'How high is the business momentum and pipeline of business ?',
    'Is the team sufficiently staffed ?',
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 19 17:09:45 2020

@author: acibi
"""
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA

analyser = SIA()

def readdata():
    return pd.read_csv("reuters_data1.csv")

# Sentiment Analyzer Scores
def sas(sentence, sentiment):
    score = analyser.polarity_scores(sentence)
    return score[sentiment]

def sentiment_lister(sentiment, df):
    trlist = list()
    for sentence in df['processed_header']:
        trlist.append(sas(sentence,sentiment))
    return trlist

def add_sentiments(df):
    df['neg'] = sentiment_lister('neg', df)
    df['neu'] = sentiment_lister('neu', df)
    df['pos'] = sentiment_lister('pos', df)
    df['compound'] = sentiment_lister('compound', df)
    
예제 #9
0
import re

#base query url
#maximum size is 1000, increasing will not change number of comments returned
#score threshold set to greater than 1 to avoid problems with normalizing sentiment
base_url = "https://api.pushshift.io/reddit/search/comment/" + \
            "?q={}&" + \
            "{}" + \
            "after={}&" + \
            "before={}&" + \
            "score=>1&" + \
            "sort_type=score&" + \
            "sort=desc&" + \
            "size=1000"

analyzer = SIA()

KEYWORD = "musk"
SUBREDDITS = ["all", "politics", "the_donald", "space"]


#formats the desired subreddits to match REST query
def format_subreddit(sub):
    if sub == "all":
        return ""
    return "subreddit={}&".format(sub)


#converts unix time to string
def utc_to_str(utc_time):
    utc_time = int(utc_time)
#Removing Irrelevant words
tk_list = []

for item in good_list:
    grd_list = []
    for ele in item:
        if ele not in useless_words:
            grd_list.append(ele)
    tk_list.append(grd_list)

#Raw headlines to enter into our dataframe and perform sentiment analysis
res = [' '.join(ele) for ele in new_list]

#Sentiment Intensity Analyzer
sia = SIA()
data = []
for f in res:
    #Get polarity score
    result = sia.polarity_scores(f)
    #Create a new column headline
    result['headline'] = f
    data.append(result)
#Make it into pandas dataframe
df = pd.DataFrame.from_records(data)
#Order the column
df = df[['headline', 'pos', 'neu', 'neg', 'compound']]
#Keep one similar headline and drop others
df.drop_duplicates(subset="headline", keep='first', inplace=True)
#Sorting in Ascending Order to Get Top 3 Lowest Sentiment
digi_dat = df.sort_values(by=['compound'])
예제 #11
0
print(vsize)
v2i = dict([(key, i) for i, key in enumerate(vocab)])
site_raw_tc = {}
site_raw_ts = {}

bvsize = len(bivocab)
print(bvsize)
bv2i = dict([(key, i) for i, key in enumerate(bivocab)])
site_raw_bc = {}
site_raw_bs = {}

#Build arrays for every site, containing counts of the terms and the average sentiment
#    Sentiment is collected for each term by adding the article's sentiment every
#time the term is detected, then dividing by the term count to get the mean

sa = SIA()

for sdom in tqdm(arts.keys()):
    #for (sdom, doc) in tqdm(arts):

    doc = arts[sdom]

    mycat = s2l[sdom]
    if mycat in cats:
        if sdom not in site_raw_tc:
            site_raw_tc[sdom] = np.zeros(vsize)
            site_raw_ts[sdom] = np.zeros(vsize)
            site_raw_bc[sdom] = np.zeros(bvsize)
            site_raw_bs[sdom] = np.zeros(bvsize)
        c = sa.polarity_scores(doc.text)['compound']
        for word in doc[:-1]:
예제 #12
0
Purpose = []
for i in result:
    Name.append(i[0][5:])
    Purpose.append(i[1][9:])
df9 = pd.DataFrame({"Name": Name, "Purpose": Purpose})

# -----------file = Company Details.txt---------#
f6 = open("Company Details.txt", "r").readlines()
Name = []
Purpose = []
for i in f6[1:]:
    if i[:5] == "Name:":
        Name.append(i[5:-1])
    elif i[:8] == "Purpose:":
        Purpose.append((i[8:]))
df10 = pd.DataFrame({"Name": Name, "Purpose": Purpose})

frames = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10]
df = pd.concat(frames)
df = df.reset_index()
df.drop("index", axis=True, inplace=True)
df.drop_duplicates(subset="Name")

result = []
for row in df["Purpose"]:
    pol_score = SIA().polarity_scores(row)
    pol_score["Purpose"] = row
    result.append(pol_score)

df["Score"] = pd.DataFrame(result)["compound"]
df1 = df.sort_values(by="Score", ascending=False)