Пример #1
0
def get_textblob_sentiments(df, text_column):
    """
    Get sentiments for each row in text_column of dataframe using spaCyTextBlob
    Append all polarity scores to a list
    Input: 
      - df: dataframe with each doc in a row
      - text_column: column name in df of texts
    Returns:
      - sentiment_scores: list of polarity scores 
    """
    # Load spacy model
    spacy_nlp = spacy.load("en_core_web_sm")

    # Initialise TextBlob and add to pipe
    spacy_text_blob = SpacyTextBlob()
    spacy_nlp.add_pipe(spacy_text_blob)

    # Create empty target list to store sentiment scores
    sentiment_scores = []

    # Apply sentiment analysis to each headline in the data
    for doc in spacy_nlp.pipe(tqdm(df[text_column]), batch_size=500):
        # Retrieve polarity score
        sentiment = doc._.sentiment.polarity
        # Append sentiment score to target list
        sentiment_scores.append(sentiment)

    return sentiment_scores
def sentimenter(text):
    """
    Function that given a list with sentences or tweets returns a 
    list with two numbers for each sentences representing the
    polarity (score as a float within the range [-1.0, 1.0]) and 
    subjectivity (that is a float within the range [0.0, 1.0] 
    where 0.0 is very objective and 1.0 is very subjective).

    Parameters
    -------------
    text : (list)
        list where each element is a sentence or tweet.

    Returns
    -------------
    (DataFrame) two-column data frame with polarity and subjectivity.

    Example
    -------------
    example = ["Contact me at [email protected]",
           "@vcuspinera my webpage is https://vcuspinera.github.io"]
    preprocess(example)
    (output:) ['contact me at',
               'my webpage is']
    """
    nlp = spacy.load('en_core_web_sm')
    spacy_text_blob = SpacyTextBlob()
    nlp.add_pipe(spacy_text_blob)

    result = [[nlp(tw)._.sentiment.polarity,
               nlp(tw)._.sentiment.subjectivity] for tw in text]
    result = pd.DataFrame(result).rename(columns={
        0: 'polarity',
        1: 'subjectivity'
    })
    return result
Пример #3
0
def main():

    # tell argparse the arguments you'd like to use - including creating a subset for faster processing
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-d", "--path", required=True,
        help="The path to directory of files")  #The input option
    parser.add_argument(
        "-s", "--subset", required=False,
        help="The subset option")  #The subset option, required is F here
    args = vars(parser.parse_args())

    # create a data variable with the csv files in the path defined above
    data = pd.read_csv(args["path"])
    # check whether the subset has been requested - if so, slice the data
    if args["subset"] is not None:
        slice_index = int(args["subset"])
        data = data[:slice_index]

    # create a directory to store the plots (if it doesn't yet exist)
    if not os.path.exists("sentiment_output"):
        os.mkdir("sentiment_output")

    # Then call and initialise spacy, TextBlob, and your nlp pipe
    nlp = spacy.load("en_core_web_sm")  #We're using the English small library
    spacy_text_blob = SpacyTextBlob()  #This is taken from the spacy website
    nlp.add_pipe(spacy_text_blob)

    ##Now we're set up, we want to programe the calculation of scores (we'll use batches of 5000)##
    # message
    print("\nHold on, we're calculating the sentiment scores...")

    # create an empty list of sentiment scores for every headline, we'll call this sentiment_tracker
    sentiment_tracker = []

    # for every headline in data frame (we're looking at docs, not sentences)
    for doc in nlp.pipe(data["headline_text"], batch_size=5000):
        # calculate the sentiment of the doc (headline)
        sentiment = doc._.sentiment.polarity
        # append this to sentiment_tracker list
        sentiment_tracker.append(sentiment)

    # append the sentiment_tracker list to the dataframe and save as output csv file in sentiment_plots
    data.insert(len(data.columns), "sentiment", sentiment_tracker)
    output_csv_path = os.path.join("sentiment_output", "sentiment_tracker.csv")
    data.to_csv(output_csv_path, index=False)

    ##Now we can put it together to create rolling mean plots##

    # message
    print(
        "We've calculated the sentiment scores, now we'll generate the plots..."
    )

    # First, create a sentiment dataframe with date as the index and sentiment scores to calculate means based on date
    df_sentiment = pd.DataFrame(
        {"sentiment": sentiment_scores
         },  # create a column to hold sentiment scores 
        index=pd.to_datetime(
            data["publish_date"], format='%Y%m%d',
            errors='ignore'))  # index the date using to_datetime

    # apply the smoothing plot function from above, to create and save plots in output
    smoothed_sentiment_plot("7d", "1-week", df_sentiment)  # 1-week average
    smoothed_sentiment_plot("30d", "1-month", df_sentiment)  # 1 month average

    # Print a message to let you know when you're done
    print(
        "That's you complete - woohoo! The csv file and plots are in output directory.\n "
    )
Пример #4
0
def init(df,param):
    # Load English parser and text blob (for sentiment analysis)
    model = spacy.load('en_core_web_sm')
    spacy_text_blob = SpacyTextBlob()
    model.add_pipe(spacy_text_blob)
    return model
Пример #5
0
from spacytextblob.spacytextblob import SpacyTextBlob
import lemminflect
from collections import Counter, defaultdict
from spacy_langdetect import LanguageDetector
from spacymoji import Emoji
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
# from afinn import Afinn
import re
import json

nlp = spacy.load('en_core_web_md')
nlp.add_pipe(Emoji(nlp, merge_spans=False), first=True)
nlp.add_pipe(SpacyTextBlob())
nlp.add_pipe(LanguageDetector(), name='language_detector', last=True)

# Comment out for quick summary
from summarizer import Summarizer

model = Summarizer()

# afinn = Afinn()


def get_summary(raw_text, category_list=[]):

    # file = 'raw_text.txt'
    # with open(file) as f:
    #     raw_text = html2text(f.read())