Exemplo n.º 1
0
def sent_result_subjectivity(text):
    # Classify a single sentence as subjective/objective using a stored custom classifier.
    word_tokenizer = regexp.WhitespaceTokenizer()
    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    sentim_analyzer = load('files/sa_subjectivity.pickle')
    label = sentim_analyzer.classify(tokenized_text)

    return label
Exemplo n.º 2
0
def subjectivityScore(docs):
    # Input: SpaCy doc class
    # Output: Subjectivity score of document (percent of sentences considered subjective)
    # Note: takes a few minutes the first time, to train classifier. After takes a few seconds.
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentiment = load('sa_subjectivity.pickle')
    except LookupError:
        sentiment = demo_subjectivity(NaiveBayesClassifier.train, True)
    subjectivities = []
    for doc in docs:
        tot, subj = 0, 0
        for sent in doc.sents:
            tot += 1
            # Tokenize and convert to lower case
            tokenized = [word.lower() for word in word_tokenizer.tokenize(sent.text)]
            if sentiment.classify(tokenized) == 'subj': subj += 1
        subjectivities.append(subj / tot)
    return subjectivities
Exemplo n.º 3
0
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text))
Exemplo n.º 4
0
import json
import string
import re
import pandas as pd
import re
import json
import csv
import tokenize
import nltk
from nltk.tokenize import regexp
word_tokenizer = regexp.WhitespaceTokenizer()
docID = ""
index_tracker = {}
input_dict = {}
df = pd.DataFrame(columns=index_tracker.keys())
column_name = []
import yaml


# initialise
def find_column_name(input_dict, column_name):
    if type(input_dict) == dict:
        for key, value in input_dict.items():
            if type(value) == dict:
                if "find" in value.keys():
                    column_name.append(key)
            if (key == "output"):
                for i in value.keys():
                    column_name.append(i)
            # print(key,column_name)
            find_column_name(value, column_name)