def calculate_department_scores(): os.chdir("../output_500_itemcount") afinn = Afinn() afinn.setup_from_file(os.path.join(afinn.data_dir(), 'AFINN-111.txt'), word_boundary=False) # dictionary where the keys are departments, values are lists of tuples containing a url, doc score and doc length # for each document in the department department_scores = {} # loop through documents and accumulate scores for each department for f in glob.glob("*.jsonl"): with jsonlines.open(f) as reader: for obj in reader: dep = format(obj['field']) url = format(obj['url']) title = format(obj['title']) text = format(obj['text']) doc_score = get_score(title, text, afinn) doc_length = len(title + " " + text) doc_tuple = (url, doc_score, doc_length) if dep not in department_scores: # checking if the department is present in the d department_scores[dep] = [doc_tuple] else: department_scores[dep].append(doc_tuple) return department_scores
def test_data(): """Test data files for format.""" afinn = Afinn() filenames = listdir(afinn.data_dir()) for filename in filenames: if not filename.endswith('.txt'): continue full_filename = join(afinn.data_dir(), filename) with io.open(full_filename, encoding='UTF-8') as fid: for line in fid: # There should be the phrase and the score # and nothing more assert len(line.split('\t')) == 2 # The score should be interpretable as an int phrase, score = line.split('\t') assert type(int(score)) == int
def test_emoticon(): afinn = Afinn() afinn.setup_from_file(join(afinn.data_dir(), 'AFINN-emoticon-8.txt'), word_boundary=False) score = afinn.score(':-)') assert score > 0 score = afinn.score('This is a :-) smiley') assert score > 0 score = afinn.score('Just so XOXO.') assert score > 0
def test_emoticon(): afinn = Afinn() afinn.setup_from_file(join(afinn.data_dir(), 'AFINN-emoticon-8.txt'), with_word_boundary=False) score = afinn.score(':-)') assert score > 0 score = afinn.score('This is a :-) smiley') assert score > 0 score = afinn.score('Just so XOXO.') assert score > 0
def test_emoticon_upper_case(): afinn = Afinn() afinn.setup_from_file(join(afinn.data_dir(), 'AFINN-emoticon-8.txt'), word_boundary=False) score = afinn.score(':d') assert score == 0 # TODO score = afinn.score(':D') # assert score > 0 score = afinn.score('It is so: :D')
from __future__ import print_function import glob, os import sys import jsonlines from afinn import Afinn afinn = Afinn() # afinn.setup_from_file(os.path.join(afinn.data_dir(), 'AFINN-165.txt'), # word_boundary=False) afinn.setup_from_file(os.path.join(afinn.data_dir(), 'AFINN-111.txt'), word_boundary=False) """ get_score for now, just return length we're supposed to do sentiment analysis here and return the text's score """ def get_score(field, title, text): # title_score = afinn.score(title) # using regex patterns token_score = afinn.score_with_wordlist(text) # using word_list # sum_score = title_score + text_score # print(title) # if "mystery" in field: # print(field,title,text)