def compare(args): """ Compare two given text """ # Read the files text1 = helpers.read_file(args.file1) text2 = helpers.read_file(args.file2) # Prepreocessing policies = preprocessing.full_preproccessing([text1, text2]) # Do the compare df = correlation.correlation_matrix(policies) # Print the compare correlation.print_correlation_matrix(df)
def compare_twitter_reddit_correlation(ngram_min, ngram_max): dir_name = os.path.dirname(os.path.realpath(__file__)) folder_dir = "/../privacy_policy_evaluator/data/policies/" path = dir_name + folder_dir twitter = helpers.read_file(path + "twitter.txt") reddit = helpers.read_file(path + "reddit.txt") policies_prepro = preprocessing.full_preproccessing([twitter, reddit]) policies = [twitter, reddit] corr_og = correlation.correlation_matrix(policies, ["twitter", "reddit"]) corr_prepro = correlation.correlation_matrix(policies_prepro, ["twitter", "reddit"]) print("######## Original files #########") correlation.print_correlation_matrix(corr_og) print("######## Preprocessed files #########") correlation.print_correlation_matrix(corr_prepro)
def evaluate_score(args): """ Evaluate a score :param args: """ # Read textfile text = helpers.read_file(args.file) # Get the Score score = wordscoring.score_text(text) print(score)
def evaluate_on_topic(args): """ Evaluate a given document on certain topics. Paragraphs that describe a certain topics are associated with that topic After which all associated topics are scored based on the extracted text :param args: """ # Read textfile text = helpers.read_file(args.file) # Paragraph the given text paragraphed = paragraphing.paragraph(text) # Get topics from argumnets topics = helpers.split(args.topic) # Do the grouping grouped = topic_grouper.group(paragraphed, topics, 0.1) # Score each topic on associated text scored_topics = topic_grouper.evaluate(grouped) # for key, value in d.items(): print(scored_topics)
import numpy as np # Settings files = [ "../privacy_policy_evaluator/data/policies/google.txt", "../privacy_policy_evaluator/data/policies/reddit.txt", "../privacy_policy_evaluator/data/policies/twitter.txt", "../privacy_policy_evaluator/data/policies/ing.txt", "../privacy_policy_evaluator/data/policies/icloud.txt", ] topics = ['location', 'address', 'email', 'information'] texts = [] # Read File for file in files: texts.append(helpers.read_file(file)) grouped = [] for text in texts: # Paragraph the given text paragraphed = paragraphing.paragraph(text) # Do the grouping grouped.append(topic_grouper.group(paragraphed, topics, 0.1)) a = [ grouped[0].get('location'), grouped[0].get('address'), grouped[0].get('email'), grouped[0].get('information'), grouped[1].get('location'), grouped[1].get('address'),
from privacy_policy_evaluator import helpers, wordscoring, paragraphing, topic_grouper, preprocessing import matplotlib.pyplot as plt import numpy as np # Settings files = [ "../privacy_policy_evaluator/data/policies/google.txt", "../privacy_policy_evaluator/data/policies/reddit.txt", "../privacy_policy_evaluator/data/policies/twitter.txt", "../privacy_policy_evaluator/data/policies/ing.txt", "../privacy_policy_evaluator/data/policies/icloud.txt", ] text = '' for file in files: file = helpers.read_file(file) text = text + preprocessing.full_preproccessing([file], 1)[0] text = helpers.remove_stop_words(text) print(helpers.most_common_words(text, 50))