Пример #1
0
def compare(args):
    """
    Compare two given text
    """
    # Read the files
    text1 = helpers.read_file(args.file1)
    text2 = helpers.read_file(args.file2)

    # Prepreocessing
    policies = preprocessing.full_preproccessing([text1, text2])

    # Do the compare
    df = correlation.correlation_matrix(policies)
    # Print the compare
    correlation.print_correlation_matrix(df)
Пример #2
0
def compare_twitter_reddit_correlation(ngram_min, ngram_max):
    dir_name = os.path.dirname(os.path.realpath(__file__))
    folder_dir = "/../privacy_policy_evaluator/data/policies/"
    path = dir_name + folder_dir

    twitter = helpers.read_file(path + "twitter.txt")
    reddit = helpers.read_file(path + "reddit.txt")

    policies_prepro = preprocessing.full_preproccessing([twitter, reddit])
    policies = [twitter, reddit]

    corr_og = correlation.correlation_matrix(policies, ["twitter", "reddit"])
    corr_prepro = correlation.correlation_matrix(policies_prepro, ["twitter", "reddit"])

    print("######## Original files #########")
    correlation.print_correlation_matrix(corr_og)
    print("######## Preprocessed files #########")
    correlation.print_correlation_matrix(corr_prepro)
Пример #3
0
def evaluate_score(args):
    """
    Evaluate a score
    :param args:
    """
    # Read textfile
    text = helpers.read_file(args.file)
    # Get the Score
    score = wordscoring.score_text(text)
    print(score)
Пример #4
0
def evaluate_on_topic(args):
    """
    Evaluate a given document on certain topics.
    Paragraphs that describe a certain topics are associated with that topic
    After which all associated topics are scored based on the extracted text
    :param args:
    """
    # Read textfile
    text = helpers.read_file(args.file)
    # Paragraph the given text
    paragraphed = paragraphing.paragraph(text)
    # Get topics from argumnets
    topics = helpers.split(args.topic)
    # Do the grouping
    grouped = topic_grouper.group(paragraphed, topics, 0.1)
    # Score each topic on associated text
    scored_topics = topic_grouper.evaluate(grouped)

    # for key, value in d.items():
    print(scored_topics)
import numpy as np

# Settings
files = [
    "../privacy_policy_evaluator/data/policies/google.txt",
    "../privacy_policy_evaluator/data/policies/reddit.txt",
    "../privacy_policy_evaluator/data/policies/twitter.txt",
    "../privacy_policy_evaluator/data/policies/ing.txt",
    "../privacy_policy_evaluator/data/policies/icloud.txt",
]

topics = ['location', 'address', 'email', 'information']

texts = []  # Read File
for file in files:
    texts.append(helpers.read_file(file))

grouped = []
for text in texts:
    # Paragraph the given text
    paragraphed = paragraphing.paragraph(text)
    # Do the grouping
    grouped.append(topic_grouper.group(paragraphed, topics, 0.1))

a = [
    grouped[0].get('location'),
    grouped[0].get('address'),
    grouped[0].get('email'),
    grouped[0].get('information'),
    grouped[1].get('location'),
    grouped[1].get('address'),
Пример #6
0
from privacy_policy_evaluator import helpers, wordscoring, paragraphing, topic_grouper, preprocessing
import matplotlib.pyplot as plt
import numpy as np

# Settings
files = [
    "../privacy_policy_evaluator/data/policies/google.txt",
    "../privacy_policy_evaluator/data/policies/reddit.txt",
    "../privacy_policy_evaluator/data/policies/twitter.txt",
    "../privacy_policy_evaluator/data/policies/ing.txt",
    "../privacy_policy_evaluator/data/policies/icloud.txt",
]


text = ''
for file in files:
    file =  helpers.read_file(file)
    text = text + preprocessing.full_preproccessing([file], 1)[0]

text = helpers.remove_stop_words(text)

print(helpers.most_common_words(text, 50))