Exemplo n.º 1
0
    def test_set_url(self):
        expected = 'http://some-url.org:1234'
        helpers.set_corenlp_url('some-url.org:1234')
        with open(CORENLP_SERVER_URL, 'r') as f:
            actual = f.read().strip('\n')

        self.assertEqual(expected, actual)
Exemplo n.º 2
0
    def test_url_default(self):
        expected = 'http://0.0.0.0:0000'
        helpers.set_corenlp_url('http://0.0.0.0:0000')
        with open(CORENLP_SERVER_URL, 'r') as f:
            actual = f.read().strip('\n')

        self.assertEqual(expected, actual)
Exemplo n.º 3
0
    def test_predictions(self):
        helpers.set_corenlp_url('artifacts.gccis.rit.edu:41194')

        data = "Have you found the answer for your question? If yes would you" \
               " please share it? Sorry :) I dont want to hack the system!! :" \
               ") is there another way? What are you trying to do?  Why can't" \
               " you just store the \"Range\"? This was supposed to have been" \
               " moved to <url> per the cfd. why wasn't it moved?"
        expected = [{
            'Have you found the answer for your question?':
            [0.45793466358055329, 0.54206533641944665]
        }, {
            'If yes would you please share it?':
            [0.47243183562775615, 0.52756816437224363]
        }, {
            'Sorry :) I dont want to hack the system!!':
            [0.54823398057393613, 0.45176601942606376]
        }, {
            ':) is there another way?':
            [0.54151149263615428, 0.45848850736384572]
        }, {
            'What are you trying to do?':
            [0.25075788316047232, 0.74924211683952746]
        }, {
            'Why can\'t you just store the "Range"?':
            [0.10255615890730475, 0.89744384109269537]
        }, {
            'This was supposed to have been moved to <url> per the cfd.':
            [0.38666486673559936, 0.61333513326440048]
        }, {
            "why wasn't it moved?": [0.29890263769016051, 0.70109736230983943]
        }, {
            'document': [0.38237418986399213, 0.61762581013600781]
        }]

        actual = self.classifier.predict(data)
        self.assertEqual(expected, actual)

        data = {
            'sentence':
            'If yes would you please share it?',
            'parses': [
                'ROOT(root-0, please-5)', 'dep(please-5, If-1)',
                'dep(please-5, yes-2)', 'aux(please-5, would-3)',
                'nsubj(please-5, you-4)', 'dobj(please-5, share-6)',
                'dep(please-5, it-7)', 'punct(please-5, ?-8)'
            ]
        }
        expected = [{
            'If yes would you please share it?':
            [0.47243183562775615, 0.52756816437224363]
        }, {
            'document': [0.47243183562775615, 0.52756816437224363]
        }]

        actual = self.classifier.predict(data)
        self.assertEqual(expected, actual)
Exemplo n.º 4
0
    def test_format_doc(self):
        helpers.set_corenlp_url('artifacts.gccis.rit.edu:41194/')

        expected = [{
            'sentences': ['I am the walrus.'],
            'parses': [
                'ROOT(root-0, walrus-4)', 'nsubj(walrus-4, I-1)',
                'cop(walrus-4, am-2)', 'det(walrus-4, the-3)',
                'punct(walrus-4, .-5)'
            ]
        }]
        actual = helpers.format_doc('I am the walrus.')
        self.assertEqual(expected, actual)
Exemplo n.º 5
0
    def test_format_doc_dict(self):
        helpers.set_corenlp_url('artifacts.gccis.rit.edu:41194/')

        data = {
            'If yes would you please share it?': [
                'ROOT(root-0, please-5)', 'dep(please-5, If-1)',
                'dep(please-5, yes-2)', 'aux(please-5, would-3)',
                'nsubj(please-5, you-4)', 'dobj(please-5, share-6)',
                'dep(please-5, it-7)', 'punct(please-5, ?-8)'
            ]
        }
        expected = {
            'sentences': ['If yes would you please share it?'],
            'parses': [
                'ROOT(root-0, please-5)', 'dep(please-5, If-1)',
                'dep(please-5, yes-2)', 'aux(please-5, would-3)',
                'nsubj(please-5, you-4)', 'dobj(please-5, share-6)',
                'dep(please-5, it-7)', 'punct(please-5, ?-8)'
            ]
        }
        for sent, deps in data.items():
            actual = helpers.format_doc(sent, deps)
            self.assertEqual(expected, actual)
Exemplo n.º 6
0
def write(args):
    #Set up the stanford corenlp server
    set_corenlp_url("http://localhost:9000")
    cls = Classifier()

    #read in the data file
    data = pandas.read_csv(args.read_file, sep=',', error_bad_lines=False)

    #Set the write file
    outFile = open(args.write_file, 'w', newline='')
    fieldnames = [
        "Id", "PostTypeId", "AcceptedAnswerId", "ParentId", "CreationDate",
        "DeletionDate", "Score", "ViewCount", "Body", "BodyNOHTML",
        "PolitenessConfidence", "ImpolitenessConfidence", "OwnerUserId",
        "OwnerDisplayName", "LastEditorUserId", "LastEditorDisplayName",
        "LastEditDate", "LastActivityDate", "Title", "Tags", "AnswerCount",
        "CommentCount", "FavoriteCount", "ClosedDate", "CommunityOwnedDate"
    ]
    w = csv.DictWriter(outFile, fieldnames=fieldnames)
    w.writeheader()

    skips = 0

    #Create the CSV of processed data
    for i in range(data.shape[0]):
        try:
            tempDict = {}

            tempDict["Id"] = str(data.iloc[i, 0])
            tempDict["PostTypeId"] = str(data.iloc[i, 1])
            tempDict["AcceptedAnswerId"] = str(data.iloc[i, 2])
            tempDict["ParentId"] = str(data.iloc[i, 3])
            tempDict["CreationDate"] = str(data.iloc[i, 4])
            tempDict["DeletionDate"] = str(data.iloc[i, 5])
            tempDict["Score"] = str(data.iloc[i, 6])
            tempDict["ViewCount"] = str(data.iloc[i, 7])
            tempDict["Body"] = str(data.iloc[i, 8])

            #Remove html and code snippets
            temp = str(data.iloc[i, 8])
            soup = BeautifulSoup(str(data.iloc[i, 8]), "lxml")
            removals = soup.find_all('code')
            for match in removals:
                match.decompose()
            text = soup.get_text()
            text = text.replace('\n', ' ').replace('\r', '').replace(
                '\t', '')  #remove newlines and such

            #get the predictions for the classifier
            output = cls.predict(text)
            vals = list(
                output[-1].values()
            )  #The last entry in the analyzed array of values are the scores for the entire doc

            tempDict["BodyNOHTML"] = text

            tempDict["PolitenessConfidence"] = str(vals[0][0])
            tempDict["ImpolitenessConfidence"] = str(vals[0][1])
            tempDict["OwnerUserId"] = str(data.iloc[i, 9])
            tempDict["OwnerDisplayName"] = str(data.iloc[i, 10])
            tempDict["LastEditorUserId"] = str(data.iloc[i, 11])
            tempDict["LastEditorDisplayName"] = str(data.iloc[i, 12])
            tempDict["LastEditDate"] = str(data.iloc[i, 13])
            tempDict["LastActivityDate"] = str(data.iloc[i, 14])
            tempDict["Title"] = str(data.iloc[i, 15])
            tempDict["Tags"] = str(data.iloc[i, 16])
            tempDict["AnswerCount"] = str(data.iloc[i, 17])
            tempDict["CommentCount"] = str(data.iloc[i, 18])
            tempDict["FavoriteCount"] = str(data.iloc[i, 19])
            tempDict["ClosedDate"] = str(data.iloc[i, 20])
            tempDict["CommunityOwnedDate"] = str(data.iloc[i, 21])

            if (i % 1000 == 0):
                print("entry: " + str(i) + " of " + str(data.shape[0]))
            w.writerow(tempDict)
        except:
            print("Total skips = " +
                  str(skips))  #keep track of how many strings were skipped
            skips += 1
            continue
Exemplo n.º 7
0
    def test_format_doc_str(self):
        helpers.set_corenlp_url('artifacts.gccis.rit.edu:41194/')
        expected = [
            [{
                'sentences': ['Have you found the answer for your question?'],
                'parses': [
                    'ROOT(root-0, found-3)', 'aux(found-3, Have-1)',
                    'nsubj(found-3, you-2)', 'det(answer-5, the-4)',
                    'dobj(found-3, answer-5)', 'case(question-8, for-6)',
                    'nmod:poss(question-8, your-7)',
                    'nmod:for(answer-5, question-8)', 'punct(found-3, ?-9)'
                ]
            }, {
                'sentences': ['If yes would you please share it?'],
                'parses': [
                    'ROOT(root-0, please-5)', 'dep(please-5, If-1)',
                    'dep(please-5, yes-2)', 'aux(please-5, would-3)',
                    'nsubj(please-5, you-4)', 'dobj(please-5, share-6)',
                    'dep(please-5, it-7)', 'punct(please-5, ?-8)'
                ]
            }],
            [{
                'sentences': ['Sorry :) I dont want to hack the system!!'],
                'parses': [
                    'ROOT(root-0, :-RRB--2)', 'amod(:-rrb--2, Sorry-1)',
                    'nsubj(want-5, I-3)', 'nsubj:xsubj(hack-7, I-3)',
                    'aux(want-5, dont-4)', 'acl:relcl(:-rrb--2, want-5)',
                    'mark(hack-7, to-6)', 'xcomp(want-5, hack-7)',
                    'det(system-9, the-8)', 'dobj(hack-7, system-9)',
                    'nummod(:-rrb--2, !!-10)'
                ]
            }, {
                'sentences': [':) is there another way?'],
                'parses': [
                    'ROOT(root-0, :-RRB--1)', 'acl(:-rrb--1, is-2)',
                    'expl(is-2, there-3)', 'det(way-5, another-4)',
                    'nsubj(is-2, way-5)', 'punct(:-rrb--1, ?-6)'
                ]
            }],
            [{
                'sentences': ['What are you trying to do?'],
                'parses': [
                    'ROOT(root-0, trying-4)', 'dep(trying-4, What-1)',
                    'aux(trying-4, are-2)', 'nsubj(trying-4, you-3)',
                    'nsubj:xsubj(do-6, you-3)', 'mark(do-6, to-5)',
                    'xcomp(trying-4, do-6)', 'punct(trying-4, ?-7)'
                ]
            }, {
                'sentences': ['Why can\'t you just store the "Range"?'],
                'parses': [
                    'ROOT(root-0, store-6)', 'advmod(store-6, Why-1)',
                    'aux(store-6, ca-2)', "neg(store-6, n't-3)",
                    'nsubj(store-6, you-4)', 'advmod(store-6, just-5)',
                    'det(range-9, the-7)', 'punct(range-9, ``-8)',
                    'dobj(store-6, Range-9)', "punct(range-9, ''-10)",
                    'punct(store-6, ?-11)'
                ]
            }],
            [{
                'sentences': [
                    'This was supposed to have been moved to <url> per '
                    'the cfd.'
                ],
                'parses': [
                    'ROOT(root-0, supposed-3)',
                    'nsubjpass(supposed-3, This-1)',
                    'nsubjpass:xsubj(moved-7, This-1)',
                    'auxpass(supposed-3, was-2)', 'mark(moved-7, to-4)',
                    'aux(moved-7, have-5)', 'auxpass(moved-7, been-6)',
                    'xcomp(supposed-3, moved-7)', 'mark(<url>-9, to-8)',
                    'xcomp(moved-7, <url>-9)', 'case(cfd-12, per-10)',
                    'det(cfd-12, the-11)', 'nmod:per(<url>-9, cfd-12)',
                    'punct(supposed-3, .-13)'
                ]
            }, {
                'sentences': ["why wasn't it moved?"],
                'parses': [
                    'ROOT(root-0, moved-5)', 'advmod(moved-5, why-1)',
                    'auxpass(moved-5, was-2)', "neg(moved-5, n't-3)",
                    'nsubjpass(moved-5, it-4)', 'punct(moved-5, ?-6)'
                ]
            }]
        ]

        data = []
        with open(TEST_DOCUMENT_PATH, 'r') as f:
            for line in f.readlines():
                data.append(line.strip('\n'))

        for i, line in enumerate(data):
            actual = helpers.format_doc(line)
            self.assertEqual(expected[i], actual)
Exemplo n.º 8
0
import os
import math
from flask import Flask
from flask import render_template, request, jsonify

import politeness
from politeness.classifier import Classifier
from politeness.helpers import set_corenlp_url

set_corenlp_url('http://127.0.0.1:5000/')

cls = Classifier()

app = Flask(__name__)
# app.config.from_object(os.environ['APP_SETTINGS'])

# @app.route("/")
# def hello():
#     return "Hello world, it's the Politeness Classifier!"


@app.route("/")
def text_input_form():
    return render_template("politeness-form.html")


# @app.route("/")
# def text_input_form2():
#     return render_template("politeness-form.html")

Exemplo n.º 9
0
def _set_corenlp_url(args):
    from politeness import helpers
    if args.list:
        print(helpers.get_corenlp_url())
    else:
        helpers.set_corenlp_url(args.url)