示例#1
0
def output_text(request):
    if request.method == 'GET':
        name = request.GET.get('q')
        tool = grammar_check.LanguageTool('en-GB')
        texts = name
        matches = tool.check(texts)
        name = grammar_check.correct(texts, matches)
        return render(request, 'index2.html', {'result': name})
示例#2
0
 def Grammar_test(self):
     errors = 0
     tool = grammar_check.LanguageTool('en-GB')
     totl = self.get_prone_no(self.rtokens)
     for ts in self.rtokens:
         text = ts
         matches = tool.check(text)
         errors += len(matches)
     return errors / totl
 def compute_grammar_metric(self):
     """
     Computes grammar metric for text set.
     """
     tool = grammar_check.LanguageTool('en-US')
     matches = tool.check(self.raw_text)
     approximate_number_of_words = self.raw_text.count(" ") + 1
     metric = ((approximate_number_of_words - len(matches)) /
               float(approximate_number_of_words))
     return (((2 * metric) - 1) * 5)
    def count_language_mistakes(self, doc):
        """Returnes the count of English mistakes in the text """
        tool = grammar_check.LanguageTool(self._defaultLanguage)
        encodedText = doc.decode("utf-8", errors='replace')
        try:
            mistakes = tool.check(encodedText)

        except Exception as e:
            mistakes = []

        return len(mistakes)
示例#5
0
 def __init__(self):
     HTMLParser.__init__(self)
     self.__spell_check_res = {}
     self.__grammar_check_res = None
     self.__ignore_tag = False
     self.__is_code_block = False
     self.__in_code_block = False
     self.__dictionary = enchant.DictWithPWL(
         'en_US', 'web-data/mxnet/doc/ignored_words.txt')
     self.__spell_checker = SpellChecker(self.__dictionary)
     self.__parsed_content = ""
     self.__grammar_checker = grammar_check.LanguageTool('en-US')
示例#6
0
def get_metrics (doc):
    global grammar_tool

    # initialize dict
    metrics = [
        'syllables', 'words', 'spelling_errors', 'grammar_errors', 'sentences'
    ]
    res = { metric: 0 for metric in metrics }

    # initial parse
    sentences = get_sentences(doc)
    # words = []

    # get metrics
    num_sentences = len(sentences)
    res['sentences'] = num_sentences
    for sentence in sentences:
        try:
            try:
                res['grammar_errors'] += len(grammar_tool.check(sentence))
            except Exception as e:
                print "grammar tool failed: {}".format(e)
                print "reinitializing grammar tool.."
                grammar_tool = grammar_check.LanguageTool('en-US')
                time.sleep(0.1)

            words_for_sentence = get_words(sentence)
            res['words'] += len(words_for_sentence)
            # words.append(words_for_sentence)

            for word in words_for_sentence:
                try:
                    # handle trailing punctuation for spellchecker
                    if word[-1] in string.punctuation:
                        word = word[:-1]
                    res['syllables'] += count_syllables(word)
                    if not spelling_tool.check(word):
                        res['spelling_errors'] += 1
                except Exception as e:
                    print "inner exception:", e
                    continue
        except Exception as e:
            print "outer exception:", e
            continue

    if res['words'] == 0:
        print "discarding...", doc

    return res #, sentences, words
示例#7
0
def grade_for_grammar(essay):
    result = {
        'score': 0,
        'lexically_ambiguous_sentences': [],
        'gramatically_ambiguous_sentences': [],
        'individual_socre': 0
    }
    regex_sentences = essay.split('.')
    regex_sentences = [string for string in regex_sentences if string != ""]
    sentences = []
    for x in regex_sentences:
        r = re.findall(r"[^\s]", x)
        if r:
            sentences.append(
                x.replace("\r", "").replace("\n", "").replace("\t", ""))
    print("The sentences are the following: " + str(sentences))
    tool = grammar_check.LanguageTool('en-US')
    incorrect = 0
    for sentence in sentences:
        matches = tool.check(sentence)
        words = sentence.split()
        lexical_ambiguous_words_count = 0
        if len(matches) > 0:
            incorrect += 1
            result["gramatically_ambiguous_sentences"].append(sentence)
            continue
        for word in words:
            if word != "":
                token = nlp(word)[0]
                if token._.wordnet.synsets():
                    lexeme = nlp.vocab[word]
                    if lexeme.is_stop != True:  # <--- Check whether it's in stopword list
                        if len(token._.wordnet.synsets()) >= 5:
                            print(
                                str(word) + " has " +
                                str(len(token._.wordnet.synsets())) +
                                " meanings.")
                            lexical_ambiguous_words_count += 1
        if (lexical_ambiguous_words_count / len(words)) > 0.1:
            incorrect += 1
            result["lexically_ambiguous_sentences"].append(sentence)
            continue
    result["score"] = (len(sentences) - incorrect) / len(sentences) * 100
    result["individual_score"] = 1 / len(sentences) * 100
    return result
示例#8
0
def main():
    plotto = Plotto()
    while (plotto.isPlotting()):
        plotto.menu()
        plotto.display()
    plotto.generate()

    tool = grammar_check.LanguageTool('en-US')

    masterplot = plotto.masterplot.getPlot()
    plot_checked = grammar_check.correct(masterplot, tool.check(masterplot))

    print capitalize(plot_checked.lower()) + '\n'

    conflict = plotto.conflicts.getConflict()
    characters = plotto.characters.getCharacters()

    print conflict + '\n'
    print 'Characters: '

    for char in characters:
        print char.name + ", " + char.role

    print '\n'
示例#9
0
# source: http://stackoverflow.com/a/7160778
# modified so protocol is optional.
url_regex = re.compile(
    r'(^(?:http|ftp)s?://)?' # http:// or https:// (optional)
    r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
    r'localhost|' # localhost...
    r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
    r'(?::\d+)?' # optional port
    r'(?:/?|[/?]\S+)$', re.IGNORECASE)

space_or_num_regex = re.compile(r'(\d|\s)+')
proper_noun_regex = re.compile(r'^([0-9]|[A-Z][a-z0-9]+)')

punctuation_table = dict.fromkeys(map(ord, string.punctuation))

grammar_tool = grammar_check.LanguageTool('en-US')
spelling_tool = enchant.Dict('en_US')

tokenizer = WhitespaceTokenizer()

def get_sentences (doc):
    return sent_tokenize(doc)

def get_words (sentence):
    sentence = sentence.strip()

    words = []
    for token in tokenizer.tokenize(sentence):
        token = token.decode("utf-8")

        # remove urls
示例#10
0
 def setUp(self):
     self.lang_check = grammar_check.LanguageTool()
示例#11
0
import pandas as pd
import nltk
from sklearn import linear_model, svm, neighbors, naive_bayes
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
# UNCOMMENT THIS
# import enchant

import grammar_check
from nltk.tokenize import sent_tokenize
from nltk import word_tokenize, pos_tag, ne_chunk

DATA_SET_PATH = "Data Sets/op_spam_v1.4/"

# UNCOMMENT THIS
# SPELLING_DICT = enchant.Dict("en_US")
GRAMMAR_CHECK = grammar_check.LanguageTool('en-US')


def main():
    raw_data = load_data()
    bigram_set = generate_bigram_set(raw_data)
    unigram_set = generate_unigram_set(raw_data)
    # processed_data = featurize_data(raw_data, bigram_set, unigram_set, baseline_flag=False)
    processed_data_baseline = featurize_data(raw_data,
                                             bigram_set,
                                             unigram_set,
                                             baseline_flag=True)
    # processed_data.to_csv("data.csv")
    # training_data = processed_data.sample(frac=0.7)
    # validation_data = processed_data.loc[set(processed_data.index)-set(training_data.index)].sample(frac=0.5)
    # test_data = processed_data.loc[set(processed_data.index)-set(training_data.index)-set(validation_data.index)]
示例#12
0
    def generate_response(self, sentence, object_name, detail_name, question,
                          p_score, f_score, u_score, s_score, detail_array):
        response = ""
        object_score = 0
        detail_score = 0
        statement_score = 0
        question_score = 0
        self.question = question
        pronoun_pool = []
        noun_pool = []
        adjectives_pool = []
        connector_pool = []
        verb_pool = []
        past_verb_pool = []
        simple_verb_pool = []
        VRB_pool = []
        rb_pool = []
        MD_pool = []
        WRB_pool = []
        WP_pool = []
        #multi-dimensional array
        word_bank = []
        if self.find_object_node(object_name):
            print("Previous object found!")
            object_score += 1

        for entry in self.justTAG(sentence):
            pos = entry[0]
            tag = entry[1]
            if tag == 'NN':  #NOUNS
                noun_pool.append(pos)
                continue
            elif tag == 'NNS':  #NOUNS PLURAL
                noun_pool.append(pos)
                continue
            elif tag == 'NNP':  #unidentified pronoun
                pronoun_pool.append(pos)
                continue
            elif tag == 'JJ':  #ADJECTIVES
                adjectives_pool.append(pos)
                continue
            elif tag == 'VBP':  #VERB
                verb_pool.append(pos)
                continue
            elif tag == 'VRB':
                if pos == "do" and question:
                    WP_pool.append(pos)
                else:
                    VRB_pool.append(pos)
                continue
            elif tag == 'VB':
                simple_verb_pool.append(pos)
                continue
            elif tag == 'VBD':
                past_verb_pool.append(pos)
                continue
            elif tag == 'VBG':
                verb_pool.append(pos)
                continue
            elif tag == 'VBZ':
                verb_pool.append(pos)
                continue
            elif tag == 'RB':  #Example : do you STILL like me ?
                rb_pool.append(pos)
                continue
            elif tag == 'MD':
                MD_pool.append(pos)
                continue
            elif tag == 'WRB':
                WRB_pool.append(pos)
                continue
            elif tag == 'WP':
                WP_pool.append(pos)
                continue
            else:
                continue
        print("PRONOUNS " + str(pronoun_pool))
        print("NOUNS " + str(noun_pool))
        print("ADJECTIVES " + str(adjectives_pool))
        print("VERBS " + str(verb_pool))
        print("SIMPLE VERBS" + str(simple_verb_pool))
        print("PAST VERBS " + str(past_verb_pool))
        print("RB " + str(rb_pool))
        print("VRB " + str(VRB_pool))
        print("WRB " + str(WRB_pool))
        print("MD" + str(MD_pool))
        print("WP " + str(WP_pool))
        existing_objects = dict()
        detail_count = 0
        line_num = 0
        detail_array = []
        noun_scores = dict()
        self.global_noun_pool = noun_pool
        for word in pronoun_pool:
            if self.find_object_node(word):
                total_score = 2
                detail_count, line_num, detail_array = self.check_object_details(
                    self.get_object_line(word))
                existing_objects[word] = detail_count
                if word == object_name:
                    total_score = total_score * 3
                total_score = total_score * detail_count
                noun_scores[word] = total_score
            else:
                continue
        for word in noun_pool:
            print('word:' + word)
            if self.find_object_node(word):
                total_score = 2
                detail_count, line_num, detail_array = self.check_object_details(
                    self.get_object_line(word))
                existing_objects[word] = detail_count
                if word == object_name:
                    total_score = total_score * 3
                total_score = total_score * detail_count
                noun_scores[word] = total_score
        print("Existing objects" + str(existing_objects))
        local_memory = self.get_local_memory(noun_pool)
        server_memory = self.get_server_memory()
        total_memory = self.get_total_memory(local_memory, server_memory)
        print('LOCAL MEMORY = ' + str(local_memory))
        print('SERVER MEMORY = ' + str(server_memory))
        print('TOTAL MEMORY = ' + str(total_memory))
        generated_word_bank = self.create_word_bank(total_memory)
        print('WORD BANK : ' + str(generated_word_bank))
        if question:
            #get all segments from wrb,md, wp
            banana_split = sentence.split(" ")
            i = len(banana_split)
            banana_max = 0
            target_question = ""
            for word in banana_split:
                if word in WRB_pool or word in MD_pool or word in WP_pool:
                    banana_max = i
                    target_question = word
                    break
            print("TARGET QUESTION : " + target_question)
            local_memory = self.get_local_memory(noun_pool)
            print('LOCAL MEMORY = ' + str(local_memory))
            return response
        elif not question:
            pronouns = []
            nouns = []
            verbs = []
            adjectives = []
            wrb = []
            final_word_bank = self.apply_weights(generated_word_bank, question,
                                                 sentence, object_name,
                                                 detail_name)
            #segment everything into question words, nouns and verbs
            question_starters = [
                'Did', 'How', 'When', 'Where', 'How', 'Can', 'Is', 'What',
                'Should', 'Could', 'Would'
            ]
            wp = ['Who', 'What']
            wdt = ['Which']
            wp_doll = ['Whose']
            wrb = ['Where', 'When', 'How']
            md = ['Can', 'Could', 'Will', 'Should', 'Would']
            q_connect = ['Was', 'Did', 'Is']
            q_connect_2 = ['An', 'A', 'It']

            q_past = []
            q_present = [wdt]
            q_future = [md]
            q_pro = [wp, wp_doll]
            q_where = [wrb]
            self_perspective = False
            reverse_perspective = False
            sentence_bank = []

            if 'I' in sentence.split(" "):
                self_perspective = True
                pronouns.append('I')
            if len(final_word_bank[0]) != 0:
                pronouns = final_word_bank[0]
                sentence_bank.append(pronouns)
            if len(final_word_bank[1]) != 0:
                nouns = final_word_bank[1]
                sentence_bank.append(nouns)
            if len(final_word_bank[2]) != 0:
                verbs = final_word_bank[2]
                sentence_bank.append(verbs)
            if len(final_word_bank[3]) != 0:
                adjectives = final_word_bank[3]
                sentence_bank.append(adjectives)
            if len(final_word_bank[6]) != 0:
                for verb in final_word_bank[6]:
                    verbs.append(WordNetLemmatizer().lemmatize(verb, 'v'))
                sentence_bank.append(verbs)
            if len(final_word_bank[7]) != 0:
                for verb in final_word_bank[7]:
                    verbs.append(WordNetLemmatizer().lemmatize(verb, 'v'))
                sentence_bank.append(verb)
            if len(final_word_bank[9]) != 0:
                wrb = final_word_bank[9]
                sentence_bank.append(wrb)
            print(pronouns)
            print(verbs)
            print(nouns)
            print(adjectives)
            print(wrb)

            sentence_RAW = str(pronouns[0] + " " + verbs[0] + " " + nouns[0])
            matches = self.tool.check(sentence_RAW)
            print(matches)
            print(language_check.correct(sentence_RAW, matches))
            sentence_tier2 = []
            for i, question_starter in enumerate(question_starters):
                sentence_tier2.append(question_starters[i] + " " +
                                      sentence_RAW)
            print(sentence_tier2)
            tool = grammar_check.LanguageTool('en-GB')
            matches = tool.check(str(sentence_tier2[0]))
            print(matches)
            response = grammar_check.correct(str(sentence_tier2[0]), matches)
            print(self.get_sentence_tense(self.justTAG(sentence)))
            #assign gravity to each set
            #combine set gravities into composite gravities for noun verb pairs
            #combine set gravities into composite gravities for question noun verb triplets
            #filter out final gravity sequence
            return response
示例#13
0
import grammar_check
import xml.etree.ElementTree as etree
import csv
import sys

tool = grammar_check.LanguageTool('en-GB')


def number_grammer_errors(text):

    matches = tool.check(text)
    return len(matches)


def extract_text_only(text):
    ret = ''
    inside_paragraph = False
    level = 0
    i = 0
    culled_text = ""
    while i < len(text):
        if text[i] == '<':
            i = i + 1
            if text[i] == 'p':
                inside_paragraph = True
            elif text[i] == '/':
                i = i + 1
                if text[i] == 'p':
                    inside_paragraph = False
                else:
                    level = level - 1
示例#14
0
 def grammar(self, essay):
     with open(essay, 'r') as text:
         self.essay1 = text.read()
     tool = grammar_check.LanguageTool('en-GB')
     matches = tool.check(self.essay1)
     print(grammar_check.correct(self.essay1, matches))
示例#15
0
def do_something(val):
    tool = grammar_check.LanguageTool('en-GB')
	texts = val
	matches = tool.check(texts)
	return grammar_check.correct(texts,matches)
示例#16
0
def get_grammar_count(string):
    tool = grammar_check.LanguageTool('en-GB')
    return len(tool.check(string))
示例#17
0
def webook():

    # endpoint for processing incoming messaging events
    #return "Hello world", 200
    data = request.get_json()
    log(
        data
    )  # you may not want to log every incoming message in production, but it's good for testing

    if data["object"] == "page":
        for entry in data["entry"]:
            for messaging_event in entry["messaging"]:
                if messaging_event.get("message"):  # someone sent us a message
                    sender_id = messaging_event["sender"][
                        "id"]  # the facebook ID of the person sending you the message
                    if sender_id == u'1774667882802558':
                        log(sender_id)
                        return "ok", 200
                    recipient_id = messaging_event["recipient"][
                        "id"]  # the recipient's ID, which should be your page's facebook ID
                    send_process(sender_id)
                    send_settings()
                    message_text = messaging_event["message"].get("text")
                    option = messaging_event["message"].get("quick_reply")
                    log(option)
                    log(grammarUserID)
                    if grammarUserID.get(sender_id) == 1:
                        grammarUserID[sender_id] = 0
                        log(grammarUserID)
                        tool = grammar_check.LanguageTool('en-US')
                        tmp = tool.check(message_text)
                        outp = ''
                        log(tmp)
                        #for mis in tmp :
                        #    outp += mis + '\n'
                        outp = grammar_check.correct(message_text, tmp)
                        send_message(sender_id, "Correct: " + outp)
                        return "ok", 200
                    if option is not None:
                        opt = messaging_event["message"]["quick_reply"][
                            "payload"]
                        log(opt)
                        option_catch(opt, sender_id)
                    else:
                        if option is None:
                            option = messaging_event.get("messaging")
                        if option is not None:
                            opt = option[0]["postback"]["payload"]
                            option_catch(opt, sender_id)
                            return "ok", 200
                        if message_text is not None:
                            if message_text == 'Setting':
                                quick_replies = [["Volcabulary", "Vocab"],
                                                 ["Grammar-Check", "Gramma"],
                                                 ["Category", "Cate"]]
                                send_quickReplies(
                                    sender_id, "Which option do you choose?",
                                    quick_replies)
                            else:
                                send_Define(sender_id, message_text)

                    #else:
                    #messageType = messaging_event["message"].get("text")
                    #if messageType == 'audio':
                    #pass
                    #change to text

                if messaging_event.get("delivery"):  # delivery confirmation
                    pass

                if messaging_event.get("optin"):  # optin confirmation
                    pass

                if messaging_event.get(
                        "postback"
                ):  # user clicked/tapped "postback" button in earlier message
                    pass
    return "ok", 200
示例#18
0
import argparse
import grammar_check

parser = argparse.ArgumentParser(description="Text Scoring")
parser.add_argument("--language",
                    help="Language of words, english by default",
                    default="en")
parser.add_argument("--sentence", help="Sentence to score")
args = parser.parse_args()

tool = grammar_check.LanguageTool(args.language)


def get_scores(text):
    """Checks the sentence for errors, and returns two scores: number of errors in the sentence, and the fraction of errors from the sentence based on length"""
    errors = tool.check(text)

    num_errors = len(errors)
    error_fraction = sum(error.errorlength for error in errors) / len(text)
    return num_errors, error_fraction


def print_scores(text):
    num_errors, error_fraction = get_scores(text)
    print(
        "Number of errors: {}\nFraction of errors from the text: {}\nFinal score: {}/100"
        .format(num_errors, error_fraction, int((1 - error_fraction) * 100)))


if args.sentence:
    print_scores(args.sentence)
def get_grammar_check_count(review):
        tool = grammar_check.LanguageTool('en-GB')
        matches = tool.check(review)
        return 100*len(matches)/len(review.split())
示例#20
0
 def __init__(self):
     self.grammar_tool = gc.LanguageTool('en-GB')
     self.spell_master = SpellChecker("en_US")
     self.senti_checker = gl.sentiment_analysis.create()