Python GingerIt.parse示例，gingerit.gingerit.GingerIt.parse Python示例

示例#1

0

显示文件

文件： test_gingerit.py 项目： zerometal/gingerit

    def test(self):
        text = 'The smelt of fliwers bring back memories.'

        parser = GingerIt()
        output = parser.parse(text)
        self.assertEqual(
            output.get("result"), "The smell of flowers brings back memories"
        )

示例#2

0

显示文件

文件： VocabularyBuilder.py 项目： prathameshnetake/BE_Project

def useInSentence(secretWord):
    '''
    secretWor: list, word + data

    Starts up an interactive module of using word in sentence.

    * At the start of the game, let the user know the secret word.

    * Ask the user to supply one grammatically sentence containing the secret word.

    * The user should receive feedback immediately whether their sentence is correct.
    '''
    # 'exiguous', 'scanty; meager; small; slender', 'adjective', '19', '51-Few English speakers likely know this word', 'Late Latin'
    word = secretWord[0]
    meaning = secretWord[1]
    usage = secretWord[2]
    points = secretWord[3]
    difficulty = secretWord[4]
    origin = secretWord[5]
    print "Welcome to the use in a sentence module!"
    print "Use the following word in a sentence: " + word
    print "The meaning of " + word + " is: " + meaning
    print "It is generally used as (a/an): " + usage
    print "Points assigned: " + points
    print "Difficulty: " + difficulty
    print "Origin: " + origin
    
    sentence = raw_input("Sentence containing the above mentioned word: ")
    
    if word not in sentence.split(' '):
        print "You have not used %s in the sentence. Please check if the spelling and form used is correct!" % (word)
    else:
        parser = GingerIt()
        op = parser.parse(sentence)
        if not op['corrections']:
            print "Your sentence is correct!"
        else:
            print "The correct sentence should be: " + op['result'] + sentence[-1]

示例#3

0

显示文件

import streamlit as st
from gingerit.gingerit import GingerIt

st.title("Devashish's Grammarly!")
incorrect_sent = st.text_area("Enter the sentence to be corrected")
if st.button("Correct!"):
    if incorrect_sent == "":
        st.warning("Please Enter the text!")
    else:
        parser = GingerIt()
        output = parser.parse(incorrect_sent)
        print(output['result'])
        st.text_area("Corrected Sentence:", value=output['result'])
        #Issue in selectbox
        user_rating = st.sidebar.selectbox(
            "Rate the corrected sentence output with 1 being worst and 5 being best",
            [None, 1, 2, 3, 4, 5])
        if user_rating == None:
            pass
        else:
            print("User Rating: ", user_rating)
else:
    pass

示例#4

0

显示文件

文件： views.py 项目： emirhanozcan/AiGrader

def ginger(text):
    parser = GingerIt()
    grammar_error_counter = parser.parse(text)
    return grammar_error_counter

示例#5

0

显示文件

文件： app.py 项目： thathva/Grammarcheck

def classify():
    comment = request.form['comment']
    parser = GingerIt()
    x = parser.parse(comment)
    s = x['result']
    return render_template('result.html', pred=s)

示例#6

0

显示文件

文件： app.py 项目： jovialcore/Docufix.io

def check(text):
    p = GingerIt()
    q = p.parse(text)
    return q['result']

示例#7

0

显示文件

文件： translator.py 项目： lalitpactera/PII

def correction(text):
    parser = GingerIt()
    pred = parser.parse(text)
    return pred['result']

示例#8

0

显示文件

文件： Pre-Processing.py 项目： sandhyaparna/Python-DataScience-CookBook

df["mean_word_len"] = df["comment_text"].apply(lambda x: np.mean([len(w) for w in str(x).split()]))
#Average length of the words - takes special characters into account - Length of the sentence divided by 
Df["Name Length"]/(df['count_word']+1)



### Spelling Correction 
# Option1: https://www.analyticsvidhya.com/blog/2018/02/natural-language-processing-for-beginners-using-textblob/
import textblob
from textblob import TextBlob
Df["Text_Var_Corrected"] = Df["Text_Var"].apply(lambda x: str(TextBlob(x).correct()))

# Option2: Takes a lot of time for many rows
from gingerit.gingerit import GingerIt
parser = GingerIt()
corrected = parser.parse("Analytics Vidhya is a gret platfrm to learn data scence")['result']                      
NewVar = []
# For each row
for row in Df['Var']:
    NewVar.append(parser.parse(row)['result'])
Df['NewVar'] = NewVar

# Option3: Takes more time even for a single sentence and doesn't do the correction properly
!pip install pyspellchecker
from spellchecker import SpellChecker
SpellChecker().correction("The smelt of fliwers bring back memories")

# Option4: hunspell (Didn't try)
https://github.com/hunspell/hunspell

### Regex - Pattern Extraction/Matching Findall search

示例#9

0

显示文件

 def checkGrammer(self,text):
     # print (text)
     parser = GingerIt()
     item = parser.parse(text)
     return item['result']

示例#10

0

显示文件

文件： autoCorrect.py 项目： swakv/SkyWriting

def autoCorrect(text):
    from gingerit.gingerit import GingerIt

    parser = GingerIt()
    return parser.parse(text)['result']

示例#11

0

显示文件

文件： example1.py 项目： sakshi244/SemanticSimilarity

from DocSim import DocSim
from gingerit.gingerit import GingerIt

googlenews_model_path = './data/GoogleNews-vectors-negative300.bin'
stopwords_path = "./data/stopwords_en.txt"

model = KeyedVectors.load_word2vec_format(googlenews_model_path, binary=True)
with open(stopwords_path, 'r') as fh:
    stopwords = fh.read().split(",")
ds = DocSim(model, stopwords=stopwords)

while (1):
    source_doc = raw_input("Enter first sentence : ")

    parser = GingerIt()
    corrected_text = parser.parse(source_doc)['result']
    while source_doc != corrected_text:
        if len(corrected_text.strip()) != len(source_doc.strip()) - 1:
            #print "Did you mean : \"",corrected_text,"\""
            print "Did you mean : \"", corrected_text, "\""
            print("Press 0 to continue with suggested version ")
            print("Press 1 to re-enter the sentence ")
            print("Press 2 to continue with your version ")
            cond = input(" --> ")
            if cond == 1:
                source_doc = raw_input("Enter first sentence : ")
                corrected_text = parser.parse(source_doc)['result']
            elif cond == 0:
                source_doc = corrected_text.strip()
                break
            else:

示例#12

0

显示文件

文件： msg_grammar.py 项目： Kowndinya2000/ghtorrent_repear

from gingerit.gingerit import GingerIt

text = "I would like to buy one apples"
ginger_parser = GingerIt()
ginger_grammar_results = ginger_parser.parse(text)
ginger_corrections = ginger_grammar_results['corrections']
print("\nNumber of grammar issues found with Ginger: " +
      str(len(ginger_corrections)) + "\n")

示例#13

0

显示文件

def gen_Question(keywords, qno):
    print(keywords)
    print("keywors")
    typo = "nonested"
    typo2 = "nontechnical"
    custom_sent_tokenizer = PunktSentenceTokenizer(train_text)
    tokenized = custom_sent_tokenizer.tokenize(keywords)
    try:
        for i in tokenized:
            words = nltk.word_tokenize(i)
            tagged = nltk.pos_tag(words)
            # print(tagged)
            # namedEnt = nltk.ne_chunk(tagged, binary=True)
            # gen_Question(namedEnt)
            # namedEnt.draw()

        key, words = zip(*tagged)
        compare = list(words)
        #print(key)
        print(words)
        a = dict(zip(key, words))
        b = dict(zip(words, key))

        print(a)
        # print("hey")
        print(
            [b for b, v in a.items() if v in NonTechnicalQuestionDirectory.l1])

    except Exception as e:
        print(str(e))

    if compare == NonTechnicalQuestionDirectory.l1:
        question = "What " + b['NNP']
        # print(question)
    elif compare == NonTechnicalQuestionDirectory.l2:
        question = "Why should we " + b['NN'] + " " + b['PRP'] + "?"
        # print(question)
    elif compare == NonTechnicalQuestionDirectory.l3:
        question = "What are " + b['PRP$'] + " " + b['NNS'] + "?"
        # print(question)
    elif compare == NonTechnicalQuestionDirectory.l4:
        question = "Tell about " + b['PRP'] + "?"
        # print(question)
    elif compare == NonTechnicalQuestionDirectory.l5:
        question = "What are your " + b['JJ'] + " " + b['NNS'] + "?"
        # print(question)
    elif compare == NonTechnicalQuestionDirectory.l6:
        question = "What is  " + b['PRP$'] + " " + b['JJ'] + " " + b['NN'] + "?"
        # print(question)
    else:
        question = "Explain about your " + keywords + " and its technologies?"
        # print(question)

    parser = GingerIt()
    grammer_corrected_question_list = parser.parse(question)
    question = grammer_corrected_question_list.get("result")
    print(question, "hiiiiiiiiiiiiiiiiiii")
    print(vik_test_codes.question(question, qno))

    #jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj
    voice_record = AudioRecorder.audio_recorder(qno)
    print(
        "error testing question numberrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr"
        + str(qno))
    answer_validity = SpeachToText.validation("", typo2, typo,
                                              "question" + str(qno))[0]

    if str(qno) == "20" or qno == 20:

        print(
            "jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhjjjjjjjjjjj"
        )
        open('Controller/questionSaver_testing.py', 'w').close()

        # fruits = ["gcpq = 'end'\n"]

        fruits = ["global gcpq\n", "gcpq = 'end'\n"]
        new_file = open("Controller/questionSaver_testing.py",
                        mode="a+",
                        encoding="utf-8")
        new_file.writelines(fruits)
        for line in new_file:
            print(line)

        new_file.close()

        filelist = glob.glob("Audio/*.wav")
        for file in filelist:
            os.remove(file)
    # if qno == "20" or qno == 20:
    #
    #     filelist = glob.glob("Audio/*.wav")
    #     for file in filelist:
    #         os.remove(file)

    return question

示例#14

0

显示文件

文件： NonTechnicalQuestions.py 项目： AnuruddhaAbeysinghe/SmartInterviewer-Code

def generate_cv_questions():
    db = "CV"
    db2 = "project"
    db3 = "project_d"
    # node_Count = ConnectionToNeo4j.getNodeCount(db)
    lang = 'en'
    q_list = []
    pro_list = []
    count = 1
    session = 0
    answer_validity = 0

    global question_number
    question_number = 0

    while count <= 3:
        session = session + 1
        print("session")
        print(session)
        session_no_string = str(session)
        session_node_count = ConnectionToNeo4j.session_Node_Count(
            db, session_no_string)
        print("this is ")
        print(session_node_count)
        node_id = ConnectionToNeo4j.get_node_id(db, session_no_string)

        for id in range(node_id, session_node_count + node_id):
            q_list.append(str(id))
        print(q_list)

        print("node_count")
        print(session_node_count)

        for question_no in range(session_node_count):

            print("question number")
            print(question_no)
            random_que = random.choice(q_list)
            print("random que")
            print(random_que)

            non_technical_question = ConnectionToNeo4j.cvQuestionGen(
                db, random_que)
            q_list.remove(random_que)
            print(q_list)
            print(non_technical_question)
            actual_question = QuestionCreator.gen_Question(
                non_technical_question)
            parser = GingerIt()
            grammer_corrected_question_list = parser.parse(actual_question)
            grammer_corrected_question = grammer_corrected_question_list.get(
                "result")
            TextToSpeechConverter.text_to_speech(grammer_corrected_question,
                                                 lang)
            question_number = question_number + 1
            print(question_number)
            print("hiiiiiiiiiiiiiiiiii printing count")

            if random_que == "5":
                pro = ConnectionToNeo4j.getProjects(db, "5")
                print(pro)
                for id in range(1, pro + 1):
                    pro_list.append(str(id))
                print(pro_list)

                random_proj_que = random.choice(pro_list)
                modify_random_proj_que = "p" + random_proj_que
                print(modify_random_proj_que)

                project_question = ConnectionToNeo4j.cvQuestionProjectGen(
                    db2, db3, modify_random_proj_que, userid)
                actual_project_question = QuestionCreator.gen_Question(
                    project_question)
                parser = GingerIt()
                grammer_corrected_project_question_list = parser.parse(
                    actual_project_question)
                grammer_corrected_pr0ject_question = grammer_corrected_project_question_list.get(
                    "result")
                TextToSpeechConverter.text_to_speech(
                    grammer_corrected_pr0ject_question, lang)
                question_number = question_number + 1
                print(question_number)
                print("hiiiiiiiiiiiiiiiiii printing count")

                global technology_list
                tech = test.kes()

                tech = NestedQuestionCreator.keywordSelector("", tech, "1", "")
                print(tech)
                print("tech printed")
                technology_list = NestedQuestionCreator.nonTechnicalKeywordSeelector(
                    tech, modify_random_proj_que)
                print("hello tech")
                print(technology_list)
                print("check validity")

            print("after a while")
            answer_validity = test.test()

            while (answer_validity == "None"):
                answer_validity = test.test()

        q_list = []
        count = count + 1

示例#15

0

显示文件

文件： test.py 项目： kutanapayal/Grammar_Spell_Checker

async def text_correction(text : str):
    if text:
        parser = GingerIt()
        response = parser.parse(text)
        return {"Text":text,"Result":response['result']}
    return {"text": "is None."}

示例#16

0

显示文件

文件： TechnicalQuestionCreators.py 项目： Senuri12/18-046_smartInterviewer_final

def gen_Question(keywords, questionno, nesornot):
    global question
    custom_sent_tokenizer = PunktSentenceTokenizer(train_text)
    tokenized = custom_sent_tokenizer.tokenize(keywords)
    print("keywords for question")
    print(keywords)
    print("keywords for question")
    keyword_count = len(keywords.split())
    print(keyword_count)

    try:
        for i in tokenized:
            words = nltk.word_tokenize(i)
            tagged = nltk.pos_tag(words)
            # print(tagged)
            # namedEnt = nltk.ne_chunk(tagged, binary=True)
            # gen_Question(namedEnt)
            # namedEnt.draw()

        key, words = zip(*tagged)
        #print(key)

        compare = list(words)
        print(words)
        print(compare)
        a = dict(zip(key, words))
        b = dict(zip(words, key))

        print(a)
        print("hey")
        # print([b for b, v in a.items() if v in l1])

    except Exception as e:
        print(str(e))

    if compare == TechnicalQuestionDictionary.tl1:
        question = "What is a " + keywords

    elif compare == TechnicalQuestionDictionary.tl2:
        question = "Can you explain " + keywords

    elif compare == TechnicalQuestionDictionary.tl3:
        question = "Describe about " + keywords

    elif compare == TechnicalQuestionDictionary.tl4:
        question = "What is a " + keywords

    elif compare == TechnicalQuestionDictionary.tl5:
        question = "What are " + keywords

    elif compare == TechnicalQuestionDictionary.tl6:
        question = "What is a " + keywords

    elif compare == TechnicalQuestionDictionary.tl7:
        question = "What are " + keywords

    elif compare == TechnicalQuestionDictionary.tl9:
        question = "What is a " + keywords

    elif compare == TechnicalQuestionDictionary.tl10:
        question = "What are " + keywords

    elif compare == TechnicalQuestionDictionary.tl11:
        question = "What is a " + keywords

    elif compare == TechnicalQuestionDictionary.tl12:
        question = "How to use" + keywords

    elif compare == TechnicalQuestionDictionary.tl13:
        question = "What is a " + keywords

    elif compare == TechnicalQuestionDictionary.tl14:
        question = "Describe about " + keywords

    elif compare == TechnicalQuestionDictionary.tl15:
        question = "Describe about " + keywords

    elif compare == TechnicalQuestionDictionary.tl16:
        question = "Describe about " + keywords

    elif compare == TechnicalQuestionDictionary.tl17:
        question = "Tell about " + keywords

    elif compare == TechnicalQuestionDictionary.tl18:
        question = "Explain about " + keywords

    elif compare == TechnicalQuestionDictionary.tl19:
        question = "Describe " + keywords

    elif compare == TechnicalQuestionDictionary.tl20:
        question = "What is a " + keywords

    elif compare == TechnicalQuestionDictionary.tl21:
        question = "Describe about " + keywords

    elif compare == TechnicalQuestionDictionary.tl22:
        question = "Tell about " + keywords

    else:
        question = "Define about " + keywords

    parser = GingerIt()
    grammer_corrected_question_list = parser.parse(question)
    question = grammer_corrected_question_list.get("result")
    question_list = question.split()
    question_suffix_list = question_list[:-keyword_count]
    question_suffix = ' '.join(question_suffix_list)
    question = question_suffix + " " + keywords

    print(question)
    print(vik_test_codes.question(question, questionno))

    voice_record = AudioRecorder.audio_recorder(questionno)
    answer_validity = SpeachToText.validation(keywords, "technical", nesornot,
                                              "question" + str(questionno))[0]

    if str(questionno) == "20" or questionno == 20:

        print(
            "jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhjjjjjjjjjjj"
        )
        open('Controller/questionSaver_testing.py', 'w').close()

        # fruits = ["gcpq = 'end'\n"]

        fruits = ["global gcpq\n", "gcpq = 'end'\n"]
        new_file = open("Controller/questionSaver_testing.py",
                        mode="a+",
                        encoding="utf-8")
        new_file.writelines(fruits)
        for line in new_file:
            print(line)

        new_file.close()

        filelist = glob.glob("Audio/*.wav")
        for file in filelist:
            os.remove(file)
    # if questionno == "20" or questionno == 20:
    #
    #     filelist = glob.glob("Audio/*.wav")
    #     for file in filelist:
    #         os.remove(file)

    return question

示例#17

0

显示文件

文件： Grammarchecker.py 项目： akshaytheau/Data-Science

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 16 15:00:43 2021

@author: akshay
"""

from gingerit.gingerit import GingerIt

text = 'Narendra Modi is our prme mnister. He is from Gujaratt'

parser = GingerIt()
print(len(parser.parse(text)['corrections']))

示例#18

0

显示文件

文件： utils.py 项目： nghiahsgs/api-auto-correct-grammar-english

def auto_correct(text):
    # text = 'The smelt of fliwers bring back memories.'
    parser = GingerIt()
    result = parser.parse(text)
    return result['result']

示例#19

0

显示文件

def checkGrammar(*args):
    from gingerit.gingerit import GingerIt
    blog_id = args[0]
    if Blog.objects.filter(id=blog_id).exists():
        blog = Blog.objects.filter(id=blog_id).first()
        blogBody = blog.body
        blogBody = blogBody.replace('&quot;',
                                    '"').replace('&lt;', '<').replace(
                                        '&gt;', '>').replace('&nbsp;', ' ')
        jsonData = json.loads(blogBody)
        futures = []

        if blog.title:
            futures.append({
                "blog_id": blog_id,
                "section_id": 0,
                "text": blog.title,
                "speed": False
            })
        for idx, data in enumerate(jsonData["blocks"]):
            if "quote" == data["type"] and "text" in data["data"]:
                quoteText = "Quote: " + handleHTML(data["data"]["text"])
                if data["data"]["caption"]:
                    quoteText = quoteText + " By " + data["data"]["caption"]
                futures.append({
                    "blog_id": blog_id,
                    "section_id": idx + 1,
                    "text": quoteText,
                    "speed": False
                })
            elif "text" in data["data"]:
                futures.append({
                    "blog_id": blog_id,
                    "section_id": idx + 1,
                    "text": handleHTML(data["data"]["text"]),
                    "speed": False
                })
            elif "items" in data["data"]:  #list,checklist
                if data["type"] == "checklist":
                    final_section = ""
                    for item in data["data"]["items"]:
                        final_section = final_section + item["text"]
                    futures.append({
                        "blog_id": blog_id,
                        "section_id": idx + 1,
                        "text": handleHTML(final_section),
                        "speed": False
                    })
                elif data["type"] == "list":
                    final_section = ""
                    for item in data["data"]["items"]:
                        final_section = final_section + item
                    futures.append({
                        "blog_id": blog_id,
                        "section_id": idx + 1,
                        "text": handleHTML(final_section),
                        "speed": False
                    })

        for text in futures:
            try:
                print("text: ", text)
                parser = GingerIt()
                result = parser.parse(text["text"][:199])
                print("result: ", result)
            except Exception as e:
                return response_500("internal error", e)

示例#20

0

显示文件

def analyze(request):

    puncts = string.punctuation
    word_to_find = request.POST.get("word_input")
    djText = request.POST.get('text', 'default')
    remPunc = request.POST.get('option', 'removepunc')
    cap = request.POST.get('option', 'capitalize')
    small = request.POST.get('option', 'toSmall')
    upper = request.POST.get('option', 'toUpper')
    word_find_flag = request.POST.get('option', 'word_find')
    New_Line = request.POST.get('option', 'New_line')
    Emails = request.POST.get('option', 'Email_Address')
    Links = request.POST.get('option', 'Links')
    Passgen = request.POST.get('option', 'Password_Generator')
    search_word = request.POST.get('option', 'Search_word')
    gallery = request.POST.get('option', 'q')
    Suggest_word = request.POST.get('option', 'suggest_word')
    Sen_Analysis = request.POST.get('option', 'Sentiment')
    Grammar = request.POST.get('option', 'grammar')
    Channel = request.POST.get('option', 'suggest_youtube')
    books = request.POST.get('option', 'suggest_books')
    articles = request.POST.get('option', 'suggest_articles')
    lemmitizer = request.POST.get('option', 'grammar')
    start_pdf = request.POST.get('option', 'generate_pdf')
    replace_text = request.POST.get('option', 'replace')
    Word_cloud = request.POST.get('option', 'wordcloud')
    Date = request.POST.get('option', 'date')
    Word_frequency = request.POST.get('option', 'word_frequency')

    analyzed_text = ""
    word_status = ""

    countword = len(djText.split())

    if word_find_flag == "word_find":
        if word_to_find != "":
            if djText.find(word_to_find) != -1:
                word_status = "found"
                word = djText.replace(
                    word_to_find,
                    f"""<b style="color:{"red"};">""" + word_to_find + "</b>")
                djText = word

                try:
                    synonym_01 = get_synonyms(word_to_find)
                    synonyms2 = random.sample(synonym_01, 4)

                    final = ""
                    for f in synonyms2:
                        final += f + " , "

                    example = get_example(word_to_find)

                    synonyms = final + example

                except:
                    synonyms = "Not Available"

            else:
                word_status = "not found"
                synonyms = "Text Not Found"

            analyzed_text = djText
            word_find = "Find Word = " + word_to_find
            synonym = format_html('<b style="color:{};">{}</b>', 'green',
                                  synonyms)

            result = {
                "analyzed_text": analyzed_text,
                "highlight":
                "Chosen word is highlighted in red colour and synonyms/examples in green colour",
                "purpose": word_find,
                "status": word_status,
                "synonym": synonym,
                "wordcount": countword,
                "analyze_text": True,
                "findWord": True
            }

    elif New_Line == "New_line":
        for char in djText:
            if char == '.':
                char = '\n'
            analyzed_text = analyzed_text + char
        result = {
            "analyzed_text": analyzed_text,
            "purpose": "Changes '.' to New Line",
            "analyze_text": True,
            "wordcount": countword
        }
    elif Emails == "Email_Address":
        regex = '^[a-z0-9]+[\._]?[a-z0-9]+[@]\w+[.]\w{2,3}$'
        lst = re.findall('\S+@+\S+', djText)
        tmp = ""
        for x in lst:
            if (re.search(regex, x)):
                tmp += x
                tmp += '\n'
        result = {
            "analyzed_text": tmp,
            "purpose": "Find All Emails",
            "analyze_text": True,
            "wordcount": countword
        }

    elif Passgen == "Password_Generator":
        stop_words = set(stopwords.words('english'))
        chars = "!£$%&*#@"
        ucase_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        text = re.sub(r'[^\w\s]', '', djText)
        token = word_tokenize(text)

        filtered_sentence = []

        for w in token:
            if w not in stop_words:
                filtered_sentence.append(w)

        if len(filtered_sentence) > 0:
            random_word = random.choice(filtered_sentence)
        else:
            random_word = token[0]

        random_word = random_word.title()

        merge = ""
        for word in random_word.split():
            merge+=random.choice(chars)+word[:-1]+ word[-1].upper()\
            +random.choice(string.ascii_letters)+"@"+random.choice(ucase_letters)\
            +random.choice(string.digits)+" "
        final_text = merge[:-1]
        result = {
            "analyzed_text": final_text,
            "purpose": "Generate password from text",
            "generate_text": True,
            "wordcount": countword
        }

    elif search_word == "Search_word":
        url = 'https://www.dictionary.com/browse/'
        headers = requests.utils.default_headers()
        headers.update({
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
        })
        req = requests.get(url + djText, headers)
        soup = BeautifulSoup(req.content, 'html.parser')
        mydivs = soup.findAll("div", {"value": "1"})[0]
        for tags in mydivs:
            meaning = tags.text
        wrap = textwrap.TextWrapper(width=100)
        word_meaning = wrap.fill(text=meaning)
        result = {
            "analyzed_text": word_meaning,
            "purpose": "Searched Word",
            "generate_text": True,
            "wordcount": countword
        }

    elif Suggest_word == "suggest_word":
        find = requests.get(
            f"https://www.dictionaryapi.com/api/v3/references/thesaurus/json/{djText}?key={api_key}"
        )
        response = find.json()

        if len(response) == 0:
            print("Word Not Recognized!")
        else:
            k = []
            if str(response[0]).count(" ") == 0:
                for j in range(len(response)):
                    k.append(response[j])
                predict = " , ".join(k)
                djText = predict

            else:
                dictionary = PyDictionary()
                testdict = dictionary.synonym(djText)
                suggest = " , ".join(testdict)
                djText = suggest
            wrap = textwrap.TextWrapper(width=100)
            suggest = wrap.fill(text=djText)

        result = {
            "analyzed_text": suggest,
            "purpose": "Suggested Word",
            "generate_text": True,
            "wordcount": countword
        }

    elif Sen_Analysis == "Sentiment":

        djText = ' '.join(
            re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ",
                   djText).split())

        analysis = TextBlob(djText)
        # set sentiment
        if analysis.sentiment.polarity > 0:
            final = str(djText) + " (Positive Text)"
        elif analysis.sentiment.polarity == 0:
            final = str(djText) + " (Neutral Text)"
        else:
            final = str(djText) + " (Negative Text)"

        result = {
            "analyzed_text": final,
            "purpose": "Sentiment Analysis",
            "analyze_text": True,
            "wordcount": countword
        }

    elif Grammar == "grammar":
        parser = GingerIt()
        result = parser.parse(djText)
        final = result["result"]

        if final == '':
            final = "Please write some text to check grammar"

        result = {
            "analyzed_text": final,
            "grammar": djText,
            "purpose": "Spelling & Grammar Check",
            "analyze_text": True,
            "wordcount": countword
        }

    elif lemmitizer == "lemmitize":
        wordnet_lemmatizer = WordNetLemmatizer()
        tokenization = nltk.word_tokenize(djText)
        count = True
        for w in tokenization:
            k = wordnet_lemmatizer.lemmatize(w, pos="v")
            if w != k:
                result = "{} -> {}".format(
                    w, wordnet_lemmatizer.lemmatize(w, pos="v"))
                count = False
        if count == True:
            final = "No need for lemmatization"
        if count == False:
            final = "(Original word) - > (Lemmatized word)"

        result = {
            "analyzed_text": result,
            "highlight": final,
            "purpose": "Lemmatization of text",
            "analyze_text": True,
            "wordcount": countword
        }

    elif Channel == "suggest_youtube":
        request.session['user-input'] = djText
        result = {
            "analyzed_text": djText,
            "purpose": "Suggest youtube channels",
            "status": "Press Button To View Channel links",
            "find_channel": True,
            "generate_text": True,
            "wordcount": countword
        }

    elif books == "suggest_books":
        request.session['user-input'] = djText
        result = {
            "analyzed_text": djText,
            "purpose": "Search Books",
            "status": "Press Button To View Books",
            "find_books": True,
            "generate_text": True,
            "wordcount": countword
        }

    elif articles == "suggest_articles":
        request.session['user-input'] = djText
        result = {
            "analyzed_text": djText,
            "purpose": "Search Articles",
            "status": "Press Button To View Articles",
            "find_articles": True,
            "generate_text": True,
            "wordcount": countword
        }

    elif start_pdf == "generate_pdf":
        request.session['user-input'] = djText
        result = {
            "analyzed_text": "Check Your Pdf",
            "purpose": "Generate Pdf",
            "status": "Press Button To View Pdf",
            "make_pdf": True,
            "generate_text": True,
            "wordcount": countword
        }

    elif replace_text == "replace":
        final_text = re.sub(word_to_find, replace_input, djText)
        result = {
            "analyzed_text": final_text,
            "purpose": "Replacemet of text in sentence",
            "analyze_text": True,
            "wordcount": countword
        }

    elif Word_cloud == "wordcloud":
        cloud = WordCloud(background_color="white",
                          max_words=200,
                          stopwords=set(STOPWORDS))
        wc = cloud.generate(djText)
        buf = io.BytesIO()
        wc.to_image().save(buf, format="png")
        data = base64.b64encode(buf.getbuffer()).decode("utf8")
        final = "data:image/png;base64,{}".format(data)

        result = {
            "analyzed_text": " ",
            "purpose": "Wordcloud",
            "my_wordcloud": final,
            "generate_text": True,
            "wordcount": countword
        }

    elif Date == "date":
        final = extract_dates(djText)
        final_text = final[0].date()

        result = {
            "analyzed_text": final_text,
            "purpose": "Extract Dates from text",
            "analyze_text": True,
            "wordcount": countword
        }

    elif Word_frequency == "word_frequency":
        input_text = djText.replace("\n", " ")
        djText = input_text.lower()

        words_dict = get_words_dict(djText)
        # create graph
        if len(words_dict) > 10:
            k = 10
        else:
            k = len(words_dict)

        y_pos = range(0, k)
        bars = []
        height = []
        count = 0

        # print and save values to graph
        format_spaces("word", "occurrences")
        for word_str, word_amount in words_dict.items():
            format_spaces(word_str, word_amount)
            count += 1
            if count <= 10:
                bars.append(word_str)
                height.append(int(word_amount))
            else:
                pass

        # # Create bars
        plt.bar(y_pos, height)

        # Create names on the x-axis
        plt.xticks(y_pos, bars, size=9)

        plt.xticks(rotation='horizontal')
        plt.ylabel('Word Frequency', fontsize=12, labelpad=10)
        plt.xlabel('Words', fontsize=12, labelpad=10)

        fig = plt.gcf()

        buf = BytesIO()
        fig.savefig(buf, format='png')
        buf.seek(0)
        data = base64.b64encode(buf.read())
        uri = urllib.parse.quote(data)
        final = "data:image/png;base64,{}".format(uri)

        result = {
            "analyzed_text": " ",
            "purpose": "Word Frequency for every word in text",
            "bar_graph": final,
            "analyze_text": True,
            "wordcount": countword
        }

    elif gallery == "q":
        request.session['user-input'] = djText
        result = {
            "analyzed_text": djText,
            "purpose": "Images",
            "status": "Press Button To View Images",
            "find_image": True,
            "generate_text": True,
            "wordcount": countword
        }

    elif remPunc == 'removepunc':
        for char in djText:
            if char not in puncts:
                analyzed_text = analyzed_text + char
        result = {
            "analyzed_text": analyzed_text,
            "purpose": "Remove Punctuations",
            "analyze_text": True,
            "wordcount": countword
        }
    elif cap == "capitalize":
        analyzed_text = djText.capitalize()

        result = {
            "analyzed_text": analyzed_text,
            "purpose": "Capitalize",
            "analyze_text": True,
            "wordcount": countword
        }

    elif small == "toSmall":
        analyzed_text = djText.lower()

        result = {
            "analyzed_text": analyzed_text,
            "purpose": "To Smallercase",
            "analyze_text": True,
            "wordcount": countword
        }

    elif upper == "toUpper":
        analyzed_text = djText.upper()

        result = {
            "analyzed_text": analyzed_text,
            "purpose": "To Uppercase",
            "analyze_text": True,
            "wordcount": countword
        }
    elif Links == "Links":
        pattern = '(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)(?:\([-A-Z0-9+&@#\/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#\/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#\/%=~_|$?!:,.]*\)|[A-Z0-9+&@#\/%=~_|$])'
        links = re.findall(pattern, djText, re.IGNORECASE)
        analyzed_text = ""

        i = 0
        for x in links:
            i = i + 1
            analyzed_text += f'<a href="{x}" target="_blank">Link {i}</a>'
            analyzed_text += '\n '

        result = {
            "analyzed_text": analyzed_text,
            "purpose": "Find All Links",
            "analyze_text": True,
            "wordcount": countword
        }

    else:
        return HttpResponse(
            '''<script type="text/javascript">alert("Please select atleast one option.");</script>'''
        )

    return render(request, 'analyze.html', result)

示例#21

0

显示文件

文件： test_gingerit.py 项目： memahesh/gingerit

def test_gingerit(text, expected):
    parser = GingerIt()
    assert parser.parse(text)["result"] == expected

示例#22

0

显示文件

def check(mail):
     try:
        f = word(mail, 'sentence')
        corrections = 0
        for s in f:
            g = GingerIt()
            h = g.parse(s)
            corrections += len(h['corrections'])
        return corrections
    except:
        print("Error while checking grammer errors in text")



def search(search_term, next=False, page=0,  board=0):
    """function to search and return comments"""
    if next == False:
        page = requests.get("https://www.nairaland.com/search?q=" + urllib.parse.quote_plus(str(search_term)) + "&board="+str(board))
    else:
        page = requests.get("https://www.nairaland.com/search/"
                            + str(search_term) + "/0/"+str(board)+"/0/1" + str(page))
    soup = BeautifulSoup(page.content, 'html.parser')

    comments = soup.findAll("div", {"class": "narrow"})

    return comments
    


WordList = []
def analysis(text):
    """function to evaluate sentiment"""
    try:
        j = 0
        board = 29
        while j < 10:
            if j == 0:
                nextItem = False
            else:
                nextItem = True
            commentsCurrent = search(text, nextItem, j,  board)
            add_to_word_list(commentsCurrent)
            j += 1
    except:
        print("Search failed")
        
    positive = 0
    negative = 0
    neutral = 0
    
    previous = []
    for tweet in WordList:
        if tweet in previous:
            continue
        previous.append(tweet)
        analysis = TextBlob(tweet)
        """evaluating polarity of comments"""
        polarity = analysis.sentiment.polarity

        if (analysis.sentiment.polarity == 0):
            neutral += 1
        elif (analysis.sentiment.polarity < 0.00):
            negative += 1
        elif (analysis.sentiment.polarity > 0.0):
            positive += 1
    
    noOfSearchTerms = positive + negative + neutral

    positive = percentage(positive,  noOfSearchTerms)
    negative = percentage(negative,  noOfSearchTerms)
    neutral = percentage(neutral,  noOfSearchTerms)
    
    return positive, negative, neutral

示例#23

0

显示文件

文件： watch_file_for_changes_TTS.py 项目： LeapMetry/LeapMetry

            )


if __name__ == "__main__":
    app = TextToSpeech(subscription_key)
    # app.get_token()
    mtime_last = 0
    pdata = []
    while True:
        mtime_cur = os.path.getmtime("texts.txt")
        if mtime_cur != mtime_last:
            time.sleep(2)
            print(
                f'LOG {time.strftime("%Y%m%d-%H%M")}: file watch event triggerred'
            )
            with open('texts.txt', 'r') as file:
                data = file.read().split('\n')
                if data != pdata:
                    pdata = data
                    try:
                        parser = GingerIt()
                        correctedText = parser.parse(data[-1])['result']
                        app.tts = correctedText
                        if len(correctedText) > 0:
                            app.save_audio()
                    except:
                        print(
                            f"LOG {time.strftime('%Y%m%d-%H%M')}: No data in file"
                        )
        mtime_last = mtime_cur

示例#24

0

显示文件

文件： GingerIt_TestCase_IS.py 项目： Eng-RSMY/GrammarCheck_LST

# print(targeted_sentences[100])

wrong_sentences = []
for j in range(0, len(targeted_sentences)):
    wrong_sentences.append(targeted_sentences[j].replace(" is ", " are "))

print("There are {} wrong sentences.".format(len(wrong_sentences)))
# print(wrong_sentences[100])

counter = 0
parser = GingerIt()

for j in range(1, 100):
    try:
        text = wrong_sentences[j] + " "
        if parser.parse(text).get("result") == targeted_sentences[j]:
            counter += 1
    except IndexError:
        pass
print(counter)

# class TestGingerIt(unittest.TestCase):

#     def test_is(self):

#         parser = GingerIt()
#         for j in range (302,303):
#             text = wrong_sentences[j] + " "
#             # print(parser.parse(text).get("result") == targeted_sentences[j])
#             self.assertEqual(parser.parse(text).get("result"), targeted_sentences[j])

示例#25

0

显示文件

    for i in range(0, le_list):
        for j in range(0, le_greet1):
            if list[i].lower() == greet1[j].lower():
                list[i] = greet1_re[j]

    for i in range(0, le_list):
        for j in range(0, le_greet2):
            if list[i].lower() == greet2[j].lower():
                list[i] = list[i] + ','

    for i in range(0, le_list):
        if list[i].lower() in bold:
            print(style.BOLD + list[i] + style.END)
            sys.stdout.flush()
        else:
            print(list[i])
            sys.stdout.flush()


from gingerit.gingerit import GingerIt

text = 'good morning'
parser = GingerIt()
str = parser.parse(text)['result']
str1 = parser.parse(str)['result']

if len(str) > len(str1):
    text_proess(str)
else:
    text_proess(str1)