def test(self): text = 'The smelt of fliwers bring back memories.' parser = GingerIt() output = parser.parse(text) self.assertEqual( output.get("result"), "The smell of flowers brings back memories" )
def useInSentence(secretWord): ''' secretWor: list, word + data Starts up an interactive module of using word in sentence. * At the start of the game, let the user know the secret word. * Ask the user to supply one grammatically sentence containing the secret word. * The user should receive feedback immediately whether their sentence is correct. ''' # 'exiguous', 'scanty; meager; small; slender', 'adjective', '19', '51-Few English speakers likely know this word', 'Late Latin' word = secretWord[0] meaning = secretWord[1] usage = secretWord[2] points = secretWord[3] difficulty = secretWord[4] origin = secretWord[5] print "Welcome to the use in a sentence module!" print "Use the following word in a sentence: " + word print "The meaning of " + word + " is: " + meaning print "It is generally used as (a/an): " + usage print "Points assigned: " + points print "Difficulty: " + difficulty print "Origin: " + origin sentence = raw_input("Sentence containing the above mentioned word: ") if word not in sentence.split(' '): print "You have not used %s in the sentence. Please check if the spelling and form used is correct!" % (word) else: parser = GingerIt() op = parser.parse(sentence) if not op['corrections']: print "Your sentence is correct!" else: print "The correct sentence should be: " + op['result'] + sentence[-1]
import streamlit as st from gingerit.gingerit import GingerIt st.title("Devashish's Grammarly!") incorrect_sent = st.text_area("Enter the sentence to be corrected") if st.button("Correct!"): if incorrect_sent == "": st.warning("Please Enter the text!") else: parser = GingerIt() output = parser.parse(incorrect_sent) print(output['result']) st.text_area("Corrected Sentence:", value=output['result']) #Issue in selectbox user_rating = st.sidebar.selectbox( "Rate the corrected sentence output with 1 being worst and 5 being best", [None, 1, 2, 3, 4, 5]) if user_rating == None: pass else: print("User Rating: ", user_rating) else: pass
def ginger(text): parser = GingerIt() grammar_error_counter = parser.parse(text) return grammar_error_counter
def classify(): comment = request.form['comment'] parser = GingerIt() x = parser.parse(comment) s = x['result'] return render_template('result.html', pred=s)
def check(text): p = GingerIt() q = p.parse(text) return q['result']
def correction(text): parser = GingerIt() pred = parser.parse(text) return pred['result']
df["mean_word_len"] = df["comment_text"].apply(lambda x: np.mean([len(w) for w in str(x).split()])) #Average length of the words - takes special characters into account - Length of the sentence divided by Df["Name Length"]/(df['count_word']+1) ### Spelling Correction # Option1: https://www.analyticsvidhya.com/blog/2018/02/natural-language-processing-for-beginners-using-textblob/ import textblob from textblob import TextBlob Df["Text_Var_Corrected"] = Df["Text_Var"].apply(lambda x: str(TextBlob(x).correct())) # Option2: Takes a lot of time for many rows from gingerit.gingerit import GingerIt parser = GingerIt() corrected = parser.parse("Analytics Vidhya is a gret platfrm to learn data scence")['result'] NewVar = [] # For each row for row in Df['Var']: NewVar.append(parser.parse(row)['result']) Df['NewVar'] = NewVar # Option3: Takes more time even for a single sentence and doesn't do the correction properly !pip install pyspellchecker from spellchecker import SpellChecker SpellChecker().correction("The smelt of fliwers bring back memories") # Option4: hunspell (Didn't try) https://github.com/hunspell/hunspell ### Regex - Pattern Extraction/Matching Findall search
def checkGrammer(self,text): # print (text) parser = GingerIt() item = parser.parse(text) return item['result']
def autoCorrect(text): from gingerit.gingerit import GingerIt parser = GingerIt() return parser.parse(text)['result']
from DocSim import DocSim from gingerit.gingerit import GingerIt googlenews_model_path = './data/GoogleNews-vectors-negative300.bin' stopwords_path = "./data/stopwords_en.txt" model = KeyedVectors.load_word2vec_format(googlenews_model_path, binary=True) with open(stopwords_path, 'r') as fh: stopwords = fh.read().split(",") ds = DocSim(model, stopwords=stopwords) while (1): source_doc = raw_input("Enter first sentence : ") parser = GingerIt() corrected_text = parser.parse(source_doc)['result'] while source_doc != corrected_text: if len(corrected_text.strip()) != len(source_doc.strip()) - 1: #print "Did you mean : \"",corrected_text,"\"" print "Did you mean : \"", corrected_text, "\"" print("Press 0 to continue with suggested version ") print("Press 1 to re-enter the sentence ") print("Press 2 to continue with your version ") cond = input(" --> ") if cond == 1: source_doc = raw_input("Enter first sentence : ") corrected_text = parser.parse(source_doc)['result'] elif cond == 0: source_doc = corrected_text.strip() break else:
from gingerit.gingerit import GingerIt text = "I would like to buy one apples" ginger_parser = GingerIt() ginger_grammar_results = ginger_parser.parse(text) ginger_corrections = ginger_grammar_results['corrections'] print("\nNumber of grammar issues found with Ginger: " + str(len(ginger_corrections)) + "\n")
def gen_Question(keywords, qno): print(keywords) print("keywors") typo = "nonested" typo2 = "nontechnical" custom_sent_tokenizer = PunktSentenceTokenizer(train_text) tokenized = custom_sent_tokenizer.tokenize(keywords) try: for i in tokenized: words = nltk.word_tokenize(i) tagged = nltk.pos_tag(words) # print(tagged) # namedEnt = nltk.ne_chunk(tagged, binary=True) # gen_Question(namedEnt) # namedEnt.draw() key, words = zip(*tagged) compare = list(words) #print(key) print(words) a = dict(zip(key, words)) b = dict(zip(words, key)) print(a) # print("hey") print( [b for b, v in a.items() if v in NonTechnicalQuestionDirectory.l1]) except Exception as e: print(str(e)) if compare == NonTechnicalQuestionDirectory.l1: question = "What " + b['NNP'] # print(question) elif compare == NonTechnicalQuestionDirectory.l2: question = "Why should we " + b['NN'] + " " + b['PRP'] + "?" # print(question) elif compare == NonTechnicalQuestionDirectory.l3: question = "What are " + b['PRP$'] + " " + b['NNS'] + "?" # print(question) elif compare == NonTechnicalQuestionDirectory.l4: question = "Tell about " + b['PRP'] + "?" # print(question) elif compare == NonTechnicalQuestionDirectory.l5: question = "What are your " + b['JJ'] + " " + b['NNS'] + "?" # print(question) elif compare == NonTechnicalQuestionDirectory.l6: question = "What is " + b['PRP$'] + " " + b['JJ'] + " " + b['NN'] + "?" # print(question) else: question = "Explain about your " + keywords + " and its technologies?" # print(question) parser = GingerIt() grammer_corrected_question_list = parser.parse(question) question = grammer_corrected_question_list.get("result") print(question, "hiiiiiiiiiiiiiiiiiii") print(vik_test_codes.question(question, qno)) #jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj voice_record = AudioRecorder.audio_recorder(qno) print( "error testing question numberrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr" + str(qno)) answer_validity = SpeachToText.validation("", typo2, typo, "question" + str(qno))[0] if str(qno) == "20" or qno == 20: print( "jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhjjjjjjjjjjj" ) open('Controller/questionSaver_testing.py', 'w').close() # fruits = ["gcpq = 'end'\n"] fruits = ["global gcpq\n", "gcpq = 'end'\n"] new_file = open("Controller/questionSaver_testing.py", mode="a+", encoding="utf-8") new_file.writelines(fruits) for line in new_file: print(line) new_file.close() filelist = glob.glob("Audio/*.wav") for file in filelist: os.remove(file) # if qno == "20" or qno == 20: # # filelist = glob.glob("Audio/*.wav") # for file in filelist: # os.remove(file) return question
def generate_cv_questions(): db = "CV" db2 = "project" db3 = "project_d" # node_Count = ConnectionToNeo4j.getNodeCount(db) lang = 'en' q_list = [] pro_list = [] count = 1 session = 0 answer_validity = 0 global question_number question_number = 0 while count <= 3: session = session + 1 print("session") print(session) session_no_string = str(session) session_node_count = ConnectionToNeo4j.session_Node_Count( db, session_no_string) print("this is ") print(session_node_count) node_id = ConnectionToNeo4j.get_node_id(db, session_no_string) for id in range(node_id, session_node_count + node_id): q_list.append(str(id)) print(q_list) print("node_count") print(session_node_count) for question_no in range(session_node_count): print("question number") print(question_no) random_que = random.choice(q_list) print("random que") print(random_que) non_technical_question = ConnectionToNeo4j.cvQuestionGen( db, random_que) q_list.remove(random_que) print(q_list) print(non_technical_question) actual_question = QuestionCreator.gen_Question( non_technical_question) parser = GingerIt() grammer_corrected_question_list = parser.parse(actual_question) grammer_corrected_question = grammer_corrected_question_list.get( "result") TextToSpeechConverter.text_to_speech(grammer_corrected_question, lang) question_number = question_number + 1 print(question_number) print("hiiiiiiiiiiiiiiiiii printing count") if random_que == "5": pro = ConnectionToNeo4j.getProjects(db, "5") print(pro) for id in range(1, pro + 1): pro_list.append(str(id)) print(pro_list) random_proj_que = random.choice(pro_list) modify_random_proj_que = "p" + random_proj_que print(modify_random_proj_que) project_question = ConnectionToNeo4j.cvQuestionProjectGen( db2, db3, modify_random_proj_que, userid) actual_project_question = QuestionCreator.gen_Question( project_question) parser = GingerIt() grammer_corrected_project_question_list = parser.parse( actual_project_question) grammer_corrected_pr0ject_question = grammer_corrected_project_question_list.get( "result") TextToSpeechConverter.text_to_speech( grammer_corrected_pr0ject_question, lang) question_number = question_number + 1 print(question_number) print("hiiiiiiiiiiiiiiiiii printing count") global technology_list tech = test.kes() tech = NestedQuestionCreator.keywordSelector("", tech, "1", "") print(tech) print("tech printed") technology_list = NestedQuestionCreator.nonTechnicalKeywordSeelector( tech, modify_random_proj_que) print("hello tech") print(technology_list) print("check validity") print("after a while") answer_validity = test.test() while (answer_validity == "None"): answer_validity = test.test() q_list = [] count = count + 1
async def text_correction(text : str): if text: parser = GingerIt() response = parser.parse(text) return {"Text":text,"Result":response['result']} return {"text": "is None."}
def gen_Question(keywords, questionno, nesornot): global question custom_sent_tokenizer = PunktSentenceTokenizer(train_text) tokenized = custom_sent_tokenizer.tokenize(keywords) print("keywords for question") print(keywords) print("keywords for question") keyword_count = len(keywords.split()) print(keyword_count) try: for i in tokenized: words = nltk.word_tokenize(i) tagged = nltk.pos_tag(words) # print(tagged) # namedEnt = nltk.ne_chunk(tagged, binary=True) # gen_Question(namedEnt) # namedEnt.draw() key, words = zip(*tagged) #print(key) compare = list(words) print(words) print(compare) a = dict(zip(key, words)) b = dict(zip(words, key)) print(a) print("hey") # print([b for b, v in a.items() if v in l1]) except Exception as e: print(str(e)) if compare == TechnicalQuestionDictionary.tl1: question = "What is a " + keywords elif compare == TechnicalQuestionDictionary.tl2: question = "Can you explain " + keywords elif compare == TechnicalQuestionDictionary.tl3: question = "Describe about " + keywords elif compare == TechnicalQuestionDictionary.tl4: question = "What is a " + keywords elif compare == TechnicalQuestionDictionary.tl5: question = "What are " + keywords elif compare == TechnicalQuestionDictionary.tl6: question = "What is a " + keywords elif compare == TechnicalQuestionDictionary.tl7: question = "What are " + keywords elif compare == TechnicalQuestionDictionary.tl9: question = "What is a " + keywords elif compare == TechnicalQuestionDictionary.tl10: question = "What are " + keywords elif compare == TechnicalQuestionDictionary.tl11: question = "What is a " + keywords elif compare == TechnicalQuestionDictionary.tl12: question = "How to use" + keywords elif compare == TechnicalQuestionDictionary.tl13: question = "What is a " + keywords elif compare == TechnicalQuestionDictionary.tl14: question = "Describe about " + keywords elif compare == TechnicalQuestionDictionary.tl15: question = "Describe about " + keywords elif compare == TechnicalQuestionDictionary.tl16: question = "Describe about " + keywords elif compare == TechnicalQuestionDictionary.tl17: question = "Tell about " + keywords elif compare == TechnicalQuestionDictionary.tl18: question = "Explain about " + keywords elif compare == TechnicalQuestionDictionary.tl19: question = "Describe " + keywords elif compare == TechnicalQuestionDictionary.tl20: question = "What is a " + keywords elif compare == TechnicalQuestionDictionary.tl21: question = "Describe about " + keywords elif compare == TechnicalQuestionDictionary.tl22: question = "Tell about " + keywords else: question = "Define about " + keywords parser = GingerIt() grammer_corrected_question_list = parser.parse(question) question = grammer_corrected_question_list.get("result") question_list = question.split() question_suffix_list = question_list[:-keyword_count] question_suffix = ' '.join(question_suffix_list) question = question_suffix + " " + keywords print(question) print(vik_test_codes.question(question, questionno)) voice_record = AudioRecorder.audio_recorder(questionno) answer_validity = SpeachToText.validation(keywords, "technical", nesornot, "question" + str(questionno))[0] if str(questionno) == "20" or questionno == 20: print( "jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhjjjjjjjjjjj" ) open('Controller/questionSaver_testing.py', 'w').close() # fruits = ["gcpq = 'end'\n"] fruits = ["global gcpq\n", "gcpq = 'end'\n"] new_file = open("Controller/questionSaver_testing.py", mode="a+", encoding="utf-8") new_file.writelines(fruits) for line in new_file: print(line) new_file.close() filelist = glob.glob("Audio/*.wav") for file in filelist: os.remove(file) # if questionno == "20" or questionno == 20: # # filelist = glob.glob("Audio/*.wav") # for file in filelist: # os.remove(file) return question
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Jan 16 15:00:43 2021 @author: akshay """ from gingerit.gingerit import GingerIt text = 'Narendra Modi is our prme mnister. He is from Gujaratt' parser = GingerIt() print(len(parser.parse(text)['corrections']))
def auto_correct(text): # text = 'The smelt of fliwers bring back memories.' parser = GingerIt() result = parser.parse(text) return result['result']
def checkGrammar(*args): from gingerit.gingerit import GingerIt blog_id = args[0] if Blog.objects.filter(id=blog_id).exists(): blog = Blog.objects.filter(id=blog_id).first() blogBody = blog.body blogBody = blogBody.replace('"', '"').replace('<', '<').replace( '>', '>').replace(' ', ' ') jsonData = json.loads(blogBody) futures = [] if blog.title: futures.append({ "blog_id": blog_id, "section_id": 0, "text": blog.title, "speed": False }) for idx, data in enumerate(jsonData["blocks"]): if "quote" == data["type"] and "text" in data["data"]: quoteText = "Quote: " + handleHTML(data["data"]["text"]) if data["data"]["caption"]: quoteText = quoteText + " By " + data["data"]["caption"] futures.append({ "blog_id": blog_id, "section_id": idx + 1, "text": quoteText, "speed": False }) elif "text" in data["data"]: futures.append({ "blog_id": blog_id, "section_id": idx + 1, "text": handleHTML(data["data"]["text"]), "speed": False }) elif "items" in data["data"]: #list,checklist if data["type"] == "checklist": final_section = "" for item in data["data"]["items"]: final_section = final_section + item["text"] futures.append({ "blog_id": blog_id, "section_id": idx + 1, "text": handleHTML(final_section), "speed": False }) elif data["type"] == "list": final_section = "" for item in data["data"]["items"]: final_section = final_section + item futures.append({ "blog_id": blog_id, "section_id": idx + 1, "text": handleHTML(final_section), "speed": False }) for text in futures: try: print("text: ", text) parser = GingerIt() result = parser.parse(text["text"][:199]) print("result: ", result) except Exception as e: return response_500("internal error", e)
def analyze(request): puncts = string.punctuation word_to_find = request.POST.get("word_input") djText = request.POST.get('text', 'default') remPunc = request.POST.get('option', 'removepunc') cap = request.POST.get('option', 'capitalize') small = request.POST.get('option', 'toSmall') upper = request.POST.get('option', 'toUpper') word_find_flag = request.POST.get('option', 'word_find') New_Line = request.POST.get('option', 'New_line') Emails = request.POST.get('option', 'Email_Address') Links = request.POST.get('option', 'Links') Passgen = request.POST.get('option', 'Password_Generator') search_word = request.POST.get('option', 'Search_word') gallery = request.POST.get('option', 'q') Suggest_word = request.POST.get('option', 'suggest_word') Sen_Analysis = request.POST.get('option', 'Sentiment') Grammar = request.POST.get('option', 'grammar') Channel = request.POST.get('option', 'suggest_youtube') books = request.POST.get('option', 'suggest_books') articles = request.POST.get('option', 'suggest_articles') lemmitizer = request.POST.get('option', 'grammar') start_pdf = request.POST.get('option', 'generate_pdf') replace_text = request.POST.get('option', 'replace') Word_cloud = request.POST.get('option', 'wordcloud') Date = request.POST.get('option', 'date') Word_frequency = request.POST.get('option', 'word_frequency') analyzed_text = "" word_status = "" countword = len(djText.split()) if word_find_flag == "word_find": if word_to_find != "": if djText.find(word_to_find) != -1: word_status = "found" word = djText.replace( word_to_find, f"""<b style="color:{"red"};">""" + word_to_find + "</b>") djText = word try: synonym_01 = get_synonyms(word_to_find) synonyms2 = random.sample(synonym_01, 4) final = "" for f in synonyms2: final += f + " , " example = get_example(word_to_find) synonyms = final + example except: synonyms = "Not Available" else: word_status = "not found" synonyms = "Text Not Found" analyzed_text = djText word_find = "Find Word = " + word_to_find synonym = format_html('<b style="color:{};">{}</b>', 'green', synonyms) result = { "analyzed_text": analyzed_text, "highlight": "Chosen word is highlighted in red colour and synonyms/examples in green colour", "purpose": word_find, "status": word_status, "synonym": synonym, "wordcount": countword, "analyze_text": True, "findWord": True } elif New_Line == "New_line": for char in djText: if char == '.': char = '\n' analyzed_text = analyzed_text + char result = { "analyzed_text": analyzed_text, "purpose": "Changes '.' to New Line", "analyze_text": True, "wordcount": countword } elif Emails == "Email_Address": regex = '^[a-z0-9]+[\._]?[a-z0-9]+[@]\w+[.]\w{2,3}$' lst = re.findall('\S+@+\S+', djText) tmp = "" for x in lst: if (re.search(regex, x)): tmp += x tmp += '\n' result = { "analyzed_text": tmp, "purpose": "Find All Emails", "analyze_text": True, "wordcount": countword } elif Passgen == "Password_Generator": stop_words = set(stopwords.words('english')) chars = "!£$%&*#@" ucase_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" text = re.sub(r'[^\w\s]', '', djText) token = word_tokenize(text) filtered_sentence = [] for w in token: if w not in stop_words: filtered_sentence.append(w) if len(filtered_sentence) > 0: random_word = random.choice(filtered_sentence) else: random_word = token[0] random_word = random_word.title() merge = "" for word in random_word.split(): merge+=random.choice(chars)+word[:-1]+ word[-1].upper()\ +random.choice(string.ascii_letters)+"@"+random.choice(ucase_letters)\ +random.choice(string.digits)+" " final_text = merge[:-1] result = { "analyzed_text": final_text, "purpose": "Generate password from text", "generate_text": True, "wordcount": countword } elif search_word == "Search_word": url = 'https://www.dictionary.com/browse/' headers = requests.utils.default_headers() headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36' }) req = requests.get(url + djText, headers) soup = BeautifulSoup(req.content, 'html.parser') mydivs = soup.findAll("div", {"value": "1"})[0] for tags in mydivs: meaning = tags.text wrap = textwrap.TextWrapper(width=100) word_meaning = wrap.fill(text=meaning) result = { "analyzed_text": word_meaning, "purpose": "Searched Word", "generate_text": True, "wordcount": countword } elif Suggest_word == "suggest_word": find = requests.get( f"https://www.dictionaryapi.com/api/v3/references/thesaurus/json/{djText}?key={api_key}" ) response = find.json() if len(response) == 0: print("Word Not Recognized!") else: k = [] if str(response[0]).count(" ") == 0: for j in range(len(response)): k.append(response[j]) predict = " , ".join(k) djText = predict else: dictionary = PyDictionary() testdict = dictionary.synonym(djText) suggest = " , ".join(testdict) djText = suggest wrap = textwrap.TextWrapper(width=100) suggest = wrap.fill(text=djText) result = { "analyzed_text": suggest, "purpose": "Suggested Word", "generate_text": True, "wordcount": countword } elif Sen_Analysis == "Sentiment": djText = ' '.join( re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", djText).split()) analysis = TextBlob(djText) # set sentiment if analysis.sentiment.polarity > 0: final = str(djText) + " (Positive Text)" elif analysis.sentiment.polarity == 0: final = str(djText) + " (Neutral Text)" else: final = str(djText) + " (Negative Text)" result = { "analyzed_text": final, "purpose": "Sentiment Analysis", "analyze_text": True, "wordcount": countword } elif Grammar == "grammar": parser = GingerIt() result = parser.parse(djText) final = result["result"] if final == '': final = "Please write some text to check grammar" result = { "analyzed_text": final, "grammar": djText, "purpose": "Spelling & Grammar Check", "analyze_text": True, "wordcount": countword } elif lemmitizer == "lemmitize": wordnet_lemmatizer = WordNetLemmatizer() tokenization = nltk.word_tokenize(djText) count = True for w in tokenization: k = wordnet_lemmatizer.lemmatize(w, pos="v") if w != k: result = "{} -> {}".format( w, wordnet_lemmatizer.lemmatize(w, pos="v")) count = False if count == True: final = "No need for lemmatization" if count == False: final = "(Original word) - > (Lemmatized word)" result = { "analyzed_text": result, "highlight": final, "purpose": "Lemmatization of text", "analyze_text": True, "wordcount": countword } elif Channel == "suggest_youtube": request.session['user-input'] = djText result = { "analyzed_text": djText, "purpose": "Suggest youtube channels", "status": "Press Button To View Channel links", "find_channel": True, "generate_text": True, "wordcount": countword } elif books == "suggest_books": request.session['user-input'] = djText result = { "analyzed_text": djText, "purpose": "Search Books", "status": "Press Button To View Books", "find_books": True, "generate_text": True, "wordcount": countword } elif articles == "suggest_articles": request.session['user-input'] = djText result = { "analyzed_text": djText, "purpose": "Search Articles", "status": "Press Button To View Articles", "find_articles": True, "generate_text": True, "wordcount": countword } elif start_pdf == "generate_pdf": request.session['user-input'] = djText result = { "analyzed_text": "Check Your Pdf", "purpose": "Generate Pdf", "status": "Press Button To View Pdf", "make_pdf": True, "generate_text": True, "wordcount": countword } elif replace_text == "replace": final_text = re.sub(word_to_find, replace_input, djText) result = { "analyzed_text": final_text, "purpose": "Replacemet of text in sentence", "analyze_text": True, "wordcount": countword } elif Word_cloud == "wordcloud": cloud = WordCloud(background_color="white", max_words=200, stopwords=set(STOPWORDS)) wc = cloud.generate(djText) buf = io.BytesIO() wc.to_image().save(buf, format="png") data = base64.b64encode(buf.getbuffer()).decode("utf8") final = "data:image/png;base64,{}".format(data) result = { "analyzed_text": " ", "purpose": "Wordcloud", "my_wordcloud": final, "generate_text": True, "wordcount": countword } elif Date == "date": final = extract_dates(djText) final_text = final[0].date() result = { "analyzed_text": final_text, "purpose": "Extract Dates from text", "analyze_text": True, "wordcount": countword } elif Word_frequency == "word_frequency": input_text = djText.replace("\n", " ") djText = input_text.lower() words_dict = get_words_dict(djText) # create graph if len(words_dict) > 10: k = 10 else: k = len(words_dict) y_pos = range(0, k) bars = [] height = [] count = 0 # print and save values to graph format_spaces("word", "occurrences") for word_str, word_amount in words_dict.items(): format_spaces(word_str, word_amount) count += 1 if count <= 10: bars.append(word_str) height.append(int(word_amount)) else: pass # # Create bars plt.bar(y_pos, height) # Create names on the x-axis plt.xticks(y_pos, bars, size=9) plt.xticks(rotation='horizontal') plt.ylabel('Word Frequency', fontsize=12, labelpad=10) plt.xlabel('Words', fontsize=12, labelpad=10) fig = plt.gcf() buf = BytesIO() fig.savefig(buf, format='png') buf.seek(0) data = base64.b64encode(buf.read()) uri = urllib.parse.quote(data) final = "data:image/png;base64,{}".format(uri) result = { "analyzed_text": " ", "purpose": "Word Frequency for every word in text", "bar_graph": final, "analyze_text": True, "wordcount": countword } elif gallery == "q": request.session['user-input'] = djText result = { "analyzed_text": djText, "purpose": "Images", "status": "Press Button To View Images", "find_image": True, "generate_text": True, "wordcount": countword } elif remPunc == 'removepunc': for char in djText: if char not in puncts: analyzed_text = analyzed_text + char result = { "analyzed_text": analyzed_text, "purpose": "Remove Punctuations", "analyze_text": True, "wordcount": countword } elif cap == "capitalize": analyzed_text = djText.capitalize() result = { "analyzed_text": analyzed_text, "purpose": "Capitalize", "analyze_text": True, "wordcount": countword } elif small == "toSmall": analyzed_text = djText.lower() result = { "analyzed_text": analyzed_text, "purpose": "To Smallercase", "analyze_text": True, "wordcount": countword } elif upper == "toUpper": analyzed_text = djText.upper() result = { "analyzed_text": analyzed_text, "purpose": "To Uppercase", "analyze_text": True, "wordcount": countword } elif Links == "Links": pattern = '(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)(?:\([-A-Z0-9+&@#\/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#\/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#\/%=~_|$?!:,.]*\)|[A-Z0-9+&@#\/%=~_|$])' links = re.findall(pattern, djText, re.IGNORECASE) analyzed_text = "" i = 0 for x in links: i = i + 1 analyzed_text += f'<a href="{x}" target="_blank">Link {i}</a>' analyzed_text += '\n ' result = { "analyzed_text": analyzed_text, "purpose": "Find All Links", "analyze_text": True, "wordcount": countword } else: return HttpResponse( '''<script type="text/javascript">alert("Please select atleast one option.");</script>''' ) return render(request, 'analyze.html', result)
def test_gingerit(text, expected): parser = GingerIt() assert parser.parse(text)["result"] == expected
def check(mail): try: f = word(mail, 'sentence') corrections = 0 for s in f: g = GingerIt() h = g.parse(s) corrections += len(h['corrections']) return corrections except: print("Error while checking grammer errors in text") def search(search_term, next=False, page=0, board=0): """function to search and return comments""" if next == False: page = requests.get("https://www.nairaland.com/search?q=" + urllib.parse.quote_plus(str(search_term)) + "&board="+str(board)) else: page = requests.get("https://www.nairaland.com/search/" + str(search_term) + "/0/"+str(board)+"/0/1" + str(page)) soup = BeautifulSoup(page.content, 'html.parser') comments = soup.findAll("div", {"class": "narrow"}) return comments WordList = [] def analysis(text): """function to evaluate sentiment""" try: j = 0 board = 29 while j < 10: if j == 0: nextItem = False else: nextItem = True commentsCurrent = search(text, nextItem, j, board) add_to_word_list(commentsCurrent) j += 1 except: print("Search failed") positive = 0 negative = 0 neutral = 0 previous = [] for tweet in WordList: if tweet in previous: continue previous.append(tweet) analysis = TextBlob(tweet) """evaluating polarity of comments""" polarity = analysis.sentiment.polarity if (analysis.sentiment.polarity == 0): neutral += 1 elif (analysis.sentiment.polarity < 0.00): negative += 1 elif (analysis.sentiment.polarity > 0.0): positive += 1 noOfSearchTerms = positive + negative + neutral positive = percentage(positive, noOfSearchTerms) negative = percentage(negative, noOfSearchTerms) neutral = percentage(neutral, noOfSearchTerms) return positive, negative, neutral
) if __name__ == "__main__": app = TextToSpeech(subscription_key) # app.get_token() mtime_last = 0 pdata = [] while True: mtime_cur = os.path.getmtime("texts.txt") if mtime_cur != mtime_last: time.sleep(2) print( f'LOG {time.strftime("%Y%m%d-%H%M")}: file watch event triggerred' ) with open('texts.txt', 'r') as file: data = file.read().split('\n') if data != pdata: pdata = data try: parser = GingerIt() correctedText = parser.parse(data[-1])['result'] app.tts = correctedText if len(correctedText) > 0: app.save_audio() except: print( f"LOG {time.strftime('%Y%m%d-%H%M')}: No data in file" ) mtime_last = mtime_cur
# print(targeted_sentences[100]) wrong_sentences = [] for j in range(0, len(targeted_sentences)): wrong_sentences.append(targeted_sentences[j].replace(" is ", " are ")) print("There are {} wrong sentences.".format(len(wrong_sentences))) # print(wrong_sentences[100]) counter = 0 parser = GingerIt() for j in range(1, 100): try: text = wrong_sentences[j] + " " if parser.parse(text).get("result") == targeted_sentences[j]: counter += 1 except IndexError: pass print(counter) # class TestGingerIt(unittest.TestCase): # def test_is(self): # parser = GingerIt() # for j in range (302,303): # text = wrong_sentences[j] + " " # # print(parser.parse(text).get("result") == targeted_sentences[j]) # self.assertEqual(parser.parse(text).get("result"), targeted_sentences[j])
for i in range(0, le_list): for j in range(0, le_greet1): if list[i].lower() == greet1[j].lower(): list[i] = greet1_re[j] for i in range(0, le_list): for j in range(0, le_greet2): if list[i].lower() == greet2[j].lower(): list[i] = list[i] + ',' for i in range(0, le_list): if list[i].lower() in bold: print(style.BOLD + list[i] + style.END) sys.stdout.flush() else: print(list[i]) sys.stdout.flush() from gingerit.gingerit import GingerIt text = 'good morning' parser = GingerIt() str = parser.parse(text)['result'] str1 = parser.parse(str)['result'] if len(str) > len(str1): text_proess(str) else: text_proess(str1)