def decrypt_BF(ciphertext, matchrate=0.8): """ A brute force approach to decipher any mono-alphabetic substitution ciphers THIS PROGRAM DOESNOT WORK: generating 26! list is impossible, needs another way to generate permutations """ # use a spellchecker to check whether words are in dictionary from spellchecker import SpellChecker # create an English spell checker spell = SpellChecker(language=u'en') # set the criterion for the number of matched words wordsCount = len(spell.split_words(ciphertext)) wordsMatchCountMin = int(matchrate * wordsCount) # create a list of alphabets for ct cipher = [None] * 26 # assign them to letters for i in range(26): cipher[i] = chr(i + ord('A')) # generate all possible permutations import itertools plain_lists = list(itertools.permutations(cipher)) for i in range(len(plain_lists)): # create the plain list plain = plain_lists[i] # create the decipher dict decipherDict = {} # iterate 'A' to 'Z' for seq in range(26): # add letter and its count to dict decipherDict.update({cipher[seq]: plain[seq]}) # decrypt with the current decipher table decrypted = decrypt(ciphertext, decipherDict) # split the text into a list of words wordsList = spell.split_words(decrypted) wordsCount = len(wordsList) print(i) # check whether it is a real word dictWordsList = spell.known(wordsList) if len(dictWordsList) >= wordsMatchCountMin: print("Find dictionary words at shift ", shift) printCipherTable(decipherDict, isInverse=True) return decrypted print("All trials failed") return "" return
def spelling_correction(query): print("Actual(misspeled) Query words :",query.split()) spell = SpellChecker() words = spell.split_words(query) query = [spell.correction(word) for word in words] print("Modified Query words : ",query) query = " ".join(query) return query
def test_split_words(self): ''' test using split_words ''' spell = SpellChecker() res = spell.split_words("This isn't a good test, but it is a test!!!!") self.assertEqual( set(res), set([ "This", "isn't", "a", "good", "test", "but", "it", "is", "a", "test" ]))
class PySpellChecker(BaseCrafter): """ :class:`PySpellChecker` wraps pyspellchecker (https://github.com/barrust/pyspellchecker) library to provide spelling correction capacity as a crafter in Jina :param language: The language of the dictionary to load or None \ for no dictionary. Supported languages are `en`, `es`, `de`, `fr`, \ `pt` and `ru`. Defaults to `en`. A list of languages may be \ provided and all languages will be loaded. :param local_dictionary: he path to a locally stored word \ frequency dictionary; if provided, no language will be loaded :param distance: The edit distance to use. Defaults to 2. :param case_sensitive: Flag to use a case sensitive dictionary or \ not, only available when not using a language dictionary. :param args: Additional positional arguments :param kwargs: Additional keyword arguments """ def __init__(self, language: str = 'en', local_dictionary: Optional[str] = None, distance: int = 2, case_sensitive: bool = False, *args, **kwargs): """Set constructor.""" super().__init__(*args, **kwargs) self.language = language self.local_dictionary = local_dictionary self.distance = distance self.case_sensitive = case_sensitive def post_init(self): from spellchecker import SpellChecker super().post_init() self.speller = SpellChecker(language=self.language, local_dictionary=self.local_dictionary, distance=self.distance, case_sensitive=self.case_sensitive) @single def craft(self, text: str, *args, **kwargs): """ Craft sentences correcting misspelled words :param text: The text to be corrected :param args: Additional positional arguments :param kwargs: Additional keyword arguments :return: A dictionary with the extracted text """ words = self.speller.split_words(text) corrected_text = ' '.join( [self.speller.correction(word) for word in words]) return dict(text=corrected_text)
def test_words_more_complete(self): ''' test the parsing of words ''' spell = SpellChecker() res = [ 'this', 'is', 'a', 'test', 'of', 'the', 'word', 'parser', 'it', 'should', 'work', 'correctly' ] self.assertEqual( spell.split_words( 'This is a test of the word parser. It should work correctly!!!' ), res)
def correctTypos(text): speller = SpellChecker() words = speller.split_words(str(text)) result = "" corrected = [] for word in words: corrected.append(speller.correction(word)) for word in corrected: if result == "": result = word else: result = result + " " + word return result
def check(text): i = 0 speller = SpellChecker() words = speller.split_words(str(text)) result = "Typo:------" corrected = [] for word in words: corrected.append(speller.correction(word)) while (i < len(words)): if (words[i] == corrected[i]): i += 1 elif (words[i] != corrected[i]): result = "Typo: " + words[i] break return result
def removeWordRepeat(text): spell = SpellChecker(language='pt') arrayString = spell.split_words(text) for string in arrayString: numberOfRepeats = arrayString.count(string) if numberOfRepeats > 1: for _ in range(0, numberOfRepeats - 1): arrayString.remove(string) for string in arrayString: numberOfRepeats = arrayString.count(string) if numberOfRepeats > 1: for _ in range(0, numberOfRepeats - 1): arrayString.remove(string) return arrayString
def SpellCheck(data): Spell_Words = [] spell = SpellChecker() words = spell.split_words(words) for i in data.split_words(' '): w = Word(i) spell.word_frequency.load_words(['molded','.', '(',')']) words = spell.correction(w) if words != w: words = colored(words, 'blue') #spell_word = ' '.join(words) Spell_Words.append(words) # print(Spell_Words) Corrected_Words = TreebankWordDetokenizer().detokenize(Spell_Words) return Corrected_Words
def check_name(name, type): incorrect_ind = False new_name = '' spell = SpellChecker() full_name = spell.split_words(name.replace('_', ' ')) for word in full_name: if (full_name[0] == word and word in ('d', 'b', 'f', 'r')) or spell.correction(word) == word: new_name += word + ' ' elif (full_name[-1] == word and word in ('key', 'txt', 'nb', 'amt', 'dtm', 'qty', 'dt')) or word in ('prev'): new_name += word + ' ' else: incorrect_ind = True new_name += spell.correction(word) + ' ' if type != None: if full_name[-1] in ('key', 'id') and type not in ('smallint', 'int', 'bigint'): print('column ' + name + ' type: <\028[32m' + type + '\033[0m> may be wrong.') elif full_name[-1] == 'dtm' and type not in ('datetime'): print('column ' + name + ' type: <\028[32m' + type + '\033[0m> may be wrong.') elif full_name[-1] == 'nm' and not (str(type).startswith('varchar') or str(type).startswith('char')): print('column ' + name + ' type: <\028[32m' + type + '\033[0m> may be wrong.') elif full_name[-1] == 'txt' and not (str(type).startswith('varchar') or str(type).startswith('char')): print('column ' + name + ' type: <\028[32m' + type + '\033[0m> may be wrong.') if incorrect_ind: print(name + ' should be \033[32m' + new_name.upper().replace(' ', '_')[:len(new_name) - 1] + '\033[0m')
break msg = msg.decode(encoding='utf-8') print('Message to received: ', msg) if msg == 'ping': c.sendall(json.dumps('pong').encode(encoding='utf-8')) continue msg = json.loads(msg) msg = sjcl.decrypt(msg, config['crypto_key']).decode() print("received: {}".format(msg)) words = spellchecker.split_words(msg) #wrong_indices = [] # for i in range(len(words)): # if spellchecker.correction(words[i]) != words[i]: # print("{} is wrong. Correct is {}".format(words[i], spellchecker.correction(words[i]))) # wrong_indices.append(i) wrong_indices = list(spellchecker.unknown(words)) print("Erros encontrados: ", wrong_indices) bill = calculate_bill(len(words)) data = {'wrong_words': wrong_indices, 'bill': bill}
from spellchecker import SpellChecker spell = SpellChecker() spell.split_words("this sentnce has misspelled werds") words = spell.split_words("this sentnce has misspelled werds") for i in words: print(spell.correction(i)) print(spell.candidates(i))
if tag.name == "a": href = tag.get("href", None) if href and href not in href_list and is_local( href): href_list.append(href) href_csv.append([href, dirname]) if tag.name not in class_dict[parent_slug].keys(): class_dict[parent_slug][tag.name] = dict() tag_class = tag.get("class", ["None"]) if tag_class: if "|".join(tag_class) not in class_dict[ parent_slug][tag.name].keys(): class_dict[parent_slug][tag.name]["|".join( tag_class)] = dirname misspelled = spell.unknown( spell.split_words( tag.get_text(separator=" ", strip=True))) for m_word in misspelled: if dirname not in word_dict.keys(): word_dict[dirname] = list() if m_word not in word_dict[dirname]: word_dict[dirname].append(m_word) word_csv.append([m_word, dirname]) for tag_to_remove in class_transformations[ "remove_tags"]: remove_tag = tag_to_remove["tag"] remove_class = tag_to_remove["class"] if len(remove_class) == 0: remove_matches = main.findAll(remove_tag) else: remove_matches = main.findAll( remove_tag, attrs={'class': remove_class})
] def get_text_from_child_nodes(element, aggregator: List): for child in element.childNodes: if child.nodeType == child.TEXT_NODE: aggregator.append(child.data) elif child.nodeType == child.ELEMENT_NODE and child.tagName not in excluded_elements: get_text_from_child_nodes(child, aggregator) spell = SpellChecker() working_dir = Path(__file__).absolute() topics_dir = working_dir.parent.parent / "resources" / "dita" for topic in topics_dir.rglob('*.dita'): doc = minidom.parse(topic.__str__()) all_text = [] get_text_from_child_nodes(doc, all_text) all_words = spell.split_words("".join(all_text)) unknown_words = [ word for word in spell.unknown(all_words) if word not in ignored_words ] if unknown_words: for word in unknown_words: print( f'Unknown word in {topic.name}: "{word}". Did you mean "{spell.correction(word)}"?' ) else: print(f'Perfect spelling in file {topic.name}')
def correction_sent(sent): spell = SpellChecker() words = spell.split_words(sent) return [spell.correction(word) for word in words]
def test_words(self): ''' test the parsing of words ''' spell = SpellChecker() res = ['this', 'is', 'a', 'test', 'of', 'this'] self.assertEqual(spell.split_words('This is a test of this'), res)
from spellchecker import SpellChecker text = input("INPUT TEXT: ") spell = SpellChecker() words = spell.split_words(text) final = '' for word in words: final += spell.correction(word) final += ' ' print(final)
def specll_check(self, text): spell = SpellChecker() text = spell.split_words(text) return " ".join([spell.correction(word) for word in text])
#!/usr/bin/env python3 import sys from spellchecker import SpellChecker input = " ".join(sys.argv[1:]) if any(c.isnumeric() for c in input): exit(0) # it's got numbers, probably not meant for spell checking spell = SpellChecker() words = spell.split_words(input) correction_pairs = [(word, spell.correction(word)) for word in words] display = [ "?" if not spell.known([word]) and word == correction else correction for word, correction in correction_pairs ] print(" ".join(display), end="")
document.styles['Normal'].font.name = 'SimHei' p = document.add_paragraph() p_run = p.add_run('Org Name List Spell Checker') p2 = document.add_paragraph('Last Update: ' + str(update_date)) p.alignment = WD_ALIGN_PARAGRAPH.CENTER p2.alignment = WD_ALIGN_PARAGRAPH.RIGHT p_run.font.size = Pt(24) table = document.add_table(rows=1, cols=1) table.style = 'Table Grid' # i = 0 item = "" for index, row in complete_df.iterrows(): first_row = str(int(row['OrganizationSignUpListNumber您的机构在接龙里的序号']) ) + '. ' + row['OrganizationNameInEnglish'] words = spell.split_words(first_row) words_book = [spell.correction(word) for word in words] spell_check_result = spell.unknown(words_book) item = item + first_row + '\n' + str(spell_check_result) + '\n\n' ### breakdown cell = table.cell(0, 0) cell.text = item document.save('./output/Org Name List ' + now + '.docx') print('Word file generate successful!')
conversation.grid(column=0, row=2, sticky='nesw', padx=10, pady=10) #image load = Image.open("trumppet.png") render = ImageTk.PhotoImage(load) logo = ttk.Label(window, image=render, background='orange') logo.grid(column=1, row=2) window.pack(padx=10, pady=50) window.mainloop() while 1: data = "" msg = input("Enter message: ") # spellchecking, first tokenize, then run through spelling engine msg = spell.split_words(msg) msg = [spell.correction(token) for token in msg] # rejoin tokens and passes to sentiment analysis query = " ".join(msg) score = sentiment.polarity_scores(query)['compound'] print(score) # run through POS / named entity engine to get the user's topic things = pos(query) topicStuff = [[token.lemma_, token.text] for token in things if (token.dep_ == "dobj")] try: topic = topicStuff[0][0] except: