def stemreplacer(reader): line=reader.readline() replacer = SpellingReplacer() fo=open("stemreplacer.txt","a+" ) while line: fo.write(replacer.replace(line)) line=reader.readline() fo.close()
def spell_correction(tweet): replacer=SpellingReplacer() length=len(tweet) i=0 dGB=enchant.Dict('en_GB') while i<length: if not dGB.check(tweet[i]): tweet[i]=replacer.replace(tweet[i]) i=i+1 return str(tweet)
def spelling_replacer(document): from replacers import SpellingReplacer replacer = SpellingReplacer() return replacer.replace(document)
<br class="spacer" /> </div> <!--body end --> <!--footer start --><!--footer end --> </body> </html> """ # open and read the csv file into memory file = open(saibaba.csv) reader = csv.reader(file) # iterate through the lines and print them to stdout # the csv module returns us a list of lists and we # simply iterate through it replacer = SpellingReplacer() fo1 = open("c-expansion.txt", "a+") fo6 = open("h-synset.txt", "a+") fo5 = open("g-spellcheck.txt", "a+") fo4 = open("f-stemmingwords.txt", "a+") fo3 = open("e-wordswithoutstopwords.txt", "a+") fo2 = open("d-words.txt", "a+") fo = open("a-sentence.txt", "a+") for line in reader: a = PunktSentenceTokenizer().tokenize(line[2]) fo.write('\n'.join(a)) review = '\n'.join(a) review = review.lower() #Convert www.* or https?://* to URL review = re.sub('((www\.[\s]+)|(https?://[^\s]+))', 'URL', review)
from replacers import SpellingReplacer import enchant input = "i ahve nt done" input_list = input.split(" ") replacer = SpellingReplacer() length = len(input_list) print length i = 0 dGB = enchant.Dict('en_GB') while i < length: if not dGB.check(input_list[i]): input_list[i] = replacer.replace(input_list[i]) i = i + 1 print str(input_list)
import enchant from replacers import SpellingReplacer replacer = SpellingReplacer() print(replacer.replace('cookbok')) # enchant print(enchant.list_languages())
print(replacer.replace('goose') == 'goose') # 1_34 print(edit_distance("relate", "relation")) print(edit_distance("suggestion", "calculation")) # 1_35 X = {10, 20, 30, 40} Y = {20, 30, 60} print(jaccard_distance(X, Y)) print('================================') print('Spelling Correction with Enchant') print('================================') replacer = SpellingReplacer() print(replacer.replace('cookbok') == 'cookbook') d = enchant.Dict('en') print(d.suggest( 'languege')) # == ['language', 'languages', 'languor', "language's"] print(edit_distance('language', 'languege') == 1) print(edit_distance('language', 'languor') == 3) print(enchant.list_languages()) # == ['en', 'en_CA', 'en_GB', 'en_US'] dUS = enchant.Dict('en_US') dGB = enchant.Dict('en_GB') us_replacer = SpellingReplacer('en_US') gb_replacer = SpellingReplacer('en_GB')
import re import pprint from replacers import SpellingReplacer def my_range(start, end, step): while start <= end: yield start start += step f = open("separate_text.py", 'r') lines = f.readlines() f.close() f = open("separate_text.py", 'w') l = ' ' for line in lines: if len(line.split()) > 4: g = line.split() for x in my_range(0, len(line.split()) - 1, 1): replacer = SpellingReplacer() l += (replacer.replace(g[x]) + ' ') f.write(l + '\n') l = ' ' if len(line.split()) == 0: f.write(line)