Пример #1
0
def stemreplacer(reader):
    line=reader.readline()
    replacer = SpellingReplacer()
    fo=open("stemreplacer.txt","a+" )
    while line:
        fo.write(replacer.replace(line))
        line=reader.readline()
    fo.close()
Пример #2
0
def spell_correction(tweet):
    
    replacer=SpellingReplacer()
    length=len(tweet)
    i=0
    dGB=enchant.Dict('en_GB')   
    
    while i<length:
        if not dGB.check(tweet[i]):
            tweet[i]=replacer.replace(tweet[i])
        i=i+1
    
    return str(tweet)
Пример #3
0
def spelling_replacer(document):
	from replacers import SpellingReplacer
	replacer = SpellingReplacer()
	return replacer.replace(document)
    <br class="spacer" />
            </div>
            <!--body end -->
            <!--footer start --><!--footer end -->
    </body>
    </html>
    
"""

# open and read the csv file into memory
file = open(saibaba.csv)
reader = csv.reader(file)
# iterate through the lines and print them to stdout
# the csv module returns us a list of lists and we
# simply iterate through it
replacer = SpellingReplacer()
fo1 = open("c-expansion.txt", "a+")

fo6 = open("h-synset.txt", "a+")
fo5 = open("g-spellcheck.txt", "a+")
fo4 = open("f-stemmingwords.txt", "a+")
fo3 = open("e-wordswithoutstopwords.txt", "a+")
fo2 = open("d-words.txt", "a+")
fo = open("a-sentence.txt", "a+")
for line in reader:
    a = PunktSentenceTokenizer().tokenize(line[2])
    fo.write('\n'.join(a))
    review = '\n'.join(a)
    review = review.lower()
    #Convert www.* or https?://* to URL
    review = re.sub('((www\.[\s]+)|(https?://[^\s]+))', 'URL', review)
Пример #5
0
from replacers import SpellingReplacer
import enchant
input = "i ahve nt done"
input_list = input.split(" ")
replacer = SpellingReplacer()
length = len(input_list)
print length
i = 0
dGB = enchant.Dict('en_GB')
while i < length:
    if not dGB.check(input_list[i]):
        input_list[i] = replacer.replace(input_list[i])
    i = i + 1
print str(input_list)
Пример #6
0
import enchant

from replacers import SpellingReplacer

replacer = SpellingReplacer()
print(replacer.replace('cookbok'))

# enchant
print(enchant.list_languages())
Пример #7
0
print(replacer.replace('goose') == 'goose')

# 1_34
print(edit_distance("relate", "relation"))
print(edit_distance("suggestion", "calculation"))

# 1_35
X = {10, 20, 30, 40}
Y = {20, 30, 60}
print(jaccard_distance(X, Y))

print('================================')
print('Spelling Correction with Enchant')
print('================================')

replacer = SpellingReplacer()
print(replacer.replace('cookbok') == 'cookbook')

d = enchant.Dict('en')
print(d.suggest(
    'languege'))  # == ['language', 'languages', 'languor', "language's"]

print(edit_distance('language', 'languege') == 1)
print(edit_distance('language', 'languor') == 3)

print(enchant.list_languages())  # == ['en', 'en_CA', 'en_GB', 'en_US']

dUS = enchant.Dict('en_US')
dGB = enchant.Dict('en_GB')
us_replacer = SpellingReplacer('en_US')
gb_replacer = SpellingReplacer('en_GB')
Пример #8
0
import re
import pprint
from replacers import SpellingReplacer


def my_range(start, end, step):
    while start <= end:
        yield start
        start += step


f = open("separate_text.py", 'r')
lines = f.readlines()
f.close()
f = open("separate_text.py", 'w')
l = ' '
for line in lines:
    if len(line.split()) > 4:

        g = line.split()

        for x in my_range(0, len(line.split()) - 1, 1):
            replacer = SpellingReplacer()

            l += (replacer.replace(g[x]) + ' ')
        f.write(l + '\n')
        l = ' '

    if len(line.split()) == 0:
        f.write(line)