def __init__(self): self.gen = Generator() self.cleanser = TextCleanser() gen = self.gen cln = self.cleanser self.cleanse_methods = { gen.IBM_SIM: cln.heuristic_cleanse, gen.SSK_SIM: cln.ssk_cleanse, gen.PHONETIC_ED_SIM: cln.phonetic_ED_cleanse }
def __init__(self): self.cleanser = TextCleanser() cln = self.cleanser self.cleanse_methods = { 'IBM': cln.ibm_cleanse, 'SSK': cln.ssk_cleanse, 'PHONETIC_ED': cln.phonetic_cleanse } self.gold_sent_clean = [] self.gold_word_pairs = [] self.gold_sent_pairs = []
class CleanserWebService(): def __init__(self): self.tc = TextCleanser() @expose def clean(self, text): cleantext, error, replacements = self.tc.ssk_cleanse(text) if error == "": return cleantext else: # an error occurred return error
class CleanserWebService(): def __init__(self): self.tc = TextCleanser() @expose def clean(self, text): cleantext, error, replacements = self.tc.ssk_cleanse(text) if error=="": return cleantext else: # an error occurred return error
def __init__(self): self.tc = TextCleanser()
as input on stdin and outputs normalised strings on stdout. Author: Stephan Gouws Contact: [email protected] """ from cleanser import TextCleanser import json import codecs import getopt import sys, time from random import choice if __name__ == '__main__': # print "Noisy text cleanser" clnsr = TextCleanser() text = sys.stdin.readline() while (text): if len(text) <= 1: break # cleantext,error,replacements = clnsr.heuristic_cleanse(text, gen_off_by_ones=False) # to use a phonetic edit-distance based similarity function, use the # method below: # cleantext,error,replacements = clnsr.phonetic_ED_cleanse(text, gen_off_by_ones=False) # to use SSK-based cleanser, use cleantext, error, replacements = clnsr.ssk_cleanse( text, gen_off_by_ones=False) if error == "ERROR": sys.stderr.write("ERROR")
as input on stdin and outputs normalised strings on stdout. Author: Stephan Gouws Contact: [email protected] """ from cleanser import TextCleanser import json import codecs import getopt import sys, time from random import choice if __name__ == '__main__': # print "Noisy text cleanser" clnsr = TextCleanser() text=sys.stdin.readline() while (text): if len(text)<=1: break # cleantext,error,replacements = clnsr.heuristic_cleanse(text, gen_off_by_ones=False) # to use a phonetic edit-distance based similarity function, use the # method below: # cleantext,error,replacements = clnsr.phonetic_ED_cleanse(text, gen_off_by_ones=False) # to use SSK-based cleanser, use cleantext,error,replacements = clnsr.ssk_cleanse(text, gen_off_by_ones=False) if error=="ERROR": sys.stderr.write("ERROR") continue