def __init__(self, fileobj, fieldnames): self.fieldnames = fieldnames self.contents = fileobj.read() self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"], whitespacechars="\r") self.parser.stringescaping = 0 self.parser.quotechars = '"' self.tokens = self.parser.tokenize(self.contents) self.tokenpos = 0
def simplify(string): return filter(type(string).isalnum, string) tokens = sparse.SimpleParser().tokenize(string) return " ".join(tokens)