IVcandidates = primary.generate(word) # if no primary candidates generated if len(IVcandidates) == 0: IVcandidates = secondary.generate(word) # if no secondary candidates generated if len(IVcandidates) == 0: class_number = 1 correct_word = '-' class_numb = 1 else: prev_tokens = selector.prev_tokens((word, pos), for_prev) correct_word = selector.choose(prev_tokens, IVcandidates) for_prev[for_prev.index((word, pos))] = (correct_word, pos) # if class is correct or NoES else: correct_word = '-' correct[tweet_id][j].append((word, class_number, correct_word)) output.build(splitter.texts, splitter.order, correct) # agregué class number to correct # uso tokenizer de language modeling? # abreviaturas con o sin punto # para nombres, hago split y que tomen parte?? # check sound....la h?? ejemplo Shanto da santo # spelling if termina con b, c # como hago lo del sms.txt # en secondary agrego algo de reps # hago memoria de candidatos? # uso ascii or hardcode? # risas terminadas en j o empezadas en vocal