Пример #1
0
                IVcandidates = primary.generate(word)
                # if no primary candidates generated
                if len(IVcandidates) == 0:
                    IVcandidates = secondary.generate(word)
                # if no secondary candidates generated
                if len(IVcandidates) == 0:
                    class_number = 1
                    correct_word = '-'
                    class_numb = 1
                else:
                    prev_tokens = selector.prev_tokens((word, pos), for_prev)
                    correct_word = selector.choose(prev_tokens, IVcandidates)
                    for_prev[for_prev.index((word, pos))] = (correct_word, pos)
            # if class is correct or NoES
            else:
                correct_word = '-'
            correct[tweet_id][j].append((word, class_number, correct_word))
output.build(splitter.texts, splitter.order, correct)

# agregué class number to correct
# uso tokenizer de language modeling?
# abreviaturas con o sin punto
# para nombres, hago split y que tomen parte??
# check sound....la h?? ejemplo Shanto da santo
# spelling if termina con b, c
# como hago lo del sms.txt
# en secondary agrego algo de reps
# hago memoria de candidatos?
# uso ascii or hardcode?
# risas terminadas en j o empezadas en vocal