Пример #1
0
import operator
import re
import numpy as np
from string import punctuation
from bor_tree import Candidate, Node, BorTree
from error_model import ErrorModel
from language_model import LanguageModel
from split_join_model import SplitJoin
from layout import switch_layout
punctuation = re.escape(punctuation)

error_model = ErrorModel()
error_model.load_json('error.json')
language_model = LanguageModel()
language_model.load_json('language.json')
tree = BorTree(error_model, language_model)
tree.fit()
split_join = SplitJoin(language_model)


def fix_query(query):

    tokens = re.split('([' + punctuation + ' ' + '])', query)
    candidate = fix_tokens(tokens)

    layout, switch = switch_layout(query)
    if switch:
        return layout
    joined_tokens, join = split_join.join(tokens)
    if join: