Exemplo n.º 1
0
def lemm_doc(doc, language='de'):
    '''
        Lemms all words in doc.
    '''
    doc = list(filter(lambda x: x != '', doc))
    vocabular = file_as_dict(file_map[language])
    return list(map(lambda x: lemm(x, vocabular), doc))
Exemplo n.º 2
0
def lemm_doc_stream(doc, language):
    '''
        Lemms all words in stdin stream.
    '''
    vocabular = file_as_dict(file_map[language])
    doc = (x.replace('\n', '') for x in doc)
    doc = (x for x in doc if x != '')
    return map(lambda x: lemm(x, vocabular), doc)
Exemplo n.º 3
0
def category_to_answer(map, file):
    map = file_as_dict(map, sep=';', local=False)
    map = {k.lower(): v for k, v in map.items()}
    cat = file_as_list(file, local=False)

    for c in cat:
        if c not in map:
            print(c)
Exemplo n.º 4
0
def test_file_as_dict(tmp_path):
    words_test = tmp_path / 'test.csv'
    words_test.write_text(CONTENT)
    words = file_as_dict(words_test, sep=';', local=False)
    assert words == {
        'category1': 'answer1',
        'category2': 'answer2',
        'category3': 'answer3'
    }
Exemplo n.º 5
0
def ionesoft_lemming(question):
    vocabular = file_as_dict('res/custom_ch_vocabular.txt')
    question = ionesoft_normalize(question)
    return lemm(question, vocabular)
Exemplo n.º 6
0
def ionesoft_normalize(question):
    grammar = file_as_dict('res/custom_ch_grammar.txt')
    normalized = normalize(question)
    return correct(normalized, grammar)
Exemplo n.º 7
0
def stackexchange_lemming(question):
    vocabular = file_as_dict('res/custom_en_vocabular.txt')
    question = stackexchange_normalize(question)
    return lemm(question, vocabular)
Exemplo n.º 8
0
def correct_grammar_stream(doc, grammar='res/custom_ch_grammar.txt'):
    doc = (x for x in doc if x != '\n')
    grammar = file_as_dict(grammar)
    return map(lambda x: correct(x, grammar), doc)
Exemplo n.º 9
0
def correct_grammar(doc, grammar='res/custom_ch_grammar.txt'):
    grammar = file_as_dict(grammar)
    doc = list(filter(lambda x: x != '', doc))
    return list(map(lambda x: correct(x, grammar), doc))