Exemplo n.º 1
0
def load(chunk_len=200, debug=False):
    print source_slug
    print name_slug
    bulk = Bulk(source_slug, name_slug, chunk_len)
    lines = get_parts(slug, '\n   ')
    for desc in lines:#[:1000]:
        desc = prettify(join_lines(desc))
        words = re.findall(u'^([-А-ЯЁ\d][-А-ЯЁ\d\s?]*)\W', desc, re.UNICODE)
        if not words:
            bulk.append_desc(desc)
            continue
        word = prettify(words[0], encoding=False)
        if not check_word(word, debug):
            continue
        bulk.add(word, desc)
    bulk.process()