Python join_lines примеры использования

Язык программирования: Python

Пространство имен/Пакет: dictionaries.utils.file

Метод/Функция: join_lines

Примеров на hotexamples.com: 2

Python join_lines - 2 примера найдено. Это лучшие примеры Python кода для dictionaries.utils.file.join_lines, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: ozhegov_shvedova.py Проект: 2vitalik/words

def load(chunk_len=200, debug=False):
    print source_slug
    print name_slug
    bulk = Bulk(source_slug, name_slug, chunk_len)
    parts = get_parts(slug, '\n\n')
    for desc in parts:
        desc = prettify(join_lines(desc))
        first_words = re.findall(u'^([-А-ЯЁ\d][-А-ЯЁ\d]*\.?\.?\.?)', desc,
                                 re.UNICODE)
        if len(first_words) != 1:
            print u'Ошибка в первом слове:'
            print '#', desc
        word = first_words[0]
        words = [word]
        cutted = desc[len(word):]

        other_words = re.findall(u'\W([А-ЯЁ][-А-ЯЁ\d]+\.?\.?\.?)\W', cutted,
                                 re.UNICODE)
        # todo: слова с пробелами: "ВСЕ Ж ТАКИ" (2 случая)
        for word in other_words:
            length = cutted.index(word) + len(word)
            wrong_words = [
                u'США', u'США.', u'СССР', u'ССР', u'СЯ', u'СЯ1', u'ЧК', u'СЯ2',
                u'ТЕ', u'СЯ1-2', u'ЭВМ', u'ВЛКСМ', u'Т-34']
            # 70 получено опытным путем и годится только для этого файла
            if length <= 70 and word not in wrong_words:
                words.append(word)

        for i in range(len(words)):
            word = remove_last_dot(words[i])
            if re.search('\d-\d-\d$', word):
                word = word[:-5]
            elif re.search('\d-\d$', word):
                word = word[:-3]
            elif re.search('\d$', word):
                word = word[:-1]
            elif re.search('\d-\d\.\.\.$', word):
                word = word[:-6] + word[-3:]
            elif re.search('\d\.\.\.$', word):
                word = word[:-4] + word[-3:]
            elif re.search('\d', word):
                print u'Ошибка! Цифра в слове: ', word
            words[i] = word

        words = set(words)  # remove duplicates

        for word in words:
            if not check_word(word, debug):  # "-" and "..."
                continue
            bulk.add(word, desc)
    bulk.process()

Пример #2

Показать файл

Файл: dalf.py Проект: 2vitalik/words

def load(chunk_len=200, debug=False):
    print source_slug
    print name_slug
    bulk = Bulk(source_slug, name_slug, chunk_len)
    lines = get_parts(slug, '\n   ')
    for desc in lines:#[:1000]:
        desc = prettify(join_lines(desc))
        words = re.findall(u'^([-А-ЯЁ\d][-А-ЯЁ\d\s?]*)\W', desc, re.UNICODE)
        if not words:
            bulk.append_desc(desc)
            continue
        word = prettify(words[0], encoding=False)
        if not check_word(word, debug):
            continue
        bulk.add(word, desc)
    bulk.process()