Python apply_regex_list примеры использования

Язык программирования: Python

Пространство имен/Пакет: articlenizer.util

Метод/Функция: apply_regex_list

Примеров на hotexamples.com: 11

Python apply_regex_list - 11 примеров найдено. Это лучшие примеры Python кода для articlenizer.util.apply_regex_list, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: sentenize.py Проект: dave-s477/articlenizer

def sentenize(s):
    """Sentenizes a string

    Args:
        s (string): string to sentenize

    Returns:
        string: sentenized string
    """
    s = apply_regex_list(s, NORM_REGEX)
    offsets = [o for o in _boundary_gen(s, SPLIT_REGEX)]
    s = '\n'.join((s[o[0]:o[1]] for o in offsets))
    s = apply_regex_list(s, REFINED_SPLIT_REGEX)
    s = apply_regex_list(s, SUBSENTENCE_REGEX)
    s = apply_regex_list(s, RECOMBINE_REGEX)
    s = apply_regex_list(s, SPLIT_ENUM_REGEX)

    return s

Пример #2

Показать файл

def correct(s):
    """Correct a string

    Args:
        s (string): string to correct

    Returns:
        string: corrected string
    """
    s = apply_regex_list(s, CORRECTION_REGEX)

    return s

Пример #3

Показать файл

Файл: sentenize.py Проект: dave-s477/articlenizer

def sentenize_with_index(s):
    """Sentenizes a string but remember at what position a change is made

    Args:
        s (string): string to sentenize

    Returns:
        string, positions: corrected string
    """
    indices = []
    offsets = [o for o in _boundary_gen(s, SPLIT_REGEX)]
    s = '\n'.join((s[o[0]:o[1]] for o in offsets))
    for r, t in REFINED_SPLIT_REGEX_CHANGE_LENGTH:
        regex_matches = r.finditer(s)
        for match in reversed(list(regex_matches)):
            indices.append(match.span(1)[1])
    s = apply_regex_list(s, REFINED_SPLIT_REGEX_CHANGE_LENGTH)
    s = apply_regex_list(s, REFINED_SPLIT_REGEX_KEEP_LENGTH)
    s = apply_regex_list(s, SUBSENTENCE_REGEX)
    s = apply_regex_list(s, RECOMBINE_REGEX)
    s = apply_regex_list(s, SPLIT_ENUM_REGEX_KEEP_LENGTH)
    for r, t in SPLIT_ENUM_REGEX_CHANGE_LENGTH:
        regex_matches = r.finditer(s)
        for match in reversed(list(regex_matches)):
            indices.append(match.span(1)[1])
    s = apply_regex_list(s, SPLIT_ENUM_REGEX_CHANGE_LENGTH)
    return s, indices

Пример #4

Показать файл

def test_string_normalization():
    s = '  This is    a test for  string normalization in    \n  all cases. '
    s = apply_regex_list(s, sentenize.NORM_REGEX)
    assert s == 'This is a test for string normalization in\nall cases.'

Пример #5

Показать файл

def test_split_enumerations():
    s = 'Something quite annoying: (1) Enumerations are sometimes used as standalone sentences; (2) This is a case in which we want to split them of.'
    s = apply_regex_list(s, sentenize.SPLIT_ENUM_REGEX)
    assert s == 'Something quite annoying:\n(1) Enumerations are sometimes used as standalone sentences;\n(2) This is a case in which we want to split them of.'

Пример #6

Показать файл

def test_formtok_split():
    s = 'Strings should be split after a formtok When the next sentence starts with a upper case letter.'
    s = apply_regex_list(s, sentenize.REFINED_SPLIT_REGEX)
    assert s == 'Strings should be split after a formtok\nWhen the next sentence starts with a upper case letter.'

Пример #7

Показать файл

def test_recombination():
    s = 'Lets assume\nthere are splits.\nwhile there approx.\nshould be zero, e.\ng.\nBecause of abbreviations or Fig.\n5. As said by Test et al.\n[56].'
    s = apply_regex_list(s, sentenize.RECOMBINE_REGEX)
    assert s == 'Lets assume there are splits. while there approx. should be zero, e. g. Because of abbreviations or Fig. 5. As said by Test et al. [56].'

Пример #8

Показать файл

def test_subsentence_recognition():
    s = 'There should be no splits (even with stuff like this.\nBut well..).'
    s = apply_regex_list(s, sentenize.SUBSENTENCE_REGEX)
    assert s == 'There should be no splits (even with stuff like this. But well..).'

Пример #9

Показать файл

def test_refined_split():
    s = 'The refined split shoud do even more.For example find "errors".'
    s = apply_regex_list(s, sentenize.REFINED_SPLIT_REGEX)
    assert s == 'The refined split shoud do even more.\nFor example find "errors".'

Пример #10

Показать файл

Файл: test_corrections.py Проект: dave-s477/articlenizer

def test_bracket_correction():
    s = 'Testing errors(performed with brackets)in order to make sure they do not happen'
    s = apply_regex_list(s, corrections.CORRECTION_REGEX)
    assert s == 'Testing errors (performed with brackets) in order to make sure they do not happen'

Пример #11

Показать файл

Файл: test_corrections.py Проект: dave-s477/articlenizer

def test_semi_colon_correction():
    s = 'Errors with semi colons;they can happen;but are easy to correct.'
    s = apply_regex_list(s, corrections.CORRECTION_REGEX)
    assert s == 'Errors with semi colons; they can happen; but are easy to correct.'