Python apply_regex_list示例

编程语言: Python

命名空间/包名称: articlenizer.util

方法/功能: apply_regex_list

hotexamples.com的示例: 11

Python apply_regex_list - 已找到11个示例。这些是从开源项目中提取的最受好评的articlenizer.util.apply_regex_list现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： sentenize.py 项目： dave-s477/articlenizer

def sentenize(s):
    """Sentenizes a string

    Args:
        s (string): string to sentenize

    Returns:
        string: sentenized string
    """
    s = apply_regex_list(s, NORM_REGEX)
    offsets = [o for o in _boundary_gen(s, SPLIT_REGEX)]
    s = '\n'.join((s[o[0]:o[1]] for o in offsets))
    s = apply_regex_list(s, REFINED_SPLIT_REGEX)
    s = apply_regex_list(s, SUBSENTENCE_REGEX)
    s = apply_regex_list(s, RECOMBINE_REGEX)
    s = apply_regex_list(s, SPLIT_ENUM_REGEX)

    return s

示例#2

显示文件

def correct(s):
    """Correct a string

    Args:
        s (string): string to correct

    Returns:
        string: corrected string
    """
    s = apply_regex_list(s, CORRECTION_REGEX)

    return s

示例#3

显示文件

文件： sentenize.py 项目： dave-s477/articlenizer

def sentenize_with_index(s):
    """Sentenizes a string but remember at what position a change is made

    Args:
        s (string): string to sentenize

    Returns:
        string, positions: corrected string
    """
    indices = []
    offsets = [o for o in _boundary_gen(s, SPLIT_REGEX)]
    s = '\n'.join((s[o[0]:o[1]] for o in offsets))
    for r, t in REFINED_SPLIT_REGEX_CHANGE_LENGTH:
        regex_matches = r.finditer(s)
        for match in reversed(list(regex_matches)):
            indices.append(match.span(1)[1])
    s = apply_regex_list(s, REFINED_SPLIT_REGEX_CHANGE_LENGTH)
    s = apply_regex_list(s, REFINED_SPLIT_REGEX_KEEP_LENGTH)
    s = apply_regex_list(s, SUBSENTENCE_REGEX)
    s = apply_regex_list(s, RECOMBINE_REGEX)
    s = apply_regex_list(s, SPLIT_ENUM_REGEX_KEEP_LENGTH)
    for r, t in SPLIT_ENUM_REGEX_CHANGE_LENGTH:
        regex_matches = r.finditer(s)
        for match in reversed(list(regex_matches)):
            indices.append(match.span(1)[1])
    s = apply_regex_list(s, SPLIT_ENUM_REGEX_CHANGE_LENGTH)
    return s, indices

示例#4

显示文件

def test_string_normalization():
    s = '  This is    a test for  string normalization in    \n  all cases. '
    s = apply_regex_list(s, sentenize.NORM_REGEX)
    assert s == 'This is a test for string normalization in\nall cases.'

示例#5

显示文件

def test_split_enumerations():
    s = 'Something quite annoying: (1) Enumerations are sometimes used as standalone sentences; (2) This is a case in which we want to split them of.'
    s = apply_regex_list(s, sentenize.SPLIT_ENUM_REGEX)
    assert s == 'Something quite annoying:\n(1) Enumerations are sometimes used as standalone sentences;\n(2) This is a case in which we want to split them of.'

示例#6

显示文件

def test_formtok_split():
    s = 'Strings should be split after a formtok When the next sentence starts with a upper case letter.'
    s = apply_regex_list(s, sentenize.REFINED_SPLIT_REGEX)
    assert s == 'Strings should be split after a formtok\nWhen the next sentence starts with a upper case letter.'

示例#7

显示文件

def test_recombination():
    s = 'Lets assume\nthere are splits.\nwhile there approx.\nshould be zero, e.\ng.\nBecause of abbreviations or Fig.\n5. As said by Test et al.\n[56].'
    s = apply_regex_list(s, sentenize.RECOMBINE_REGEX)
    assert s == 'Lets assume there are splits. while there approx. should be zero, e. g. Because of abbreviations or Fig. 5. As said by Test et al. [56].'

示例#8

显示文件

def test_subsentence_recognition():
    s = 'There should be no splits (even with stuff like this.\nBut well..).'
    s = apply_regex_list(s, sentenize.SUBSENTENCE_REGEX)
    assert s == 'There should be no splits (even with stuff like this. But well..).'

示例#9

显示文件

def test_refined_split():
    s = 'The refined split shoud do even more.For example find "errors".'
    s = apply_regex_list(s, sentenize.REFINED_SPLIT_REGEX)
    assert s == 'The refined split shoud do even more.\nFor example find "errors".'

示例#10

显示文件

文件： test_corrections.py 项目： dave-s477/articlenizer

def test_bracket_correction():
    s = 'Testing errors(performed with brackets)in order to make sure they do not happen'
    s = apply_regex_list(s, corrections.CORRECTION_REGEX)
    assert s == 'Testing errors (performed with brackets) in order to make sure they do not happen'

示例#11

显示文件

文件： test_corrections.py 项目： dave-s477/articlenizer

def test_semi_colon_correction():
    s = 'Errors with semi colons;they can happen;but are easy to correct.'
    s = apply_regex_list(s, corrections.CORRECTION_REGEX)
    assert s == 'Errors with semi colons; they can happen; but are easy to correct.'