示例#1
0
def punctuation_variations(string, language):
    variations = set()
    matches = [m for m in get_punctuation_regex(language).finditer(string)]
    if not matches:
        return variations

    matches = sorted(matches, key=lambda x: x.start())
    values = [({START: m.start(), END: m.end()}, (m.group(0), ""))
              for m in matches]

    combinations = itertools.product(range(2), repeat=len(matches))
    for c in combinations:
        ranges_and_utterances = [(values[i][0], values[i][1][ix])
                                 for i, ix in enumerate(c)]
        variations.add(build_variated_query(string, ranges_and_utterances))
    return variations
示例#2
0
def punctuation_variations(string, language):
    variations = set()
    matches = [m for m in get_punctuation_regex(language).finditer(string)]
    if not matches:
        return variations

    matches = sorted(matches, key=lambda x: x.start())
    values = [({
        START: m.start(),
        END: m.end()
    }, (m.group(0), "")) for m in matches]

    combinations = itertools.product(range(2), repeat=len(matches))
    for c in combinations:
        ranges_and_utterances = [(values[i][0], values[i][1][ix])
                                 for i, ix in enumerate(c)]
        variations.add(build_variated_query(string, ranges_and_utterances))
    return variations