def punctuation_variations(string, language): variations = set() matches = [m for m in get_punctuation_regex(language).finditer(string)] if not matches: return variations matches = sorted(matches, key=lambda x: x.start()) values = [({START: m.start(), END: m.end()}, (m.group(0), "")) for m in matches] combinations = itertools.product(range(2), repeat=len(matches)) for c in combinations: ranges_and_utterances = [(values[i][0], values[i][1][ix]) for i, ix in enumerate(c)] variations.add(build_variated_query(string, ranges_and_utterances)) return variations
def punctuation_variations(string, language): variations = set() matches = [m for m in get_punctuation_regex(language).finditer(string)] if not matches: return variations matches = sorted(matches, key=lambda x: x.start()) values = [({ START: m.start(), END: m.end() }, (m.group(0), "")) for m in matches] combinations = itertools.product(range(2), repeat=len(matches)) for c in combinations: ranges_and_utterances = [(values[i][0], values[i][1][ix]) for i, ix in enumerate(c)] variations.add(build_variated_query(string, ranges_and_utterances)) return variations