def test_bug_449000(self): # Test for sub() on escaped characters self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), 'abc\ndef\n') self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), 'abc\ndef\n') self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), 'abc\ndef\n') self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 'abc\ndef\n')
def delete_derivational_suffix(self, word): result = word derivational_suffix = "(i|k?an)$" match = pcre.search(derivational_suffix, result) if match: result = pcre.sub(derivational_suffix, '', result) self.removed['derivational_suffix'] = match.group(0) check = self.lookup(result) if check: return check return result
def test_bug_1140(self): # re.sub(x, y, u'') should return u'', not '', and # re.sub(x, y, '') should return '', not u''. # Also: # re.sub(x, y, unicode(x)) should return unicode(y), and # re.sub(x, y, str(x)) should return # str(y) if isinstance(y, str) else unicode(y). for x in 'x', u'x': for y in 'y', u'y': z = re.sub(x, y, u'') self.assertEqual(z, u'') self.assertEqual(type(z), unicode) # z = re.sub(x, y, '') self.assertEqual(z, '') self.assertEqual(type(z), str) # z = re.sub(x, y, unicode(x)) self.assertEqual(z, y) self.assertEqual(type(z), unicode) # z = re.sub(x, y, str(x)) self.assertEqual(z, y) self.assertEqual(type(z), type(y))
def delete_inflectional_suffix(self, word): result = word patterns = { 'particle': "([klt]ah|pun)$", 'possessive_pronoun': "([km]u|nya)$" } for key, pattern in patterns.items(): match = pcre.search(pattern, result) if match: result = pcre.sub(pattern, '', result) self.removed[key] = match.group(0) check = self.lookup(result) if check: return check return result
def test_basic_re_sub(self): self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), '9.3 -3 24x100y') self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), '9.3 -3 23x99y') self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n') self.assertEqual(re.sub('.', r"\n", 'x'), '\n') s = r"\1\1" self.assertEqual(re.sub('(.)', s, 'x'), 'xx') self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s) self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s) self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx') self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx') self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx') self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx') self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'), '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a') self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
def test_bug_462270(self): # Test for empty sub() behaviour, see SF bug #462270 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-') self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
def test_bug_114660(self): self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), 'hello there')
def test_qualified_re_sub(self): self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
def test_sub_template_numeric_escape(self): # bug 776311 and friends self.assertEqual(re.sub('x', r'\0', 'x'), '\0') self.assertEqual(re.sub('x', r'\000', 'x'), '\000') self.assertEqual(re.sub('x', r'\001', 'x'), '\001') self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8') self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9') self.assertEqual(re.sub('x', r'\111', 'x'), '\111') self.assertEqual(re.sub('x', r'\117', 'x'), '\117') self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111') self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1') self.assertEqual(re.sub('x', r'\00', 'x'), '\x00') self.assertEqual(re.sub('x', r'\07', 'x'), '\x07') self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8') self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') self.assertEqual(re.sub('x', r'\400', 'x'), '\0') self.assertEqual(re.sub('x', r'\777', 'x'), '\377') self.assertRaises(re.error, re.sub, 'x', r'\1', 'x') self.assertRaises(re.error, re.sub, 'x', r'\8', 'x') self.assertRaises(re.error, re.sub, 'x', r'\9', 'x') self.assertRaises(re.error, re.sub, 'x', r'\11', 'x') self.assertRaises(re.error, re.sub, 'x', r'\18', 'x') self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x') self.assertRaises(re.error, re.sub, 'x', r'\90', 'x') self.assertRaises(re.error, re.sub, 'x', r'\99', 'x') self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8' self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x') self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1' self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0' # in python2.3 (etc), these loop endlessly in sre_parser.py self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), 'xz8') self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), 'xza')
def execute(mode, code, input_str): result = "" if mode == "l": rows = (pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)) table = handle_table(rows) if input_str in table: result = table[input_str] else: result = table["?"] elif mode == "f": result = code % ast.literal_eval(input_str) elif mode == "F": literal = ast.literal_eval(input_str) if isinstance(literal, tuple): result = code % literal input_str = str(sum((len(str(x)) for x in literal))) else: result = code % literal input_str = str(len(str(literal))) elif mode == "g": for string in exrex.generate(code): print(unescape(string)) return # Generate is always terminal elif mode == "h": if type(input_str) is str: input_str = pcre.escape(input_str) for string in exrex.generate(code % input_str): print(unescape(string)) return elif mode == "p": literal = ast.literal_eval(input_str) if isinstance(literal, int): result = pcre.sub(r"(?<![^\\]\\)~(.+?)(?<![^\\]\\)~",r"\1" * literal, code, flags=pcre.DOTALL) else: result = pcre.sub(r"(?<![^\\]\\)%(.+?)(?<![^\\]\\)%",r"\1" * literal[1], pcre.sub(r"~(.+?)~",r"\1" * literal[0], code, flags=pcre.DOTALL), flags=pcre.DOTALL) elif mode == "P": result = pcre.sub(r"(.)(?<![^\\]\\)~",r"\1" * ast.literal_eval(input_str), code, flags=pcre.DOTALL) elif mode == "e": rows = (pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)) table = handle_table(rows) for char in i: result += table[i] elif mode == "o": pieces = pcre.split(r"(?<![^\\]\\)`", code) print(unescape(pieces[0])) result = handle_pieces(pieces[1:], "") elif mode == "s": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) input_str = handle_subs(input_str, subs) result = handle_pieces(pieces[1:], input_str) elif mode == "d": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) for sub in subs: input_str = pcre.sub(sub, "", input_str) result = handle_pieces(pieces[1:], input_str) elif mode == "S": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) sub_length = len(subs) output = unescape(handle_subs(input_str, subs)) result = handle_pieces(pieces[1:], "") print(output) elif mode == "i": result = code + input_str elif mode == "I": result = code + "\n" + input_str else: result = code if len(result) > 0 and result[0] == "`": input_pieces = pcre.split(r"(?<![^\\]\\)!", result) if len(input_pieces) >= 2: execute(result[1], input_pieces[0][2:], "!".join(input_pieces[1:])) else: execute(result[1], result[2:], get_input(input_str)) else: print(unescape(result))
def remove_digits(input_str): return re.sub(r'\d+', '', input_str)
def eat(self, word, backtrack_step=False): result = word temp = self.lookup(word) if temp: if not (backtrack_step): self.error = 'input_is_lemma' return temp else: steps = self.check_rule_precedence(word) if (backtrack_step): steps = [5, 6] else: if (steps): steps = [5, 6, 3, 4, 7] else: steps = [3, 4, 5, 6, 7] for step in steps: if step == 3: temp = self.delete_inflectional_suffix(result) elif step == 4: temp = self.delete_derivational_suffix(result) elif step == 5: temp = result for i in range(3): previous = temp temp = self.delete_derivational_prefix(temp) if ((i == 0 and self.has_disallowed_pairs()) or self.found or temp == previous or (type(self.removed['derivational_prefix']) is str and len(self.removed['derivational_prefix'] > 3))): break elif step == 6: temp = self.recode(result) elif step == 7: prefixes = self.complex_prefix_tracker res = temp temp = "" for prefix, changes in prefixes.items(): changes_first_key = list(changes.keys())[0] prefix_added = changes[changes_first_key] prefix_removed = changes_first_key if prefix_added != '': temp = pcre.sub('^{}'.format(prefix_added), prefix_removed, temp) else: temp = temp + prefix_removed self.removed['derivational_prefix'] = '' self.complex_prefix_tracker = {} temp = temp + res backtract = self.eat(temp, True) if self.found: return self.found # return deriv suffix if not (self.found ) and self.removed['derivational_suffix'] != '': if self.removed['derivational_suffix'] == 'kan': temp = temp + 'k' self.removed['derivational_prefix'] = '' self.complex_prefix_tracker = {} backtract = self.eat(temp, True) if self.found: return self.found temp = temp + 'an' else: temp = temp + self.removed['derivational_suffix'] self.removed['derivational_prefix'] = '' self.complex_prefix_tracker = {} backtract = self.eat(temp, True) # return possessive pronoun if not (self.found ) and self.removed['possessive_pronoun'] != '': temp = temp + self.removed['possessive_pronoun'] self.removed['derivational_prefix'] = '' self.complex_prefix_tracker = {} backtract = self.eat(temp, True) if self.found: return self.found # return particle if not (self.found) and self.removed['particle'] != '': temp = temp + self.removed['particle'] self.removed['derivational_prefix'] = '' self.complex_prefix_tracker = {} backtract = self.eat(temp, True) if self.found: return self.found if self.found: return self.found result = temp if not (backtrack_step) and not (self.error): self.error = 'lemma_not_found' return word
def recode(self, word): result = word prefixes = self.complex_prefix_tracker reverse_ord = list(prefixes.keys()) reverse_ord.reverse() for prefix, changes in prefixes.items(): recode = self.recoding_tracker[prefix] prefix_key = list(changes.keys())[0] prefix_added = changes[prefix_key] prefix_removed = prefix_key temp = "" # the code below is different from the original code due to how python insert values to dict if prefix_added != '': result = pcre.sub('^{}'.format(prefix_added), prefix_removed, result) else: result = prefix_removed + result if recode != '': temp = "" temp2 = "" for raw_removed, added in recode.items(): removed = pcre.sub("[0-9]+", "", raw_removed) if added: temp2 = added else: temp2 = "" temp = pcre.sub('^{}'.format(removed), temp2, result) if self.lookup(temp): self.complex_prefix_tracker[prefix] = {removed: added} return temp previous = '' record = temp before = len(self.complex_prefix_tracker) for i in range(3): previous = record record = self.delete_derivational_prefix(record) if (i == 0 and self.has_disallowed_pairs() ) or record == previous or len( self.removed['derivational_prefix']) > 3: break elif self.found: return record if len(self.complex_prefix_tracker) > before: tempe = dict(self.complex_prefix_tracker) count = 0 for key, value in tempe.items(): count += 1 if count <= before: continue del self.complex_prefix_tracker[key] del self.removed['derivational_prefix'][count - 1] # disabling this for now # if temp != "": # result = temp return word
def delete_derivational_prefix(self, word): vowel = self.vowel consonant = self.consonant alpha = self.alpha result = word prefix_type = "" prefix = "" patterns = {'plain': "^(di|(k|s)e)", 'complex': "^(b|m|p|t)e"} if len(result) < 4: return result for key, pattern in patterns.items(): match = re.match(pattern, result) if match: prefix_type = (key == 'plain') prefix = match[0] if self.removed[ 'derivational_prefix'] != '' and prefix in self.removed[ 'derivational_prefix']: return result self.recoding_tracker[match[0]] = '' if prefix_type: array = self.removed['derivational_prefix'] if prefix == 'ke' and array != '' and ( array[0] == 'di' and not (pcre.search('(tawa|tahu)', result)) and array[0] != 'be'): return result result = pcre.sub(pattern, '', result) self.complex_prefix_tracker[prefix] = {prefix: ''} else: modification = "" # 'be-' prefix rules # total rule = 5 if prefix == 'be': if self.removed['derivational_prefix'] != '': array_key = list( self.complex_prefix_tracker.keys())[ 0] # get first dict value array = self.complex_prefix_tracker[array_key] added_key = list(array.keys())[0] added = array[added_key] pp = added_key if pp not in ['mem', 'pem', 'di', 'ke']: return result # rule 1 if pcre.search("^ber{}".format(vowel), result): result = pcre.sub("^ber", '', result) modification = {"ber": ''} self.recoding_tracker[prefix] = {'be': ''} # rule 2 elif pcre.search( "^ber[bcdfghjklmnpqstvwxyz][a-z](?!er)", result): result = pcre.sub("^ber", '', result) modification = {'ber': ""} # rule 3 elif pcre.search( "^ber[bcdfghjklmnpqstvwxyz][a-z]er{}".format( vowel), result): result = pcre.sub("^ber", '', result) modification = {'ber': ""} # rule 4 elif pcre.search("^belajar$", result): result = pcre.sub("^bel", '', result) modification = {'bel': ""} # rule 5 elif pcre.search( "^be[bcdfghjkmnpqstvwxyz]er{}".format( consonant), result): result = pcre.sub("^be", '', result) modification = {'be': ""} # unsuccessful else: del self.recoding_tracker[prefix] return word # te- prefix rules # total rule : 5 elif prefix == 'te': if self.removed['derivational_prefix'] != '': array_key = list( self.complex_prefix_tracker.keys())[ 0] # get first dict value array = self.complex_prefix_tracker[array_key] added_key = list(array.keys())[0] added = array[added_key] pp = added_key if pp != 'ke' and pp in [ 'me', 'men', 'pen' ] and not (pcre.search('tawa', result)): return result # rule 6 if pcre.search("^ter{}".format(vowel), result): result = pcre.sub('^ter', '', result) modification = {'ter': ''} self.recoding_tracker[prefix] = {'te': ''} # rule 7 elif pcre.search( "^ter[bcdfghjklmnpqstvwxyz]er{}".format(vowel), result): result = pcre.sub('^ter', '', result) modification = {'ter': ''} # rule 8 elif pcre.search("^ter{}(?!er)".format(consonant), result): result = pcre.sub('^ter', '', result) modification = {'ter': ''} # rule 9 elif pcre.search( "^te[bcdfghjklmnpqstvwxyz]er{}".format( consonant), result): result = pcre.sub('^te', '', result) modification = {'te': ''} # rule 10 elif pcre.search( "^ter[bcdfghjklmnpqstvwxyz]er{}".format( consonant), result): result = pcre.sub('^ter', '', result) modification = {'ter': ''} # unsuccessful else: del self.recoding_tracker[prefix] return word # me- prefix rules # total rule = 10 elif prefix == 'me': if self.removed['derivational_prefix'] != '': return result # rule 11 if pcre.search('^me[lrwy]{}'.format(vowel), result): result = pcre.sub('^me', '', result) modification = {'me': ''} # rule 12 elif pcre.search('^mem[bfv]', result): result = pcre.sub('^mem', '', result) modification = {'mem': ''} # rule 13 elif pcre.search('^mempe', result): result = pcre.sub('^mem', '', result) modification = {'mem': ''} # rule 14 elif pcre.search("^mem(r?)[aiueo]", result): match = pcre.search("^mem(r?)[aiueo]", result) result = pcre.sub('^me', '', result) modification = {'me{}'.format(match.group(1)): ''} self.recoding_tracker[prefix] = {'mem': 'p'} # rule 15 elif pcre.search('^men[cdsjz]', result): result = pcre.sub('^men', '', result) modification = {'men': ''} # rule 16 elif pcre.search('^men{}'.format(vowel), result): result = pcre.sub('^men', 't', result) modification = {'men': 't'} self.recoding_tracker[prefix] = {'me': ''} # rule 17 elif pcre.search('^meng[ghqk]', result): result = pcre.sub('^meng', '', result) modification = {'meng': ''} # rule 18 elif pcre.search('^meng({})'.format(vowel), result): match = pcre.search('^meng({})'.format(vowel), result) result = pcre.sub('^meng', '', result) modification = {'meng': ''} self.recoding_tracker[prefix] = {'meng1': 'k'} self.recoding_tracker[prefix]['menge'] = '' # rule 19 elif pcre.search('^meny{}'.format(vowel), result): result = pcre.sub('^me', '', result) modification = {'me': ''} self.recoding_tracker[prefix] = {'meny': 's'} # rule 20 elif pcre.search('^memp[abcdfghijklmnopqrstuvwxyz]', result): result = pcre.sub('^mem', '', result) modification = {'mem': ''} # unsuccesful else: del self.recoding_tracker[prefix] return word # pe- prefix rules # total rule = 15 elif prefix == 'pe': if self.removed['derivational_prefix'] != '': array_key = list( self.complex_prefix_tracker.keys())[ 0] # get first dict value array = self.complex_prefix_tracker[array_key] added_key = list(array.keys())[0] added = array[added_key] pp = added_key if pp not in ['di', 'ber', 'mem', 'se', 'ke']: return result # rule 21 if pcre.search('^pe[wy]{}'.format(vowel), result): result = pcre.sub('^pe', '', result) modification = {'pe': ''} # rule 22 elif pcre.search('^per{}'.format(vowel), result): result = pcre.sub('^per', '', result) modification = {'per': ''} self.recoding_tracker[prefix] = {'pe': ''} # rule 23 elif pcre.search( '^per[bcdfghjklmnpqstvwxyz][a-z](?!er)', result): result = pcre.sub('^per', '', result) modification = {'per': ''} # rule 24 elif pcre.search( '^per[bcdfghjklmnpqstvwxyz][a-z]er{}'.format( vowel), result): result = pcre.sub('^per', '', result) modification = {'per': ''} # rule 25 elif pcre.search('^pem[bfv]', result): result = pcre.sub('^pem', '', result) modification = {'pem': ''} # rule 26 elif pcre.search('^pem(r?){}'.format(vowel), result): result = pcre.sub('^pe', '', result) modification = {'pe': ''} self.recoding_tracker[prefix] = {'pem': 'p'} # rule 27 elif pcre.search('^pen[cdjz]', result): result = pcre.sub('^pen', '', result) modification = {'pen': ''} # rule 28 elif pcre.search('^pen{}'.format(vowel), result): result = pcre.sub('^pen', 't', result) modification = {'pen': 't'} self.recoding_tracker[prefix] = {'pe': ''} # rule 29 elif pcre.search('^peng{}'.format(consonant), result): result = pcre.sub('^peng', '', result) modification = {'peng': ''} # rule 30 elif pcre.search('^peng({})'.format(vowel), result): match = pcre.search('^peng({})'.format(vowel), result) result = pcre.sub('^peng', '', result) modification = {'peng': ''} self.recoding_tracker[prefix] = {'peng1': 'k'} self.recoding_tracker[prefix]['penge'] = '' # rule 31 elif pcre.search('^peny{}'.format(vowel), result): result = pcre.sub('^pe', '', result) modification = {'pe': ''} self.recoding_tracker[prefix] = {'peny': 's'} # rule 32 elif pcre.search('^pel{}'.format(vowel), result): if (result == 'pelajar'): result = pcre.sub('^pel', '', result) modification = {'pel': ''} else: result = pcre.sub("^pe", "", result) modification = {'pe': ''} # rule 33 elif pcre.search( '^pe[bcdfghjkpqstvxz]er{}'.format(vowel), result): result = pcre.sub('^pe', '', result) modification = {'pe': ''} # rule 34 elif pcre.search('^pe[bcdfghjkpqstvxz](?!er)', result): result = pcre.sub('^pe', '', result) modification = {'pe': ''} # rule 35 elif pcre.search( '^pe[bcdfghjkpqstvxz]er{}'.format(consonant), result): result = pcre.sub('^pe', '', result) modification = {'pe': ''} # unsuccessful else: del self.recoding_tracker[prefix] return word if modification != "": self.complex_prefix_tracker[prefix] = modification else: return result if self.removed['derivational_prefix'] == '': self.removed['derivational_prefix'] = [] self.removed['derivational_prefix'].append(prefix) self.lookup(result) return result return result
def test_bug_449964(self): # fails for group followed by other escape self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'), 'xx\bxx\b')
def execute(mode, code, input_str): result = "" if mode == "l": rows = [pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)] table = {} for row in rows: table.update(dict(zip(row[:-1],[row[-1]]*(len(row)-1)))) if input_str in table: result = table[input_str] else: result = table["?"] elif mode == "f": result = code % ast.literal_eval(input_str) elif mode == "F": literal = ast.literal_eval(input_str) if isinstance(literal, tuple): result = code % literal input_str = str(sum([len(str(x)) for x in literal])) else: result = code % literal input_str = str(len(str(literal))) elif mode == "g": for string in exrex.generate(code): print(string.encode("utf-8").decode("unicode-escape")) return # Generate is always terminal elif mode == "h": if type(input_str) is str: input_str = pcre.escape(input_str) for string in exrex.generate(code % input_str): print(string.encode("utf-8").decode("unicode-escape")) return elif mode == "p": literal = ast.literal_eval(input_str) if isinstance(literal, int): result = pcre.sub(r"(?<![^\\]\\)~(.+?)(?<![^\\]\\)~",r"\1" * literal, code, flags=pcre.DOTALL) else: result = pcre.sub(r"(?<![^\\]\\)%(.+?)(?<![^\\]\\)%",r"\1" * literal[1], pcre.sub(r"~(.+?)~",r"\1" * literal[0], code, flags=pcre.DOTALL), flags=pcre.DOTALL) elif mode == "P": result = pcre.sub(r"(.)(?<![^\\]\\)~",r"\1" * ast.literal_eval(input_str), code, flags=pcre.DOTALL) elif mode == "e": rows = [pcre.split(r"(?<![^\\]\\)&", row) for row in pcre.split(r"(?<![^\\]\\);", code)] table = {} for row in rows: table.update(dict(zip(row[:-1],[row[-1]]*(len(row)-1)))) for char in i: result += table[i] elif mode == "o": pieces = pcre.split(r"(?<![^\\]\\)`", code) print(pieces[0].encode("utf-8").decode("unicode-escape")) result = "`" + "`".join(pieces[1:]) elif mode == "s": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) sub_length = len(subs) for i in range(0, len(subs), 2): input_str = pcre.sub(subs[i], subs[i + 1], input_str) if len(pieces) > 1: result = "`" + "`".join(pieces[1:]) else: result = input_str elif mode == "d": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) for sub in subs: input_str = pcre.sub(sub, "", input_str) if len(pieces) > 1: result = "`" + "`".join(pieces[1:]) else: result = input_str elif mode == "S": pieces = pcre.split(r"(?<![^\\]\\)`", code) subs = pcre.split(r"(?<![^\\]\\)&", pieces[0]) sub_length = len(subs) output = input_str for i in range(0, len(subs), 2): output = pcre.sub(subs[i], subs[i + 1], output) if len(pieces) > 1: result = "`" + "`".join(pieces[1:]) else: result = "" print(output.encode("utf-8").decode("unicode-escape")) elif mode == "i": result = code + input_str elif mode == "I": result = code + "\n" + input_str else: result = code if len(result) > 0 and result[0] == "`": input_pieces = pcre.split(r"(?<![^\\]\\)!", result) if len(input_pieces) >= 2: execute(result[1], input_pieces[0][2:], "!".join(input_pieces[1:])) else: execute(result[1], result[2:], get_input(input_str)) else: print(result.encode("utf-8").decode("unicode-escape"))
def handle_subs(string, subs): for i in range(0, len(subs), 2): string = pcre.sub(subs[i], subs[i + 1], string) return string