def test(A_str, B_str): print ("A = " + A_str).replace("\n", "\\n").replace("\t", "\\t") print ("B = " + B_str).replace("\n", "\\n").replace("\t", "\\t") print "---------------------------" A = regex.do(A_str, {}).extract_sm() B = regex.do(B_str, {}).extract_sm() # Determine lexeme set before union (possible modification) ## set0 = lexeme_set.get(A) ## set1 = lexeme_set.get(B) x = union.do([A, B]) y = union.do([B, A]) assert identity.do(x, y) ## if "SequenceAndLoop" not in sys.argv: ## result = lexeme_set.get(x) ## expectation = set0 ## expectation.update(set1) ## print "#result:", lexeme_set.lexeme_set_to_characters(result) ## print "#expect:", lexeme_set.lexeme_set_to_characters(expectation) ## assert result == expectation print "union = ", x print
def __core(Original, Cutter): print("Original = " + Original).replace("\n", "\\n").replace("\t", "\\t") print("Cutter = " + Cutter).replace("\n", "\\n").replace("\t", "\\t") orig = regex.do(Original, {}).extract_sm() cutter = regex.do(Cutter, {}).extract_sm() #print orig.get_string(NormalizeF=False) #print cutter.get_string(NormalizeF=False) # ComplementBegin = intersection(P, complement(Q)\Any*) result = derived.not_begin(orig, cutter) print if not result.is_Empty(): print "superset(Original, result): %s" % superset.do( orig, result) if not result.is_Empty(): tmp = clean(intersection.do([cutter, result])) print "intersection(Cutter, result) is None: %s" % tmp.is_Empty() tmp = clean(union.do([orig, result])) print "union(Original, result) == Original: %s" % identity.do( tmp, orig) print print "result = ", result.get_string(NormalizeF=True) assert_considerations(orig, cutter, result) return result
def __core(Pattern0, Pattern1): print("Pattern A = " + Pattern0).replace("\n", "\\n").replace("\t", "\\t") print("Pattern B = " + Pattern1).replace("\n", "\\n").replace("\t", "\\t") sm0 = regex.do(Pattern0, {}).extract_sm() sm1 = regex.do(Pattern1, {}).extract_sm() print "claim = ", outrun_check.do(sm0, sm1)
def __core(A, TailCandidate): print("Pattern = " + A).replace("\n", "\\n").replace("\t", "\\t") print("Tail = " + TailCandidate).replace("\n", "\\n").replace( "\t", "\\t") sm0 = regex.do(A, {}).extract_sm() sm1 = regex.do(TailCandidate, {}).extract_sm() only_common_f, common_f = tail.do(sm0, sm1) print "commonality: %s; exclusive: %s; " % (common_f, only_common_f)
def __core(A_str, B_str): print("A = " + A_str).replace("\n", "\\n").replace("\t", "\\t") print("B = " + B_str).replace("\n", "\\n").replace("\t", "\\t") a_pattern = regex.do(A_str, {}) b_pattern = regex.do(B_str, {}) result = intersection.do([a_pattern.sm, b_pattern.sm]) print "intersection = ", result return result
def __core(A_str, B_str): print ("A = " + A_str).replace("\n", "\\n").replace("\t", "\\t") print ("B = " + B_str).replace("\n", "\\n").replace("\t", "\\t") a_pattern = regex.do(A_str, {}) b_pattern = regex.do(B_str, {}) result = intersection.do([a_pattern.sm, b_pattern.sm]) print "intersection = ", result return result
def __core(Pattern0, Pattern1): print("Pattern0 = " + Pattern0).replace("\n", "\\n").replace("\t", "\\t") print("Pattern1 = " + Pattern1).replace("\n", "\\n").replace("\t", "\\t") p0 = regex.do(Pattern0, {}).finalize(None) p1 = regex.do(Pattern1, {}).finalize(None) verdict_f = identity_checker.do(p0, p1) print "claim = ", verdict_f
def __core(SuperPattern, SubPattern): print("super = " + SuperPattern).replace("\n", "\\n").replace("\t", "\\t") print("sub = " + SubPattern).replace("\n", "\\n").replace("\t", "\\t") super_p = regex.do(SuperPattern, {}).finalize(None) sub_p = regex.do(SubPattern, {}).finalize(None) verdict_f = superset.do(super_p, sub_p) print "claim = ", verdict_f
def __core(SuperPattern, SubPattern): print("super = " + SuperPattern).replace("\n", "\\n").replace("\t", "\\t") print("sub = " + SubPattern).replace("\n", "\\n").replace("\t", "\\t") super_p = regex.do(SuperPattern, {}).sm sub_p = regex.do(SubPattern, {}).sm print "result = ", difference.do(super_p, sub_p).get_string(NormalizeF=True)
def __core(Pattern0, Pattern1): print ("Pattern0 = " + Pattern0).replace("\n", "\\n").replace("\t", "\\t") print ("Pattern1 = " + Pattern1).replace("\n", "\\n").replace("\t", "\\t") p0 = regex.do(Pattern0, {}) p0.mount_post_context_sm() p0.mount_pre_context_sm() p1 = regex.do(Pattern1, {}) p1.mount_post_context_sm() p1.mount_pre_context_sm() print "claim = ", identity_checker.do(p0, p1)
def __core(SuperPattern, SubPattern): print("super = " + SuperPattern).replace("\n", "\\n").replace("\t", "\\t") print("sub = " + SubPattern).replace("\n", "\\n").replace("\t", "\\t") super_p = regex.do(SuperPattern, {}).extract_sm() sub_p = regex.do(SubPattern, {}).extract_sm() result = difference.do(super_p, sub_p) print "result = ", beautifier.do(difference.do( super_p, sub_p)) # .get_string(NormalizeF=False)
def __core(SuperPattern, SubPattern): print ("super = " + SuperPattern).replace("\n", "\\n").replace("\t", "\\t") print ("sub = " + SubPattern).replace("\n", "\\n").replace("\t", "\\t") super_p = regex.do(SuperPattern, {}) super_p.mount_post_context_sm() super_p.mount_pre_context_sm() sub_p = regex.do(SubPattern, {}) sub_p.mount_post_context_sm() sub_p.mount_pre_context_sm() # print "##super:", super_p # print "##sub:", sub_p print "claim = ", superset.do(super_p, sub_p)
def __core(Pattern0, Pattern1): print("Pattern0 = " + Pattern0).replace("\n", "\\n").replace("\t", "\\t") print("Pattern1 = " + Pattern1).replace("\n", "\\n").replace("\t", "\\t") p0 = regex.do(Pattern0, {}) p0.mount_post_context_sm() p0.mount_pre_context_sm() p1 = regex.do(Pattern1, {}) p1.mount_post_context_sm() p1.mount_pre_context_sm() print "claim = ", identity_checker.do(p0, p1)
def __core(SuperPattern, SubPattern): print("super = " + SuperPattern).replace("\n", "\\n").replace("\t", "\\t") print("sub = " + SubPattern).replace("\n", "\\n").replace("\t", "\\t") super_p = regex.do(SuperPattern, {}) super_p.mount_post_context_sm() super_p.mount_pre_context_sm() sub_p = regex.do(SubPattern, {}) sub_p.mount_post_context_sm() sub_p.mount_pre_context_sm() # print "##super:", super_p # print "##sub:", sub_p print "claim = ", superset.do(super_p, sub_p)
def test(TestString, StartCharacterList): print "____________________________________________________________________" print "expr. = " + TestString.replace("\n", "\\n").replace("\t", "\\t") sm = core.do(TestString, {}).sm print "start = ", map(lambda char: char.replace("\t", "\\t"), StartCharacterList) code_list = map(lambda char: ord(char), StartCharacterList) print "verdict = ", repr(sm.get_init_state().target_map.has_one_of_triggers(code_list))
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}) pattern.mount_post_context_sm() sm = pattern.sm print "state machine\n", sm
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}).finalize(None) print "pattern\n" print pattern.sm print "pre-context = ", reverse.do(pattern.sm_pre_context_to_be_reversed)
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}) pattern.mount_post_context_sm() pattern.mount_pre_context_sm() print "pattern\n", pattern
def test(TestString): TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") TestString = "%s" % TestString print("RE: " + TestString).replace("\n", "\\n").replace("\t", "\\t") sm = core.do(TestString, {}, AllowNothingIsNecessaryF=True).extract_sm() # print "#sm:", sm print("result: %s" % sm.longest_path_to_first_acceptance())
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}) pattern.mount_post_context_sm() pattern.mount_pre_context_sm() print "pattern\n", pattern
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}) pattern.mount_post_context_sm() sm = pattern.sm print "state machine\n", sm
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}).finalize(None) # During 'finalize()': pattern.transform(Setup.buffer_encoding) # During 'finalize()': pattern.mount_post_context_sm() # During 'finalize()': pattern.mount_pre_context_sm() print "pattern\n" assert pattern.sm.is_DFA_compliant() ok_f, sm = Setup.buffer_encoding.do_state_machine(pattern.sm) sm = beautifier.do(pattern.sm) print sm.get_string(NormalizeF=True, Option="hex") if pattern.sm_pre_context_to_be_reversed: assert pattern.sm_pre_context_to_be_reversed.is_DFA_compliant() ok_f, sm = Setup.buffer_encoding.do_state_machine( pattern.sm_pre_context_to_be_reversed) reversed_sm = reverse.do(sm) print "pre-context = ", reversed_sm.get_string(NormalizeF=True, Option="hex") if pattern.sm_bipd_to_be_reversed: assert pattern.sm_bipd_to_be_reversed.is_DFA_compliant() ok_f, sm = Setup.buffer_encoding.do_state_machine( pattern.sm_bipd_to_be_reversed) sm = reverse.do(sm) print "post-context backward input position detector = ", sm.get_string( NormalizeF=True, Option="hex")
def test(TestString): TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") if "BeginOfLine" in sys.argv: TestString = "^%s" % TestString print ("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t") pattern = core.do(TestString, {}) pattern.prepare_count_info(counter_db, None) print ("info = {\n %s\n}\n" % str(pattern.count_info()).replace("\n", "\n "))
def test(TestString): TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") if "BeginOfLine" in sys.argv: TestString = "^%s" % TestString print("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t") pattern = core.do(TestString, {}) pattern = pattern.finalize(ca_map) print("info = {\n %s\n}\n" % str(pattern.lcci).replace("\n", "\n "))
def test(TestString): global choice TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") if choice == "Grid-BOL": TestString = "^%s" % TestString print ("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t") pattern = core.do(TestString, {}) pattern.prepare_count_info(counter_db, None) print ("info = {\n %s\n}\n" % str(pattern.count_info()).replace("\n", "\n "))
def test(TestString): global choice TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") if choice == "Grid-BOL": TestString = "^%s" % TestString print("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t") pattern = core.do(TestString, {}) pattern = pattern.finalize(ca_map) print("info = {\n %s\n}\n" % str(pattern.lcci).replace("\n", "\n "))
def __core(Original, Cutter): print ("Original = " + Original).replace("\n", "\\n").replace("\t", "\\t") print ("Cutter = " + Cutter).replace("\n", "\\n").replace("\t", "\\t") orig = regex.do(Original, {}).sm cutter = regex.do(Cutter, {}).sm #print orig.get_string(NormalizeF=False) #print cutter.get_string(NormalizeF=False) result = clean(complement_end.do(orig, cutter)) print if not special.is_none(result): print "superset(Original, result): %s" % superset.do(orig, result) if not special.is_none(result): tmp = clean(intersection.do([cutter, result])) print "intersection(Cutter, result) is None: %s" % special.is_none(tmp) tmp = clean(union.do([orig, result])) print "union(Original, result) == Original: %s" % identity.do(tmp, orig) print print "result = ", result.get_string(NormalizeF=True)
def __test(RE_Core, RE_PostCondition): string_stream_Core = StringIO(RE_Core) string_stream_PostCondition = StringIO(RE_PostCondition) try: core_sm = regex.do(string_stream_Core, {}).extract_sm() except RegularExpressionException, x: print "Core Pattern:\n" + repr(x) return
def binary(ExprStrX, ExprStrY): global X global Y X = regex.do(ExprStrX, {}).sm Y = regex.do(ExprStrY, {}).sm equal("uni(X, Y)", "uni(Y, X)") equal("itsct(X, Y)", "itsct(Y, X)") equal("rev(uni(rev(X), rev(Y)))", "uni(X, Y)") equal("rev(itsct(rev(X), rev(Y)))", "itsct(X, Y)") equal("inv(itsct(X, Y))", "uni(inv(X), inv(Y))") equal("inv(uni(X, Y))", "itsct(inv(X), inv(Y))") equal("diff(X, Y)", "itsct(X, inv(Y))") equal("itsct(diff(X, Y), Y)", "None") equal("uni(diff(X, Y), Y)", "uni(X, Y)")
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}) pattern.transform(Setup.buffer_codec) pattern.mount_post_context_sm() pattern.mount_pre_context_sm() print "pattern\n", pattern.get_string(NormalizeF=True, Option="hex")
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}) pattern.transform(Setup.buffer_codec) pattern.mount_post_context_sm() pattern.mount_pre_context_sm() print "pattern\n", pattern.get_string(NormalizeF=True, Option="hex")
def test(TestString): print print "expression = \"" + TestString + "\"" stream = StringIO.StringIO(TestString) try: result = engine.do(stream, {}) print "result = " + result.get_string(Option="hex", NormalizeF=True) except exception.RegularExpressionException, x: print x.message
def test(TestString): global choice TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") if choice == "Grid-BOL": TestString = "^%s" % TestString print("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t") pattern = core.do(TestString, {}) pattern.prepare_count_info(counter_db, None) print("info = {\n %s\n}\n" % str(pattern.count_info()).replace("\n", "\n "))
def test(TestString): TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") if "BeginOfLine" in sys.argv: TestString = "^%s" % TestString print("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t") pattern = core.do(TestString, {}) pattern.prepare_count_info(counter_db, None) print("info = {\n %s\n}\n" % str(pattern.count_info()).replace("\n", "\n "))
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" try: sm = core.do(TestString, {}) print "state machine\n", sm except RegularExpressionException, x: print x._message
def __test(RE_Core, RE_PostCondition): string_stream_Core = StringIO(RE_Core) string_stream_PostCondition = StringIO(RE_PostCondition) try: core_sm = regex.do(string_stream_Core, {}).sm except RegularExpressionException, x: print "Core Pattern:\n" + repr(x) return
def binary(ExprStrX, ExprStrY): global X global Y X = regex.do(ExprStrX, {}).sm Y = regex.do(ExprStrY, {}).sm equal("uni(X, Y)", "uni(Y, X)") equal("itsct(X, Y)", "itsct(Y, X)") equal("rev(uni(rev(X), rev(Y)))", "uni(X, Y)") equal("rev(itsct(rev(X), rev(Y)))", "itsct(X, Y)") equal("inv(itsct(X, Y))", "uni(inv(X), inv(Y))") equal("inv(uni(X, Y))", "itsct(inv(X), inv(Y))") equal("diff(X, Y)", "itsct(X, inv(Y))") equal("itsct(diff(X, Y), Y)", "None") equal("uni(diff(X, Y), Y)", "uni(X, Y)")
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" try: sm = core.do(TestString, {}) print "state machine\n", sm except RegularExpressionException, x: print x._message
def test(TestString, StartCharacterList): print "____________________________________________________________________" print "expr. = " + TestString.replace("\n", "\\n").replace("\t", "\\t") sm = core.do(TestString, {}).extract_sm() print "start = ", map(lambda char: char.replace("\t", "\\t"), StartCharacterList) code_list = map(lambda char: ord(char), StartCharacterList) print "verdict = ", repr( sm.get_init_state().target_map.has_one_of_triggers(code_list))
def do(PatternActionPairList, TestStr, PatternDictionary={}, Language="ANSI-C-PlainMemory", QuexBufferSize=15, # DO NOT CHANGE! SecondPatternActionPairList=[], QuexBufferFallbackN=-1, ShowBufferLoadsF=False, AssertsActionvation_str="-DQUEX_OPTION_ASSERTS"): BufferLimitCode = 0 Setup.buffer_limit_code = BufferLimitCode Setup.buffer_element_specification_prepare() Setup.buffer_codec_prepare("unicode", None) __Setup_init_language_database(Language) CompileOptionStr = "" computed_goto_f = False FullLanguage = Language if Language.find("StrangeStream") != -1: CompileOptionStr += " -DQUEX_OPTION_STRANGE_ISTREAM_IMPLEMENTATION " if Language.find("-CG") != -1: Language = Language.replace("-CG", "") CompileOptionStr += " -DQUEX_OPTION_COMPUTED_GOTOS " computed_goto_f = True if Language == "Cpp-Template": Language = "Cpp" # Shall template compression be used? Setup.compression_type_list = [ E_Compression.TEMPLATE ] Setup.compression_template_min_gain = 0 elif Language == "Cpp-Path": Language = "Cpp" Setup.compression_type_list = [ E_Compression.PATH ] elif Language == "Cpp-PathUniform": Language = "Cpp" Setup.compression_type_list = [ E_Compression.PATH_UNIFORM ] elif Language == "ANSI-C-PathTemplate": Language = "Cpp" Setup.compression_type_list = [ E_Compression.PATH, E_Compression.TEMPLATE ] Setup.compression_template_min_gain = 0 try: adapted_dict = {} for key, regular_expression in PatternDictionary.items(): string_stream = StringIO(regular_expression) pattern = regex.do(string_stream, adapted_dict) # It is ESSENTIAL that the state machines of defined patterns do not # have origins! Actually, there are not more than patterns waiting # to be applied in regular expressions. The regular expressions # can later be origins. assert pattern.sm.has_origins() == False adapted_dict[key] = PatternShorthand(key, pattern.sm) except RegularExpressionException, x: print "Dictionary Creation:\n" + repr(x)
def do(PatternActionPairList, TestStr, PatternDictionary={}, Language="ANSI-C-PlainMemory", QuexBufferSize=15, # DO NOT CHANGE! SecondPatternActionPairList=[], QuexBufferFallbackN=-1, ShowBufferLoadsF=False, AssertsActionvation_str="-DQUEX_OPTION_ASSERTS"): BufferLimitCode = 0 Setup.buffer_limit_code = BufferLimitCode Setup.buffer_codec_set(bc_factory.do("unicode", None), LexatomSizeInBytes=1) __Setup_init_language_database(Language) CompileOptionStr = "" computed_goto_f = False FullLanguage = Language if Language.find("StrangeStream") != -1: CompileOptionStr += " -DQUEX_OPTION_STRANGE_ISTREAM_IMPLEMENTATION " if Language.find("-CG") != -1: Language = Language.replace("-CG", "") CompileOptionStr += " -DQUEX_OPTION_COMPUTED_GOTOS " computed_goto_f = True if Language == "Cpp-Template": Language = "Cpp" # Shall template compression be used? Setup.compression_type_list = [ E_Compression.TEMPLATE ] Setup.compression_template_min_gain = 0 elif Language == "Cpp-Path": Language = "Cpp" Setup.compression_type_list = [ E_Compression.PATH ] elif Language == "Cpp-PathUniform": Language = "Cpp" Setup.compression_type_list = [ E_Compression.PATH_UNIFORM ] elif Language == "ANSI-C-PathTemplate": Language = "ANSI-C" Setup.compression_type_list = [ E_Compression.PATH, E_Compression.TEMPLATE ] Setup.compression_template_min_gain = 0 try: adapted_dict = {} for key, regular_expression in PatternDictionary.items(): string_stream = StringIO(regular_expression) pattern = regex.do(string_stream, adapted_dict) # It is ESSENTIAL that the state machines of defined patterns do not # have origins! Actually, there are not more than patterns waiting # to be applied in regular expressions. The regular expressions # can later be origins. assert pattern.sm.has_origins() == False adapted_dict[key] = PatternShorthand(key, pattern.sm) except RegularExpressionException, x: print "Dictionary Creation:\n" + repr(x)
def test(RE_Core, RE_PostCondition): string_stream_Core = StringIO(RE_Core) string_stream_PostCondition = StringIO(RE_PostCondition) # reset the index, so that things get a litter less 'historic' try: core_sm = regex.do(string_stream_Core, {}).sm except RegularExpressionException, x: print "Core Pattern:\n" + repr(x) return
def test_core(TestString): print "___________________________________________________________________________" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}, AllowNothingIsNecessaryF=True) pattern.mount_post_context_sm() pattern.mount_pre_context_sm() if pattern is None: print "pattern syntax error" else: print "pattern\n", pattern print "begin of line = ", pattern.pre_context_trivial_begin_of_line_f
def test_core(TestString): print "___________________________________________________________________________" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}, AllowNothingIsNecessaryF=True) pattern.mount_post_context_sm() pattern.mount_pre_context_sm() if pattern is None: print "pattern syntax error" else: print "pattern\n", pattern print "begin of line = ", pattern.pre_context_trivial_begin_of_line_f
def __test(RE_Core, RE_PostCondition): print "---------------------------------------------------------" print "core pattern =", RE_Core print "post condition pattern =", RE_PostCondition string_stream_Core = StringIO(RE_Core) string_stream_PostCondition = StringIO(RE_PostCondition) try: core_sm = regex.do(string_stream_Core, {}).sm except RegularExpressionException, x: print "Core Pattern:\n" + repr(x) return
def prepare(PatternStringList, GetPreContextSM_F=False): pattern_list = map(lambda x: regex.do(x, {}), PatternStringList) for pattern in pattern_list: pattern.mount_post_context_sm() pattern.mount_pre_context_sm() if GetPreContextSM_F: state_machine_list = [ pattern.pre_context_sm for pattern in pattern_list ] else: state_machine_list = [ pattern.sm for pattern in pattern_list ] sm = get_combined_state_machine(state_machine_list, False) # May be 'True' later. return sm.normalized_clone()
def parse(fh, AllowNothingIsFineF=False, AllowStateMachineTrafoF=True): start_position = fh.tell() try: # (*) parse regular expression, build state machine pattern = regex.do(fh, blackboard.shorthand_db, AllowNothingIsNecessaryF = AllowNothingIsFineF, AllowStateMachineTrafoF = AllowStateMachineTrafoF) except RegularExpressionException, x: fh.seek(start_position) error_msg("Regular expression parsing:\n" + x.message, fh)
def test_core(TestString): print "___________________________________________________________________________" print "expression = \"" + TestString + "\"" Setup.dos_carriage_return_newline_f = True pattern = core.do(TestString, {}) pattern.mount_post_context_sm() pattern.mount_pre_context_sm() if pattern is None: print "pattern syntax error" else: print "pattern\n", pattern print "begin of line = ", pattern.pre_context_trivial_begin_of_line_f
def derived_binary(ExprStrX, ExprStrY): global X global Y X = regex.do(ExprStrX, {}).sm Y = regex.do(ExprStrY, {}).sm equal("symdiff(X, Y)", "symdiff(Y, X)") equal("symdiff(X, Y)", "diff(uni(X,Y), itsct(X, Y))") equal("itsct(Y, not_begin(X, Y))", "None") equal("itsct(X, not_begin(Y, X))", "None") equal("uni(X, not_begin(X, Y))", "X") equal("uni(Y, not_begin(Y, X))", "Y") equal("itsct(Y, not_in(X, Y))", "None") equal("itsct(X, not_in(Y, X))", "None") equal("uni(X, not_in(X, Y))", "X") equal("uni(Y, not_in(Y, X))", "Y") equal("itsct(Y, not_end(X, Y))", "None") equal("itsct(X, not_end(Y, X))", "None") equal("uni(X, not_end(X, Y))", "X") equal("uni(Y, not_end(Y, X))", "Y")
def test(Name, function, ExprStrX, ExprStrY, ExprStrZ): global X global Y X = regex.do(ExprStrX, {}).sm Y = regex.do(ExprStrY, {}).sm Z = regex.do(ExprStrZ, {}).sm if Name in ["NotIn", "NotBegin", "NotEnd", "Difference"]: r0 = function(X, Y) r1 = function(X, Z) else: r0 = function([X, Y]) r1 = function([X, Z]) state_indices_0 = set(r0.states.iterkeys()) state_indices_1 = set(r1.states.iterkeys()) print "%s _________________________________________" % Name if not state_indices_0.isdisjoint(state_indices_1): print "Error: Two results contain common state indices." print "Error:", state_indices_0.intersection(state_indices_1) else: print "Oll Korrect"
def test(A_str): print "_____________________________________________________________________" if isinstance(A_str, (str, unicode)): print ("A = " + A_str).replace("\n", "\\n").replace("\t", "\\t") sm = regex.do(A_str, {}).sm else: sm = A_str print "A = ", sm result_1st = complement.do(sm) print "complement(A):", result_1st result_2nd = complement.do(result_1st) print print "union(A, complement(A)): All =", is_all(union.do([sm, result_1st])) print "intersection(A, complement(A)): None =", is_none(intersection.do([sm, result_1st])) print "identity(A, complement(complement(A)):", identity.do(sm, result_2nd)
def test(TestString): TestString = TestString.replace("\n", "\\n").replace("\t", "\\t") if "BeginOfLine" in sys.argv: TestString = "^%s" % TestString print ("expr. = " + TestString).replace("\n", "\\n").replace("\t", "\\t") pattern = core.do(TestString, {}) # Prepare transformation info according to choice. Setup.buffer_element_specification_prepare() if "UTF8" in sys.argv: Setup.buffer_codec_prepare("utf8", Module=utf8_state_split) else: Setup.buffer_codec_prepare("utf16", Module=utf16_state_split) # Count pattern.prepare_count_info(counter_db, Setup.buffer_codec) print ("info = {\n %s\n}\n" % str(pattern.count_info()).replace("\n", "\n "))
def __parse(Txt_or_File, ExtractFunction=None, Name=None, Terminator=None, AllowNothingIsFineF=False): if Txt_or_File.__class__ in [file, StringIO]: sh = Txt_or_File else: sh = StringIO(Txt_or_File) # (*) Parse the pattern => A Pattern object start_position = sh.tell() try: pattern = regex.do(sh, blackboard.shorthand_db, AllowNothingIsNecessaryF = AllowNothingIsFineF, SpecialTerminator = Terminator) except RegularExpressionException, x: sh.seek(start_position) error_msg("Regular expression parsing:\n" + x.message, sh)
def test(TestString, PatternDict): try: print "expression = " + TestString print "state machine\n", core.do(TestString, PatternDict) except RegularExpressionException, x: print "Expression Expansion:\n" + repr(x)
print "state machine\n", core.do(TestString, PatternDict) except RegularExpressionException, x: print "Expression Expansion:\n" + repr(x) pattern_dict = { "DIGIT": '[0-9]', "NAME": '[A-Z][a-z]+', "NUMBER": '{DIGIT}("."{DIGIT}*)?', "IDENTIFIER": '[_a-z][_a-z0-9]*', "SPACE": '[ \t\n]' } try: adapted_dict = {} for key, regular_expression in pattern_dict.items(): string_stream = StringIO(regular_expression) state_machine = core.do(string_stream, adapted_dict).sm # It is ESSENTIAL that the state machines of defined patterns do not # have origins! Actually, there are not more than patterns waiting # to be applied in regular expressions. The regular expressions # can later be origins. adapted_dict[key] = PatternShorthand(key, state_machine) except RegularExpressionException, x: print "Dictionary Creation:\n" + repr(x) test('{DIGIT}("."{DIGIT}*)?', adapted_dict) test('{NAME}("."{DIGIT}*)?', adapted_dict) test('FOR{SPACE}+{NAME}{SPACE}={NUMBER}', adapted_dict)
import os sys.path.append(os.environ["QUEX_PATH"]) import quex.output.graphviz.core as plotter import quex.input.regular_expression.engine as regex from quex.input.files.mode import PatternActionInfo from quex.blackboard import setup as Setup Setup.normalize_f = True if "--hwut-info" in sys.argv: print "Plot: Pre-Context." sys.exit(0) pattern = regex.do("[Hh]ello/a((b+ee(fe)*)+(b+cd)?)/", {}) pattern.mount_pre_context_sm() pattern_list = [ pattern ] my_plotter = plotter.Generator(pattern_list, "test-plot") my_plotter.do() # HWUT consideres '##' as comment for line in open(my_plotter.pre_context_file_name).readlines(): # .replace("#", "##") if line.find("digraph") != -1: print "digraph state_machine {" else:
print "string = ", string_to_match letter_code_list = utf8.map_n_utf8_to_unicode(string_to_match) norm_db, x, x = sm.get_state_index_normalization() state_index = the_state_machine.init_state_index letter_n = -1 for letter_code in letter_code_list: letter_n += 1 if letter_n % 5 == 0: sys.stdout.write("\n") state_index = sm.states[state_index].target_map.get_resulting_target_state_index(letter_code) sys.stdout.write("'%s' --> (%s), " % (utf8.map_unicode_to_utf8(letter_code), repr(norm_db[state_index]).replace("L",""))) if state_index == -1: break print print "_____________________________________________________________________________" regex_str = "h[alowe ]+t" print "regular expression = '%s'" % regex_str sm = re2sm.do(regex_str, {}).sm print sm test(sm, "hallo welt") test(sm, "haaawwwolellewat") print "_____________________________________________________________________________" regex_str = "a+(b|c)*t" print "regular expression = '%s'" % regex_str sm = re2sm.do(regex_str, {}).sm print sm test(sm, "aaaacccbbt") test(sm, "abcbcbct")
def __core(Pattern0, Pattern1): print ("Pattern A = " + Pattern0).replace("\n", "\\n").replace("\t", "\\t") print ("Pattern B = " + Pattern1).replace("\n", "\\n").replace("\t", "\\t") sm0 = regex.do(Pattern0, {}).sm sm1 = regex.do(Pattern1, {}).sm print "claim = ", outrun_check.do(sm0, sm1)