def testGetTokenData(self): LexerStateTransitionTest.logger.debug( 'Testing the matching of a sequence.') # Test data location = (3, 4) start_offet = 0 end_offset = 4 source = 'Jul 11 09:51:54' stream = Stream(source, 'Unit Test Stream') stream.element_buffer = list(source) transition = LexerStateTransition('', start_offet, end_offset, True, None, 'Unit Test State Transition') # Show test data LexerStateTransitionTest.logger.debug('Created stream:\n%s' % (stream.to_pretty_json())) LexerStateTransitionTest.logger.debug('Created state transition:\n%s' % (transition.to_pretty_json())) # Run test data = transition.getTokenData(stream, location) # Show test output LexerStateTransitionTest.logger.debug('Token data found: "%s"' % (data)) LexerStateTransitionTest.logger.debug('Final stream state:\n%s' % (stream.to_pretty_json())) # Verify results assert data == source[:location[ 0]], 'The data from the accepted buffer is incorrect.' LexerStateTransitionTest.logger.debug('Test succeeded!')
def findMatchPattern(self, source, pattern, start, end): # Test data stream = Stream(source, 'Unit Test Stream') stream.element_buffer = list(source) if pattern == Element.EOS: stream.current_position = len(stream.element_buffer) transition = LexerStateTransition(pattern, start, end, True, None, 'Unit Test State Transition') # Show test data LexerStateTransitionTest.logger.debug('Created stream:\n%s' % (stream.to_pretty_json())) LexerStateTransitionTest.logger.debug('Created state transition:\n%s' % (transition.to_pretty_json())) # Run test data = transition.findMatch(stream) new_buffer = stream.element_buffer # Show test output LexerStateTransitionTest.logger.debug('Token data found: "%s"' % (data)) LexerStateTransitionTest.logger.debug('New buffer: %s' % (str(new_buffer))) LexerStateTransitionTest.logger.debug('Final stream state:\n%s' % (stream.to_pretty_json())) return (data, new_buffer)
def testSplitBuffer(self): LexerStateTransitionTest.logger.debug( 'Testing the matching of a sequence.') # Test data test_data = 'Jul 11 09:51:54' buffer = list(test_data) end = 3 start = 7 transition = LexerStateTransition('', 0, 0, True, None, 'Unit Test State Transition') # Show test data LexerStateTransitionTest.logger.debug('Input buffer: %s' % (str(buffer))) # Run test accepted_buffer, remaining_buffer = transition.splitBuffer( buffer, end, start) # Show test output LexerStateTransitionTest.logger.debug('Data from accepted buffer: %s' % (str(accepted_buffer))) LexerStateTransitionTest.logger.debug( 'Data from remaining buffer: %s' % (str(remaining_buffer))) # Verify results assert accepted_buffer == list( test_data[:end]), 'The data from the accepted buffer is incorrect.' assert remaining_buffer == list( test_data[start:] ), 'The data from the remaining buffer is incorrect.' LexerStateTransitionTest.logger.debug('Test succeeded!')
def testGetTokenData(self): LexerStateTransitionTest.logger.debug('Testing the matching of a sequence.') # Test data location = (3, 4) start_offet = 0 end_offset = 4 source = 'Jul 11 09:51:54' stream = Stream(source, 'Unit Test Stream') stream.element_buffer = list(source) transition = LexerStateTransition('', start_offet, end_offset, True, None, 'Unit Test State Transition') # Show test data LexerStateTransitionTest.logger.debug('Created stream:\n%s' % (stream.to_pretty_json())) LexerStateTransitionTest.logger.debug('Created state transition:\n%s' % (transition.to_pretty_json())) # Run test data = transition.getTokenData(stream, location) # Show test output LexerStateTransitionTest.logger.debug('Token data found: "%s"' % (data)) LexerStateTransitionTest.logger.debug('Final stream state:\n%s' % (stream.to_pretty_json())) # Verify results assert data == source[:location[0]], 'The data from the accepted buffer is incorrect.' LexerStateTransitionTest.logger.debug('Test succeeded!')
def createSourceLexers(self): # End state end_token = Token('end', Token.NEVER_DATA, True, 'End Token') end_state = LexerState([], end_token, True, 'End State') # Component id lexer comp_id_to_end_transition = LexerStateTransition( 'EOS', -1, 0, True, end_state, 'Component ID to End Transition') comp_id_token = Token('component_id', Token.SOMETIMES_DATA, True, 'Component ID Token') comp_id_state = LexerState([comp_id_to_end_transition], comp_id_token, False, 'Component ID State') comp_id_lexer = Lexer(None, 'component_id', [], 'Component ID Lexer') # Component lexer comp_to_end_transition = LexerStateTransition( 'EOS', 0, 0, True, end_state, 'Component to End Transition') comp_id_transition = LexerStateTransition( '\[', 0, 1, True, comp_id_state, 'Component to Component ID Transition') comp_token = Token('component', Token.ALWAYS_DATA, True, 'Component Token') comp_state = LexerState([comp_to_end_transition, comp_id_transition], comp_token, False, 'Component State') comp_lexer = Lexer(None, 'component', [], 'Component Lexer') # Source lexer comp_transition = LexerStateTransition( '.', 0, -1, False, comp_state, 'Source to Component Transition') source_token = Token('source', Token.NEVER_DATA, False, 'Source Token') source_state = LexerState([comp_transition], source_token, False, 'Source State') source_lexer = Lexer(None, 'source', [], 'Source Lexer') return ((source_lexer, comp_lexer, comp_id_lexer), source_state)
def testSplitBuffer(self): LexerStateTransitionTest.logger.debug('Testing the matching of a sequence.') # Test data test_data = 'Jul 11 09:51:54' buffer = list(test_data) end = 3 start = 7 transition = LexerStateTransition('', 0, 0, True, None, 'Unit Test State Transition') # Show test data LexerStateTransitionTest.logger.debug('Input buffer: %s' % (str(buffer))) # Run test accepted_buffer, remaining_buffer = transition.splitBuffer(buffer, end, start) # Show test output LexerStateTransitionTest.logger.debug('Data from accepted buffer: %s' % (str(accepted_buffer))) LexerStateTransitionTest.logger.debug('Data from remaining buffer: %s' % (str(remaining_buffer))) # Verify results assert accepted_buffer == list(test_data[:end]), 'The data from the accepted buffer is incorrect.' assert remaining_buffer == list(test_data[start:]), 'The data from the remaining buffer is incorrect.' LexerStateTransitionTest.logger.debug('Test succeeded!')
def createLexer(self): # Test data source = 'Jul 11 09:51:54' # End state end_token = Token('end', Token.NEVER_DATA, True, 'End Token') end_state = LexerState([], end_token, True, 'End State') # Time lexer end_transition = LexerStateTransition('EOS', 0, 0, True, end_state, 'Time to End Transition') time_token = Token('time', Token.ALWAYS_DATA, True, 'Time Token') time_state = LexerState([end_transition], time_token, False, 'Time State') time_lexer = Lexer(time_state, 'time', [], 'Time Lexer') # Month lexer time_transition = LexerStateTransition('\s+\d{2}\s+', 0, 4, True, time_state, 'Month to Time Transition') month_token = Token('month', Token.ALWAYS_DATA, True, 'Month Token') month_state = LexerState([time_transition], month_token, False, 'Month State') month_lexer = Lexer(month_state, 'month', [], 'Month Lexer') # Msg state month_transition = LexerStateTransition('.', 0, -1, True, month_state, 'Msg to Month Transition') msg_token = Token('msg', Token.NEVER_DATA, False, source) msg_state = LexerState([month_transition], msg_token, False, 'Msg State') # Msg lexer msg_lexer = Lexer(msg_state, 'msg', [time_lexer, month_lexer], 'Msg Lexer') msg_lexer.changeState(msg_state) msg_lexer.token = msg_token msg_lexer.stream = msg_token.getStream() # Show test data LexerTest.logger.debug('Created state:\n%s' % (msg_state.to_pretty_json())) LexerTest.logger.debug('Created state:\n%s' % (month_state.to_pretty_json())) LexerTest.logger.debug('Created state:\n%s' % (time_state.to_pretty_json())) LexerTest.logger.debug('Created state:\n%s' % (end_state.to_pretty_json())) tokens = [msg_token, month_token, time_token] states = [msg_state, month_state, time_state, end_state] lexers = [msg_lexer, month_lexer, time_lexer] return [lexers, tokens, states]
def createEventLexer(self): # End state end_token = Token('end', Token.NEVER_DATA, True, 'End Token') end_state = LexerState([], end_token, True, 'End State') # Msg lexer end_transition = LexerStateTransition('EOS', 0, 0, True, end_state, 'Msg to End Transition') msg_token = Token('msg', Token.SOMETIMES_DATA, False, 'Outer Msg Token') msg_state = LexerState([end_transition], msg_token, False, 'Outer Msg State') msg_sub_lexers, msg_start_state = self.createMsgLexers() msg_lexer = Lexer(msg_start_state, 'msg', msg_sub_lexers, 'Outer Msg Lexer') # Source lexer msg_transition = LexerStateTransition(':', 0, 1, True, msg_state, 'Source to Msg Transition') source_token = Token('source', Token.ALWAYS_DATA, False, 'Outer Source Token') source_state = LexerState([msg_transition], source_token, True, 'Outer Source State') source_sub_lexers, source_start_state = self.createSourceLexers() source_lexer = Lexer(source_start_state, 'source', source_sub_lexers, 'Outer Source Lexer') # Datetime lexer source_transition = LexerStateTransition( '\d{2}:\d{2}:\d{2}', 8, 8, True, source_state, 'Datetime to Source Transition') datetime_token = Token('datetime', Token.ALWAYS_DATA, False, 'Outer Datetime Token') datetime_state = LexerState([source_transition], datetime_token, False, 'Outer Datetime State') datetime_sub_lexers, datetime_start_state = self.createDatetimeLexers() datetime_lexer = Lexer(datetime_start_state, 'datetime', datetime_sub_lexers, 'Outer Datetime Lexer') # Event lexer datetime_transition = LexerStateTransition( '.', 0, -1, True, datetime_state, 'Event to Datetime Transition') event_token = Token('event', Token.NEVER_DATA, False, 'Event Token') event_state = LexerState([datetime_transition], event_token, False, 'Event State') event_lexer = Lexer(event_state, 'event', [datetime_lexer, source_lexer, msg_lexer], 'Event Lexer') return event_lexer
def subTestGetDataFromBuffer(self, buffer, stripped): # Test data transition = LexerStateTransition('', 0, 0, stripped, None, 'Unit Test State Transition') # Show test data LexerStateTransitionTest.logger.debug('Input buffer: %s' % (str(buffer))) # Run test data = transition.getDataFromBuffer(buffer) # Show test output LexerStateTransitionTest.logger.debug('Data from buffer: "%s"' % (str(data))) return data
def contructTransition(cls, transition_json): name = transition_json['name'] pattern = transition_json['pattern'] end_offset = transition_json['end_offset'] start_offset = transition_json['start_offset'] strip = cls.getBoolean(transition_json['strip']) next_state = transition_json['next_state'] return LexerStateTransition(pattern, end_offset, start_offset, strip, next_state, name)
def createMsgLexers(self): # End state end_token = Token('end', Token.NEVER_DATA, True, 'End Token') end_state = LexerState([], end_token, True, 'End State') # Sub msg lexer end_transition = LexerStateTransition('EOS', 0, 1, True, end_state, 'Sub Msg to End Transition') sub_msg_token = Token('sub_msg', Token.SOMETIMES_DATA, True, 'Sub Msg Token') sub_msg_state = LexerState([end_transition], sub_msg_token, False, 'Sub Msg State') sub_msg_lexer = Lexer(None, 'sub_msg', [], 'Sub Msg Lexer') # Level lexer level_to_sub_msg_transition = LexerStateTransition( '>', 0, 1, True, sub_msg_state, 'Level to Sub Msg Transition') level_token = Token('level', Token.SOMETIMES_DATA, True, 'Level Token') level_state = LexerState([level_to_sub_msg_transition], level_token, False, 'Level State') level_lexer = Lexer(None, 'level', [], 'Level Lexer') # Msg lexer #precise_seconds_transition = LexerStateTransition('\[', -1, 1, False, precise_seconds_state, 'Msg to Precise Seconds Transition') level_transition = LexerStateTransition('[<]', -1, 1, False, level_state, 'Msg to Level Transition') sub_msg_transition = LexerStateTransition('[^<]', -1, 0, False, sub_msg_state, 'Msg to Sub Msg Transition') end_transition = LexerStateTransition('EOS', 0, 1, True, end_state, 'Msg to End Transition') msg_token = Token('msg', Token.NEVER_DATA, False, 'Msg Token') msg_state = LexerState( [level_transition, sub_msg_transition, end_transition], msg_token, True, 'Msg State') msg_lexer = Lexer(None, 'msg', [], 'Msg Lexer') return ((msg_lexer, level_lexer, sub_msg_lexer), msg_state)
def createDatetimeLexers(self): # End state end_token = Token('end', Token.NEVER_DATA, True, 'End Token') end_state = LexerState([], end_token, True, 'End State') # Second lexer end_transition = LexerStateTransition('EOS', 0, 0, False, end_state, 'Second to End Transition') second_token = Token('second', Token.ALWAYS_DATA, True, 'Second Token') second_state = LexerState([end_transition], second_token, False, 'Second State') second_lexer = Lexer(None, 'second', [], 'Second Lexer') # Minute lexer second_transition = LexerStateTransition( ':', 0, 1, False, second_state, 'Minute to Second Transition') minute_token = Token('minute', Token.ALWAYS_DATA, True, 'Minute Token') minute_state = LexerState([second_transition], minute_token, False, 'Minute State') minute_lexer = Lexer(None, 'minute', [], 'Minute Lexer') # Hour lexer minute_transition = LexerStateTransition(':', 0, 1, False, minute_state, 'Hour to Minute Transition') hour_token = Token('hour', Token.ALWAYS_DATA, True, 'Hour Token') hour_state = LexerState([minute_transition], hour_token, False, 'Hour State') hour_lexer = Lexer(None, 'hour', [], 'Hour Lexer') # Day lexer hour_transition = LexerStateTransition('\s+', 0, 1, True, hour_state, 'Day to Hour Transition') day_token = Token('day', Token.ALWAYS_DATA, True, 'Day Token') day_state = LexerState([hour_transition], day_token, False, 'Day State') day_lexer = Lexer(None, 'day', [], 'Day Lexer') # Month lexer day_transition = LexerStateTransition('\s+', 0, 1, True, day_state, 'Month to Day Transition') month_token = Token('month', Token.ALWAYS_DATA, True, 'Month Token') month_state = LexerState([day_transition], month_token, False, 'Month State') month_lexer = Lexer(None, 'month', [], 'Month Lexer') # Datetime lexer month_transition = LexerStateTransition( '.', 0, -1, True, month_state, 'Datetime to Month Transition') datetime_token = Token('datetime', Token.NEVER_DATA, False, 'Datetime Token') datetime_state = LexerState([month_transition], datetime_token, False, 'Datetime State') datetime_lexer = Lexer(None, 'datetime', [], 'Datetime Lexer') return ((datetime_lexer, month_lexer, day_lexer, hour_lexer, minute_lexer, second_lexer), datetime_state)
def testNextState(self): LexerStateTest.logger.debug('Testing the matching of a sequence.') # Test data source = 'Jul 11 09:51:54' stream = Stream(source, 'Unit Test Stream') token = Token('str', Token.ALWAYS_DATA, True, 'Token data') state_b = LexerState([], token, True, 'State B') pattern = '\s+\d{2}\s+' start_offet = 0 end_offset = 4 transition = LexerStateTransition(pattern, start_offet, end_offset, True, state_b, 'A to B Transition') state_a = LexerState([transition], token, False, 'State A') # Show test data LexerStateTest.logger.debug('Created stream:\n%s' % (stream.to_pretty_json())) LexerStateTest.logger.debug('Created state A:\n%s' % (state_a.to_pretty_json())) LexerStateTest.logger.debug('Created state B:\n%s' % (state_b.to_pretty_json())) # Run test next_state = state_a new_token = None while next_state == state_a: current_element = stream.getNextElement() next_state, new_token = state_a.nextState(stream) # Show test output LexerStateTest.logger.debug('Fetching element: %s' % (current_element)) # Show test output LexerStateTest.logger.debug('Token found:\n%s' % (new_token.to_pretty_json())) LexerStateTest.logger.debug('Final stream state:\n%s' % (stream.to_pretty_json())) LexerStateTest.logger.debug('Final lexer state:\n%s' % (next_state.to_pretty_json())) # Verify results token.data = source[:3] assert new_token == token, 'The token found is incorrect.' LexerStateTest.logger.debug('Test succeeded!')