def __init__( self, name=None, **kwds): name = name or kwds.setdefault( 'context', self.__class__.__name__ ) leng = USINT( context='length' ) leng[None] = cpppo.string_bytes( 'string', context='string', limit='..length', initial='.*', decode='iso-8859-1', terminal=True ) super( SSTRING, self ).__init__( name=name, initial=leng, **kwds )
def __init__(self, name=None, **kwds): name = name or kwds.setdefault('context', self.__class__.__name__) leng = USINT(context='length') leng[None] = cpppo.string_bytes('string', context='string', limit='..length', initial='.*', decode='iso-8859-1', terminal=True) super(SSTRING, self).__init__(name=name, initial=leng, **kwds)
def test_decode(): # Test decode of regexes over bytes data. Operates in raw bytes symbols., works in Python 2/3. source = cpppo.peekable('π'.encode('utf-8')) data = cpppo.dotdict() with cpppo.string_bytes('pi', initial='.*', greedy=True, context='pi', decode='utf-8') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 3 assert source.sent == 2 assert data.pi == 'π' if sys.version_info[0] < 3: # Test regexes over plain string data (no decode required). Force non-unicode (counteracts # import unicode_literals above). We can't use greenery.lego regexes on unicode data in # Python 2... source = cpppo.peekable(str('pi')) data = cpppo.dotdict() with cpppo.string('pi', initial='.*', greedy=True, context='pi') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 3 assert source.sent == 2 assert data.pi == 'pi' else: # Test regexes over Python 3 unicode string data (no decode required). Operates in native # unicode symbols. source = cpppo.peekable('π') data = cpppo.dotdict() with cpppo.string('pi', initial='.*', greedy=True, context='pi') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 2 assert source.sent == 1 assert data.pi == 'π' source = cpppo.peekable(str('123')) data = cpppo.dotdict() with cpppo.integer('value') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 4 assert source.sent == 3 assert data.integer == 123 source = cpppo.peekable('123'.encode('ascii')) data = cpppo.dotdict() with cpppo.integer_bytes('value') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 4 assert source.sent == 3 assert data.integer == 123 # Try using a integer (str) parser over bytes data. Works in Python 2, not so much in Python 3 try: source = cpppo.peekable('123'.encode('ascii')) data = cpppo.dotdict() with cpppo.integer('value') as machine: for i, (m, s) in enumerate(machine.run(source=source, data=data)): log.info("%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data) assert i == 4 assert source.sent == 3 assert data.integer == 123 assert sys.version_info[0] < 3, \ "Should have failed in Python3; str/bytes iterator both produce str/int" except AssertionError: assert not sys.version_info[0] < 3, \ "Shouldn't have failed in Python2; str/bytes iterator both produce str"
def test_decode(): # Test decode of regexes over bytes data. Operates in raw bytes symbols., works in Python 2/3. source = cpppo.peekable( 'π'.encode( 'utf-8' )) data = cpppo.dotdict() with cpppo.string_bytes( 'pi', initial='.*', greedy=True, context='pi', decode='utf-8' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 3 assert source.sent == 2 assert data.pi == 'π' if sys.version_info[0] < 3: # Test regexes over plain string data (no decode required). Force non-unicode (counteracts # import unicode_literals above). We can't use greenery.lego regexes on unicode data in # Python 2... source = cpppo.peekable( str( 'pi' )) data = cpppo.dotdict() with cpppo.string( 'pi', initial='.*', greedy=True, context='pi' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 3 assert source.sent == 2 assert data.pi == 'pi' else: # Test regexes over Python 3 unicode string data (no decode required). Operates in native # unicode symbols. source = cpppo.peekable( 'π' ) data = cpppo.dotdict() with cpppo.string( 'pi', initial='.*', greedy=True, context='pi' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 2 assert source.sent == 1 assert data.pi == 'π' source = cpppo.peekable( str( '123' )) data = cpppo.dotdict() with cpppo.integer( 'value' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 4 assert source.sent == 3 assert data.integer == 123 source = cpppo.peekable( '123'.encode( 'ascii' )) data = cpppo.dotdict() with cpppo.integer_bytes( 'value' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 4 assert source.sent == 3 assert data.integer == 123 # Try using a integer (str) parser over bytes data. Works in Python 2, not so much in Python 3 try: source = cpppo.peekable( '123'.encode( 'ascii' )) data = cpppo.dotdict() with cpppo.integer( 'value' ) as machine: for i,(m,s) in enumerate( machine.run( source=source, data=data )): log.info( "%s #%3d -> %10.10s; next byte %3d: %-10.10r: %r", m.name_centered(), i, s, source.sent, source.peek(), data ) assert i == 4 assert source.sent == 3 assert data.integer == 123 assert sys.version_info[0] < 3, \ "Should have failed in Python3; str/bytes iterator both produce str/int" except AssertionError: assert not sys.version_info[0] < 3, \ "Shouldn't have failed in Python2; str/bytes iterator both produce str"
def __init__( self, name=None, **kwds ): name = name or kwds.setdefault( 'context', self.__class__.__name__ ) # Get the size, and chain remaining machine onto rest. When used as a Route Path, the size # is padded, so insert a state to drop the pad, and chain rest to that instead. size = rest = USINT( context='size' ) if self.padsize: size[True] = rest = octets_drop( 'pad', repeat=1 ) # After capturing each segment__ (pseg), move it onto the path segment list, and loop pseg = octets_noop( 'type', terminal=True ) # ...segment parsers... pmov = move_if( 'move', initializer=lambda **kwds: [], source='..segment__', destination='..segment', state=pseg ) # Wire each different segment type parser between pseg and pmov pseg[b'\x28'[0]]= e_8t = octets_drop( 'type', repeat=1 ) e_8t[True] = e_8v = USINT( 'elem_8bit', context='element') e_8v[None] = pmov pseg[b'\x29'[0]]= e16t = octets_drop( 'type', repeat=2 ) e16t[True] = e16v = UINT( 'elem16bit', context='element') e16v[None] = pmov pseg[b'\x2a'[0]]= e32t = octets_drop( 'type', repeat=2 ) e32t[True] = e32v = UDINT( 'elem32bit', context='element') e32v[None] = pmov pseg[b'\x20'[0]]= c_8t = octets_drop( 'type', repeat=1 ) c_8t[True] = c_8v = USINT( 'clas_8bit', context='class') c_8v[None] = pmov pseg[b'\x21'[0]]= c16t = octets_drop( 'type', repeat=2 ) c16t[True] = c16v = UINT( 'clas16bit', context='class') c16v[None] = pmov pseg[b'\x24'[0]]= i_8t = octets_drop( 'type', repeat=1 ) i_8t[True] = i_8v = USINT( 'inst_8bit', context='instance') i_8v[None] = pmov pseg[b'\x25'[0]]= i16t = octets_drop( 'type', repeat=2 ) i16t[True] = i16v = UINT( 'inst16bit', context='instance') i16v[None] = pmov pseg[b'\x30'[0]]= a_8t = octets_drop( 'type', repeat=1 ) a_8t[True] = a_8v = USINT( 'attr_8bit', context='attribute') a_8v[None] = pmov pseg[b'\x31'[0]]= a16t = octets_drop( 'type', repeat=2 ) a16t[True] = a16v = UINT( 'attr16bit', context='attribute') a16v[None] = pmov pseg[b'\x91'[0]]= symt = octets_drop( 'type', repeat=1 ) symt[True] = syml = USINT( 'sym_len', context='symbolic.length' ) syml[None] = symv = cpppo.string_bytes( 'symbolic', context='symbolic', limit='.length', initial='.*', decode='iso-8859-1' ) # An odd-length ANSI Extended Symbolic name means an odd total. Pad symo = octets_drop( 'pad', repeat=1 ) symo[None] = pmov symv[None] = cpppo.decide( 'odd', predicate=lambda path=None, data=None, **kwds: len( data[path].symbolic ) % 2, state=symo ) symv[None] = pmov # Route Path port/link-address. See Vol 1-3.13, Table C-1.3 Port Segment Encoding. # segment: 0b000spppp # |\\\\+-> port number 0x01-0x0E; 0x0F=>extended # | # +------> link size+address; 0=>numeric, 1=>size+string # def port_fix( path=None, data=None, **kwds ): """Discard port values about 0x0F; return True (transition) if remaining port value is 0x0F (Optional Extended port)""" data[path].port &= 0x0F if data[path].port == 0x0F: # Port is extended; discard and prepare to collect new port number data[path].port = cpppo.dotdict() return True # Port is OK; don't transition return False # [01-0E][LL] port 01-0E, link-address #LL pseg[b'\x01'[0]]= pnum = USINT( 'port_num', context='port' ) pseg[b'\x02'[0]] = pnum pseg[b'\x03'[0]] = pnum pseg[b'\x04'[0]] = pnum pseg[b'\x05'[0]] = pnum pseg[b'\x06'[0]] = pnum pseg[b'\x07'[0]] = pnum pseg[b'\x08'[0]] = pnum pseg[b'\x09'[0]] = pnum pseg[b'\x0a'[0]] = pnum pseg[b'\x0b'[0]] = pnum pseg[b'\x0c'[0]] = pnum pseg[b'\x0d'[0]] = pnum pseg[b'\x0e'[0]] = pnum # [0F][PPPP][LL] port 0xPPPP, link-address 0xLL pseg[b'\x0f'[0]] = pnum # A big port#; re-scan a UINT into .port (won't work 'til port_fix is called) pnbg = UINT( 'port_nbg', context='port' ) pnbg[True] = pnlk = USINT( 'link_num', context='link' ) # Fix the port#; if 0x0F, setup for extended port and transition to pnbg. Otherwise, # (not extended port), just go the the port numeric link. pnum[None] = cpppo.decide( 'port_nfix', predicate=port_fix, state=pnbg ) pnum[None] = pnlk pnlk[None] = pmov # and done; move segment, get next # [11-1E][SS]'123.123.123.123'[00] port 0x01-0E, link address '123.123.123.123' (pad if size 0xSS odd) pseg[b'\x11'] = padr = USINT( 'port_adr', context='port' ) pseg[b'\x12'[0]] = padr pseg[b'\x13'[0]] = padr pseg[b'\x14'[0]] = padr pseg[b'\x15'[0]] = padr pseg[b'\x16'[0]] = padr pseg[b'\x17'[0]] = padr pseg[b'\x18'[0]] = padr pseg[b'\x19'[0]] = padr pseg[b'\x1a'[0]] = padr pseg[b'\x1b'[0]] = padr pseg[b'\x1c'[0]] = padr pseg[b'\x1d'[0]] = padr pseg[b'\x1e'[0]] = padr # [1F][SS][PPPP]'123.123.123.123'[00] port 0xPPPP, link address '123.123.123.123' (pad if size SS odd) pseg[b'\x1f'[0]] = padr # Harvest the addresses into .link adrv = cpppo.string_bytes( 'link_add', context='link', limit='.length', initial='.*', decode='iso-8859-1' ) # An odd-length link address means an odd total. Pad adro = octets_drop( 'link_pad', repeat=1 ) adro[None] = pmov adrv[None] = cpppo.decide( 'link_odd', predicate=lambda path=None, data=None, **kwds: len( data[path+'.link'] ) % 2, state=adro ) adrv[None] = pmov # A big port#; re-scan a UINT into .port (won't work 'til port_fix is called) pabg = UINT( 'port_abg', context='port' ) pabg[None] = adrv # padr[True] = adrl = USINT( 'link_len', context='link.length' ) adrl[None] = cpppo.decide( 'port_afix', predicate=port_fix, state=pabg ) adrl[None] = adrv # Parse all segments in a sub-dfa limited by the parsed path.size (in words; double) rest[None] = cpppo.dfa( 'each', context='segment__', initial=pseg, terminal=True, limit=lambda path=None, data=None, **kwds: data[path+'..size'] * 2 ) super( EPATH, self ).__init__( name=name, initial=size, **kwds )
def __init__(self, name=None, **kwds): name = name or kwds.setdefault('context', self.__class__.__name__) # Get the size, and chain remaining machine onto rest. When used as a Route Path, the size # is padded, so insert a state to drop the pad, and chain rest to that instead. size = rest = USINT(context='size') if self.padsize: size[True] = rest = octets_drop('pad', repeat=1) # After capturing each segment__ (pseg), move it onto the path segment list, and loop pseg = octets_noop('type', terminal=True) # ...segment parsers... pmov = move_if('move', initializer=lambda **kwds: [], source='..segment__', destination='..segment', state=pseg) # Wire each different segment type parser between pseg and pmov pseg[b'\x28'[0]] = e_8t = octets_drop('type', repeat=1) e_8t[True] = e_8v = USINT('elem_8bit', context='element') e_8v[None] = pmov pseg[b'\x29'[0]] = e16t = octets_drop('type', repeat=2) e16t[True] = e16v = UINT('elem16bit', context='element') e16v[None] = pmov pseg[b'\x2a'[0]] = e32t = octets_drop('type', repeat=2) e32t[True] = e32v = UDINT('elem32bit', context='element') e32v[None] = pmov pseg[b'\x20'[0]] = c_8t = octets_drop('type', repeat=1) c_8t[True] = c_8v = USINT('clas_8bit', context='class') c_8v[None] = pmov pseg[b'\x21'[0]] = c16t = octets_drop('type', repeat=2) c16t[True] = c16v = UINT('clas16bit', context='class') c16v[None] = pmov pseg[b'\x24'[0]] = i_8t = octets_drop('type', repeat=1) i_8t[True] = i_8v = USINT('inst_8bit', context='instance') i_8v[None] = pmov pseg[b'\x25'[0]] = i16t = octets_drop('type', repeat=2) i16t[True] = i16v = UINT('inst16bit', context='instance') i16v[None] = pmov pseg[b'\x30'[0]] = a_8t = octets_drop('type', repeat=1) a_8t[True] = a_8v = USINT('attr_8bit', context='attribute') a_8v[None] = pmov pseg[b'\x31'[0]] = a16t = octets_drop('type', repeat=2) a16t[True] = a16v = UINT('attr16bit', context='attribute') a16v[None] = pmov pseg[b'\x91'[0]] = symt = octets_drop('type', repeat=1) symt[True] = syml = USINT('sym_len', context='symbolic.length') syml[None] = symv = cpppo.string_bytes('symbolic', context='symbolic', limit='.length', initial='.*', decode='iso-8859-1') # An odd-length ANSI Extended Symbolic name means an odd total. Pad symo = octets_drop('pad', repeat=1) symo[None] = pmov symv[None] = cpppo.decide('odd', predicate=lambda path=None, data=None, ** kwds: len(data[path].symbolic) % 2, state=symo) symv[None] = pmov # Route Path port/link-address. See Vol 1-3.13, Table C-1.3 Port Segment Encoding. # segment: 0b000spppp # |\\\\+-> port number 0x01-0x0E; 0x0F=>extended # | # +------> link size+address; 0=>numeric, 1=>size+string # def port_fix(path=None, data=None, **kwds): """Discard port values about 0x0F; return True (transition) if remaining port value is 0x0F (Optional Extended port)""" data[path].port &= 0x0F if data[path].port == 0x0F: # Port is extended; discard and prepare to collect new port number data[path].port = cpppo.dotdict() return True # Port is OK; don't transition return False # [01-0E][LL] port 01-0E, link-address #LL pseg[b'\x01'[0]] = pnum = USINT('port_num', context='port') pseg[b'\x02'[0]] = pnum pseg[b'\x03'[0]] = pnum pseg[b'\x04'[0]] = pnum pseg[b'\x05'[0]] = pnum pseg[b'\x06'[0]] = pnum pseg[b'\x07'[0]] = pnum pseg[b'\x08'[0]] = pnum pseg[b'\x09'[0]] = pnum pseg[b'\x0a'[0]] = pnum pseg[b'\x0b'[0]] = pnum pseg[b'\x0c'[0]] = pnum pseg[b'\x0d'[0]] = pnum pseg[b'\x0e'[0]] = pnum # [0F][PPPP][LL] port 0xPPPP, link-address 0xLL pseg[b'\x0f'[0]] = pnum # A big port#; re-scan a UINT into .port (won't work 'til port_fix is called) pnbg = UINT('port_nbg', context='port') pnbg[True] = pnlk = USINT('link_num', context='link') # Fix the port#; if 0x0F, setup for extended port and transition to pnbg. Otherwise, # (not extended port), just go the the port numeric link. pnum[None] = cpppo.decide('port_nfix', predicate=port_fix, state=pnbg) pnum[None] = pnlk pnlk[None] = pmov # and done; move segment, get next # [11-1E][SS]'123.123.123.123'[00] port 0x01-0E, link address '123.123.123.123' (pad if size 0xSS odd) pseg[b'\x11'] = padr = USINT('port_adr', context='port') pseg[b'\x12'[0]] = padr pseg[b'\x13'[0]] = padr pseg[b'\x14'[0]] = padr pseg[b'\x15'[0]] = padr pseg[b'\x16'[0]] = padr pseg[b'\x17'[0]] = padr pseg[b'\x18'[0]] = padr pseg[b'\x19'[0]] = padr pseg[b'\x1a'[0]] = padr pseg[b'\x1b'[0]] = padr pseg[b'\x1c'[0]] = padr pseg[b'\x1d'[0]] = padr pseg[b'\x1e'[0]] = padr # [1F][SS][PPPP]'123.123.123.123'[00] port 0xPPPP, link address '123.123.123.123' (pad if size SS odd) pseg[b'\x1f'[0]] = padr # Harvest the addresses into .link adrv = cpppo.string_bytes('link_add', context='link', limit='.length', initial='.*', decode='iso-8859-1') # An odd-length link address means an odd total. Pad adro = octets_drop('link_pad', repeat=1) adro[None] = pmov adrv[None] = cpppo.decide('link_odd', predicate=lambda path=None, data=None, ** kwds: len(data[path + '.link']) % 2, state=adro) adrv[None] = pmov # A big port#; re-scan a UINT into .port (won't work 'til port_fix is called) pabg = UINT('port_abg', context='port') pabg[None] = adrv # padr[True] = adrl = USINT('link_len', context='link.length') adrl[None] = cpppo.decide('port_afix', predicate=port_fix, state=pabg) adrl[None] = adrv # Parse all segments in a sub-dfa limited by the parsed path.size (in words; double) rest[None] = cpppo.dfa('each', context='segment__', initial=pseg, terminal=True, limit=lambda path=None, data=None, **kwds: data[ path + '..size'] * 2) super(EPATH, self).__init__(name=name, initial=size, **kwds)