def tokenize(text): ''' Produce a token list from the given text string. [ (<tok>,<info>), ... ] ''' off = 0 tlen = len(text) toks = [] while off < tlen: _, off = s_syntax.nom_whitespace(text, off) if off >= tlen: break if s_syntax.nextin(text, off, '"\''): tokn = ('valu', {'off': off, 'type': 'str'}) tokn[1]['valu'], off = s_syntax.parse_string(text, off, trim=False) tokn[1]['end'] = off toks.append(tokn) continue if s_syntax.nextin(text, off, '0123456789'): tokn = ('valu', {'off': off, 'type': 'int'}) tokn[1]['valu'], off = s_syntax.parse_int(text, off) tokn[1]['end'] = off toks.append(tokn) continue tokdone = False for tok in tokstrs: if text.startswith(tok, off): tokn = (tok, {'off': off}) off += len(tok) tokn[1]['end'] = off toks.append(tokn) tokdone = True break if tokdone: continue if not s_syntax.nextin(text, off, varset): raise s_common.BadSyntaxError(at=off, mesg='no valid tokens found') tokn = ('var', {'off': off}) tokn[1]['name'], off = s_syntax.nom(text, off, varset, trim=False) toks.append(tokn) for tokn in toks: tokn[1].update(tokninfo.get(tokn[0], {})) return toks
def test_lib_syntax_int(self): self.eq(s_syntax.parse_int(' 30 ', 0), (30, 5)) self.eq(s_syntax.parse_int(' -30 ', 0), (-30, 5)) self.eq(s_syntax.parse_int(' 0xfF ', 0), (15, 5)) self.eq(s_syntax.parse_int(' 0b01101001 ', 0), (105, 14)) self.eq(s_syntax.parse_int(' -0xfF ', 0), (-15, 5)) self.eq(s_syntax.parse_int(' -0b01101001 ', 0), (-105, 14)) self.eq(s_syntax.parse_int(' 1.0 ', 0), (1.0, 6)) self.eq(s_syntax.parse_int(' 1.2 ', 0), (1.2, 6)) self.eq(s_syntax.parse_int(' 0.2 ', 0), (0.2, 6)) self.eq(s_syntax.parse_int(' 0.0 ', 0), (0.0, 6)) self.eq(s_syntax.parse_int(' -1.2 ', 0), (-1.2, 6)) self.eq(s_syntax.parse_int(' -0.2 ', 0), (-0.2, 6)) self.eq(s_syntax.parse_int(' -0.0 ', 0), (0.0, 6)) self.raises(BadSyntaxError, s_syntax.parse_int, '0x', 0) self.raises(BadSyntaxError, s_syntax.parse_int, 'asdf', 0) self.raises(BadSyntaxError, s_syntax.parse_int, '0xzzzz', 0) self.raises(BadSyntaxError, s_syntax.parse_int, '0bbbbb', 0)