Exemplo n.º 1
0
    def __init__(self):
        self.lex = SQLexer()
        self.alpha_re = re.compile(r'^\s*[A-Za-z0-9_]*\s*$')

        #  foo10" or foo1'  must start with letter to preven
        #  7" # .... which is likely a measurement, not sqli
        self.alpha_str_re = re.compile(r'^[A-Z][A-Z0-9_]+[\'\"]$')

        self.pmap_new = frozenset(
[
'1ok','1oks,','1oksc','noL1R','1okLk','1okfL',
'sonos', 'sono1', 'sosos', '1ono1',
'sonoo', '1Rono', 's;n:k', 'k1,1,', 'k1,1k',
'nokLk',
# unions
 'okkkn',
'ofL1R',

'fLk,L',
'1,1R,', '1,LfL', '1,Lk1', '1,LkL', '1,Lkf', '1,fL1', '1,sR,', '1;kL1', '1;kLL', '1;kLo', '1;kfL', '1;kks', '1;knL', '1;knc', '1;koL', '1;kok', '1R,L1', '1R;kL', '1R;kf', '1R;kk', '1R;kn', '1R;ko', '1RLRs', '1RR;k', '1RRR;', '1RRRR', '1RRRk', '1RRRo', '1RRk1', '1RRkk', '1RRo1', '1RRoL', '1RRof', '1RRok', '1RRon', '1RRoo', '1Rk1', '1Rk1c', '1Rk1o', '1Rkks', '1Ro1f', '1Ro1k', '1Ro1o', '1RoL1', '1RoLk', '1RoLn', '1RofL', '1Rok1', '1RokL', '1RooL', '1k1', '1k1c', '1kfL1', '1kkL1', '1kksc', '1o1Ro', '1o1fL', '1o1kf', '1o1o1', '1o1oL', '1o1of', '1o1ok', '1o1on', '1o1oo', '1o1ov', '1oL1R', '1oL1o', '1oLL1', '1oLLL', '1oLLf', '1oLfL', '1oLk1', '1oLkf', '1oLkn', '1oLnR', '1oLsR', '1ofL1', '1ofLR', '1ofLf', '1ofLn', '1ok1', '1ok1,', '1ok1c', '1ok1k', '1okL1', '1okv,',
# '1ono1',
 '1onos', '1oo1o', '1ooL1', '1oso1', ';kknc', 'fL1,f', 'fL1Ro', 'fL1o1', 'fLRof', 'fLfL1', 'fLfLR', 'fLkLR', 'fLnLR', 'fLv,1', 'k1kLk', 'k1oLs', 'kLRok', 'kLk,L', 'kLokL', 'kLvvR', 'kfL1,', 'kfL1R', 'kfLfL', 'kfLn,', 'kvkL1', 'n,LfL', 'n,Lk1', 'n,LkL', 'n,Lkf', 'n,fL1', 'n;kL1', 'n;kLL', 'n;kfL', 'n;kks', 'n;knL', 'n;koL', 'n;kok', 'nR;kL', 'nR;kf', 'nR;kk', 'nR;kn', 'nR;ko', 'nRR;k', 'nRRR;', 'nRRRk', 'nRRRo', 'nRRkk', 'nRRo1', 'nRRoL', 'nRRof', 'nRRok', 'nRk1o', 'nRkks', 'nRo1f', 'nRo1o', 'nRoLk', 'nRofL', 'nRokL', 'nkksc', 'no1fL', 'no1o1', 'no1oL', 'no1of', 'noLk1', 'nofL1', 'nokL1', 'noo1o', 'ofL1o', 'ofLRo', 'ok1o1', 'oo1kf', 's,1R,', 's;k1,', 's;k1o', 's;k;', 's;kL1', 's;kLL', 's;kLo', 's;k[k', 's;k[n', 's;kfL', 's;kkn', 's;kks', 's;knL', 's;knc', 's;knk', 's;knn', 's;koL', 's;kok', 'sR,L1', 'sR;kL', 'sR;kf', 'sR;kk', 'sR;kn', 'sR;ko', 'sRR;k', 'sRRR;', 'sRRRk', 'sRRRo', 'sRRk1', 'sRRkk', 'sRRo1', 'sRRoL', 'sRRof', 'sRRok', 'sRRoo', 'sRk1', 'sRk1c', 'sRk1o', 'sRkks', 'sRo1f', 'sRo1k', 'sRo1o', 'sRoLk', 'sRofL', 'sRok1', 'sRokL', 'sRooL', 'sc', 'sfL1R', 'sfLn,', 'sfLsR', 'sk1', 'sk1c', 'sk1o1', 'sk1os', 'skR;k', 'skRk1', 'skRkk', 'skRo1', 'skRoL', 'skRof', 'skRok', 'skks', 'skksc', 'skoL1', 'skoLk', 'so1c', 'so1fL', 'so1kf', 'so1o1', 'so1oL', 'so1of', 'so1ok', 'so1on', 'so1oo', 'so1os', 'so1ov', 'soL1R', 'soL1o', 'soLLL', 'soLLk', 'soLLs', 'soLfL', 'soLk1', 'soLkR', 'soLkk', 'soLkn', 'soLks', 'soLsR', 'sofL1', 'sofLR', 'sofLf', 'sofLk', 'sok1', 'sok1,', 'sok1c', 'sok1o', 'sokL1', 'sokLk', 'sokLo', 'sokLs', 'sokc', 'sokfL', 'sokn,', 'soknk', 'soko1', 'sokoL', 'sokok', 'sokoo', 'sokos', 'son:o',
# 'sonk1',
'soLko',
 'soo1o', 'sooL1', 'sooLk', 'sooLo', 'soofL', 'sookc', 'soos', 'sos', 'sovo1', 'sovok', 'sovoo', 'sovos', 'sovov', 'vok1,']
)
        self.pmap_new2 = frozenset(
['1,1R', '1,Lf', '1,Lk', '1,LL', '1,fL', '1,sR', '1;kL', '1;kf', '1;kk', '1;kn', '1;ko', '1R,L', '1R;k', '1RLR', '1RR;', '1RRR', '1RRk', '1RRo', '1Rk1', '1Rkk', '1Ro1', '1RoL', '1Rof', '1Rok', '1Roo', '1k1', '1k1c', '1kfL', '1kkL', '1kks', '1o1R', '1o1f', '1o1k', '1o1o', '1oL1', '1oLf', '1oLL', '1oLk', '1oLn', '1oLs', '1ofL', '1ok1', '1okL', '1okk', '1okv', '1ono', '1oo1', '1ooL', '1oso', ';kkn', 'fL1,', 'fL1R', 'fL1o', 'fLRo', 'fLfL', 'fLkL', 'fLnL', 'fLv,', 'fLL1', 'fLLf','k1kL', 'k1oL', 'kLRo', 'kLk,', 'kLok', 'kLvv', 'kfL1', 'kfLL', 'kfLf', 'kfLn', 'kvkL', 'n,Lf', 'n,LL', 'n,Lk', 'n,fL', 'n;kL', 'n;kf', 'n;kk', 'n;kn', 'n;ko', 'nR;k', 'nRR;', 'nRRR', 'nRRk', 'nRRo', 'nRk1', 'nRkk', 'nRo1', 'nRoL', 'nRof', 'nRok', 'nkks', 'no1f', 'no1o', 'noLk', 'nofL', 'nokL', 'noo1', 'ofL1', 'ofLL', 'ofLR', 'ok1o', 'oo1k', 's,1R', 's;k1', 's;k;', 's;kL', 's;k[', 's;kf', 's;kk', 's;kn', 's;ko', 'sR,L', 'sR;k', 'sRR;', 'sRRR', 'sRRk', 'sRRo', 'sRk1', 'sRkk', 'sRo1', 'sRoL', 'sRof', 'sRok', 'sRoo', 'sc', 'sfL1', 'sfLL', 'sfLn', 'sfLs', 'sk1', 'sk1c', 'sk1o', 'skR;', 'skRk', 'skRo', 'skks', 'skoL', 'skoo', 'so1c', 'so1f', 'so1k', 'so1o', 'soL1', 'soLL', 'soLf', 'soLk', 'soLs', 'sofL', 'sok1', 'sokL', 'sokc', 'sokf', 'sokk', 'sokn', 'soko', 'son:', 'sonk',
# 'sono',
 'soo1', 'sooL', 'soof', 'sook', 'soos', 'sos',
# 'soso',
 'sovo', 'vok1', 'voko'])

        self.badsql= (
            '\\',  # backslash not allowed in non-string
            '[1]',
            'o[',
            'on,n',
            'knn',
            '1:',
            'no1n',
            ';no',
            ';nn',
            ';n,',
            ';[',
            ';s',
            'Lnon',  # "(FOO, BAR...."
#            'Ln,n',  # "(FOO OR BAR...
            'Lnn',
            'onon',
            'nono',
            'nnn',
            'nLn',
            'o,',
            'n1',
            '1nn',
            '11',
            's1',
            ';1',
            ';n;',
            ';s;',
            'onn',
            'sns',
            'LnnR'
            )
Exemplo n.º 2
0
    def testParser(self):
        s = SQLexer()

        tokens = s.tokenize('1 "foo" "bar" 1')
        self.assertEquals([('1', '1'), ('s', '"foo"'), ('s', '"bar"'),
                           ('1', '1')], tokens)
        tokens = s.syntax(tokens)
        self.assertEquals([('1', '1'), ('s', '"foo""bar"'), ('1', '1')],
                          tokens)

        tokens = s.tokenize('"foo" "bar" 1')
        self.assertEquals([('s', '"foo"'), ('s', '"bar"'), ('1', '1')], tokens)
        tokens = s.syntax(tokens)
        self.assertEquals([('s', '"foo""bar"'), ('1', '1')], tokens)

        tokens = s.tokenize('1 "foo" "bar" 1')
        self.assertEquals([('1', '1'), ('s', '"foo"'), ('s', '"bar"'),
                           ('1', '1')], tokens)
        tokens = s.syntax(tokens)
        self.assertEquals([('1', '1'), ('s', '"foo""bar"'), ('1', '1')],
                          tokens)

        tokens = s.tokenize('select 1'.upper())
        self.assertEquals([('k', 'SELECT'), ('1', '1')], tokens)

        tokens = s.tokenize('1 /* foo */ 2'.upper())
        self.assertEquals([('1', '1'), ('c', '/* FOO */'), ('1', '2')], tokens)

        tokens = s.tokenize('1 /*foo*/ 2'.upper())
        self.assertEquals([('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens)

        tokens = s.tokenize('1 /*foo*/ 2'.upper())
        self.assertEquals([('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens)

        tokens = s.tokenize('1 || select'.upper())
        self.assertEquals([('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens)

        tokens = s.tokenize('1 /*! || */ select'.upper())
        self.assertEquals([('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens)

        tokens = s.tokenize('1 /*!32302 || */ select'.upper())
        self.assertEquals([('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens)

        tokens = s.tokenize('select 1 /*!00000AND 2>1*/'.upper())
        self.assertEquals([('k', 'SELECT'), ('1', '1'), ('o', 'AND'),
                           ('1', '2'), ('o', '>'), ('1', '1')], tokens)

        tokens = s.tokenize('@@NEW UNION#SQLMAP'.upper())
        self.assertEquals([('v', '@@NEW'), ('o', 'UNION'), ('c', '#SQLMAP')],
                          tokens)

        #tokens = s.tokenize('"FOO" IN BOOLEAN MODE'.upper())
        #self.assertEquals( [('string', 'FOO'), ('k', 'IN BOOLEAN MODE')], tokens)

        # mysql comments terminate on normal "*/" AND
        # on another C-style start comment /*  !!!!

        # ==> select 1,2
        tokens = s.tokenize("SELECT /*!000001,/*!000002")
        self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','),
                           ('1', '2')], tokens)

        tokens = s.tokenize("SELECT /*!1,/*!2*/")
        self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','),
                           ('1', '2')], tokens)

        # ==> select 1,2
        tokens = s.tokenize("SELECT /*!000001,/*!000002*/")
        self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','),
                           ('1', '2')], tokens)

        # ==> select 1,2,3
        tokens = s.tokenize("SELECT /*!000001,/*!2*/,3")
        self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'),
                           (',', ','), ('1', '3')], tokens)

        # ==> select 1,2,3
        tokens = s.tokenize("SELECT /*!000001,/*!2*/,3")
        self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'),
                           (',', ','), ('1', '3')], tokens)

        tokens = s.tokenize("1+2")
        self.assertEquals([('1', '1'), ('o', '+'), ('1', '2')], tokens)

        tokens = s.tokenize("1 /**/UNION/**/SELECT")
        self.assertEquals([('1', '1'), ('c', '/**/'), ('o', 'UNION'),
                           ('c', '/**/'), ('k', 'SELECT')], tokens)
        tokens = s.syntax(tokens)
        self.assertEquals([('1', '1'), ('o', 'UNION'), ('k', 'SELECT')],
                          tokens)

        tokens = s.tokenize("1 /**/UNION/**/ALL/**/SELECT")
        #self.assertEquals( [('1', '1'), ('c', '/**/'), ('o', 'UNION'), ('c', '/**/'), ('k', 'SELECT')], tokens)
        tokens = s.syntax(tokens)
        self.assertEquals([('1', '1'), ('o', 'UNION ALL'), ('k', 'SELECT')],
                          tokens)
Exemplo n.º 3
0
class Attacker:
    def __init__(self):
        self.lex = SQLexer()
        self.alpha_re = re.compile(r'^\s*[A-Za-z0-9_]*\s*$')

        #  foo10" or foo1'  must start with letter to preven
        #  7" # .... which is likely a measurement, not sqli
        self.alpha_str_re = re.compile(r'^[A-Z][A-Z0-9_]+[\'\"]$')

        self.pmap_new = frozenset(
[
'1ok','1oks,','1oksc','noL1R','1okLk','1okfL',
'sonos', 'sono1', 'sosos', '1ono1',
'sonoo', '1Rono', 's;n:k', 'k1,1,', 'k1,1k',
'nokLk',
# unions
 'okkkn',
'ofL1R',

'fLk,L',
'1,1R,', '1,LfL', '1,Lk1', '1,LkL', '1,Lkf', '1,fL1', '1,sR,', '1;kL1', '1;kLL', '1;kLo', '1;kfL', '1;kks', '1;knL', '1;knc', '1;koL', '1;kok', '1R,L1', '1R;kL', '1R;kf', '1R;kk', '1R;kn', '1R;ko', '1RLRs', '1RR;k', '1RRR;', '1RRRR', '1RRRk', '1RRRo', '1RRk1', '1RRkk', '1RRo1', '1RRoL', '1RRof', '1RRok', '1RRon', '1RRoo', '1Rk1', '1Rk1c', '1Rk1o', '1Rkks', '1Ro1f', '1Ro1k', '1Ro1o', '1RoL1', '1RoLk', '1RoLn', '1RofL', '1Rok1', '1RokL', '1RooL', '1k1', '1k1c', '1kfL1', '1kkL1', '1kksc', '1o1Ro', '1o1fL', '1o1kf', '1o1o1', '1o1oL', '1o1of', '1o1ok', '1o1on', '1o1oo', '1o1ov', '1oL1R', '1oL1o', '1oLL1', '1oLLL', '1oLLf', '1oLfL', '1oLk1', '1oLkf', '1oLkn', '1oLnR', '1oLsR', '1ofL1', '1ofLR', '1ofLf', '1ofLn', '1ok1', '1ok1,', '1ok1c', '1ok1k', '1okL1', '1okv,',
# '1ono1',
 '1onos', '1oo1o', '1ooL1', '1oso1', ';kknc', 'fL1,f', 'fL1Ro', 'fL1o1', 'fLRof', 'fLfL1', 'fLfLR', 'fLkLR', 'fLnLR', 'fLv,1', 'k1kLk', 'k1oLs', 'kLRok', 'kLk,L', 'kLokL', 'kLvvR', 'kfL1,', 'kfL1R', 'kfLfL', 'kfLn,', 'kvkL1', 'n,LfL', 'n,Lk1', 'n,LkL', 'n,Lkf', 'n,fL1', 'n;kL1', 'n;kLL', 'n;kfL', 'n;kks', 'n;knL', 'n;koL', 'n;kok', 'nR;kL', 'nR;kf', 'nR;kk', 'nR;kn', 'nR;ko', 'nRR;k', 'nRRR;', 'nRRRk', 'nRRRo', 'nRRkk', 'nRRo1', 'nRRoL', 'nRRof', 'nRRok', 'nRk1o', 'nRkks', 'nRo1f', 'nRo1o', 'nRoLk', 'nRofL', 'nRokL', 'nkksc', 'no1fL', 'no1o1', 'no1oL', 'no1of', 'noLk1', 'nofL1', 'nokL1', 'noo1o', 'ofL1o', 'ofLRo', 'ok1o1', 'oo1kf', 's,1R,', 's;k1,', 's;k1o', 's;k;', 's;kL1', 's;kLL', 's;kLo', 's;k[k', 's;k[n', 's;kfL', 's;kkn', 's;kks', 's;knL', 's;knc', 's;knk', 's;knn', 's;koL', 's;kok', 'sR,L1', 'sR;kL', 'sR;kf', 'sR;kk', 'sR;kn', 'sR;ko', 'sRR;k', 'sRRR;', 'sRRRk', 'sRRRo', 'sRRk1', 'sRRkk', 'sRRo1', 'sRRoL', 'sRRof', 'sRRok', 'sRRoo', 'sRk1', 'sRk1c', 'sRk1o', 'sRkks', 'sRo1f', 'sRo1k', 'sRo1o', 'sRoLk', 'sRofL', 'sRok1', 'sRokL', 'sRooL', 'sc', 'sfL1R', 'sfLn,', 'sfLsR', 'sk1', 'sk1c', 'sk1o1', 'sk1os', 'skR;k', 'skRk1', 'skRkk', 'skRo1', 'skRoL', 'skRof', 'skRok', 'skks', 'skksc', 'skoL1', 'skoLk', 'so1c', 'so1fL', 'so1kf', 'so1o1', 'so1oL', 'so1of', 'so1ok', 'so1on', 'so1oo', 'so1os', 'so1ov', 'soL1R', 'soL1o', 'soLLL', 'soLLk', 'soLLs', 'soLfL', 'soLk1', 'soLkR', 'soLkk', 'soLkn', 'soLks', 'soLsR', 'sofL1', 'sofLR', 'sofLf', 'sofLk', 'sok1', 'sok1,', 'sok1c', 'sok1o', 'sokL1', 'sokLk', 'sokLo', 'sokLs', 'sokc', 'sokfL', 'sokn,', 'soknk', 'soko1', 'sokoL', 'sokok', 'sokoo', 'sokos', 'son:o',
# 'sonk1',
'soLko',
 'soo1o', 'sooL1', 'sooLk', 'sooLo', 'soofL', 'sookc', 'soos', 'sos', 'sovo1', 'sovok', 'sovoo', 'sovos', 'sovov', 'vok1,']
)
        self.pmap_new2 = frozenset(
['1,1R', '1,Lf', '1,Lk', '1,LL', '1,fL', '1,sR', '1;kL', '1;kf', '1;kk', '1;kn', '1;ko', '1R,L', '1R;k', '1RLR', '1RR;', '1RRR', '1RRk', '1RRo', '1Rk1', '1Rkk', '1Ro1', '1RoL', '1Rof', '1Rok', '1Roo', '1k1', '1k1c', '1kfL', '1kkL', '1kks', '1o1R', '1o1f', '1o1k', '1o1o', '1oL1', '1oLf', '1oLL', '1oLk', '1oLn', '1oLs', '1ofL', '1ok1', '1okL', '1okk', '1okv', '1ono', '1oo1', '1ooL', '1oso', ';kkn', 'fL1,', 'fL1R', 'fL1o', 'fLRo', 'fLfL', 'fLkL', 'fLnL', 'fLv,', 'fLL1', 'fLLf','k1kL', 'k1oL', 'kLRo', 'kLk,', 'kLok', 'kLvv', 'kfL1', 'kfLL', 'kfLf', 'kfLn', 'kvkL', 'n,Lf', 'n,LL', 'n,Lk', 'n,fL', 'n;kL', 'n;kf', 'n;kk', 'n;kn', 'n;ko', 'nR;k', 'nRR;', 'nRRR', 'nRRk', 'nRRo', 'nRk1', 'nRkk', 'nRo1', 'nRoL', 'nRof', 'nRok', 'nkks', 'no1f', 'no1o', 'noLk', 'nofL', 'nokL', 'noo1', 'ofL1', 'ofLL', 'ofLR', 'ok1o', 'oo1k', 's,1R', 's;k1', 's;k;', 's;kL', 's;k[', 's;kf', 's;kk', 's;kn', 's;ko', 'sR,L', 'sR;k', 'sRR;', 'sRRR', 'sRRk', 'sRRo', 'sRk1', 'sRkk', 'sRo1', 'sRoL', 'sRof', 'sRok', 'sRoo', 'sc', 'sfL1', 'sfLL', 'sfLn', 'sfLs', 'sk1', 'sk1c', 'sk1o', 'skR;', 'skRk', 'skRo', 'skks', 'skoL', 'skoo', 'so1c', 'so1f', 'so1k', 'so1o', 'soL1', 'soLL', 'soLf', 'soLk', 'soLs', 'sofL', 'sok1', 'sokL', 'sokc', 'sokf', 'sokk', 'sokn', 'soko', 'son:', 'sonk',
# 'sono',
 'soo1', 'sooL', 'soof', 'sook', 'soos', 'sos',
# 'soso',
 'sovo', 'vok1', 'voko'])

        self.badsql= (
            '\\',  # backslash not allowed in non-string
            '[1]',
            'o[',
            'on,n',
            'knn',
            '1:',
            'no1n',
            ';no',
            ';nn',
            ';n,',
            ';[',
            ';s',
            'Lnon',  # "(FOO, BAR...."
#            'Ln,n',  # "(FOO OR BAR...
            'Lnn',
            'onon',
            'nono',
            'nnn',
            'nLn',
            'o,',
            'n1',
            '1nn',
            '11',
            's1',
            ';1',
            ';n;',
            ';s;',
            'onn',
            'sns',
            'LnnR'
            )

    def type_string(self, s, pmap, tname, delim=None):

        tokens = self.lex.tokenize_reset(s)
        if delim is not None:
            self.lex.parseInitialString(delim)

        tokens = self.lex.tokenize_loop()
        tokens = self.lex.syntax(tokens)
        tokens = self.constant_folding2(tokens)
        (sqli, fullpat, pat, reason) =  self.patmatch(tokens, pmap)

        if not sqli:
            #print 'False: %s %s in %s on full %s' % (tname, reason, pat, fullpat)
            return None
        else:
            #print 'False: %s matched' % (tname)
            pass

        return tname, pat, fullpat, tokens

    def test(self, s):
        m = self.type_string(s, self.pmap_new, 'type1')
        if m:
            return m

        m = self.type_string(s, self.pmap_new, 'type2', "'")
        if m:
            return m

        m = self.type_string(s, self.pmap_new, 'type3', '"')
        if m:
            return m

        return None

    def normalize(self, s):
        while True:
            snew = unquote(s)
            if s == snew:
                break
            s = snew.upper()

        # common html cut-n-paste problem
        # we do NOT want to make this a '"'
        # since they one could attack by inserting " which
        # SQL will know, but we'll think it's a "
        s = s.replace('"', '"')

        s = s.replace(''', '\'')
        return s

    # simplfies  basic arithmetic expressions tha might be used
    # as english abbreviatio
    # merges ';;' in to ';'
    def constant_folding2(self, tokens):
        tlen = len(tokens)
        if tlen == 0:
            return tokens

         # skip all leading left-parens and unary chars
        index = 0
        while index < tlen:
            if tokens[index][0] == 'L':
                index += 1
            elif tokens[index][0] == 'o' and tokens[index][1] in ('-', '+', '~'):
                index +=1
            else:
                break


        newt = []


        last = None
        isunary = False
        for t in tokens[index:]:

            if len(newt) == 5:
                if last and last[0] == 'o':
                    newt.append( last )
                newt += tokens[index:]
                return newt

            index += 1


            # skip over repeated runs of unary operators
            # 1+---+2 -> 1+2 -> 1
            if t[0] == 'o' and t[1] in ('!', '+', '-', '~'):
                if isunary:
                    continue
                else:
                    isunary = True
            else:
                isunary = False

            if t[0] == '1':
                if last == None or last[0] == '1' or last[0]==';':
                    newt.append(t)
                    last = t
                else:
                    last = t
            elif t[0] == 'x':
                if last == None or last[0] in ('n', ';', '1'):
                    newt.append(t)
                    last = t
                else:
                    last = t
            elif t[0] == 'X':
                if last == None or last[0] in ('n', ';', '1'):
                    newt.append(t)
                    last = t
                else:
                    last = t
            elif t[0] == 'o' and t[1] in ('!', '+', '-', '~', '/', '%', '*', 'MOD', 'DIV'):
                #print 'current is operator: ' + t[1]
                if last and last[0] == '1':
                    #print 'and last is number'
                    last = t
                elif last and last[0] == 'n':
                    #print 'and last is number'
                    last = t
                elif last and last[0] == 'o' and last[1] in ('!', '+', '-', '~', '/', '%', '*', 'MOD', 'DIV'):
                    pass
                else:
                    newt.append(t)
                    last = None
            elif t[0] == ';':
                if last and last[0] == ';':
                    pass
                elif last and last[0] == 'o':
                    newt.append( last )
                    last = t
                else:
                    newt.append( t )
                    last = t
            else:
                if last and last[0] == 'o':
                    newt.append( last )
                newt.append(t)
                last = None

        if last and last[0] == 'o':
            newt.append( last )
        return newt

    def is_valid_sql(self, pat, tokens=None):
        tlen = len(tokens)
        # common english screwups

        if tlen == 5:
            if pat in ('so1on', 'no1oo', 'no1of'):
                return 'too short'
            elif pat in ('no1o1', '1ono1'):
                if tokens[1][1] in ('AND', 'OR', '&&', '||') and tokens[1][1] != tokens[3][1]:
                    return None
                else:
                    return 'bogon'

        pat5 = pat[0:5]
        if pat5 in ('sonos', 'sono1', 'sosos', '1ono1', 'so1on', 'sonoo', 'no1oL', 'no1o1'):
            if tlen == 5 and tokens[1][1] != tokens[3][1] and tokens[1][1] not in ('&',):
                return None
            elif tokens[1][1] in ('UNION', 'UNION ALL'):
                return None
            elif tokens[1][1] in ('AND', 'OR', '&&', '||') and tokens[1][1] != tokens[3][1]:
                return None
            #elif tokens[3][1] in ('AND', 'OR', '&&', '||') and tokens[1][1] != tokens[3][1]:
            #    return None
            else:
                return "Unlikely"
        elif pat5.endswith('f') and tlen > 5 and pat[5] != 'L':
            return 'function missing left'



        if tlen <= 4:
            if pat == 'sos':
                if tokens[1][1] in ('*', '/', '-', '+'):
                    return 'too small, harmless'

                isnormal_left = tokens[0][1][0] in ('"',"'")
                isnormal_right = tokens[2][1][-1] in ('"',"'")
                isempty_right = len(tokens[2][1]) == 1
                isenglish = tokens[1][1] in ('AND','&','NOT','UNION','IS','MOD')
                if isenglish:
                    # open string  ...foo "LIKE"
                    return "pat is string-operator-string and operator is logical"
                elif isnormal_left and isnormal_right:
                    # "FOO" + "BAR", type1 style
                    return "fully formed type1 sos -- ignoring"
                elif not isnormal_left and isempty_right:
                    return "not much of an attack"
                elif isnormal_left and not isnormal_right:
                    return "looks like truncation"
            elif pat == 'soos':
                if tokens[1][1] == tokens[2][1]:
                    return "likely double typing or AND or OR"

            elif pat == 'sc':
                if self.alpha_str_re.match(tokens[0][1]):
                    return None
                else:
                    return "gibberish"

            elif (pat in ('sk1','1k1')):
                if (tokens[1][1] not in ('ORDER BY', 'GROUP BY', 'OWN3D BY')):
                    return "pat is string-k-number but k not order/group by"

            elif pat == '1ok':
                # sqli fragment
                if tokens[1][1] not in ( 'UNION', 'UNION ALL'):
                    return 'too short'

            # right start, but too short to sqli
            elif pat in ('n;kn', 'no1o'):
                return "too short"

        return None

    def is_valid_sql2(self, pat, tokens=None):


        # if form of foo'--, foo'#, foo'/*   (half open string)+comment
        if pat == 'sc':
            if self.alpha_str_re.match(tokens[0][1]):
                return None
            else:
                return "gibberish"

        pos = pat.find(';')
        if pos != -1 and pos+ 1< tlen and tlen > 4:
            pos += 1
            if pat[pos:].startswith('fL'):
                return None
            elif tokens[pos][0] == 'c':
                return None
            elif tokens[pos][0] == 'k' and tokens[pos][1]  in ('CREATE','SELECT', 'SET', 'INSERT', 'UPDATE', 'DECLARE', 'BEGIN', 'SHUTDOWN', 'WHILE', 'WAITFOR', 'DROP', 'DELETE', 'IF', 'CASE', 'END', 'EXEC', 'EXECUTE'):
                return None
            else:
                return "Token after semicolon isnt a sql verb or function"

        # if function isnt followed by a (
        #  f[^l]|f$
        for i in range(tlen - 1):
            if tokens[i][0] == 'o' and tokens[i][1] in ('UNION', 'UNION ALL'):
                if tokens[i+1][0] == 'k' and tokens[i+1][1] == 'SELECT':
                    return None
            if tokens[i][0] == 'f':
                if tokens[i+1][0] != 'L':
                    #pass
                    return "function '%s' not followed by L" % (tokens[i][1],)


        # last real token can't be a function or operator
        # ends with "function"
        # ends with "operator"
        # ends with "function-comment"
        # ends with "operator-comments"
        # [fo]c?$
        if tokens[-1][0] in ('f', 'o'):
            return "sql can't end in a operator or function"

        elif tokens[-1][0] == 'c' and tlen > 1 and tokens[-2][0] in ('f', 'o'):
            return "sql can't end in a operator or function"


        if pat[0] == 's' and ';' not in pat:
            if  tokens[0][1][0] not in ('"', "'"):
                # type2 or type3
                #if pat[-1] not in ('s', 'c'):
                #    pass
                #    #print "False: Expression started as half-open but didn't end as half-open string"
                #    #return "Expression started as half-open but didn't end as half-open string"
                if pat[-1] == 's' and len(tokens[-1][1]) >1 and tokens[-1][1][-1] in ('"', "'"):
                    #print "False: Expression started with opens tring, but closed with full string"
                    #pass
                    return "Expression started with opens tring, but closed with full string"


        # operator (and conditional) case
        # must have a WHEN adn THEN following it
        cstate = 0
        for t in tokens:
            if cstate == 0 and t == ('o', 'CASE'):
                cstate = 1
            elif t == ('k', 'WHEN'):
                if cstate == 1:
                    cstate = 2
                else:
                    return "Found WHEN without CASE"
            elif t == ('k', 'THEN'):
                if cstate == 2:
                    cstate = 3
                else:
                    return "Found THEN without CASE"
                    # maybe origtokens[cpos] = ('n', 'CASE'); break
                    # too... this isn't right form for case statement

        # if have a case without WHEN.. THEN.. then it's not an operator
        if cstate == 1 or cstate == 2:
            return "Found case without WHEN/THEN"

        # check unary followed by binary operator
        uo = self.lex.unary_operators
        for i in range(tlen - 1):
            if tokens[i][0] == 'o' and tokens[i][1] in uo and tokens[i+1][0] == 'o' and tokens[i+1][1] not in uo:
                return "unary operator can't be followed by a binary operator"

        # reject other simple statements
        for p in self.badsql:
            if p in pat:
                return "Found invalid sql pattern " + p

        if pat.startswith('son') and tokens[1][1] == '+':
            return "Likely false positive, ....'+FOO, due to URL as parameter"

        #if pat == 'sono1s' and tokens[1][1] == '+' and tokens[3][1] == '+':
        #    return "Likely a url encoded measurement, e.g.  2' x 4'"

        # a leftparens ( must have a right parnens)
        #has_rightp = 'R' in pat
        #if 'L' in pat and not has_rightp and pat[-1] != 'c':
        #    return "leftparens found without right parens"

        # number,comma must have a )... (IN list)
        # ^[1ns],.*l
        #if pat.startswith('1,') and not has_rightp:
        #    return "number-comma missing right parens"
        #if pat.startswith('n,') and not has_rightp:
        #    return "n-comma missing right parens"
        #if pat.startswith('s,') and not has_rightp:
        #    return "string-comma missing right parens"

        if pat.startswith('noLn') and (tokens[1][1] not in ('AND','OR', '&&', '||')):
            return "starts with n-operator and operator is not logical"

        # ignore query string like things
        #  foo=1234&bar=fruit
        if pat.startswith('no1o') or pat.startswith('nono') or pat.startswith('soso'):
            if tokens[1][1] == tokens[3][1]:
                return "XoYo where o is the same -- bogus"
            if tokens[1][1] == '&' and tokens[3][1] == '=':
                return "starts with no1o and looks like qstring"

        #  foo=1234&bar=fruit
        if pat.startswith('sono') and tokens[1][1] == '&' and tokens[3][1] == '=':
            return 'got ..."&foo=... likely query string'


        #if pat == '1k' and not (tokens[1][0] == 'k' and tokens[1][1] in ('ORDER BY', 'GROUP BY', 'OWN3D BY')):
        #    return 'starts with number-k but k is not order/group by'

        # reject stuff like 1/FOO  1-FOO
        if pat.startswith('1on') and tokens[1][1] not in  ('AND','OR','&&','||', 'UNION', 'UNION ALL'):
            return "number-operator-n but operator is not logical or union"

        if pat.startswith('sok') or pat.startswith('nok'):
            if tokens[1][1] == 'NOT':
                return '[string|number]-NOT-keyword rejected'
            if tokens[2][1] == 'WITH':
                return '[string|number]-operator-WITH rejected'

        # common english screwups
        if tlen <= 4:
            if pat == 'sos':
                isnormal_left = tokens[0][1][0] in ('"',"'")
                isnormal_right = tokens[2][1][-1] in ('"',"'")
                isempty_right = len(tokens[2][1]) == 1
                isenglish = tokens[1][1] in ('AND','&','NOT','UNION','IS','MOD')
                if isenglish:
                    # open string  ...foo "LIKE"
                    return "pat is string-operator-string and operator is logical"
                elif isnormal_left and isnormal_right:
                    # "FOO" + "BAR", type1 style
                    return "fully formed type1 sos -- ignoring"
                elif not isnormal_left and isempty_right:
                    return "not much of an attack"
                elif isnormal_left and not isnormal_right:
                    return "looks like truncation"


            elif (pat in ('sk1','1k1')):
                if (tokens[1][1] not in ('ORDER BY', 'GROUP BY', 'OWN3D BY')):
                    return "pat is string-k-number but k not order/group by"

        return None

    def patmatch(self, tokens, pmap):
        fullpat = ''.join([ t[0] for t in tokens ])
        pat = fullpat[0:5]
        if pat in pmap:
            oksql =self.is_valid_sql(fullpat, tokens)
            if oksql is None:
                return (True, fullpat, pat, '')
            else:
                return (False, fullpat, pat, oksql)
        return (False, fullpat, '', 'No starting pattern found')

    def parens_match(self, origtokens):
        count = 0
        for t in origtokens:
            if t[0] == 'L':
                count += 1
            elif t[0] == 'R':
                count -= 1
            if count < 0:
                return False
        if count != 0:
            return False
        return True
Exemplo n.º 4
0
    def testParser(self):
        s = SQLexer()

        tokens = s.tokenize('1 "foo" "bar" 1')
        self.assertEquals( [('1', '1'), ('s', '"foo"'), ('s', '"bar"'), ('1', '1')], tokens )
        tokens = s.syntax(tokens)
        self.assertEquals( [('1', '1'), ('s', '"foo""bar"'), ('1', '1')], tokens)

        tokens = s.tokenize('"foo" "bar" 1')
        self.assertEquals( [('s', '"foo"'), ('s', '"bar"'), ('1','1')], tokens )
        tokens = s.syntax(tokens)
        self.assertEquals( [('s', '"foo""bar"'),('1', '1')], tokens)

        tokens = s.tokenize('1 "foo" "bar" 1')
        self.assertEquals( [('1','1'), ('s', '"foo"'), ('s', '"bar"'), ('1','1')], tokens )
        tokens = s.syntax(tokens)
        self.assertEquals( [('1', '1'), ('s', '"foo""bar"'), ('1', '1') ], tokens)


        tokens = s.tokenize('select 1'.upper())
        self.assertEquals( [('k', 'SELECT'), ('1', '1')], tokens)

        tokens = s.tokenize('1 /* foo */ 2'.upper())
        self.assertEquals( [('1', '1'), ('c', '/* FOO */'), ('1', '2')], tokens)

        tokens = s.tokenize('1 /*foo*/ 2'.upper())
        self.assertEquals( [('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens)

        tokens = s.tokenize('1 /*foo*/ 2'.upper())
        self.assertEquals( [('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens)

        tokens = s.tokenize('1 || select'.upper())
        self.assertEquals( [('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens)

        tokens = s.tokenize('1 /*! || */ select'.upper())
        self.assertEquals( [('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens)

        tokens = s.tokenize('1 /*!32302 || */ select'.upper())
        self.assertEquals( [('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens)

        tokens  = s.tokenize('select 1 /*!00000AND 2>1*/'.upper());
        self.assertEquals( [('k', 'SELECT'), ('1', '1'), ('o', 'AND'), ('1', '2'), ('o', '>'), ('1', '1')], tokens)

        tokens = s.tokenize('@@NEW UNION#SQLMAP'.upper())
        self.assertEquals( [('v', '@@NEW'), ('o', 'UNION'), ('c', '#SQLMAP')], tokens)

        #tokens = s.tokenize('"FOO" IN BOOLEAN MODE'.upper())
        #self.assertEquals( [('string', 'FOO'), ('k', 'IN BOOLEAN MODE')], tokens)

        # mysql comments terminate on normal "*/" AND
        # on another C-style start comment /*  !!!!

        # ==> select 1,2
        tokens = s.tokenize("SELECT /*!000001,/*!000002")
        self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens)

        tokens = s.tokenize("SELECT /*!1,/*!2*/")
        self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens)

        # ==> select 1,2
        tokens = s.tokenize("SELECT /*!000001,/*!000002*/")
        self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens)

        # ==> select 1,2,3
        tokens = s.tokenize("SELECT /*!000001,/*!2*/,3")
        self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'), (',', ','), ('1', '3')], tokens)

        # ==> select 1,2,3
        tokens = s.tokenize("SELECT /*!000001,/*!2*/,3")
        self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'), (',', ','), ('1', '3')], tokens)

        tokens = s.tokenize("1+2")
        self.assertEquals( [('1', '1'), ('o', '+'), ('1','2')], tokens)

        tokens = s.tokenize("1 /**/UNION/**/SELECT")
        self.assertEquals( [('1', '1'), ('c', '/**/'), ('o', 'UNION'), ('c', '/**/'), ('k', 'SELECT')], tokens)
        tokens = s.syntax(tokens)
        self.assertEquals( [('1', '1'), ('o', 'UNION'), ('k', 'SELECT')], tokens)

        tokens = s.tokenize("1 /**/UNION/**/ALL/**/SELECT")
        #self.assertEquals( [('1', '1'), ('c', '/**/'), ('o', 'UNION'), ('c', '/**/'), ('k', 'SELECT')], tokens)
        tokens = s.syntax(tokens)
        self.assertEquals( [('1', '1'), ('o', 'UNION ALL'), ('k', 'SELECT')], tokens)