/
cluehunter.py
executable file
·200 lines (186 loc) · 8.43 KB
/
cluehunter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python
'''
Created on Dec 19, 2015
@author: yangke
'''
import logging
import argparse
import os
import subprocess
from parse.parse import LogParser
from model.TaintVar import TaintVar
from Tracker import Tracker
from parse.RedundancyFixer import RedundancyFixer
from parse.MacroInspector import MacroInspector
DESCRIPTION = """ClueHunter is an auxiliary tool for crash point reverse data flow analysis.
It generate data flow graph according to the gdb debug log(C program source code level).
It receive manually specified sink variables that cause the last line crash and perform interprocedural analysis on the log trace.
For obtaining the auto-debug trace, the tool `robot_dbg.exp` in ClueHunter requires the program under debug to be compiled with profiled code information (gcc **-g** operation)."""
DEFAULT_GDB_LOG="test/gdb_logs/swfmill-0.3.3/gdb-swfmill-0.3.3.txt"
DEFAULT_OUTPUT_PATH="."
DEFAULT_NAME="output"
class ClueHunter:
def __init__(self):
self._init_arg_parser()
self.args = self.arg_parser.parse_args()
self.checkArguments()
self._config_logger()
self._create_output_dir()
#self.args.logging_level==logging.DEBUG or self.args.logging_level==logging.INFO)
def checkArguments(self):
err=''
if len(self.args.patterns)==0 and len(self.args.variables)==0:
err='At least one sink variable should be provided.\nUse --variable [VARIABLE_NAME_LIST] --patterns [DATA_ACCES_PATTERNS_LIST]'
elif len(self.args.patterns)!=len(self.args.variables):
err='Variables and patterns number must be same!'
else:
for p in self.args.patterns:
if p!='*' and p!='N':
err='The specified pattern must be "*" or "N".'
break
if self.args.level!=RedundancyFixer.REMOVE_INLINE_REDUNDANT and self.args.level!=RedundancyFixer.REMOVE_INTERPROCEDURAL_REDUNDANT:
err='Redundancy level should be 0 or 1'
if err!='':
self.arg_parser.error(err)
def _init_arg_parser(self):
self.arg_parser = argparse.ArgumentParser(description=DESCRIPTION)
source_sink_group=self.arg_parser.add_argument_group('sinks')
source_sink_group.add_argument(
'-ps','--patterns',
action='store',
dest='patterns',
required=True,
nargs='+',
default=['N'],
help='''Specify the access pattern list of the sink identifiers.
Patterns must be "*" or "N" separated with blanks.
"N" means direct access, "*" means this is a pointer of the cared data.
''')
source_sink_group.add_argument(
'-vs','--variables',
action='store',
dest='variables',
required=True,
nargs='+',
default=['length'],
help='Specify the identifier name of the sink variables. Example:"father->baby.toy"')
self.arg_parser.add_argument(
'-l', '--level',
action = 'store',
default = 1,
type = int,
help = """Redundancy level of the parsing.
0 means just remove inline or innner function redundancy; 1 means remove both of the inline and interprocedural reduandancy.""")
self.arg_parser.add_argument(
'-i', '--index',
action = 'store',
default = -1,
type = int,
help = """The start trace line for tracking.
Default value is -1 which means start from the last line.
Positive integer means the {line number}-1 in the parsed result cluhunter/test/trace.txt.
Negative integer means the last but what line of the cluhunter/test/trace.txt.
0 is useless, but it still can be regarded as the first line.""")
self.arg_parser.add_argument(
'-t', '--trace',
action = 'store',
dest='trace',
required=True,
default = DEFAULT_GDB_LOG,
help = """The file path of gdb trace log, for example, ./gdb.txt. This log should be generated by robot_dbg.exp.""")
self.arg_parser.add_argument(
'-o', '--output-directory',
action = 'store',
dest="output_path",
default = DEFAULT_OUTPUT_PATH,
help = """The output directory in which .dot and .png files will be dumped in this path.""")
self.arg_parser.add_argument(
'-m', '--c-project-dir',
action = 'store',
dest="c_project_dir",
default = None,
help = """The C project directory with the .i files maked by gcc '-save-temps' option.
Usually the we add this flags during configure: ./configure CFLAGS='-g -save-temps'.""")
self.arg_parser.add_argument(
'-n', '--name',
action = 'store',
dest = "name",
default = DEFAULT_NAME,
help = """The prefix name of the generated .dot and .png files.""")
group = self.arg_parser.add_mutually_exclusive_group()
group.add_argument(
'-d', '--debug',
action = 'store_const',
const = logging.DEBUG,
dest = 'logging_level',
default = logging.WARNING,
help = """Enable debug output.""")
group.add_argument(
'-v', '--verbose',
action = 'store_const',
const = logging.INFO,
dest = 'logging_level',
default = logging.WARNING,
help = """Increase verbosity.""")
group.add_argument(
'-q', '--quiet',
action = 'store_const',
const = logging.ERROR,
dest = 'logging_level',
default = logging.WARNING,
help = """Be quiet during processing.""")
def build_tiantvars_list(self):
vs=[]
for i in range(0,len(self.args.variables)):
if self.args.patterns[i]=='N':
p=[]
else:
p=['*']
vs.append(TaintVar(self.args.variables[i],p))
return vs
def _analysis(self):
parser=LogParser()
parser.setRedundantLevel(self.args.level)
l=parser.parse(self.args.trace)
if self.args.c_project_dir is not None:
macro_inspector=MacroInspector(self.args.c_project_dir)
tracker=Tracker(l,macro_inspector)
else:
tracker=Tracker(l)
for line in l:
print str(l.index(line))+"#"+str(line)
traceIndex=(len(l)+self.args.index)%len(l)
vs=self.build_tiantvars_list()
tracker.setStartJobs(traceIndex, vs)
TG=tracker.track()
output=file(self.args.output_path+"/"+self.args.name+".dot", 'w')
print TG.serialize2dot()
output.write(TG.serialize2dot())
output.close()
subprocess.call("dot -Tsvg "+self.args.output_path+"/"+self.args.name+".dot -o "+self.args.output_path+"/"+self.args.name+".svg", shell = True)
#print str(TG)
def execute(self):
self._analysis()
def _create_output_dir(self):
output_path = self.args.output_path
if not os.path.isdir(output_path):
self.logger.debug('Creating directory %s.', os.path.abspath(output_path))
os.makedirs(output_path)
self.logger.info('Output directory is %s.', os.path.abspath(output_path))
def _config_logger(self):
self.logger = logging.getLogger('cluehunter')
self.logger.setLevel('DEBUG')
console_handler = logging.StreamHandler()
console_handler.setLevel(self.args.logging_level)
file_handler = logging.FileHandler('cluehunter.log', 'w+')
file_handler.setLevel('DEBUG')
#console_formatter = logging.Formatter('%(message)s')
console_formatter = logging.Formatter('[%(levelname)s] %(message)s')
file_formatter = logging.Formatter('[%(levelname)s] %(message)s')
console_handler.setFormatter(console_formatter)
file_handler.setFormatter(file_formatter)
self.logger.addHandler(console_handler)
self.logger.addHandler(file_handler)
if __name__=="__main__":
c=ClueHunter()
c.execute()