forked from smicallef/spiderfoot
/
sfdb.py
379 lines (326 loc) · 14.4 KB
/
sfdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
#-------------------------------------------------------------------------------
# Name: sfdb
# Purpose: Common functions for working with the database back-end.
#
# Author: Steve Micallef <steve@binarypool.com>
#
# Created: 15/05/2012
# Copyright: (c) Steve Micallef 2012
# Licence: GPL
#-------------------------------------------------------------------------------
import hashlib
import random
import sqlite3
import sys
import time
from sflib import SpiderFoot
# SpiderFoot class passed to us
sf = None
class SpiderFootDb:
def __init__(self, opts):
global sf
# connect() will create the database file if it doesn't exist, but
# at least we can use this opportunity to ensure we have permissions to
# read and write to such a file.
dbh = sqlite3.connect(opts['__database'], timeout=10)
if dbh == None:
sf.error("Could not connect to internal database. Check that " + \
opts['__database'] + " exists and is readable and writable.")
dbh.text_factory = str
self.conn = dbh
self.dbh = dbh.cursor()
sf = SpiderFoot(opts)
# Now we actually check to ensure the database file has the schema set
# up correctly.
try:
self.dbh.execute('SELECT COUNT(*) FROM tbl_scan_config')
except sqlite3.Error:
sf.error("Found spiderfoot.db but it doesn't appear to be in " \
"the expected state - ensure the schema is created.")
return
#
# Back-end database operations
#
# Close the database handle
def close(self):
self.dbh.close()
# Log an event to the database
def scanLogEvent(self, instanceId, classification, message, component=None):
if component == None:
component = "SpiderFoot"
qry = "INSERT INTO tbl_scan_log \
(scan_instance_id, generated, component, type, message) \
VALUES (?, ?, ?, ?, ?)"
try:
self.dbh.execute(qry, (
instanceId, time.time() * 1000, component, classification, message
))
self.conn.commit()
except sqlite3.Error as e:
if "locked" in e.args[0]:
# TODO: Do something smarter here to handle locked databases
sf.fatal("Unable to log event in DB: " + e.args[0])
else:
sf.fatal("Unable to log event in DB: " + e.args[0])
return True
# Generate an globally unique ID for this scan
def scanInstanceGenGUID(self, scanName):
hashStr = hashlib.sha256(
scanName +
str(time.time() * 1000) +
str(random.randint(100000, 999999))
).hexdigest()
return hashStr
# Store a scan instance
def scanInstanceCreate(self, instanceId, scanName, scanTarget):
qry = "INSERT INTO tbl_scan_instance \
(guid, name, seed_target, created, status) \
VALUES (?, ?, ?, ?, ?)"
try:
self.dbh.execute(qry, (
instanceId, scanName, scanTarget, time.time() * 1000, 'CREATED'
))
self.conn.commit()
except sqlite3.Error as e:
sf.fatal("Unable to create instance in DB: " + e.args[0])
return True
# Update the start time, end time or status (or all 3) of a scan instance
def scanInstanceSet(self, instanceId, started=None, ended=None, status=None):
qvars = list()
qry = "UPDATE tbl_scan_instance SET "
if started != None:
qry += " started = ?,"
qvars.append(started)
if ended != None:
qry += " ended = ?,"
qvars.append(ended)
if status != None:
qry += " status = ?,"
qvars.append(status)
# guid = guid is a little hack to avoid messing with , placement above
qry += " guid = guid WHERE guid = ?"
qvars.append(instanceId)
try:
self.dbh.execute(qry, qvars)
self.conn.commit()
except sqlite3.Error:
sf.fatal("Unable to set information for the scan instance.")
# Return info about a scan instance (name, target, created, started,
# ended, status) - don't need this yet - untested
def scanInstanceGet(self, instanceId):
qry = "SELECT name, seed_target, ROUND(created/1000) AS created, \
ROUND(started/1000) AS started, ROUND(ended/1000) AS ended, status \
FROM tbl_scan_instance WHERE guid = ?"
qvars = [instanceId]
try:
self.dbh.execute(qry, qvars)
return self.dbh.fetchone()
except sqlite3.Error as e:
sf.error("SQL error encountered when retreiving scan instance:" +
e.args[0])
# Obtain a summary of the results per event type
def scanResultSummary(self, instanceId):
qry = "SELECT r.type, e.event_descr, MAX(ROUND(generated)) AS last_in, \
count(*) AS total, count(DISTINCT r.data) as utotal FROM \
tbl_scan_results r, tbl_event_types e WHERE e.event = r.type \
AND r.scan_instance_id = ? GROUP BY r.type ORDER BY e.event_descr"
qvars = [instanceId]
try:
self.dbh.execute(qry, qvars)
return self.dbh.fetchall()
except sqlite3.Error as e:
sf.error("SQL error encountered when fetching result summary: " +
e.args[0])
# Obtain the data for a scan and event type
def scanResultEvent(self, instanceId, eventType='ALL'):
qry = "SELECT ROUND(c.generated) AS generated, c.data, \
s.data as 'source_data', \
c.module, c.type, c.confidence, c.visibility, c.risk \
FROM tbl_scan_results c, tbl_scan_results s \
WHERE c.scan_instance_id = ? AND c.source_event_hash = s.hash AND \
s.scan_instance_id = c.scan_instance_id"
qvars = [instanceId]
if eventType != "ALL":
qry = qry + " AND c.type = ?"
qvars.append(eventType)
qry = qry + " ORDER BY c.data"
#print "QRY: " + qry
try:
self.dbh.execute(qry, qvars)
return self.dbh.fetchall()
except sqlite3.Error as e:
sf.error("SQL error encountered when fetching result events: " +
e.args[0])
# Obtain a unique list of elements
def scanResultEventUnique(self, instanceId, eventType='ALL'):
qry = "SELECT DISTINCT data, type FROM tbl_scan_results \
WHERE scan_instance_id = ?"
qvars = [instanceId]
if eventType != "ALL":
qry = qry + " AND type = ?"
qvars.append(eventType)
qry = qry + " ORDER BY type, data"
try:
self.dbh.execute(qry, qvars)
return self.dbh.fetchall()
except sqlite3.Error as e:
sf.error("SQL error encountered when fetching unique result events: " +
e.args[0])
# Get scan logs
def scanLogs(self, instanceId):
qry = "SELECT generated AS generated, component, \
type, message FROM tbl_scan_log WHERE scan_instance_id = ? \
ORDER BY generated DESC"
qvars = [instanceId]
try:
self.dbh.execute(qry, qvars)
return self.dbh.fetchall()
except sqlite3.Error as e:
sf.error("SQL error encountered when fetching result events: " +
e.args[0])
# Delete a scan instance
def scanInstanceDelete(self, instanceId):
qry1 = "DELETE FROM tbl_scan_instance WHERE guid = ?"
qry2 = "DELETE FROM tbl_scan_config WHERE scan_instance_id = ?"
qry3 = "DELETE FROM tbl_scan_results WHERE scan_instance_id = ?"
qvars = [instanceId]
try:
self.dbh.execute(qry1, qvars)
self.dbh.execute(qry2, qvars)
self.dbh.execute(qry3, qvars)
self.conn.commit()
except sqlite3.Error as e:
sf.error("SQL error encountered when deleting scan: " +
e.args[0])
# Store the default configuration
def configSet(self, optMap=dict()):
qry = "REPLACE INTO tbl_config (scope, opt, val) VALUES (?, ?, ?)"
for opt in optMap.keys():
# Module option
if ":" in opt:
parts = opt.split(':')
qvals = [ parts[0], parts[1], optMap[opt] ]
else:
# Global option
qvals = [ "GLOBAL", opt, optMap[opt] ]
try:
self.dbh.execute(qry, qvals)
except sqlite3.Error as e:
sf.error("SQL error encountered when storing config, aborting: " +
e.args[0])
self.conn.commit()
# Retreive the config from the database
def configGet(self):
qry = "SELECT scope, opt, val FROM tbl_config"
try:
retval = dict()
self.dbh.execute(qry)
for [scope, opt, val] in self.dbh.fetchall():
if scope == "GLOBAL":
retval[opt] = val
else:
retval[scope + ":" + opt] = val
return retval
except sqlite3.Error as e:
sf.error("SQL error encountered when fetching configuration: " + e.args[0])
# Reset the config to default (clear it from the DB and let the hard-coded
# settings in the code take effect.)
def configClear(self):
qry = "DELETE from tbl_config"
try:
self.dbh.execute(qry)
self.conn.commit()
except sqlite3.Error as e:
sf.error("Unable to clear configuration from the database: " + e.args[0])
# Store a configuration value for a scan
def scanConfigSet(self, id, optMap=dict()):
qry = "REPLACE INTO tbl_scan_config \
(scan_instance_id, component, opt, val) VALUES (?, ?, ?, ?)"
for opt in optMap.keys():
# Module option
if ":" in opt:
parts = opt.split(':')
qvals = [ id, parts[0], parts[1], optMap[opt] ]
else:
# Global option
qvals = [ id, "GLOBAL", opt, optMap[opt] ]
try:
self.dbh.execute(qry, qvals)
except sqlite3.Error as e:
sf.error("SQL error encountered when storing config, aborting: " +
e.args[0])
self.conn.commit()
# Retreive configuration data for a scan component
def scanConfigGet(self, instanceId):
qry = "SELECT component, opt, val FROM tbl_scan_config \
WHERE scan_instance_id = ? ORDER BY component, opt"
qvars = [instanceId]
try:
retval = dict()
self.dbh.execute(qry, qvars)
for [component, opt, val] in self.dbh.fetchall():
if component == "GLOBAL":
retval[opt] = val
else:
retval[component + ":" + opt] = val
return retval
except sqlite3.Error as e:
sf.error("SQL error encountered when fetching configuration: " + e.args[0])
# Store an event
# eventData is a SpiderFootEvent object with the following variables:
# - eventType: the event, e.g. URL_FORM, RAW_DATA, etc.
# - generated: time the event occurred
# - confidence: how sure are we of this data's validity, 0-100
# - visibility: how 'visible' was this data, 0-100
# - risk: how much risk does this data represent, 0-100
# - module: module that generated the event
# - data: the actual data, i.e. a URL, port number, webpage content, etc.
# - sourceEventHash: hash of the event that triggered this event
# And getHash() will return the event hash.
def scanEventStore(self, instanceId, sfEvent):
qry = "INSERT INTO tbl_scan_results \
(scan_instance_id, hash, type, generated, confidence, \
visibility, risk, module, data, source_event_hash) \
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
qvals = [ instanceId, sfEvent.getHash(), sfEvent.eventType, sfEvent.generated,
sfEvent.confidence, sfEvent.visibility, sfEvent.risk,
sfEvent.module, sfEvent.data, sfEvent.sourceEventHash ]
#print "STORING: " + str(qvals)
try:
self.dbh.execute(qry, qvals)
self.conn.commit()
return None
except sqlite3.Error as e:
sf.fatal("SQL error encountered when storing event data (" + str(self.dbh) + ": " +
e.args[0])
# List of all previously run scans
def scanInstanceList(self):
# SQLite doesn't support OUTER JOINs, so we need a work-around that
# does a UNION of scans with results and scans without results to
# get a complete listing.
qry = "SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \
ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, COUNT(r.type) \
FROM tbl_scan_instance i, tbl_scan_results r WHERE i.guid = r.scan_instance_id \
GROUP BY i.guid \
UNION ALL \
SELECT i.guid, i.name, i.seed_target, ROUND(i.created/1000), \
ROUND(i.started)/1000 as started, ROUND(i.ended)/1000, i.status, '0' \
FROM tbl_scan_instance i WHERE i.guid NOT IN ( \
SELECT distinct scan_instance_id FROM tbl_scan_results) \
ORDER BY started DESC"
try:
self.dbh.execute(qry)
return self.dbh.fetchall()
except sqlite3.Error as e:
sf.error("SQL error encountered when fetching scan list: " + e.args[0])
# History of data from the scan
def scanResultHistory(self, instanceId):
qry = "SELECT STRFTIME('%H:%M %w', ROUND(generated/1000), 'unixepoch') AS hourmin, \
type, COUNT(*) FROM tbl_scan_results \
WHERE scan_instance_id = ? GROUP BY hourmin, type"
qvars = [instanceId]
try:
self.dbh.execute(qry, qvars)
return self.dbh.fetchall()
except sqlite3.Error as e:
sf.error("SQL error encountered when fetching scan history: " + e.args[0])