def rerunscan(self, id): """Rerun a scan Args: id (str): scan ID Returns: None Raises: HTTPRedirect: redirect to info page for new scan """ # Snapshot the current configuration to be used by the scan cfg = deepcopy(self.config) modlist = list() dbh = SpiderFootDb(cfg) info = dbh.scanInstanceGet(id) if not info: return self.error("Invalid scan ID.") scanname = info[0] scantarget = info[1] scanconfig = dbh.scanConfigGet(id) if not scanconfig: return self.error(f"Error loading config from scan: {id}") modlist = scanconfig['_modulesenabled'].split(',') if "sfp__stor_stdout" in modlist: modlist.remove("sfp__stor_stdout") targetType = SpiderFootHelpers.targetTypeFromString(scantarget) if not targetType: # It must then be a name, as a re-run scan should always have a clean # target. Put quotes around the target value and try to determine the # target type again. targetType = SpiderFootHelpers.targetTypeFromString(f'"{scantarget}"') if targetType not in ["HUMAN_NAME", "BITCOIN_ADDRESS"]: scantarget = scantarget.lower() # Start running a new scan scanId = SpiderFootHelpers.genScanInstanceId() try: p = mp.Process(target=SpiderFootScanner, args=(scanname, scanId, scantarget, targetType, modlist, cfg)) p.daemon = True p.start() except Exception as e: self.log.error(f"[-] Scan [{scanId}] failed: {e}") return self.error(f"[-] Scan [{scanId}] failed: {e}") # Wait until the scan has initialized while dbh.scanInstanceGet(scanId) is None: self.log.info("Waiting for the scan to initialize...") time.sleep(1) raise cherrypy.HTTPRedirect(f"{self.docroot}/scaninfo?id={scanId}", status=302)
def rerunscanmulti(self, ids): """Rerun scans Args: ids (str): comma separated list of scan IDs Returns: None """ # Snapshot the current configuration to be used by the scan cfg = deepcopy(self.config) modlist = list() dbh = SpiderFootDb(cfg) for id in ids.split(","): info = dbh.scanInstanceGet(id) if not info: return self.error("Invalid scan ID.") scanconfig = dbh.scanConfigGet(id) scanname = info[0] scantarget = info[1] targetType = None if len(scanconfig) == 0: return self.error("Something went wrong internally.") modlist = scanconfig['_modulesenabled'].split(',') if "sfp__stor_stdout" in modlist: modlist.remove("sfp__stor_stdout") targetType = SpiderFootHelpers.targetTypeFromString(scantarget) if targetType is None: # Should never be triggered for a re-run scan.. return self.error("Invalid target type. Could not recognize it as a target SpiderFoot supports.") # Start running a new scan scanId = SpiderFootHelpers.genScanInstanceId() try: p = mp.Process(target=SpiderFootScanner, args=(scanname, scanId, scantarget, targetType, modlist, cfg)) p.daemon = True p.start() except Exception as e: self.log.error(f"[-] Scan [{scanId}] failed: {e}") return self.error(f"[-] Scan [{scanId}] failed: {e}") # Wait until the scan has initialized while dbh.scanInstanceGet(scanId) is None: self.log.info("Waiting for the scan to initialize...") time.sleep(1) templ = Template(filename='spiderfoot/templates/scanlist.tmpl', lookup=self.lookup) return templ.render(rerunscans=True, docroot=self.docroot, pageid="SCANLIST", version=__version__)
def test_scanInstanceGet_argument_instanceId_of_invalid_type_should_raise_TypeError(self): """ Test scanInstanceGet(self, instanceId) """ sfdb = SpiderFootDb(self.default_options, False) invalid_types = [None, list(), dict(), int()] for invalid_type in invalid_types: with self.subTest(invalid_type=invalid_type): with self.assertRaises(TypeError): sfdb.scanInstanceGet(invalid_type)
def scanvizmulti(self, ids, gexf="1"): """Export entities results from multiple scans in GEXF format Args: ids (str): scan IDs gexf (str): TBD Returns: string: GEXF data """ dbh = SpiderFootDb(self.config) data = list() roots = list() if not ids: return None for id in ids.split(','): data = data + dbh.scanResultEvent(id, filterFp=True) scan = dbh.scanInstanceGet(id) if scan: roots.append(scan[1]) if gexf == "0": # Not implemented yet return None cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf" cherrypy.response.headers['Content-Type'] = "application/gexf" cherrypy.response.headers['Pragma'] = "no-cache" return SpiderFootHelpers.buildGraphGexf(roots, "SpiderFoot Export", data)
def scanviz(self, id, gexf="0"): """Export entities from scan results for visualising Args: id (str): scan ID gexf (str): TBD Returns: string: GEXF data """ if not id: return None dbh = SpiderFootDb(self.config) data = dbh.scanResultEvent(id, filterFp=True) scan = dbh.scanInstanceGet(id) if not scan: return None root = scan[1] if gexf == "0": return SpiderFootHelpers.buildGraphJson([root], data) cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf" cherrypy.response.headers['Content-Type'] = "application/gexf" cherrypy.response.headers['Pragma'] = "no-cache" return SpiderFootHelpers.buildGraphGexf([root], "SpiderFoot Export", data)
def scansummary(self, id, by): """Summary of scan results. Args: id (str): scan ID by: TBD Returns: str: scan summary as JSON """ retdata = [] dbh = SpiderFootDb(self.config) try: scandata = dbh.scanResultSummary(id, by) except Exception: return retdata try: statusdata = dbh.scanInstanceGet(id) except Exception: return retdata for row in scandata: if row[0] == "ROOT": continue lastseen = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(row[2])) retdata.append([row[0], row[1], lastseen, row[3], row[4], statusdata[5]]) return retdata
def scandelete(self, id): """Delete scan(s) Args: id (str): comma separated list of scan IDs Returns: str: JSON response """ if not id: return self.jsonify_error('404', "No scan specified") dbh = SpiderFootDb(self.config) ids = id.split(',') for scan_id in ids: res = dbh.scanInstanceGet(scan_id) if not res: return self.jsonify_error('404', f"Scan {id} does not exist") if res[5] in ["RUNNING", "STARTING", "STARTED"]: return self.jsonify_error('400', f"Scan {id} is {res[5]}. You cannot delete running scans.") for scan_id in ids: dbh.scanInstanceDelete(scan_id) return b""
def scandelete(self, id, confirm=None): """Delete a scan Args: id (str): scan ID confirm (str): specify any value (except None) to confirm deletion of the scan """ dbh = SpiderFootDb(self.config) res = dbh.scanInstanceGet(id) if res is None: if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["ERROR", "Scan ID not found."]).encode('utf-8') return self.error("Scan ID not found.") if confirm: dbh.scanInstanceDelete(id) if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["SUCCESS", ""]).encode('utf-8') raise cherrypy.HTTPRedirect("/") templ = Template(filename='dyn/scandelete.tmpl', lookup=self.lookup) return templ.render(id=id, name=str(res[0]), names=list(), ids=list(), pageid="SCANLIST", docroot=self.docroot)
def scaneventresultexportmulti(self, ids, dialect="excel"): """Get scan event result data in CSV format for multiple scans Args: ids (str): comma separated list of scan IDs dialect (str): TBD Returns: string: results in CSV format """ dbh = SpiderFootDb(self.config) scaninfo = dict() data = list() for id in ids.split(','): scaninfo[id] = dbh.scanInstanceGet(id) data = data + dbh.scanResultEvent(id) fileobj = StringIO() parser = csv.writer(fileobj, dialect=dialect) parser.writerow(["Scan Name", "Updated", "Type", "Module", "Source", "F/P", "Data"]) for row in data: if row[4] == "ROOT": continue lastseen = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(row[0])) datafield = str(row[1]).replace("<SFURL>", "").replace("</SFURL>", "") parser.writerow([scaninfo[row[12]][0], lastseen, str(row[4]), str(row[3]), str(row[2]), row[13], datafield]) cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.csv" cherrypy.response.headers['Content-Type'] = "application/csv" cherrypy.response.headers['Pragma'] = "no-cache" return fileobj.getvalue().encode('utf-8')
def stopscan(self, id): """Stop a scan Args: id (str): comma separated list of scan IDs Returns: str: JSON response """ if not id: return self.jsonify_error('404', "No scan specified") dbh = SpiderFootDb(self.config) ids = id.split(',') for scan_id in ids: res = dbh.scanInstanceGet(scan_id) if not res: return self.jsonify_error('404', f"Scan {id} does not exist") scan_status = res[5] if scan_status == "FINISHED": return self.jsonify_error('400', f"Scan {id} has already finished.") if scan_status == "ABORTED": return self.jsonify_error('400', f"Scan {id} has already aborted.") if scan_status != "RUNNING": return self.jsonify_error('400', f"The running scan is currently in the state '{scan_status}', please try again later or restart SpiderFoot.") for scan_id in ids: dbh.scanInstanceSet(scan_id, status="ABORT-REQUESTED") return b""
def stopscanmulti(self, ids): """Stop a scan Args: ids (str): comma separated list of scan IDs Note: Unnecessary for now given that only one simultaneous scan is permitted """ dbh = SpiderFootDb(self.config) error = list() for id in ids.split(","): scaninfo = dbh.scanInstanceGet(id) if not scaninfo: return self.error("Invalid scan ID: %s" % id) scanname = str(scaninfo[0]) scanstatus = scaninfo[5] if scanstatus == "FINISHED": error.append("Scan '%s' is in a finished state. <a href='/scandelete?id=%s&confirm=1'>Maybe you want to delete it instead?</a>" % (scanname, id)) continue if scanstatus == "ABORTED": error.append("Scan '" + scanname + "' is already aborted.") continue dbh.scanInstanceSet(id, status="ABORT-REQUESTED") raise cherrypy.HTTPRedirect("/")
def scansummary(self, id, by): """Summary of scan results Args: id (str): scan ID by: TBD """ cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" retdata = [] dbh = SpiderFootDb(self.config) try: scandata = dbh.scanResultSummary(id, by) except Exception: return json.dumps(retdata).encode('utf-8') try: statusdata = dbh.scanInstanceGet(id) except Exception: return json.dumps(retdata).encode('utf-8') for row in scandata: if row[0] == "ROOT": continue lastseen = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(row[2])) retdata.append([row[0], row[1], lastseen, row[3], row[4], statusdata[5]]) return json.dumps(retdata).encode('utf-8')
def scandeletemulti(self, ids, confirm=None): """Delete a scan Args: ids (str): comma separated list of scan IDs confirm: TBD """ dbh = SpiderFootDb(self.config) names = list() for id in ids.split(','): res = dbh.scanInstanceGet(id) names.append(str(res[0])) if res is None: return self.error("Scan ID not found (" + id + ").") if res[5] in ["RUNNING", "STARTING", "STARTED"]: return self.error("You cannot delete running scans.") if confirm: for id in ids.split(','): dbh.scanInstanceDelete(id) raise cherrypy.HTTPRedirect("/") templ = Template(filename='dyn/scandelete.tmpl', lookup=self.lookup) return templ.render(id=None, name=None, ids=ids.split(','), names=names, pageid="SCANLIST", docroot=self.docroot)
def test_scanInstanceGet_should_return_scan_info(self): """ Test scanInstanceGet(self, instanceId) """ sfdb = SpiderFootDb(self.default_options, False) instance_id = "example instance id" scan_name = "example scan name" scan_target = "example scan target" sfdb.scanInstanceCreate(instance_id, scan_name, scan_target) scan_instance_get = sfdb.scanInstanceGet(instance_id) self.assertEqual(len(scan_instance_get), 6) self.assertIsInstance(scan_instance_get[0], str) self.assertEqual(scan_instance_get[0], scan_name) self.assertIsInstance(scan_instance_get[1], str) self.assertEqual(scan_instance_get[1], scan_target) self.assertIsInstance(scan_instance_get[2], float) self.assertIsInstance(scan_instance_get[3], float) self.assertIsInstance(scan_instance_get[4], float) self.assertIsInstance(scan_instance_get[5], str) self.assertEqual(scan_instance_get[5], 'CREATED')
def resultsetfp(self, id, resultids, fp): """Set a bunch of results (hashes) as false positive Args: id (str): scan ID resultids (str): comma separated list of result IDs fp (str): 0 or 1 Returns: str: set false positive status as JSON """ cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" dbh = SpiderFootDb(self.config) if fp not in ["0", "1"]: return json.dumps(["ERROR", "No FP flag set or not set correctly."]).encode('utf-8') try: ids = json.loads(resultids) except Exception: return json.dumps(["ERROR", "No IDs supplied."]).encode('utf-8') # Cannot set FPs if a scan is not completed status = dbh.scanInstanceGet(id) if not status: return self.error("Invalid scan ID: %s" % id) if status[5] not in ["ABORTED", "FINISHED", "ERROR-FAILED"]: return json.dumps([ "WARNING", "Scan must be in a finished state when setting False Positives." ]).encode('utf-8') # Make sure the user doesn't set something as non-FP when the # parent is set as an FP. if fp == "0": data = dbh.scanElementSourcesDirect(id, ids) for row in data: if str(row[14]) == "1": return json.dumps([ "WARNING", f"Cannot unset element {id} as False Positive if a parent element is still False Positive." ]).encode('utf-8') # Set all the children as FPs too.. it's only logical afterall, right? childs = dbh.scanElementChildrenAll(id, ids) allIds = ids + childs ret = dbh.scanResultsUpdateFP(id, allIds, fp) if ret: return json.dumps(["SUCCESS", ""]).encode('utf-8') return json.dumps(["ERROR", "Exception encountered."]).encode('utf-8')
def scanexportjsonmulti(self, ids): """Get scan event result data in JSON format for multiple scans Args: ids (str): comma separated list of scan IDs Returns: string: results in CSV format """ dbh = SpiderFootDb(self.config) scaninfo = list() scan_name = "" for id in ids.split(','): scan = dbh.scanInstanceGet(id) if scan is None: continue scan_name = scan[0] for row in dbh.scanResultEvent(id): lastseen = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(row[0])) event_data = str(row[1]).replace("<SFURL>", "").replace("</SFURL>", "") source_data = str(row[2]) source_module = str(row[3]) event_type = row[4] false_positive = row[13] if event_type == "ROOT": continue scaninfo.append({ "data": event_data, "event_type": event_type, "module": source_module, "source_data": source_data, "false_positive": false_positive, "last_seen": lastseen, "scan_name": scan_name, "scan_target": scan[1] }) if len(ids.split(',')) > 1 or scan_name == "": fname = "SpiderFoot.json" else: fname = scan_name + "-SpiderFoot.json" cherrypy.response.headers['Content-Disposition'] = "attachment; filename=" + fname cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" cherrypy.response.headers['Pragma'] = "no-cache" return json.dumps(scaninfo).encode('utf-8')
def scanviz(self, id, gexf="0"): dbh = SpiderFootDb(self.config) sf = SpiderFoot(self.config) data = dbh.scanResultEvent(id, filterFp=True) scan = dbh.scanInstanceGet(id) root = scan[1] if gexf != "0": cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf" cherrypy.response.headers['Content-Type'] = "application/gexf" cherrypy.response.headers['Pragma'] = "no-cache" return sf.buildGraphGexf([root], "SpiderFoot Export", data) else: return sf.buildGraphJson([root], data)
def scaninfo(self, id): """Information about a selected scan Args: id (str): scan id """ dbh = SpiderFootDb(self.config) res = dbh.scanInstanceGet(id) if res is None: return self.error("Scan ID not found.") templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup, input_encoding='utf-8') return templ.render(id=id, name=html.escape(res[0]), status=res[5], docroot=self.docroot, pageid="SCANLIST")
def scanvizmulti(self, ids, gexf="1"): dbh = SpiderFootDb(self.config) sf = SpiderFoot(self.config) data = list() roots = list() for id in ids.split(','): data = data + dbh.scanResultEvent(id, filterFp=True) roots.append(dbh.scanInstanceGet(id)[1]) if gexf != "0": cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf" cherrypy.response.headers['Content-Type'] = "application/gexf" cherrypy.response.headers['Pragma'] = "no-cache" return sf.buildGraphGexf(roots, "SpiderFoot Export", data) else: # Not implemented yet return None
def scanopts(self, id): """Configuration used for a scan Args: id: scan ID Returns: str: options as JSON string """ dbh = SpiderFootDb(self.config) ret = dict() meta = dbh.scanInstanceGet(id) if not meta: return ret if meta[3] != 0: started = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[3])) else: started = "Not yet" if meta[4] != 0: finished = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[4])) else: finished = "Not yet" ret['meta'] = [meta[0], meta[1], meta[2], started, finished, meta[5]] ret['config'] = dbh.scanConfigGet(id) ret['configdesc'] = dict() for key in list(ret['config'].keys()): if ':' not in key: globaloptdescs = self.config['__globaloptdescs__'] if globaloptdescs: ret['configdesc'][key] = globaloptdescs.get(key, f"{key} (legacy)") else: [modName, modOpt] = key.split(':') if modName not in list(self.config['__modules__'].keys()): continue if modOpt not in list(self.config['__modules__'][modName]['optdescs'].keys()): continue ret['configdesc'][key] = self.config['__modules__'][modName]['optdescs'][modOpt] return ret
def scanstatus(self, id): """Basic information about a scan. Args: id (str): scan ID Returns: str: scan status as JSON """ dbh = SpiderFootDb(self.config) data = dbh.scanInstanceGet(id) if not data: return [] created = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[2])) started = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[3])) ended = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[4])) return [data[0], data[1], created, started, ended, data[5]]
def scanstatus(self, id): """Basic information about a scan Args: id (str): scan ID """ cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" dbh = SpiderFootDb(self.config) data = dbh.scanInstanceGet(id) if not data: return json.dumps([]).encode('utf-8') created = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[2])) started = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[3])) ended = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[4])) retdata = [data[0], data[1], created, started, ended, data[5]] return json.dumps(retdata).encode('utf-8')
def stopscan(self, id): """Stop a scan. Args: id (str): scan ID """ dbh = SpiderFootDb(self.config) scaninfo = dbh.scanInstanceGet(id) if not scaninfo: if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["ERROR", "Invalid scan ID."]).encode('utf-8') return self.error("Invalid scan ID.") scanstatus = scaninfo[5] if scanstatus == "ABORTED": if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["ERROR", "Scan already aborted."]).encode('utf-8') return self.error("The scan is already aborted.") if not scanstatus == "RUNNING": if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["ERROR", "Scan in an invalid state for stopping."]).encode('utf-8') return self.error("The running scan is currently in the state '%s', please try again later or restart SpiderFoot." % scanstatus) dbh.scanInstanceSet(id, status="ABORT-REQUESTED") if 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["SUCCESS", ""]).encode('utf-8') raise cherrypy.HTTPRedirect("/")
def scanopts(self, id): """Configuration used for a scan Args: id: scan ID """ ret = dict() dbh = SpiderFootDb(self.config) ret['config'] = dbh.scanConfigGet(id) ret['configdesc'] = dict() for key in list(ret['config'].keys()): if ':' not in key: ret['configdesc'][key] = self.config['__globaloptdescs__'][key] else: [modName, modOpt] = key.split(':') if modName not in list(self.config['__modules__'].keys()): continue if modOpt not in list(self.config['__modules__'][modName]['optdescs'].keys()): continue ret['configdesc'][key] = self.config['__modules__'][modName]['optdescs'][modOpt] meta = dbh.scanInstanceGet(id) if not meta: return json.dumps([]).encode('utf-8') if meta[3] != 0: started = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[3])) else: started = "Not yet" if meta[4] != 0: finished = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[4])) else: finished = "Not yet" ret['meta'] = [meta[0], meta[1], meta[2], started, finished, meta[5]] return json.dumps(ret).encode('utf-8')
def clonescan(self, id): """ Clone an existing scan (pre-selected options in the newscan page) Args: id (str): scan ID to clone Returns: None """ dbh = SpiderFootDb(self.config) types = dbh.eventTypes() info = dbh.scanInstanceGet(id) if not info: return self.error("Invalid scan ID.") scanconfig = dbh.scanConfigGet(id) scanname = info[0] scantarget = info[1] targetType = None if scanname == "" or scantarget == "" or len(scanconfig) == 0: return self.error("Something went wrong internally.") targetType = SpiderFootHelpers.targetTypeFromString(scantarget) if targetType is None: # It must be a name, so wrap quotes around it scantarget = """ + scantarget + """ modlist = scanconfig['_modulesenabled'].split(',') templ = Template(filename='spiderfoot/templates/newscan.tmpl', lookup=self.lookup) return templ.render(pageid='NEWSCAN', types=types, docroot=self.docroot, modules=self.config['__modules__'], selectedmods=modlist, scanname=str(scanname), scantarget=str(scantarget), version=__version__)
def start_scan(sfConfig, sfModules, args): global dbh global scanId dbh = SpiderFootDb(sfConfig, init=True) sf = SpiderFoot(sfConfig) if args.modules: log.info("Modules available:") for m in sorted(sfModules.keys()): if "__" in m: continue print(('{0:25} {1}'.format(m, sfModules[m]['descr']))) sys.exit(0) if args.types: log.info("Types available:") typedata = dbh.eventTypes() types = dict() for r in typedata: types[r[1]] = r[0] for t in sorted(types.keys()): print(('{0:45} {1}'.format(t, types[t]))) sys.exit(0) if not args.s: log.error( "You must specify a target when running in scan mode. Try --help for guidance." ) sys.exit(-1) if args.x and not args.t: log.error("-x can only be used with -t. Use --help for guidance.") sys.exit(-1) if args.x and args.m: log.error( "-x can only be used with -t and not with -m. Use --help for guidance." ) sys.exit(-1) if args.r and (args.o and args.o not in ["tab", "csv"]): log.error("-r can only be used when your output format is tab or csv.") sys.exit(-1) if args.H and (args.o and args.o not in ["tab", "csv"]): log.error("-H can only be used when your output format is tab or csv.") sys.exit(-1) if args.D and args.o != "csv": log.error("-D can only be used when using the csv output format.") sys.exit(-1) target = args.s # Usernames and names - quoted on the commandline - won't have quotes, # so add them. if " " in target: target = f"\"{target}\"" if "." not in target and not target.startswith("+") and '"' not in target: target = f"\"{target}\"" targetType = sf.targetType(target) if not targetType: log.error(f"Could not determine target type. Invalid target: {target}") sys.exit(-1) target = target.strip('"') modlist = list() if not args.t and not args.m: log.warning( "You didn't specify any modules or types, so all will be enabled.") for m in list(sfModules.keys()): if "__" in m: continue modlist.append(m) signal.signal(signal.SIGINT, handle_abort) # If the user is scanning by type.. # 1. Find modules producing that type if args.t: types = args.t modlist = sf.modulesProducing(types) newmods = deepcopy(modlist) newmodcpy = deepcopy(newmods) # 2. For each type those modules consume, get modules producing while len(newmodcpy) > 0: for etype in sf.eventsToModules(newmodcpy): xmods = sf.modulesProducing([etype]) for mod in xmods: if mod not in modlist: modlist.append(mod) newmods.append(mod) newmodcpy = deepcopy(newmods) newmods = list() # Easier if scanning by module if args.m: modlist = list(filter(None, args.m.split(","))) # Add sfp__stor_stdout to the module list typedata = dbh.eventTypes() types = dict() for r in typedata: types[r[1]] = r[0] sfp__stor_stdout_opts = sfConfig['__modules__']['sfp__stor_stdout']['opts'] sfp__stor_stdout_opts['_eventtypes'] = types if args.f: if args.f and not args.t: log.error("You can only use -f with -t. Use --help for guidance.") sys.exit(-1) sfp__stor_stdout_opts['_showonlyrequested'] = True if args.F: sfp__stor_stdout_opts['_requested'] = args.F.split(",") sfp__stor_stdout_opts['_showonlyrequested'] = True if args.o: sfp__stor_stdout_opts['_format'] = args.o if args.t: sfp__stor_stdout_opts['_requested'] = args.t.split(",") if args.n: sfp__stor_stdout_opts['_stripnewline'] = True if args.r: sfp__stor_stdout_opts['_showsource'] = True if args.S: sfp__stor_stdout_opts['_maxlength'] = args.S if args.D: sfp__stor_stdout_opts['_csvdelim'] = args.D if args.x: tmodlist = list() modlist = list() xmods = sf.modulesConsuming([targetType]) for mod in xmods: if mod not in modlist: tmodlist.append(mod) # Remove any modules not producing the type requested rtypes = args.t.split(",") for mod in tmodlist: for r in rtypes: if not sfModules[mod]['provides']: continue if r in sfModules[mod].get('provides', []) and mod not in modlist: modlist.append(mod) if len(modlist) == 0: log.error("Based on your criteria, no modules were enabled.") sys.exit(-1) modlist += ["sfp__stor_db", "sfp__stor_stdout"] # Run the scan if sfConfig['__logging']: log.info(f"Modules enabled ({len(modlist)}): {','.join(modlist)}") cfg = sf.configUnserialize(dbh.configGet(), sfConfig) # Debug mode is a variable that gets stored to the DB, so re-apply it if args.debug: cfg['_debug'] = True else: cfg['_debug'] = False # If strict mode is enabled, filter the output from modules. if args.x and args.t: cfg['__outputfilter'] = args.t.split(",") if args.o == "json": print("[", end='') # Start running a new scan scanName = target scanId = sf.genScanInstanceId() try: p = mp.Process(target=SpiderFootScanner, args=(scanName, scanId, target, targetType, modlist, cfg)) p.daemon = True p.start() except BaseException as e: log.error(f"Scan [{scanId}] failed: {e}") sys.exit(-1) # If field headers weren't disabled, print them if not args.H and args.o != "json": if args.D: delim = args.D else: if args.o in ["tab", None]: delim = "\t" if args.o == "csv": delim = "," if not args.r: if delim != "\t": print(delim.join(["Source", "Type", "Data"])) else: print('{0:30}{1}{2:45}{3}{4}'.format("Source", delim, "Type", delim, "Data")) else: if delim != "\t": print(delim.join(["Source", "Type", "Source Data", "Data"])) else: print('{0:30}{1}{2:45}{3}{4}{5}{6}'.format( "Source", delim, "Type", delim, "Source Data", delim, "Data")) while True: time.sleep(1) info = dbh.scanInstanceGet(scanId) if not info: continue if info[5] in [ "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED" ]: if sfConfig['__logging']: log.info(f"Scan completed with status {info[5]}") if args.o == "json": print("]") sys.exit(0) return None
class SpiderFootScanner(): """SpiderFootScanner object. Attributes: scanId (str): unique ID of the scan status (str): status of the scan """ __scanId = None __status = None __config = None __sf = None __dbh = None __targetValue = None __targetType = None __moduleList = list() __target = None __moduleInstances = dict() __modconfig = dict() __scanName = None def __init__(self, scanName: str, scanId: str, targetValue: str, targetType: str, moduleList: list, globalOpts: dict, start: bool = True) -> None: """Initialize SpiderFootScanner object. Args: scanName (str): name of the scan scanId (str): unique ID of the scan targetValue (str): scan target targetType (str): scan target type moduleList (list): list of modules to run globalOpts (dict): scan options start (bool): start the scan immediately Raises: TypeError: arg type was invalid ValueError: arg value was invalid Todo: Eventually change this to be able to control multiple scan instances """ if not isinstance(globalOpts, dict): raise TypeError( f"globalOpts is {type(globalOpts)}; expected dict()") if not globalOpts: raise ValueError("globalOpts is empty") self.__config = deepcopy(globalOpts) self.__dbh = SpiderFootDb(self.__config) if not isinstance(scanName, str): raise TypeError(f"scanName is {type(scanName)}; expected str()") if not scanName: raise ValueError("scanName value is blank") self.__scanName = scanName if not isinstance(scanId, str): raise TypeError(f"scanId is {type(scanId)}; expected str()") if not scanId: raise ValueError("scanId value is blank") if not isinstance(targetValue, str): raise TypeError( f"targetValue is {type(targetValue)}; expected str()") if not targetValue: raise ValueError("targetValue value is blank") self.__targetValue = targetValue if not isinstance(targetType, str): raise TypeError( f"targetType is {type(targetType)}; expected str()") if not targetType: raise ValueError("targetType value is blank") self.__targetType = targetType if not isinstance(moduleList, list): raise TypeError( f"moduleList is {type(moduleList)}; expected list()") if not moduleList: raise ValueError("moduleList is empty") self.__moduleList = moduleList self.__sf = SpiderFoot(self.__config) self.__sf.dbh = self.__dbh # Create a unique ID for this scan in the back-end DB. if scanId: self.__scanId = scanId else: self.__scanId = SpiderFootHelpers.genScanInstanceId() self.__sf.scanId = self.__scanId self.__dbh.scanInstanceCreate(self.__scanId, self.__scanName, self.__targetValue) # Create our target try: self.__target = SpiderFootTarget(self.__targetValue, self.__targetType) except (TypeError, ValueError) as e: self.__sf.status(f"Scan [{self.__scanId}] failed: {e}") self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError(f"Invalid target: {e}") from None # Save the config current set for this scan self.__config['_modulesenabled'] = self.__moduleList self.__dbh.scanConfigSet( self.__scanId, self.__sf.configSerialize(deepcopy(self.__config))) # Process global options that point to other places for data # If a proxy server was specified, set it up proxy_type = self.__config.get('_socks1type') if proxy_type: # TODO: allow DNS lookup to be configurable when using a proxy # - proxy DNS lookup: socks5h:// and socks4a:// # - local DNS lookup: socks5:// and socks4:// if proxy_type == '4': proxy_proto = 'socks4://' elif proxy_type == '5': proxy_proto = 'socks5://' elif proxy_type == 'HTTP': proxy_proto = 'http://' elif proxy_type == 'TOR': proxy_proto = 'socks5h://' else: self.__sf.status( f"Scan [{self.__scanId}] failed: Invalid proxy type: {proxy_type}" ) self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError(f"Invalid proxy type: {proxy_type}") proxy_host = self.__config.get('_socks2addr', '') if not proxy_host: self.__sf.status( f"Scan [{self.__scanId}] failed: Proxy type is set ({proxy_type}) but proxy address value is blank" ) self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError( f"Proxy type is set ({proxy_type}) but proxy address value is blank" ) proxy_port = int(self.__config.get('_socks3port') or 0) if not proxy_port: if proxy_type in ['4', '5']: proxy_port = 1080 elif proxy_type.upper() == 'HTTP': proxy_port = 8080 elif proxy_type.upper() == 'TOR': proxy_port = 9050 proxy_username = self.__config.get('_socks4user', '') proxy_password = self.__config.get('_socks5pwd', '') if proxy_username or proxy_password: proxy_auth = f"{proxy_username}:{proxy_password}" proxy = f"{proxy_proto}{proxy_auth}@{proxy_host}:{proxy_port}" else: proxy = f"{proxy_proto}{proxy_host}:{proxy_port}" self.__sf.debug(f"Using proxy: {proxy}") self.__sf.socksProxy = proxy else: self.__sf.socksProxy = None # Override the default DNS server if self.__config['_dnsserver']: res = dns.resolver.Resolver() res.nameservers = [self.__config['_dnsserver']] dns.resolver.override_system_resolver(res) else: dns.resolver.restore_system_resolver() # Set the user agent self.__config['_useragent'] = self.__sf.optValueToData( self.__config['_useragent']) # Set up the Internet TLD list. # If the cached does not exist or has expired, reload it from scratch. tld_data = self.__sf.cacheGet("internet_tlds", self.__config['_internettlds_cache']) if tld_data is None: tld_data = self.__sf.optValueToData(self.__config['_internettlds']) if tld_data is None: self.__sf.status( f"Scan [{self.__scanId}] failed: Could not update TLD list" ) self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError("Could not update TLD list") self.__sf.cachePut("internet_tlds", tld_data) self.__config['_internettlds'] = tld_data.splitlines() self.__setStatus("INITIALIZING", time.time() * 1000, None) self.__sharedThreadPool = SpiderFootThreadPool( threads=self.__config.get("_maxthreads", 3), name='sharedThreadPool') # Used when module threading is enabled self.eventQueue = None if start: self.__startScan() @property def scanId(self) -> str: return self.__scanId @property def status(self) -> str: return self.__status def __setStatus(self, status: str, started: float = None, ended: float = None) -> None: """Set the status of the currently running scan (if any). Args: status (str): scan status started (float): timestamp at start of scan ended (float): timestamp at end of scan Raises: TypeError: arg type was invalid ValueError: arg value was invalid """ if not isinstance(status, str): raise TypeError(f"status is {type(status)}; expected str()") if status not in [ "INITIALIZING", "STARTING", "STARTED", "RUNNING", "ABORT-REQUESTED", "ABORTED", "ABORTING", "FINISHED", "ERROR-FAILED" ]: raise ValueError(f"Invalid scan status {status}") self.__status = status self.__dbh.scanInstanceSet(self.__scanId, started, ended, status) def __startScan(self) -> None: """Start running a scan. Raises: AssertionError: Never actually raised. """ failed = True try: self.__setStatus("STARTING", time.time() * 1000, None) self.__sf.status( f"Scan [{self.__scanId}] for '{self.__target.targetValue}' initiated." ) self.eventQueue = queue.Queue() self.__sharedThreadPool.start() # moduleList = list of modules the user wants to run self.__sf.debug(f"Loading {len(self.__moduleList)} modules ...") for modName in self.__moduleList: if not modName: continue # Module may have been renamed or removed if modName not in self.__config['__modules__']: self.__sf.error(f"Failed to load module: {modName}") continue try: module = __import__('modules.' + modName, globals(), locals(), [modName]) except ImportError: self.__sf.error(f"Failed to load module: {modName}") continue try: mod = getattr(module, modName)() mod.__name__ = modName except Exception: self.__sf.error( f"Module {modName} initialization failed: {traceback.format_exc()}" ) continue # Set up the module options, scan ID, database handle and listeners try: # Configuration is a combined global config with module-specific options self.__modconfig[modName] = deepcopy( self.__config['__modules__'][modName]['opts']) for opt in list(self.__config.keys()): self.__modconfig[modName][opt] = deepcopy( self.__config[opt]) # clear any listener relationships from the past mod.clearListeners() mod.setScanId(self.__scanId) mod.setSharedThreadPool(self.__sharedThreadPool) mod.setDbh(self.__dbh) mod.setup(self.__sf, self.__modconfig[modName]) except Exception: self.__sf.error( f"Module {modName} initialization failed: {traceback.format_exc()}" ) mod.errorState = True continue # Override the module's local socket module to be the SOCKS one. if self.__config['_socks1type'] != '': try: mod._updateSocket(socket) except Exception as e: self.__sf.error( f"Module {modName} socket setup failed: {e}") continue # Set up event output filters if requested if self.__config['__outputfilter']: try: mod.setOutputFilter(self.__config['__outputfilter']) except Exception as e: self.__sf.error( f"Module {modName} output filter setup failed: {e}" ) continue # Give modules a chance to 'enrich' the original target with aliases of that target. try: newTarget = mod.enrichTarget(self.__target) if newTarget is not None: self.__target = newTarget except Exception as e: self.__sf.error( f"Module {modName} target enrichment failed: {e}") continue # Register the target with the module try: mod.setTarget(self.__target) except Exception as e: self.__sf.error( f"Module {modName} failed to set target '{self.__target}': {e}" ) continue # Set up the outgoing event queue try: mod.outgoingEventQueue = self.eventQueue mod.incomingEventQueue = queue.Queue() except Exception as e: self.__sf.error( f"Module {modName} event queue setup failed: {e}") continue self.__moduleInstances[modName] = mod self.__sf.status(f"{modName} module loaded.") self.__sf.debug( f"Scan [{self.__scanId}] loaded {len(self.__moduleInstances)} modules." ) if not self.__moduleInstances: self.__setStatus("ERROR-FAILED", None, time.time() * 1000) self.__dbh.close() return # sort modules by priority self.__moduleInstances = OrderedDict( sorted(self.__moduleInstances.items(), key=lambda m: m[-1]._priority)) # Now we are ready to roll.. self.__setStatus("RUNNING") # Create a pseudo module for the root event to originate from psMod = SpiderFootPlugin() psMod.__name__ = "SpiderFoot UI" psMod.setTarget(self.__target) psMod.setDbh(self.__dbh) psMod.clearListeners() psMod.outgoingEventQueue = self.eventQueue psMod.incomingEventQueue = queue.Queue() # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None) psMod.notifyListeners(rootEvent) firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # Special case.. check if an INTERNET_NAME is also a domain if self.__targetType == 'INTERNET_NAME': if self.__sf.isDomain(self.__targetValue, self.__config['_internettlds']): firstEvent = SpiderFootEvent('DOMAIN_NAME', self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # If in interactive mode, loop through this shared global variable # waiting for inputs, and process them until my status is set to # FINISHED. # Check in case the user requested to stop the scan between modules # initializing scanstatus = self.__dbh.scanInstanceGet(self.__scanId) if scanstatus and scanstatus[5] == "ABORT-REQUESTED": raise AssertionError("ABORT-REQUESTED") # start threads self.waitForThreads() failed = False except (KeyboardInterrupt, AssertionError): self.__sf.status(f"Scan [{self.__scanId}] aborted.") self.__setStatus("ABORTED", None, time.time() * 1000) except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.__sf.error( f"Unhandled exception ({e.__class__.__name__}) encountered during scan." + "Please report this as a bug: " + +repr( traceback.format_exception(exc_type, exc_value, exc_traceback))) self.__sf.status(f"Scan [{self.__scanId}] failed: {e}") self.__setStatus("ERROR-FAILED", None, time.time() * 1000) finally: if not failed: self.__setStatus("FINISHED", None, time.time() * 1000) self.runCorrelations() self.__sf.status(f"Scan [{self.__scanId}] completed.") self.__dbh.close() def runCorrelations(self) -> None: """Run correlation rules.""" self.__sf.status( f"Running {len(self.__config['__correlationrules__'])} correlation rules." ) ruleset = dict() for rule in self.__config['__correlationrules__']: ruleset[rule['id']] = rule['rawYaml'] corr = SpiderFootCorrelator(self.__dbh, ruleset, self.__scanId) corr.run_correlations() def waitForThreads(self) -> None: """Wait for threads. Raises: TypeError: queue tried to process a malformed event AssertionError: scan halted for some reason """ counter = 0 try: if not self.eventQueue: return # start one thread for each module for mod in self.__moduleInstances.values(): mod.start() final_passes = 3 # watch for newly-generated events while True: # log status of threads every 10 iterations log_status = counter % 10 == 0 counter += 1 if log_status: scanstatus = self.__dbh.scanInstanceGet(self.__scanId) if scanstatus and scanstatus[5] == "ABORT-REQUESTED": raise AssertionError("ABORT-REQUESTED") try: sfEvent = self.eventQueue.get_nowait() self.__sf.debug( f"waitForThreads() got event, {sfEvent.eventType}, from eventQueue." ) except queue.Empty: # check if we're finished if self.threadsFinished(log_status): sleep(.1) # but are we really? if self.threadsFinished(log_status): if final_passes < 1: break # Trigger module.finished() for mod in self.__moduleInstances.values(): if not mod.errorState and mod.incomingEventQueue is not None: mod.incomingEventQueue.put('FINISHED') sleep(.1) while not self.threadsFinished(log_status): log_status = counter % 100 == 0 counter += 1 sleep(.01) final_passes -= 1 else: # save on CPU sleep(.1) continue if not isinstance(sfEvent, SpiderFootEvent): raise TypeError( f"sfEvent is {type(sfEvent)}; expected SpiderFootEvent" ) # for every module for mod in self.__moduleInstances.values(): # if it's been aborted if mod._stopScanning: # break out of the while loop raise AssertionError(f"{mod.__name__} requested stop") # send it the new event if applicable if not mod.errorState and mod.incomingEventQueue is not None: watchedEvents = mod.watchedEvents() if sfEvent.eventType in watchedEvents or "*" in watchedEvents: mod.incomingEventQueue.put(deepcopy(sfEvent)) finally: # tell the modules to stop for mod in self.__moduleInstances.values(): mod._stopScanning = True self.__sharedThreadPool.shutdown(wait=True) def threadsFinished(self, log_status: bool = False) -> bool: """Check if all threads are complete. Args: log_status (bool): print thread queue status to debug log Returns: bool: True if all threads are finished """ if self.eventQueue is None: return True modules_waiting = dict() for m in self.__moduleInstances.values(): try: if m.incomingEventQueue is not None: modules_waiting[m.__name__] = m.incomingEventQueue.qsize() except Exception: with suppress(Exception): m.errorState = True modules_waiting = sorted(modules_waiting.items(), key=lambda x: x[-1], reverse=True) modules_running = [] for m in self.__moduleInstances.values(): try: if m.running: modules_running.append(m.__name__) except Exception: with suppress(Exception): m.errorState = True modules_errored = [] for m in self.__moduleInstances.values(): try: if m.errorState: modules_errored.append(m.__name__) except Exception: with suppress(Exception): m.errorState = True queues_empty = [qsize == 0 for m, qsize in modules_waiting] for mod in self.__moduleInstances.values(): if mod.errorState and mod.incomingEventQueue is not None: self.__sf.debug( f"Clearing and unsetting incomingEventQueue for errored module {mod.__name__}." ) with suppress(Exception): while 1: mod.incomingEventQueue.get_nowait() mod.incomingEventQueue = None if not modules_running and not queues_empty: self.__sf.debug("Clearing queues for stalled/aborted modules.") for mod in self.__moduleInstances.values(): try: while True: mod.incomingEventQueue.get_nowait() except Exception: pass if log_status: events_queued = ", ".join([ f"{mod}: {qsize:,}" for mod, qsize in modules_waiting[:5] if qsize > 0 ]) if not events_queued: events_queued = 'None' self.__sf.debug( f"Events queued: {sum([m[-1] for m in modules_waiting]):,} ({events_queued})" ) if modules_running: self.__sf.debug( f"Modules running: {len(modules_running):,} ({', '.join(modules_running)})" ) if modules_errored: self.__sf.debug( f"Modules errored: {len(modules_errored):,} ({', '.join(modules_errored)})" ) if all(queues_empty) and not modules_running: return True return False
def start_scan(sfConfig: dict, sfModules: dict, args, loggingQueue) -> None: """Start scan Args: sfConfig (dict): SpiderFoot config options sfModules (dict): modules args (argparse.Namespace): command line args loggingQueue (Queue): main SpiderFoot logging queue """ log = logging.getLogger(f"spiderfoot.{__name__}") global dbh global scanId dbh = SpiderFootDb(sfConfig, init=True) sf = SpiderFoot(sfConfig) if not args.s: log.error( "You must specify a target when running in scan mode. Try --help for guidance." ) sys.exit(-1) if args.x and not args.t: log.error("-x can only be used with -t. Use --help for guidance.") sys.exit(-1) if args.x and args.m: log.error( "-x can only be used with -t and not with -m. Use --help for guidance." ) sys.exit(-1) if args.r and (args.o and args.o not in ["tab", "csv"]): log.error("-r can only be used when your output format is tab or csv.") sys.exit(-1) if args.H and (args.o and args.o not in ["tab", "csv"]): log.error("-H can only be used when your output format is tab or csv.") sys.exit(-1) if args.D and args.o != "csv": log.error("-D can only be used when using the csv output format.") sys.exit(-1) target = args.s # Usernames and names - quoted on the commandline - won't have quotes, # so add them. if " " in target: target = f"\"{target}\"" if "." not in target and not target.startswith("+") and '"' not in target: target = f"\"{target}\"" targetType = SpiderFootHelpers.targetTypeFromString(target) if not targetType: log.error(f"Could not determine target type. Invalid target: {target}") sys.exit(-1) target = target.strip('"') modlist = list() if not args.t and not args.m and not args.u: log.warning( "You didn't specify any modules, types or use case, so all modules will be enabled." ) for m in list(sfModules.keys()): if "__" in m: continue modlist.append(m) signal.signal(signal.SIGINT, handle_abort) # If the user is scanning by type.. # 1. Find modules producing that type if args.t: types = args.t modlist = sf.modulesProducing(types) newmods = deepcopy(modlist) newmodcpy = deepcopy(newmods) # 2. For each type those modules consume, get modules producing while len(newmodcpy) > 0: for etype in sf.eventsToModules(newmodcpy): xmods = sf.modulesProducing([etype]) for mod in xmods: if mod not in modlist: modlist.append(mod) newmods.append(mod) newmodcpy = deepcopy(newmods) newmods = list() # Easier if scanning by module if args.m: modlist = list(filter(None, args.m.split(","))) # Select modules if the user selected usercase if args.u: usecase = args.u[0].upper() + args.u[ 1:] # Make the first Letter Uppercase for mod in sfConfig['__modules__']: if usecase == 'All' or usecase in sfConfig['__modules__'][mod][ 'group']: modlist.append(mod) # Add sfp__stor_stdout to the module list typedata = dbh.eventTypes() types = dict() for r in typedata: types[r[1]] = r[0] sfp__stor_stdout_opts = sfConfig['__modules__']['sfp__stor_stdout']['opts'] sfp__stor_stdout_opts['_eventtypes'] = types if args.f: if args.f and not args.t: log.error("You can only use -f with -t. Use --help for guidance.") sys.exit(-1) sfp__stor_stdout_opts['_showonlyrequested'] = True if args.F: sfp__stor_stdout_opts['_requested'] = args.F.split(",") sfp__stor_stdout_opts['_showonlyrequested'] = True if args.o: if args.o not in ["tab", "csv", "json"]: log.error( "Invalid output format selected. Must be 'tab', 'csv' or 'json'." ) sys.exit(-1) sfp__stor_stdout_opts['_format'] = args.o if args.t: sfp__stor_stdout_opts['_requested'] = args.t.split(",") if args.n: sfp__stor_stdout_opts['_stripnewline'] = True if args.r: sfp__stor_stdout_opts['_showsource'] = True if args.S: sfp__stor_stdout_opts['_maxlength'] = args.S if args.D: sfp__stor_stdout_opts['_csvdelim'] = args.D if args.x: tmodlist = list() modlist = list() xmods = sf.modulesConsuming([targetType]) for mod in xmods: if mod not in modlist: tmodlist.append(mod) # Remove any modules not producing the type requested rtypes = args.t.split(",") for mod in tmodlist: for r in rtypes: if not sfModules[mod]['provides']: continue if r in sfModules[mod].get('provides', []) and mod not in modlist: modlist.append(mod) if len(modlist) == 0: log.error("Based on your criteria, no modules were enabled.") sys.exit(-1) modlist += ["sfp__stor_db", "sfp__stor_stdout"] if sfConfig['__logging']: log.info(f"Modules enabled ({len(modlist)}): {','.join(modlist)}") cfg = sf.configUnserialize(dbh.configGet(), sfConfig) # Debug mode is a variable that gets stored to the DB, so re-apply it if args.debug: cfg['_debug'] = True else: cfg['_debug'] = False # If strict mode is enabled, filter the output from modules. if args.x and args.t: cfg['__outputfilter'] = args.t.split(",") # Prepare scan output headers if args.o == "json": print("[", end='') elif not args.H: delim = "\t" if args.o == "tab": delim = "\t" if args.o == "csv": if args.D: delim = args.D else: delim = "," if args.r: if delim == "\t": headers = '{0:30}{1}{2:45}{3}{4}{5}{6}'.format( "Source", delim, "Type", delim, "Source Data", delim, "Data") else: headers = delim.join(["Source", "Type", "Source Data", "Data"]) else: if delim == "\t": headers = '{0:30}{1}{2:45}{3}{4}'.format( "Source", delim, "Type", delim, "Data") else: headers = delim.join(["Source", "Type", "Data"]) print(headers) # Start running a new scan scanName = target scanId = SpiderFootHelpers.genScanInstanceId() try: p = mp.Process(target=startSpiderFootScanner, args=(loggingQueue, scanName, scanId, target, targetType, modlist, cfg)) p.daemon = True p.start() except BaseException as e: log.error(f"Scan [{scanId}] failed: {e}") sys.exit(-1) # Poll for scan status until completion while True: time.sleep(1) info = dbh.scanInstanceGet(scanId) if not info: continue if info[5] in [ "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED" ]: if sfConfig['__logging']: log.info(f"Scan completed with status {info[5]}") if args.o == "json": print("]") sys.exit(0) return
if delim != "\t": print(delim.join(["Source", "Type", "Data"])) else: print('{0:30}{1}{2:45}{3}{4}'.format( "Source", delim, "Type", delim, "Data")) else: if delim != "\t": print(delim.join(["Source", "Type", "Source Data", "Data"])) else: print('{0:30}{1}{2:45}{3}{4}{5}{6}'.format( "Source", delim, "Type", delim, "Source Data", delim, "Data")) while True: info = dbh.scanInstanceGet(scanId) if not info: time.sleep(1) continue if info[5] in [ "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED" ]: if sfConfig['__logging']: log.info(f"Scan completed with status {info[5]}") if args.o == "json": print("]") sys.exit(0) time.sleep(1) sys.exit(0) # Start the web server so you can start looking at results
def startscan(self, scanname, scantarget, modulelist, typelist, usecase): """Initiate a scan Args: scanname (str): scan name scantarget (str): scan target modulelist (str): TBD typelist (str): TBD usecase (str): TBD """ # Swap the globalscantable for the database handler dbh = SpiderFootDb(self.config) # Snapshot the current configuration to be used by the scan cfg = deepcopy(self.config) modlist = list() sf = SpiderFoot(cfg) targetType = None [scanname, scantarget] = self.cleanUserInput([scanname, scantarget]) if scanname == "" or scantarget == "": if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["ERROR", "Incorrect usage: scan name or target was not specified."]).encode('utf-8') return self.error("Invalid request: scan name or target was not specified.") if typelist == "" and modulelist == "" and usecase == "": if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["ERROR", "Incorrect usage: no modules specified for scan."]).encode('utf-8') return self.error("Invalid request: no modules specified for scan.") # User selected modules if modulelist != "": modlist = modulelist.replace('module_', '').split(',') # User selected types if len(modlist) == 0 and typelist != "": typesx = typelist.replace('type_', '').split(',') # 1. Find all modules that produce the requested types modlist = sf.modulesProducing(typesx) newmods = deepcopy(modlist) newmodcpy = deepcopy(newmods) # 2. For each type those modules consume, get modules producing while len(newmodcpy) > 0: for etype in sf.eventsToModules(newmodcpy): xmods = sf.modulesProducing([etype]) for mod in xmods: if mod not in modlist: modlist.append(mod) newmods.append(mod) newmodcpy = deepcopy(newmods) newmods = list() # User selected a use case if len(modlist) == 0 and usecase != "": for mod in self.config['__modules__']: if usecase == 'all' or usecase in self.config['__modules__'][mod]['cats']: modlist.append(mod) # Add our mandatory storage module.. if "sfp__stor_db" not in modlist: modlist.append("sfp__stor_db") modlist.sort() targetType = sf.targetType(scantarget) if targetType is None: if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["ERROR", "Unrecognised target type."]).encode('utf-8') return self.error("Invalid target type. Could not recognize it as a human name, IP address, IP subnet, ASN, domain name or host name.") # Delete the stdout module in case it crept in if "sfp__stor_stdout" in modlist: modlist.remove("sfp__stor_stdout") # Start running a new scan if targetType in ["HUMAN_NAME", "USERNAME"]: scantarget = scantarget.replace("\"", "") else: scantarget = scantarget.lower() # Start running a new scan scanId = sf.genScanInstanceId() try: p = mp.Process(target=SpiderFootScanner, args=(scanname, scanId, scantarget, targetType, modlist, cfg)) p.daemon = True p.start() except BaseException as e: self.log.error(f"[-] Scan [{scanId}] failed: {e}") return self.error(f"[-] Scan [{scanId}] failed: {e}") # Wait until the scan has initialized # Check the database for the scan status results while dbh.scanInstanceGet(scanId) is None: self.log.info("Waiting for the scan to initialize...") time.sleep(1) if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'): cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8" return json.dumps(["SUCCESS", scanId]).encode('utf-8') raise cherrypy.HTTPRedirect(f"/scaninfo?id={scanId}")