def test_cache_get_should_return_a_string(self): """ Test cachePut(self, label, data) Test cacheGet(self, label, timeoutHrs) """ sf = SpiderFoot(dict()) label = 'test-cache-label' data = 'test-cache-data' sf.cachePut(label, data) cache_get = sf.cacheGet(label, sf.opts.get('cacheperiod', 0)) self.assertIsInstance(cache_get, str) self.assertEqual(data, cache_get)
class SpiderFootScanner(): """SpiderFootScanner object. Attributes: scanId (str): unique ID of the scan status (str): status of the scan """ __scanId = None __status = None __config = None __sf = None __dbh = None __targetValue = None __targetType = None __moduleList = list() __target = None __moduleInstances = dict() __modconfig = dict() __scanName = None def __init__(self, scanName: str, scanId: str, targetValue: str, targetType: str, moduleList: list, globalOpts: dict, start: bool = True) -> None: """Initialize SpiderFootScanner object. Args: scanName (str): name of the scan scanId (str): unique ID of the scan targetValue (str): scan target targetType (str): scan target type moduleList (list): list of modules to run globalOpts (dict): scan options start (bool): start the scan immediately Raises: TypeError: arg type was invalid ValueError: arg value was invalid Todo: Eventually change this to be able to control multiple scan instances """ if not isinstance(globalOpts, dict): raise TypeError( f"globalOpts is {type(globalOpts)}; expected dict()") if not globalOpts: raise ValueError("globalOpts is empty") self.__config = deepcopy(globalOpts) self.__dbh = SpiderFootDb(self.__config) if not isinstance(scanName, str): raise TypeError(f"scanName is {type(scanName)}; expected str()") if not scanName: raise ValueError("scanName value is blank") self.__scanName = scanName if not isinstance(scanId, str): raise TypeError(f"scanId is {type(scanId)}; expected str()") if not scanId: raise ValueError("scanId value is blank") if not isinstance(targetValue, str): raise TypeError( f"targetValue is {type(targetValue)}; expected str()") if not targetValue: raise ValueError("targetValue value is blank") self.__targetValue = targetValue if not isinstance(targetType, str): raise TypeError( f"targetType is {type(targetType)}; expected str()") if not targetType: raise ValueError("targetType value is blank") self.__targetType = targetType if not isinstance(moduleList, list): raise TypeError( f"moduleList is {type(moduleList)}; expected list()") if not moduleList: raise ValueError("moduleList is empty") self.__moduleList = moduleList self.__sf = SpiderFoot(self.__config) self.__sf.dbh = self.__dbh # Create a unique ID for this scan in the back-end DB. if scanId: self.__scanId = scanId else: self.__scanId = SpiderFootHelpers.genScanInstanceId() self.__sf.scanId = self.__scanId self.__dbh.scanInstanceCreate(self.__scanId, self.__scanName, self.__targetValue) # Create our target try: self.__target = SpiderFootTarget(self.__targetValue, self.__targetType) except (TypeError, ValueError) as e: self.__sf.status(f"Scan [{self.__scanId}] failed: {e}") self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError(f"Invalid target: {e}") from None # Save the config current set for this scan self.__config['_modulesenabled'] = self.__moduleList self.__dbh.scanConfigSet( self.__scanId, self.__sf.configSerialize(deepcopy(self.__config))) # Process global options that point to other places for data # If a proxy server was specified, set it up proxy_type = self.__config.get('_socks1type') if proxy_type: # TODO: allow DNS lookup to be configurable when using a proxy # - proxy DNS lookup: socks5h:// and socks4a:// # - local DNS lookup: socks5:// and socks4:// if proxy_type == '4': proxy_proto = 'socks4://' elif proxy_type == '5': proxy_proto = 'socks5://' elif proxy_type == 'HTTP': proxy_proto = 'http://' elif proxy_type == 'TOR': proxy_proto = 'socks5h://' else: self.__sf.status( f"Scan [{self.__scanId}] failed: Invalid proxy type: {proxy_type}" ) self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError(f"Invalid proxy type: {proxy_type}") proxy_host = self.__config.get('_socks2addr', '') if not proxy_host: self.__sf.status( f"Scan [{self.__scanId}] failed: Proxy type is set ({proxy_type}) but proxy address value is blank" ) self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError( f"Proxy type is set ({proxy_type}) but proxy address value is blank" ) proxy_port = int(self.__config.get('_socks3port') or 0) if not proxy_port: if proxy_type in ['4', '5']: proxy_port = 1080 elif proxy_type.upper() == 'HTTP': proxy_port = 8080 elif proxy_type.upper() == 'TOR': proxy_port = 9050 proxy_username = self.__config.get('_socks4user', '') proxy_password = self.__config.get('_socks5pwd', '') if proxy_username or proxy_password: proxy_auth = f"{proxy_username}:{proxy_password}" proxy = f"{proxy_proto}{proxy_auth}@{proxy_host}:{proxy_port}" else: proxy = f"{proxy_proto}{proxy_host}:{proxy_port}" self.__sf.debug(f"Using proxy: {proxy}") self.__sf.socksProxy = proxy else: self.__sf.socksProxy = None # Override the default DNS server if self.__config['_dnsserver']: res = dns.resolver.Resolver() res.nameservers = [self.__config['_dnsserver']] dns.resolver.override_system_resolver(res) else: dns.resolver.restore_system_resolver() # Set the user agent self.__config['_useragent'] = self.__sf.optValueToData( self.__config['_useragent']) # Set up the Internet TLD list. # If the cached does not exist or has expired, reload it from scratch. tld_data = self.__sf.cacheGet("internet_tlds", self.__config['_internettlds_cache']) if tld_data is None: tld_data = self.__sf.optValueToData(self.__config['_internettlds']) if tld_data is None: self.__sf.status( f"Scan [{self.__scanId}] failed: Could not update TLD list" ) self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError("Could not update TLD list") self.__sf.cachePut("internet_tlds", tld_data) self.__config['_internettlds'] = tld_data.splitlines() self.__setStatus("INITIALIZING", time.time() * 1000, None) self.__sharedThreadPool = SpiderFootThreadPool( threads=self.__config.get("_maxthreads", 3), name='sharedThreadPool') # Used when module threading is enabled self.eventQueue = None if start: self.__startScan() @property def scanId(self) -> str: return self.__scanId @property def status(self) -> str: return self.__status def __setStatus(self, status: str, started: float = None, ended: float = None) -> None: """Set the status of the currently running scan (if any). Args: status (str): scan status started (float): timestamp at start of scan ended (float): timestamp at end of scan Raises: TypeError: arg type was invalid ValueError: arg value was invalid """ if not isinstance(status, str): raise TypeError(f"status is {type(status)}; expected str()") if status not in [ "INITIALIZING", "STARTING", "STARTED", "RUNNING", "ABORT-REQUESTED", "ABORTED", "ABORTING", "FINISHED", "ERROR-FAILED" ]: raise ValueError(f"Invalid scan status {status}") self.__status = status self.__dbh.scanInstanceSet(self.__scanId, started, ended, status) def __startScan(self) -> None: """Start running a scan. Raises: AssertionError: Never actually raised. """ failed = True try: self.__setStatus("STARTING", time.time() * 1000, None) self.__sf.status( f"Scan [{self.__scanId}] for '{self.__target.targetValue}' initiated." ) self.eventQueue = queue.Queue() self.__sharedThreadPool.start() # moduleList = list of modules the user wants to run self.__sf.debug(f"Loading {len(self.__moduleList)} modules ...") for modName in self.__moduleList: if not modName: continue # Module may have been renamed or removed if modName not in self.__config['__modules__']: self.__sf.error(f"Failed to load module: {modName}") continue try: module = __import__('modules.' + modName, globals(), locals(), [modName]) except ImportError: self.__sf.error(f"Failed to load module: {modName}") continue try: mod = getattr(module, modName)() mod.__name__ = modName except Exception: self.__sf.error( f"Module {modName} initialization failed: {traceback.format_exc()}" ) continue # Set up the module options, scan ID, database handle and listeners try: # Configuration is a combined global config with module-specific options self.__modconfig[modName] = deepcopy( self.__config['__modules__'][modName]['opts']) for opt in list(self.__config.keys()): self.__modconfig[modName][opt] = deepcopy( self.__config[opt]) # clear any listener relationships from the past mod.clearListeners() mod.setScanId(self.__scanId) mod.setSharedThreadPool(self.__sharedThreadPool) mod.setDbh(self.__dbh) mod.setup(self.__sf, self.__modconfig[modName]) except Exception: self.__sf.error( f"Module {modName} initialization failed: {traceback.format_exc()}" ) mod.errorState = True continue # Override the module's local socket module to be the SOCKS one. if self.__config['_socks1type'] != '': try: mod._updateSocket(socket) except Exception as e: self.__sf.error( f"Module {modName} socket setup failed: {e}") continue # Set up event output filters if requested if self.__config['__outputfilter']: try: mod.setOutputFilter(self.__config['__outputfilter']) except Exception as e: self.__sf.error( f"Module {modName} output filter setup failed: {e}" ) continue # Give modules a chance to 'enrich' the original target with aliases of that target. try: newTarget = mod.enrichTarget(self.__target) if newTarget is not None: self.__target = newTarget except Exception as e: self.__sf.error( f"Module {modName} target enrichment failed: {e}") continue # Register the target with the module try: mod.setTarget(self.__target) except Exception as e: self.__sf.error( f"Module {modName} failed to set target '{self.__target}': {e}" ) continue # Set up the outgoing event queue try: mod.outgoingEventQueue = self.eventQueue mod.incomingEventQueue = queue.Queue() except Exception as e: self.__sf.error( f"Module {modName} event queue setup failed: {e}") continue self.__moduleInstances[modName] = mod self.__sf.status(f"{modName} module loaded.") self.__sf.debug( f"Scan [{self.__scanId}] loaded {len(self.__moduleInstances)} modules." ) if not self.__moduleInstances: self.__setStatus("ERROR-FAILED", None, time.time() * 1000) self.__dbh.close() return # sort modules by priority self.__moduleInstances = OrderedDict( sorted(self.__moduleInstances.items(), key=lambda m: m[-1]._priority)) # Now we are ready to roll.. self.__setStatus("RUNNING") # Create a pseudo module for the root event to originate from psMod = SpiderFootPlugin() psMod.__name__ = "SpiderFoot UI" psMod.setTarget(self.__target) psMod.setDbh(self.__dbh) psMod.clearListeners() psMod.outgoingEventQueue = self.eventQueue psMod.incomingEventQueue = queue.Queue() # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None) psMod.notifyListeners(rootEvent) firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # Special case.. check if an INTERNET_NAME is also a domain if self.__targetType == 'INTERNET_NAME': if self.__sf.isDomain(self.__targetValue, self.__config['_internettlds']): firstEvent = SpiderFootEvent('DOMAIN_NAME', self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # If in interactive mode, loop through this shared global variable # waiting for inputs, and process them until my status is set to # FINISHED. # Check in case the user requested to stop the scan between modules # initializing scanstatus = self.__dbh.scanInstanceGet(self.__scanId) if scanstatus and scanstatus[5] == "ABORT-REQUESTED": raise AssertionError("ABORT-REQUESTED") # start threads self.waitForThreads() failed = False except (KeyboardInterrupt, AssertionError): self.__sf.status(f"Scan [{self.__scanId}] aborted.") self.__setStatus("ABORTED", None, time.time() * 1000) except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.__sf.error( f"Unhandled exception ({e.__class__.__name__}) encountered during scan." + "Please report this as a bug: " + +repr( traceback.format_exception(exc_type, exc_value, exc_traceback))) self.__sf.status(f"Scan [{self.__scanId}] failed: {e}") self.__setStatus("ERROR-FAILED", None, time.time() * 1000) finally: if not failed: self.__setStatus("FINISHED", None, time.time() * 1000) self.runCorrelations() self.__sf.status(f"Scan [{self.__scanId}] completed.") self.__dbh.close() def runCorrelations(self) -> None: """Run correlation rules.""" self.__sf.status( f"Running {len(self.__config['__correlationrules__'])} correlation rules." ) ruleset = dict() for rule in self.__config['__correlationrules__']: ruleset[rule['id']] = rule['rawYaml'] corr = SpiderFootCorrelator(self.__dbh, ruleset, self.__scanId) corr.run_correlations() def waitForThreads(self) -> None: """Wait for threads. Raises: TypeError: queue tried to process a malformed event AssertionError: scan halted for some reason """ counter = 0 try: if not self.eventQueue: return # start one thread for each module for mod in self.__moduleInstances.values(): mod.start() final_passes = 3 # watch for newly-generated events while True: # log status of threads every 10 iterations log_status = counter % 10 == 0 counter += 1 if log_status: scanstatus = self.__dbh.scanInstanceGet(self.__scanId) if scanstatus and scanstatus[5] == "ABORT-REQUESTED": raise AssertionError("ABORT-REQUESTED") try: sfEvent = self.eventQueue.get_nowait() self.__sf.debug( f"waitForThreads() got event, {sfEvent.eventType}, from eventQueue." ) except queue.Empty: # check if we're finished if self.threadsFinished(log_status): sleep(.1) # but are we really? if self.threadsFinished(log_status): if final_passes < 1: break # Trigger module.finished() for mod in self.__moduleInstances.values(): if not mod.errorState and mod.incomingEventQueue is not None: mod.incomingEventQueue.put('FINISHED') sleep(.1) while not self.threadsFinished(log_status): log_status = counter % 100 == 0 counter += 1 sleep(.01) final_passes -= 1 else: # save on CPU sleep(.1) continue if not isinstance(sfEvent, SpiderFootEvent): raise TypeError( f"sfEvent is {type(sfEvent)}; expected SpiderFootEvent" ) # for every module for mod in self.__moduleInstances.values(): # if it's been aborted if mod._stopScanning: # break out of the while loop raise AssertionError(f"{mod.__name__} requested stop") # send it the new event if applicable if not mod.errorState and mod.incomingEventQueue is not None: watchedEvents = mod.watchedEvents() if sfEvent.eventType in watchedEvents or "*" in watchedEvents: mod.incomingEventQueue.put(deepcopy(sfEvent)) finally: # tell the modules to stop for mod in self.__moduleInstances.values(): mod._stopScanning = True self.__sharedThreadPool.shutdown(wait=True) def threadsFinished(self, log_status: bool = False) -> bool: """Check if all threads are complete. Args: log_status (bool): print thread queue status to debug log Returns: bool: True if all threads are finished """ if self.eventQueue is None: return True modules_waiting = dict() for m in self.__moduleInstances.values(): try: if m.incomingEventQueue is not None: modules_waiting[m.__name__] = m.incomingEventQueue.qsize() except Exception: with suppress(Exception): m.errorState = True modules_waiting = sorted(modules_waiting.items(), key=lambda x: x[-1], reverse=True) modules_running = [] for m in self.__moduleInstances.values(): try: if m.running: modules_running.append(m.__name__) except Exception: with suppress(Exception): m.errorState = True modules_errored = [] for m in self.__moduleInstances.values(): try: if m.errorState: modules_errored.append(m.__name__) except Exception: with suppress(Exception): m.errorState = True queues_empty = [qsize == 0 for m, qsize in modules_waiting] for mod in self.__moduleInstances.values(): if mod.errorState and mod.incomingEventQueue is not None: self.__sf.debug( f"Clearing and unsetting incomingEventQueue for errored module {mod.__name__}." ) with suppress(Exception): while 1: mod.incomingEventQueue.get_nowait() mod.incomingEventQueue = None if not modules_running and not queues_empty: self.__sf.debug("Clearing queues for stalled/aborted modules.") for mod in self.__moduleInstances.values(): try: while True: mod.incomingEventQueue.get_nowait() except Exception: pass if log_status: events_queued = ", ".join([ f"{mod}: {qsize:,}" for mod, qsize in modules_waiting[:5] if qsize > 0 ]) if not events_queued: events_queued = 'None' self.__sf.debug( f"Events queued: {sum([m[-1] for m in modules_waiting]):,} ({events_queued})" ) if modules_running: self.__sf.debug( f"Modules running: {len(modules_running):,} ({', '.join(modules_running)})" ) if modules_errored: self.__sf.debug( f"Modules errored: {len(modules_errored):,} ({', '.join(modules_errored)})" ) if all(queues_empty) and not modules_running: return True return False
class SpiderFootScanner(): """SpiderFootScanner object. Attributes: scanId (str): unique ID of the scan status (str): status of the scan """ __scanId = None __status = None __config = None __sf = None __dbh = None __targetValue = None __targetType = None __moduleList = list() __target = None __moduleInstances = dict() __modconfig = dict() __scanName = None def __init__(self, scanName, scanId, targetValue, targetType, moduleList, globalOpts, start=True): """Initialize SpiderFootScanner object. Args: scanName (str): name of the scan scanId (str): unique ID of the scan targetValue (str): scan target targetType (str): scan target type moduleList (list): list of modules to run globalOpts (dict): scan options start (bool): start the scan immediately Raises: TypeError: arg type was invalid ValueError: arg value was invalid Todo: Eventually change this to be able to control multiple scan instances """ if not isinstance(globalOpts, dict): raise TypeError( f"globalOpts is {type(globalOpts)}; expected dict()") if not globalOpts: raise ValueError("globalOpts is empty") self.__config = deepcopy(globalOpts) self.__dbh = SpiderFootDb(self.__config) if not isinstance(scanName, str): raise TypeError(f"scanName is {type(scanName)}; expected str()") if not scanName: raise ValueError("scanName value is blank") self.__scanName = scanName if not isinstance(scanId, str): raise TypeError(f"scanId is {type(scanId)}; expected str()") if not scanId: raise ValueError("scanId value is blank") if not isinstance(targetValue, str): raise TypeError( f"targetValue is {type(targetValue)}; expected str()") if not targetValue: raise ValueError("targetValue value is blank") self.__targetValue = targetValue if not isinstance(targetType, str): raise TypeError( f"targetType is {type(targetType)}; expected str()") if not targetType: raise ValueError("targetType value is blank") self.__targetType = targetType if not isinstance(moduleList, list): raise TypeError( f"moduleList is {type(moduleList)}; expected list()") if not moduleList: raise ValueError("moduleList is empty") self.__moduleList = moduleList self.__sf = SpiderFoot(self.__config) self.__sf.dbh = self.__dbh # Create a unique ID for this scan in the back-end DB. if scanId: self.__scanId = scanId else: self.__scanId = SpiderFootHelpers.genScanInstanceId() self.__sf.scanId = self.__scanId self.__dbh.scanInstanceCreate(self.__scanId, self.__scanName, self.__targetValue) # Create our target try: self.__target = SpiderFootTarget(self.__targetValue, self.__targetType) except (TypeError, ValueError) as e: self.__sf.status(f"Scan [{self.__scanId}] failed: {e}") self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError(f"Invalid target: {e}") # Save the config current set for this scan self.__config['_modulesenabled'] = self.__moduleList self.__dbh.scanConfigSet( self.__scanId, self.__sf.configSerialize(deepcopy(self.__config))) # Process global options that point to other places for data # If a proxy server was specified, set it up proxy_type = self.__config.get('_socks1type') if proxy_type: # TODO: allow DNS lookup to be configurable when using a proxy # - proxy DNS lookup: socks5h:// and socks4a:// # - local DNS lookup: socks5:// and socks4:// if proxy_type == '4': proxy_proto = 'socks4://' elif proxy_type == '5': proxy_proto = 'socks5://' elif proxy_type == 'HTTP': proxy_proto = 'http://' elif proxy_type == 'TOR': proxy_proto = 'socks5h://' else: self.__sf.status( f"Scan [{self.__scanId}] failed: Invalid proxy type: {proxy_type}" ) self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError(f"Invalid proxy type: {proxy_type}") proxy_host = self.__config.get('_socks2addr', '') if not proxy_host: self.__sf.status( f"Scan [{self.__scanId}] failed: Proxy type is set ({proxy_type}) but proxy address value is blank" ) self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError( f"Proxy type is set ({proxy_type}) but proxy address value is blank" ) proxy_port = int(self.__config.get('_socks3port') or 0) if not proxy_port: if proxy_type == '4' or proxy_type == '5': proxy_port = 1080 elif proxy_type.upper() == 'HTTP': proxy_port = 8080 elif proxy_type.upper() == 'TOR': proxy_port = 9050 proxy_username = self.__config.get('_socks4user', '') proxy_password = self.__config.get('_socks5pwd', '') if proxy_username or proxy_password: proxy_auth = f"{proxy_username}:{proxy_password}" proxy = f"{proxy_proto}{proxy_auth}@{proxy_host}:{proxy_port}" else: proxy = f"{proxy_proto}{proxy_host}:{proxy_port}" self.__sf.debug(f"Using proxy: {proxy}") self.__sf.socksProxy = proxy else: self.__sf.socksProxy = None # Override the default DNS server if self.__config['_dnsserver']: res = dns.resolver.Resolver() res.nameservers = [self.__config['_dnsserver']] dns.resolver.override_system_resolver(res) else: dns.resolver.restore_system_resolver() # Set the user agent self.__config['_useragent'] = self.__sf.optValueToData( self.__config['_useragent']) # Get internet TLDs tlddata = self.__sf.cacheGet("internet_tlds", self.__config['_internettlds_cache']) # If it wasn't loadable from cache, load it from scratch if tlddata is None: self.__config['_internettlds'] = self.__sf.optValueToData( self.__config['_internettlds']) self.__sf.cachePut("internet_tlds", self.__config['_internettlds']) else: self.__config["_internettlds"] = tlddata.splitlines() self.__setStatus("INITIALIZING", time.time() * 1000, None) # Used when module threading is enabled self.eventQueue = None if start: self.__startScan() @property def scanId(self): return self.__scanId @property def status(self): return self.__status def __setStatus(self, status, started=None, ended=None): """Set the status of the currently running scan (if any). Args: status (str): scan status started (float): timestamp at start of scan ended (float): timestamp at end of scan Raises: TypeError: arg type was invalid ValueError: arg value was invalid """ if not isinstance(status, str): raise TypeError(f"status is {type(status)}; expected str()") if status not in [ "INITIALIZING", "STARTING", "STARTED", "RUNNING", "ABORT-REQUESTED", "ABORTED", "ABORTING", "FINISHED", "ERROR-FAILED" ]: raise ValueError(f"Invalid scan status {status}") self.__status = status self.__dbh.scanInstanceSet(self.__scanId, started, ended, status) def __startScan(self, threaded=True): """Start running a scan. Args: threaded (bool): whether to thread modules """ aborted = False self.__setStatus("STARTING", time.time() * 1000, None) self.__sf.status(f"Scan [{self.__scanId}] initiated.") if threaded: self.eventQueue = queue.Queue() try: # moduleList = list of modules the user wants to run for modName in self.__moduleList: if modName == '': continue try: module = __import__('modules.' + modName, globals(), locals(), [modName]) except ImportError: self.__sf.error(f"Failed to load module: {modName}") continue mod = getattr(module, modName)() mod.__name__ = modName # Module may have been renamed or removed if modName not in self.__config['__modules__']: continue # Set up the module # Configuration is a combined global config with module-specific options self.__modconfig[modName] = deepcopy( self.__config['__modules__'][modName]['opts']) for opt in list(self.__config.keys()): self.__modconfig[modName][opt] = deepcopy( self.__config[opt]) mod.clearListeners( ) # clear any listener relationships from the past mod.setup(self.__sf, self.__modconfig[modName]) mod.setDbh(self.__dbh) mod.setScanId(self.__scanId) # Give modules a chance to 'enrich' the original target with # aliases of that target. newTarget = mod.enrichTarget(self.__target) if newTarget is not None: self.__target = newTarget self.__moduleInstances[modName] = mod # Override the module's local socket module # to be the SOCKS one. if self.__config['_socks1type'] != '': mod._updateSocket(socket) # Set up event output filters if requested if self.__config['__outputfilter']: mod.setOutputFilter(self.__config['__outputfilter']) # Register the target with the module mod.setTarget(self.__target) if threaded: # Set up the outgoing event queue mod.outgoingEventQueue = self.eventQueue mod.incomingEventQueue = queue.Queue() self.__sf.status(modName + " module loaded.") # sort modules by priority self.__moduleInstances = OrderedDict( sorted(self.__moduleInstances.items(), key=lambda m: m[-1]._priority)) if not threaded: # Register listener modules and then start all modules sequentially for module in list(self.__moduleInstances.values()): for listenerModule in list( self.__moduleInstances.values()): # Careful not to register twice or you will get duplicate events if listenerModule in module._listenerModules: continue # Note the absence of a check for whether a module can register # to itself. That is intentional because some modules will # act on their own notifications (e.g. sfp_dns)! if listenerModule.watchedEvents() is not None: module.registerListener(listenerModule) # Now we are ready to roll.. self.__setStatus("RUNNING") # Create a pseudo module for the root event to originate from psMod = SpiderFootPlugin() psMod.__name__ = "SpiderFoot UI" psMod.setTarget(self.__target) psMod.setDbh(self.__dbh) psMod.clearListeners() if threaded: psMod.outgoingEventQueue = self.eventQueue psMod.incomingEventQueue = queue.Queue() else: for mod in list(self.__moduleInstances.values()): if mod.watchedEvents() is not None: psMod.registerListener(mod) # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None) psMod.notifyListeners(rootEvent) firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # Special case.. check if an INTERNET_NAME is also a domain if self.__targetType == 'INTERNET_NAME': if self.__sf.isDomain(self.__targetValue, self.__config['_internettlds']): firstEvent = SpiderFootEvent('DOMAIN_NAME', self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # If in interactive mode, loop through this shared global variable # waiting for inputs, and process them until my status is set to # FINISHED. # Check in case the user requested to stop the scan between modules # initializing for mod in list(self.__moduleInstances.values()): if mod.checkForStop(): self.__setStatus('ABORTING') aborted = True break # start threads if threaded and not aborted: self.waitForThreads() if aborted: self.__sf.status(f"Scan [{self.__scanId}] aborted.") self.__setStatus("ABORTED", None, time.time() * 1000) else: self.__sf.status(f"Scan [{self.__scanId}] completed.") self.__setStatus("FINISHED", None, time.time() * 1000) except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.__sf.error( f"Unhandled exception ({e.__class__.__name__}) encountered during scan." + "Please report this as a bug: " + repr( traceback.format_exception(exc_type, exc_value, exc_traceback))) self.__sf.status(f"Scan [{self.__scanId}] failed: {e}") self.__setStatus("ERROR-FAILED", None, time.time() * 1000) self.__dbh.close() def waitForThreads(self): counter = 0 try: if not self.eventQueue: return # start one thread for each module for mod in self.__moduleInstances.values(): mod.start() # watch for newly-generated events while True: # log status of threads every 100 iterations log_status = counter % 100 == 0 counter += 1 try: sfEvent = self.eventQueue.get_nowait() self.__sf.debug( f"waitForThreads() got event, {sfEvent.eventType}, from eventQueue." ) except queue.Empty: # check if we're finished if self.threadsFinished(log_status): sleep(.1) # but are we really? if self.threadsFinished(log_status): break else: # save on CPU sleep(.01) continue if not isinstance(sfEvent, SpiderFootEvent): raise TypeError( f"sfEvent is {type(sfEvent)}; expected SpiderFootEvent" ) # for every module for mod in self.__moduleInstances.values(): # if it's been aborted if mod._stopScanning: # break out of the while loop raise AssertionError(f"{mod.__name__} requested stop") # send it the new event if applicable watchedEvents = mod.watchedEvents() if sfEvent.eventType in watchedEvents or "*" in watchedEvents: mod.incomingEventQueue.put(deepcopy(sfEvent)) except (KeyboardInterrupt, AssertionError) as e: self.__sf.status(f"Scan [{self.__scanId}] aborted, {e}.") finally: # tell the modules to stop for mod in self.__moduleInstances.values(): mod._stopScanning = True def threadsFinished(self, log_status=False): if self.eventQueue is None: return True modules_waiting = { m.__name__: m.incomingEventQueue.qsize() for m in self.__moduleInstances.values() } modules_waiting = sorted(modules_waiting.items(), key=lambda x: x[-1], reverse=True) modules_running = [ m.__name__ for m in self.__moduleInstances.values() if m.running ] queues_empty = [qsize == 0 for m, qsize in modules_waiting] if not modules_running and not queues_empty: self.__sf.debug("Clearing queues for stalled/aborted modules.") for mod in self.__moduleInstances.values(): try: while True: mod.incomingEventQueue.get_nowait() except Exception: pass if log_status and modules_running: events_queued = ", ".join([ f"{mod}: {qsize:,}" for mod, qsize in modules_waiting[:5] if qsize > 0 ]) if events_queued: self.__sf.info(f"Events queued: {events_queued}") if all(queues_empty) and not modules_running: return True return False
class SpiderFootScanner(): # Temporary storage temp = None def __init__(self, scanName, scanTarget, targetType, scanId, moduleList, globalOpts, moduleOpts): """Initialize SpiderFootScanner object and immediately start a scan of the specified target. Args: scanName (str): name of the scan scanTarget (str): scan target targetType (str): scan target type scanId (str): scan identifier moduleList (list): list of modules to run globalOpts (dict): scan options moduleOpts (dict): unused Returns: None """ if not isinstance(scanName, str): raise TypeError("scanName is %s; expected str()" % type(scanName)) if not isinstance(scanTarget, str): raise TypeError("scanTarget is %s; expected str()" % type(scanTarget)) if not isinstance(scanId, str): raise TypeError("scanId is %s; expected str()" % type(scanId)) if not isinstance(moduleList, list): raise TypeError("moduleList is %s; expected list()" % type(moduleList)) if not isinstance(globalOpts, dict): raise TypeError("globalOpts is %s; expected dict()" % type(globalOpts)) self.temp = dict() self.temp['config'] = deepcopy(globalOpts) self.temp['targetValue'] = scanTarget self.temp['targetType'] = targetType self.temp['moduleList'] = moduleList self.temp['scanName'] = scanName self.temp['scanId'] = scanId self.startScan() def setStatus(self, status, started=None, ended=None): """Set the status of the currently running scan (if any). Args: status (str): scan status ("RUNNING", "STARTING", "STARTED", "ABORT-REQUESTED", "ABORTED", "FINISHED", "ERROR-FAILED") started (str): TBD ended (str): TBD Returns: None """ #if self is None: # print(("Internal Error: Status set attempted before " + \ # "SpiderFootScanner was ready.")) # exit(-1) self.status = status self.dbh.scanInstanceSet(self.scanId, started, ended, status) return None def run(self): """Start running a scan.""" self.startScan() def getId(self): if hasattr(self, 'scanId'): return self.scanId return None def startScan(self): """Start running a scan.""" self.moduleInstances = dict() self.sf = SpiderFoot(self.temp['config']) self.config = deepcopy(self.temp['config']) self.dbh = SpiderFootDb(self.temp['config']) self.targetValue = self.temp['targetValue'] self.targetType = self.temp['targetType'] self.moduleList = self.temp['moduleList'] self.modconfig = dict() self.scanName = self.temp['scanName'] self.scanId = self.temp['scanId'] aborted = False self.sf.setDbh(self.dbh) # Create a unique ID for this scan and create it in the back-end DB. self.sf.setGUID(self.scanId) self.dbh.scanInstanceCreate(self.scanId, self.scanName, self.targetValue) self.setStatus("STARTING", time.time() * 1000, None) # Create our target try: target = SpiderFootTarget(self.targetValue, self.targetType) except BaseException as e: self.sf.status("Scan [%s] failed: %s" % (self.scanId, e)) self.setStatus("ERROR-FAILED", None, time.time() * 1000) return None # Save the config current set for this scan self.config['_modulesenabled'] = self.moduleList self.dbh.scanConfigSet(self.scanId, self.sf.configSerialize(deepcopy(self.config))) self.sf.status("Scan [" + self.scanId + "] initiated.") # moduleList = list of modules the user wants to run try: # Process global options that point to other places for data # If a SOCKS server was specified, set it up if self.config['_socks1type'] != '': socksDns = self.config['_socks6dns'] socksAddr = self.config['_socks2addr'] socksPort = int(self.config['_socks3port']) socksUsername = self.config['_socks4user'] or '' socksPassword = self.config['_socks5pwd'] or '' creds = "" if socksUsername and socksPassword: creds = socksUsername + ":" + socksPassword + "@" proxy = creds + socksAddr + ":" + str(socksPort) if self.config['_socks1type'] == '4': proxy = 'socks4://' + proxy elif self.config['_socks1type'] == '5': proxy = 'socks5://' + proxy elif self.config['_socks1type'] == 'HTTP': proxy = 'http://' + proxy elif self.config['_socks1type'] == 'TOR': proxy = 'socks5h://' + proxy self.sf.debug("SOCKS: " + socksAddr + ":" + str(socksPort) + \ "(" + socksUsername + ":" + socksPassword + ")") self.sf.updateSocket(proxy) else: self.sf.revertSocket() # Override the default DNS server if self.config['_dnsserver'] != "": res = dns.resolver.Resolver() res.nameservers = [self.config['_dnsserver']] dns.resolver.override_system_resolver(res) else: dns.resolver.restore_system_resolver() # Set the user agent self.config['_useragent'] = self.sf.optValueToData( self.config['_useragent']) # Get internet TLDs tlddata = self.sf.cacheGet("internet_tlds", self.config['_internettlds_cache']) # If it wasn't loadable from cache, load it from scratch if tlddata is None: self.config['_internettlds'] = self.sf.optValueToData( self.config['_internettlds']) self.sf.cachePut("internet_tlds", self.config['_internettlds']) else: self.config["_internettlds"] = tlddata.splitlines() for modName in self.moduleList: if modName == '': continue try: module = __import__('modules.' + modName, globals(), locals(), [modName]) except ImportError: self.sf.error("Failed to load module: " + modName, False) continue mod = getattr(module, modName)() mod.__name__ = modName # Module may have been renamed or removed if modName not in self.config['__modules__']: continue # Set up the module # Configuration is a combined global config with module-specific options self.modconfig[modName] = deepcopy(self.config['__modules__'][modName]['opts']) for opt in list(self.config.keys()): self.modconfig[modName][opt] = deepcopy(self.config[opt]) mod.clearListeners() # clear any listener relationships from the past mod.setup(self.sf, self.modconfig[modName]) mod.setDbh(self.dbh) mod.setScanId(self.scanId) # Give modules a chance to 'enrich' the original target with # aliases of that target. newTarget = mod.enrichTarget(target) if newTarget is not None: target = newTarget self.moduleInstances[modName] = mod # Override the module's local socket module # to be the SOCKS one. if self.config['_socks1type'] != '': mod._updateSocket(socket) # Set up event output filters if requested if self.config['__outputfilter']: mod.setOutputFilter(self.config['__outputfilter']) self.sf.status(modName + " module loaded.") # Register listener modules and then start all modules sequentially for module in list(self.moduleInstances.values()): # Register the target with the module module.setTarget(target) for listenerModule in list(self.moduleInstances.values()): # Careful not to register twice or you will get duplicate events if listenerModule in module._listenerModules: continue # Note the absence of a check for whether a module can register # to itself. That is intentional because some modules will # act on their own notifications (e.g. sfp_dns)! if listenerModule.watchedEvents() is not None: module.registerListener(listenerModule) # Now we are ready to roll.. self.setStatus("RUNNING") # Create a pseudo module for the root event to originate from psMod = SpiderFootPlugin() psMod.__name__ = "SpiderFoot UI" psMod.setTarget(target) psMod.setDbh(self.dbh) psMod.clearListeners() for mod in list(self.moduleInstances.values()): if mod.watchedEvents() is not None: psMod.registerListener(mod) # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("ROOT", self.targetValue, "", None) psMod.notifyListeners(rootEvent) firstEvent = SpiderFootEvent(self.targetType, self.targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # Special case.. check if an INTERNET_NAME is also a domain if self.targetType == 'INTERNET_NAME': if self.sf.isDomain(self.targetValue, self.config['_internettlds']): firstEvent = SpiderFootEvent('DOMAIN_NAME', self.targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # If in interactive mode, loop through this shared global variable # waiting for inputs, and process them until my status is set to # FINISHED. # Check in case the user requested to stop the scan between modules # initializing for module in list(self.moduleInstances.values()): if module.checkForStop(): self.setStatus('ABORTING') aborted = True break if aborted: self.sf.status("Scan [" + self.scanId + "] aborted.") self.setStatus("ABORTED", None, time.time() * 1000) else: self.sf.status("Scan [" + self.scanId + "] completed.") self.setStatus("FINISHED", None, time.time() * 1000) except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.sf.error("Unhandled exception (" + e.__class__.__name__ + ") " + \ "encountered during scan. Please report this as a bug: " + \ repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False) self.sf.status("Scan [" + self.scanId + "] failed: " + str(e)) self.setStatus("ERROR-FAILED", None, time.time() * 1000) self.dbh.close()
class SpiderFootScanner: moduleInstances = None status = "UNKNOWN" myId = None def __init__(self, name, target, moduleList, globalOpts, moduleOpts): self.config = deepcopy(globalOpts) self.sf = SpiderFoot(self.config) self.target = target self.moduleList = moduleList self.name = name return # Status of the currently running scan (if any) def scanStatus(self, id): if id != self.myId: return "UNKNOWN" return self.status # Stop a scan (id variable is unnecessary for now given that only one simultaneous # scan is permitted.) def stopScan(self, id): if id != self.myId: return None if self.moduleInstances == None: return None for modName in self.moduleInstances.keys(): self.moduleInstances[modName].stopScanning() # Start running a scan def startScan(self): self.moduleInstances = dict() dbh = SpiderFootDb(self.config) self.sf.setDbh(dbh) aborted = False # Create a unique ID for this scan and create it in the back-end DB. self.config['__guid__'] = dbh.scanInstanceGenGUID(self.target) self.sf.setScanId(self.config['__guid__']) self.myId = self.config['__guid__'] dbh.scanInstanceCreate(self.config['__guid__'], self.name, self.target) dbh.scanInstanceSet(self.config['__guid__'], time.time() * 1000, None, 'STARTING') self.status = "STARTING" # Save the config current set for this scan self.config['_modulesenabled'] = self.moduleList dbh.scanConfigSet(self.config['__guid__'], self.sf.configSerialize(self.config)) self.sf.status("Scan [" + self.config['__guid__'] + "] initiated.") # moduleList = list of modules the user wants to run try: # Process global options that point to other places for data # If a SOCKS server was specified, set it up if self.config['_socks1type'] != '': socksType = socks.PROXY_TYPE_SOCKS4 socksDns = self.config['_socks6dns'] socksAddr = self.config['_socks2addr'] socksPort = int(self.config['_socks3port']) socksUsername = '' socksPassword = '' if self.config['_socks1type'] == '4': socksType = socks.PROXY_TYPE_SOCKS4 if self.config['_socks1type'] == '5': socksType = socks.PROXY_TYPE_SOCKS5 socksUsername = self.config['_socks4user'] socksPassword = self.config['_socks5pwd'] if self.config['_socks1type'] == 'HTTP': socksType = socks.PROXY_TYPE_HTTP self.sf.debug("SOCKS: " + socksAddr + ":" + str(socksPort) + \ "(" + socksUsername + ":" + socksPassword + ")") socks.setdefaultproxy(socksType, socksAddr, socksPort, socksDns, socksUsername, socksPassword) # Override the default socket and getaddrinfo calls with the # SOCKS ones socket.socket = socks.socksocket socket.create_connection = socks.create_connection socket.getaddrinfo = socks.getaddrinfo self.sf.updateSocket(socket) # Override the default DNS server if self.config['_dnsserver'] != "": res = dns.resolver.Resolver() res.nameservers = [ self.config['_dnsserver'] ] dns.resolver.override_system_resolver(res) else: dns.resolver.restore_system_resolver() # Set the user agent self.config['_useragent'] = self.sf.optValueToData(self.config['_useragent']) # Get internet TLDs tlddata = self.sf.cacheGet("internet_tlds", self.config['_internettlds_cache']) # If it wasn't loadable from cache, load it from scratch if tlddata == None: self.config['_internettlds'] = self.sf.optValueToData(self.config['_internettlds']) self.sf.cachePut("internet_tlds", self.config['_internettlds']) else: self.config["_internettlds"] = tlddata.splitlines() for modName in self.moduleList: if modName == '': continue module = __import__('modules.' + modName, globals(), locals(), [modName]) mod = getattr(module, modName)() mod.__name__ = modName # A bit hacky: we pass the database object as part of the config. This # object should only be used by the internal SpiderFoot modules writing # to the database, which at present is only sfp__stor_db. # Individual modules cannot create their own SpiderFootDb instance or # we'll get database locking issues, so it all goes through this. self.config['__sfdb__'] = dbh # Set up the module # Configuration is a combined global config with module-specific options #modConfig = deepcopy(self.config) modConfig = self.config['__modules__'][modName]['opts'] for opt in self.config.keys(): modConfig[opt] = self.config[opt] mod.clearListeners() # clear any listener relationships from the past mod.setup(self.sf, self.target, modConfig) self.moduleInstances[modName] = mod # Override the module's local socket module # to be the SOCKS one. if self.config['_socks1type'] != '': mod._updateSocket(socket) self.sf.status(modName + " module loaded.") # Register listener modules and then start all modules sequentially for module in self.moduleInstances.values(): for listenerModule in self.moduleInstances.values(): # Careful not to register twice or you will get duplicate events if listenerModule in module._listenerModules: continue # Note the absence of a check for whether a module can register # to itself. That is intentional because some modules will # act on their own notifications (e.g. sfp_dns)! if listenerModule.watchedEvents() != None: module.registerListener(listenerModule) dbh.scanInstanceSet(self.config['__guid__'], status='RUNNING') self.status = "RUNNING" # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("INITIAL_TARGET", self.target, "SpiderFoot UI") dbh.scanEventStore(self.config['__guid__'], rootEvent) # Start the modules sequentially. for module in self.moduleInstances.values(): # Check in case the user requested to stop the scan between modules initializing if module.checkForStop(): dbh.scanInstanceSet(self.config['__guid__'], status='ABORTING') self.status = "ABORTING" aborted = True break # Many modules' start() method will return None, as most will rely on # notifications during the scan from other modules. module.start() # Check if any of the modules ended due to being stopped for module in self.moduleInstances.values(): if module.checkForStop(): aborted = True if aborted: self.sf.status("Scan [" + self.config['__guid__'] + "] aborted.") dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ABORTED') self.status = "ABORTED" else: self.sf.status("Scan [" + self.config['__guid__'] + "] completed.") dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'FINISHED') self.status = "FINISHED" except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.sf.error("Unhandled exception (" + e.__class__.__name__ + ") " + \ "encountered during scan. Please report this as a bug: " + \ repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False) self.sf.status("Scan [" + self.config['__guid__'] + "] failed: " + str(e)) dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ERROR-FAILED') self.status = "ERROR-FAILED" self.moduleInstances = None dbh.close() self.sf.setDbh(None) self.sf.setScanId(None)
class SpiderFootScanner(): """SpiderFootScanner object. Attributes: scanId (str): unique ID of the scan status (str): status of the scan """ __scanId = None __status = None __config = None __sf = None __dbh = None __targetValue = None __targetType = None __moduleList = list() __target = None __moduleInstances = dict() __modconfig = dict() __scanName = None def __init__(self, scanName, scanId, targetValue, targetType, moduleList, globalOpts, start=True): """Initialize SpiderFootScanner object. Args: scanName (str): name of the scan scanId (str): unique ID of the scan targetValue (str): scan target targetType (str): scan target type moduleList (list): list of modules to run globalOpts (dict): scan options start (bool): start the scan immediately Raises: TypeError: arg type was invalid ValueError: arg value was invalid Todo: Eventually change this to be able to control multiple scan instances """ if not isinstance(globalOpts, dict): raise TypeError( f"globalOpts is {type(globalOpts)}; expected dict()") if not globalOpts: raise ValueError("globalOpts is empty") self.__config = deepcopy(globalOpts) self.__dbh = SpiderFootDb(self.__config) if not isinstance(scanName, str): raise TypeError(f"scanName is {type(scanName)}; expected str()") if not scanName: raise ValueError("scanName value is blank") self.__scanName = scanName if not isinstance(scanId, str): raise TypeError(f"scanId is {type(scanId)}; expected str()") if not scanId: raise ValueError("scanId value is blank") if not isinstance(targetValue, str): raise TypeError( f"targetValue is {type(targetValue)}; expected str()") if not targetValue: raise ValueError("targetValue value is blank") self.__targetValue = targetValue if not isinstance(targetType, str): raise TypeError( f"targetType is {type(targetType)}; expected str()") if not targetType: raise ValueError("targetType value is blank") self.__targetType = targetType if not isinstance(moduleList, list): raise TypeError( f"moduleList is {type(moduleList)}; expected list()") if not moduleList: raise ValueError("moduleList is empty") self.__moduleList = moduleList self.__sf = SpiderFoot(self.__config) self.__sf.dbh = self.__dbh # Create a unique ID for this scan in the back-end DB. if not isinstance(scanId, str): raise TypeError(f"scanId is {type(scanId)}; expected str()") if scanId: self.__scanId = scanId else: self.__scanId = self.__sf.genScanInstanceId() self.__sf.scanId = self.__scanId self.__dbh.scanInstanceCreate(self.__scanId, self.__scanName, self.__targetValue) # Create our target try: self.__target = SpiderFootTarget(self.__targetValue, self.__targetType) except (TypeError, ValueError) as e: self.__sf.status(f"Scan [{self.__scanId}] failed: {e}") self.__setStatus("ERROR-FAILED", None, time.time() * 1000) raise ValueError(f"Invalid target: {e}") # Save the config current set for this scan self.__config['_modulesenabled'] = self.__moduleList self.__dbh.scanConfigSet( self.__scanId, self.__sf.configSerialize(deepcopy(self.__config))) # Process global options that point to other places for data # If a SOCKS server was specified, set it up if self.__config['_socks1type']: socksAddr = self.__config['_socks2addr'] socksPort = int(self.__config['_socks3port']) socksUsername = self.__config['_socks4user'] or '' socksPassword = self.__config['_socks5pwd'] or '' proxy = f"{socksAddr}:{socksPort}" if socksUsername or socksPassword: proxy = "%s:%s@%s" % (socksUsername, socksPassword, proxy) if self.__config['_socks1type'] == '4': proxy = 'socks4://' + proxy elif self.__config['_socks1type'] == '5': proxy = 'socks5://' + proxy elif self.__config['_socks1type'] == 'HTTP': proxy = 'http://' + proxy elif self.__config['_socks1type'] == 'TOR': proxy = 'socks5h://' + proxy else: raise ValueError( f"Invalid SOCKS proxy type: {self.__config['_socks1ttype']}" ) self.__sf.debug( f"SOCKS: {socksAddr}:{socksPort} ({socksUsername}:{socksPassword})" ) self.__sf.socksProxy = proxy else: self.__sf.socksProxy = None # Override the default DNS server if self.__config['_dnsserver']: res = dns.resolver.Resolver() res.nameservers = [self.__config['_dnsserver']] dns.resolver.override_system_resolver(res) else: dns.resolver.restore_system_resolver() # Set the user agent self.__config['_useragent'] = self.__sf.optValueToData( self.__config['_useragent']) # Get internet TLDs tlddata = self.__sf.cacheGet("internet_tlds", self.__config['_internettlds_cache']) # If it wasn't loadable from cache, load it from scratch if tlddata is None: self.__config['_internettlds'] = self.__sf.optValueToData( self.__config['_internettlds']) self.__sf.cachePut("internet_tlds", self.__config['_internettlds']) else: self.__config["_internettlds"] = tlddata.splitlines() self.__setStatus("INITIALIZING", time.time() * 1000, None) if start: self.__startScan() @property def scanId(self): """Unique identifier for this scan""" return self.__scanId @property def status(self): """Status of this scan""" return self.__status def __setStatus(self, status, started=None, ended=None): """Set the status of the currently running scan (if any). Args: status (str): scan status started (float): timestamp at start of scan ended (float): timestamp at end of scan Returns: None Raises: TypeError: arg type was invalid ValueError: arg value was invalid """ if not isinstance(status, str): raise TypeError(f"status is {type(status)}; expected str()") if status not in [ "INITIALIZING", "STARTING", "STARTED", "RUNNING", "ABORT-REQUESTED", "ABORTED", "ABORTING", "FINISHED", "ERROR-FAILED" ]: raise ValueError(f"Invalid scan status {status}") self.__status = status self.__dbh.scanInstanceSet(self.__scanId, started, ended, status) def __startScan(self): """Start running a scan.""" aborted = False self.__setStatus("STARTING", time.time() * 1000, None) self.__sf.status(f"Scan [{self.__scanId}] initiated.") try: # moduleList = list of modules the user wants to run for modName in self.__moduleList: if modName == '': continue try: module = __import__('modules.' + modName, globals(), locals(), [modName]) except ImportError: self.__sf.error("Failed to load module: " + modName, False) continue mod = getattr(module, modName)() mod.__name__ = modName # Module may have been renamed or removed if modName not in self.__config['__modules__']: continue # Set up the module # Configuration is a combined global config with module-specific options self.__modconfig[modName] = deepcopy( self.__config['__modules__'][modName]['opts']) for opt in list(self.__config.keys()): self.__modconfig[modName][opt] = deepcopy( self.__config[opt]) mod.clearListeners( ) # clear any listener relationships from the past mod.setup(self.__sf, self.__modconfig[modName]) mod.setDbh(self.__dbh) mod.setScanId(self.__scanId) # Give modules a chance to 'enrich' the original target with # aliases of that target. newTarget = mod.enrichTarget(self.__target) if newTarget is not None: self.__target = newTarget self.__moduleInstances[modName] = mod # Override the module's local socket module # to be the SOCKS one. if self.__config['_socks1type'] != '': mod._updateSocket(socket) # Set up event output filters if requested if self.__config['__outputfilter']: mod.setOutputFilter(self.__config['__outputfilter']) self.__sf.status(modName + " module loaded.") # Register listener modules and then start all modules sequentially for module in list(self.__moduleInstances.values()): # Register the target with the module module.setTarget(self.__target) for listenerModule in list(self.__moduleInstances.values()): # Careful not to register twice or you will get duplicate events if listenerModule in module._listenerModules: continue # Note the absence of a check for whether a module can register # to itself. That is intentional because some modules will # act on their own notifications (e.g. sfp_dns)! if listenerModule.watchedEvents() is not None: module.registerListener(listenerModule) # Now we are ready to roll.. self.__setStatus("RUNNING") # Create a pseudo module for the root event to originate from psMod = SpiderFootPlugin() psMod.__name__ = "SpiderFoot UI" psMod.setTarget(self.__target) psMod.setDbh(self.__dbh) psMod.clearListeners() for mod in list(self.__moduleInstances.values()): if mod.watchedEvents() is not None: psMod.registerListener(mod) # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None) psMod.notifyListeners(rootEvent) firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # Special case.. check if an INTERNET_NAME is also a domain if self.__targetType == 'INTERNET_NAME': if self.__sf.isDomain(self.__targetValue, self.__config['_internettlds']): firstEvent = SpiderFootEvent('DOMAIN_NAME', self.__targetValue, "SpiderFoot UI", rootEvent) psMod.notifyListeners(firstEvent) # If in interactive mode, loop through this shared global variable # waiting for inputs, and process them until my status is set to # FINISHED. # Check in case the user requested to stop the scan between modules # initializing for module in list(self.__moduleInstances.values()): if module.checkForStop(): self.__setStatus('ABORTING') aborted = True break if aborted: self.__sf.status(f"Scan [{self.__scanId}] aborted.") self.__setStatus("ABORTED", None, time.time() * 1000) else: self.__sf.status(f"Scan [{self.__scanId}] completed.") self.__setStatus("FINISHED", None, time.time() * 1000) except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.__sf.error( f"Unhandled exception ({e.__class__.__name__}) encountered during scan." + "Please report this as a bug: " + repr( traceback.format_exception(exc_type, exc_value, exc_traceback)), False) self.__sf.status(f"Scan [{self.__scanId}] failed: {e}") self.__setStatus("ERROR-FAILED", None, time.time() * 1000) self.__dbh.close()
class SpiderFootScanner: moduleInstances = None status = "UNKNOWN" myId = None def __init__(self, name, target, moduleList, globalOpts, moduleOpts): self.config = deepcopy(globalOpts) self.sf = SpiderFoot(self.config) self.target = target self.moduleList = moduleList self.name = name return # Status of the currently running scan (if any) def scanStatus(self, id): if id != self.myId: return "UNKNOWN" return self.status # Stop a scan (id variable is unnecessary for now given that only one simultaneous # scan is permitted.) def stopScan(self, id): if id != self.myId: return None if self.moduleInstances == None: return None for modName in self.moduleInstances.keys(): self.moduleInstances[modName].stopScanning() # Start running a scan def startScan(self): self.moduleInstances = dict() dbh = SpiderFootDb(self.config) self.sf.setDbh(dbh) aborted = False # Create a unique ID for this scan and create it in the back-end DB. self.config['__guid__'] = dbh.scanInstanceGenGUID(self.target) self.sf.setScanId(self.config['__guid__']) self.myId = self.config['__guid__'] dbh.scanInstanceCreate(self.config['__guid__'], self.name, self.target) dbh.scanInstanceSet(self.config['__guid__'], time.time() * 1000, None, 'STARTING') self.status = "STARTING" # Save the config current set for this scan self.config['_modulesenabled'] = self.moduleList dbh.scanConfigSet(self.config['__guid__'], self.sf.configSerialize(self.config)) self.sf.status("Scan [" + self.config['__guid__'] + "] initiated.") # moduleList = list of modules the user wants to run try: # Process global options that point to other places for data # If a SOCKS server was specified, set it up if self.config['_socks1type'] != '': socksType = socks.PROXY_TYPE_SOCKS4 socksDns = self.config['_socks6dns'] socksAddr = self.config['_socks2addr'] socksPort = int(self.config['_socks3port']) socksUsername = '' socksPassword = '' if self.config['_socks1type'] == '4': socksType = socks.PROXY_TYPE_SOCKS4 if self.config['_socks1type'] == '5': socksType = socks.PROXY_TYPE_SOCKS5 socksUsername = self.config['_socks4user'] socksPassword = self.config['_socks5pwd'] if self.config['_socks1type'] == 'HTTP': socksType = socks.PROXY_TYPE_HTTP self.sf.debug("SOCKS: " + socksAddr + ":" + str(socksPort) + \ "(" + socksUsername + ":" + socksPassword + ")") socks.setdefaultproxy(socksType, socksAddr, socksPort, socksDns, socksUsername, socksPassword) # Override the default socket and getaddrinfo calls with the # SOCKS ones socket.socket = socks.socksocket socket.create_connection = socks.create_connection socket.getaddrinfo = socks.getaddrinfo self.sf.updateSocket(socket) # Override the default DNS server if self.config['_dnsserver'] != "": res = dns.resolver.Resolver() res.nameservers = [self.config['_dnsserver']] dns.resolver.override_system_resolver(res) else: dns.resolver.restore_system_resolver() # Set the user agent self.config['_useragent'] = self.sf.optValueToData( self.config['_useragent']) # Get internet TLDs tlddata = self.sf.cacheGet("internet_tlds", self.config['_internettlds_cache']) # If it wasn't loadable from cache, load it from scratch if tlddata == None: self.config['_internettlds'] = self.sf.optValueToData( self.config['_internettlds']) self.sf.cachePut("internet_tlds", self.config['_internettlds']) else: self.config["_internettlds"] = tlddata.splitlines() for modName in self.moduleList: if modName == '': continue module = __import__('modules.' + modName, globals(), locals(), [modName]) mod = getattr(module, modName)() mod.__name__ = modName # A bit hacky: we pass the database object as part of the config. This # object should only be used by the internal SpiderFoot modules writing # to the database, which at present is only sfp__stor_db. # Individual modules cannot create their own SpiderFootDb instance or # we'll get database locking issues, so it all goes through this. self.config['__sfdb__'] = dbh # Set up the module # Configuration is a combined global config with module-specific options #modConfig = deepcopy(self.config) modConfig = self.config['__modules__'][modName]['opts'] for opt in self.config.keys(): modConfig[opt] = self.config[opt] mod.clearListeners( ) # clear any listener relationships from the past mod.setup(self.sf, self.target, modConfig) self.moduleInstances[modName] = mod # Override the module's local socket module # to be the SOCKS one. if self.config['_socks1type'] != '': mod._updateSocket(socket) self.sf.status(modName + " module loaded.") # Register listener modules and then start all modules sequentially for module in self.moduleInstances.values(): for listenerModule in self.moduleInstances.values(): # Careful not to register twice or you will get duplicate events if listenerModule in module._listenerModules: continue # Note the absence of a check for whether a module can register # to itself. That is intentional because some modules will # act on their own notifications (e.g. sfp_dns)! if listenerModule.watchedEvents() != None: module.registerListener(listenerModule) dbh.scanInstanceSet(self.config['__guid__'], status='RUNNING') self.status = "RUNNING" # Create the "ROOT" event which un-triggered modules will link events to rootEvent = SpiderFootEvent("INITIAL_TARGET", self.target, "SpiderFoot UI") dbh.scanEventStore(self.config['__guid__'], rootEvent) # Start the modules sequentially. for module in self.moduleInstances.values(): # Check in case the user requested to stop the scan between modules initializing if module.checkForStop(): dbh.scanInstanceSet(self.config['__guid__'], status='ABORTING') self.status = "ABORTING" aborted = True break # Many modules' start() method will return None, as most will rely on # notifications during the scan from other modules. module.start() # Check if any of the modules ended due to being stopped for module in self.moduleInstances.values(): if module.checkForStop(): aborted = True if aborted: self.sf.status("Scan [" + self.config['__guid__'] + "] aborted.") dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ABORTED') self.status = "ABORTED" else: self.sf.status("Scan [" + self.config['__guid__'] + "] completed.") dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'FINISHED') self.status = "FINISHED" except BaseException as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.sf.error("Unhandled exception (" + e.__class__.__name__ + ") " + \ "encountered during scan. Please report this as a bug: " + \ repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False) self.sf.status("Scan [" + self.config['__guid__'] + "] failed: " + str(e)) dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ERROR-FAILED') self.status = "ERROR-FAILED" self.moduleInstances = None dbh.close() self.sf.setDbh(None) self.sf.setScanId(None)