示例#1
0
    def rerunscan(self, id):
        """Rerun a scan

        Args:
            id (str): scan ID

        Returns:
            None

        Raises:
            HTTPRedirect: redirect to info page for new scan
        """

        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        modlist = list()
        dbh = SpiderFootDb(cfg)
        info = dbh.scanInstanceGet(id)

        if not info:
            return self.error("Invalid scan ID.")

        scanname = info[0]
        scantarget = info[1]

        scanconfig = dbh.scanConfigGet(id)
        if not scanconfig:
            return self.error(f"Error loading config from scan: {id}")

        modlist = scanconfig['_modulesenabled'].split(',')
        if "sfp__stor_stdout" in modlist:
            modlist.remove("sfp__stor_stdout")

        targetType = SpiderFootHelpers.targetTypeFromString(scantarget)
        if not targetType:
            # It must then be a name, as a re-run scan should always have a clean
            # target. Put quotes around the target value and try to determine the
            # target type again.
            targetType = SpiderFootHelpers.targetTypeFromString(f'"{scantarget}"')

        if targetType not in ["HUMAN_NAME", "BITCOIN_ADDRESS"]:
            scantarget = scantarget.lower()

        # Start running a new scan
        scanId = SpiderFootHelpers.genScanInstanceId()
        try:
            p = mp.Process(target=SpiderFootScanner, args=(scanname, scanId, scantarget, targetType, modlist, cfg))
            p.daemon = True
            p.start()
        except Exception as e:
            self.log.error(f"[-] Scan [{scanId}] failed: {e}")
            return self.error(f"[-] Scan [{scanId}] failed: {e}")

        # Wait until the scan has initialized
        while dbh.scanInstanceGet(scanId) is None:
            self.log.info("Waiting for the scan to initialize...")
            time.sleep(1)

        raise cherrypy.HTTPRedirect(f"{self.docroot}/scaninfo?id={scanId}", status=302)
示例#2
0
    def rerunscanmulti(self, ids):
        """Rerun scans

        Args:
            ids (str): comma separated list of scan IDs

        Returns:
            None
        """

        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        modlist = list()
        dbh = SpiderFootDb(cfg)

        for id in ids.split(","):
            info = dbh.scanInstanceGet(id)
            if not info:
                return self.error("Invalid scan ID.")

            scanconfig = dbh.scanConfigGet(id)
            scanname = info[0]
            scantarget = info[1]
            targetType = None

            if len(scanconfig) == 0:
                return self.error("Something went wrong internally.")

            modlist = scanconfig['_modulesenabled'].split(',')
            if "sfp__stor_stdout" in modlist:
                modlist.remove("sfp__stor_stdout")

            targetType = SpiderFootHelpers.targetTypeFromString(scantarget)
            if targetType is None:
                # Should never be triggered for a re-run scan..
                return self.error("Invalid target type. Could not recognize it as a target SpiderFoot supports.")

            # Start running a new scan
            scanId = SpiderFootHelpers.genScanInstanceId()
            try:
                p = mp.Process(target=SpiderFootScanner, args=(scanname, scanId, scantarget, targetType, modlist, cfg))
                p.daemon = True
                p.start()
            except Exception as e:
                self.log.error(f"[-] Scan [{scanId}] failed: {e}")
                return self.error(f"[-] Scan [{scanId}] failed: {e}")

            # Wait until the scan has initialized
            while dbh.scanInstanceGet(scanId) is None:
                self.log.info("Waiting for the scan to initialize...")
                time.sleep(1)

        templ = Template(filename='spiderfoot/templates/scanlist.tmpl', lookup=self.lookup)
        return templ.render(rerunscans=True, docroot=self.docroot, pageid="SCANLIST", version=__version__)
    def test_scanInstanceGet_argument_instanceId_of_invalid_type_should_raise_TypeError(self):
        """
        Test scanInstanceGet(self, instanceId)
        """
        sfdb = SpiderFootDb(self.default_options, False)

        invalid_types = [None, list(), dict(), int()]
        for invalid_type in invalid_types:
            with self.subTest(invalid_type=invalid_type):
                with self.assertRaises(TypeError):
                    sfdb.scanInstanceGet(invalid_type)
示例#4
0
    def scanvizmulti(self, ids, gexf="1"):
        """Export entities results from multiple scans in GEXF format

        Args:
            ids (str): scan IDs
            gexf (str): TBD

        Returns:
            string: GEXF data
        """
        dbh = SpiderFootDb(self.config)
        data = list()
        roots = list()

        if not ids:
            return None

        for id in ids.split(','):
            data = data + dbh.scanResultEvent(id, filterFp=True)
            scan = dbh.scanInstanceGet(id)
            if scan:
                roots.append(scan[1])

        if gexf == "0":
            # Not implemented yet
            return None

        cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf"
        cherrypy.response.headers['Content-Type'] = "application/gexf"
        cherrypy.response.headers['Pragma'] = "no-cache"
        return SpiderFootHelpers.buildGraphGexf(roots, "SpiderFoot Export", data)
示例#5
0
    def scanviz(self, id, gexf="0"):
        """Export entities from scan results for visualising

        Args:
            id (str): scan ID
            gexf (str): TBD

        Returns:
            string: GEXF data
        """

        if not id:
            return None

        dbh = SpiderFootDb(self.config)
        data = dbh.scanResultEvent(id, filterFp=True)
        scan = dbh.scanInstanceGet(id)

        if not scan:
            return None

        root = scan[1]

        if gexf == "0":
            return SpiderFootHelpers.buildGraphJson([root], data)

        cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf"
        cherrypy.response.headers['Content-Type'] = "application/gexf"
        cherrypy.response.headers['Pragma'] = "no-cache"
        return SpiderFootHelpers.buildGraphGexf([root], "SpiderFoot Export", data)
示例#6
0
    def scansummary(self, id, by):
        """Summary of scan results.

        Args:
            id (str): scan ID
            by: TBD

        Returns:
            str: scan summary as JSON
        """
        retdata = []

        dbh = SpiderFootDb(self.config)

        try:
            scandata = dbh.scanResultSummary(id, by)
        except Exception:
            return retdata

        try:
            statusdata = dbh.scanInstanceGet(id)
        except Exception:
            return retdata

        for row in scandata:
            if row[0] == "ROOT":
                continue
            lastseen = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(row[2]))
            retdata.append([row[0], row[1], lastseen, row[3], row[4], statusdata[5]])

        return retdata
示例#7
0
    def scandelete(self, id):
        """Delete scan(s)

        Args:
            id (str): comma separated list of scan IDs

        Returns:
            str: JSON response
        """
        if not id:
            return self.jsonify_error('404', "No scan specified")

        dbh = SpiderFootDb(self.config)
        ids = id.split(',')

        for scan_id in ids:
            res = dbh.scanInstanceGet(scan_id)
            if not res:
                return self.jsonify_error('404', f"Scan {id} does not exist")

            if res[5] in ["RUNNING", "STARTING", "STARTED"]:
                return self.jsonify_error('400', f"Scan {id} is {res[5]}. You cannot delete running scans.")

        for scan_id in ids:
            dbh.scanInstanceDelete(scan_id)

        return b""
示例#8
0
    def scandelete(self, id, confirm=None):
        """Delete a scan

        Args:
            id (str): scan ID
            confirm (str): specify any value (except None) to confirm deletion of the scan
        """

        dbh = SpiderFootDb(self.config)
        res = dbh.scanInstanceGet(id)

        if res is None:
            if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Scan ID not found."]).encode('utf-8')

            return self.error("Scan ID not found.")

        if confirm:
            dbh.scanInstanceDelete(id)

            if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["SUCCESS", ""]).encode('utf-8')

            raise cherrypy.HTTPRedirect("/")

        templ = Template(filename='dyn/scandelete.tmpl', lookup=self.lookup)
        return templ.render(id=id, name=str(res[0]),
                            names=list(), ids=list(),
                            pageid="SCANLIST", docroot=self.docroot)
示例#9
0
    def scaneventresultexportmulti(self, ids, dialect="excel"):
        """Get scan event result data in CSV format for multiple scans

        Args:
            ids (str): comma separated list of scan IDs
            dialect (str): TBD

        Returns:
            string: results in CSV format
        """

        dbh = SpiderFootDb(self.config)
        scaninfo = dict()
        data = list()
        for id in ids.split(','):
            scaninfo[id] = dbh.scanInstanceGet(id)
            data = data + dbh.scanResultEvent(id)

        fileobj = StringIO()
        parser = csv.writer(fileobj, dialect=dialect)
        parser.writerow(["Scan Name", "Updated", "Type", "Module", "Source", "F/P", "Data"])
        for row in data:
            if row[4] == "ROOT":
                continue
            lastseen = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(row[0]))
            datafield = str(row[1]).replace("<SFURL>", "").replace("</SFURL>", "")
            parser.writerow([scaninfo[row[12]][0], lastseen, str(row[4]), str(row[3]),
                            str(row[2]), row[13], datafield])
        cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.csv"
        cherrypy.response.headers['Content-Type'] = "application/csv"
        cherrypy.response.headers['Pragma'] = "no-cache"
        return fileobj.getvalue().encode('utf-8')
示例#10
0
    def stopscan(self, id):
        """Stop a scan

        Args:
            id (str): comma separated list of scan IDs

        Returns:
            str: JSON response
        """
        if not id:
            return self.jsonify_error('404', "No scan specified")

        dbh = SpiderFootDb(self.config)
        ids = id.split(',')

        for scan_id in ids:
            res = dbh.scanInstanceGet(scan_id)
            if not res:
                return self.jsonify_error('404', f"Scan {id} does not exist")

            scan_status = res[5]

            if scan_status == "FINISHED":
                return self.jsonify_error('400', f"Scan {id} has already finished.")

            if scan_status == "ABORTED":
                return self.jsonify_error('400', f"Scan {id} has already aborted.")

            if scan_status != "RUNNING":
                return self.jsonify_error('400', f"The running scan is currently in the state '{scan_status}', please try again later or restart SpiderFoot.")

        for scan_id in ids:
            dbh.scanInstanceSet(scan_id, status="ABORT-REQUESTED")

        return b""
示例#11
0
    def stopscanmulti(self, ids):
        """Stop a scan

        Args:
            ids (str): comma separated list of scan IDs

        Note:
            Unnecessary for now given that only one simultaneous scan is permitted
        """

        dbh = SpiderFootDb(self.config)
        error = list()

        for id in ids.split(","):
            scaninfo = dbh.scanInstanceGet(id)

            if not scaninfo:
                return self.error("Invalid scan ID: %s" % id)

            scanname = str(scaninfo[0])
            scanstatus = scaninfo[5]

            if scanstatus == "FINISHED":
                error.append("Scan '%s' is in a finished state. <a href='/scandelete?id=%s&confirm=1'>Maybe you want to delete it instead?</a>" % (scanname, id))
                continue

            if scanstatus == "ABORTED":
                error.append("Scan '" + scanname + "' is already aborted.")
                continue

            dbh.scanInstanceSet(id, status="ABORT-REQUESTED")

        raise cherrypy.HTTPRedirect("/")
示例#12
0
    def scansummary(self, id, by):
        """Summary of scan results

        Args:
            id (str): scan ID
            by: TBD
        """

        cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"

        retdata = []

        dbh = SpiderFootDb(self.config)

        try:
            scandata = dbh.scanResultSummary(id, by)
        except Exception:
            return json.dumps(retdata).encode('utf-8')

        try:
            statusdata = dbh.scanInstanceGet(id)
        except Exception:
            return json.dumps(retdata).encode('utf-8')

        for row in scandata:
            if row[0] == "ROOT":
                continue
            lastseen = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(row[2]))
            retdata.append([row[0], row[1], lastseen, row[3], row[4], statusdata[5]])
        return json.dumps(retdata).encode('utf-8')
示例#13
0
    def scandeletemulti(self, ids, confirm=None):
        """Delete a scan

        Args:
            ids (str): comma separated list of scan IDs
            confirm: TBD
        """

        dbh = SpiderFootDb(self.config)
        names = list()

        for id in ids.split(','):
            res = dbh.scanInstanceGet(id)
            names.append(str(res[0]))
            if res is None:
                return self.error("Scan ID not found (" + id + ").")

            if res[5] in ["RUNNING", "STARTING", "STARTED"]:
                return self.error("You cannot delete running scans.")

        if confirm:
            for id in ids.split(','):
                dbh.scanInstanceDelete(id)
            raise cherrypy.HTTPRedirect("/")

        templ = Template(filename='dyn/scandelete.tmpl', lookup=self.lookup)
        return templ.render(id=None, name=None, ids=ids.split(','), names=names,
                            pageid="SCANLIST", docroot=self.docroot)
示例#14
0
    def test_scanInstanceGet_should_return_scan_info(self):
        """
        Test scanInstanceGet(self, instanceId)
        """
        sfdb = SpiderFootDb(self.default_options, False)

        instance_id = "example instance id"
        scan_name = "example scan name"
        scan_target = "example scan target"

        sfdb.scanInstanceCreate(instance_id, scan_name, scan_target)

        scan_instance_get = sfdb.scanInstanceGet(instance_id)

        self.assertEqual(len(scan_instance_get), 6)

        self.assertIsInstance(scan_instance_get[0], str)
        self.assertEqual(scan_instance_get[0], scan_name)

        self.assertIsInstance(scan_instance_get[1], str)
        self.assertEqual(scan_instance_get[1], scan_target)

        self.assertIsInstance(scan_instance_get[2], float)

        self.assertIsInstance(scan_instance_get[3], float)

        self.assertIsInstance(scan_instance_get[4], float)

        self.assertIsInstance(scan_instance_get[5], str)
        self.assertEqual(scan_instance_get[5], 'CREATED')
示例#15
0
    def resultsetfp(self, id, resultids, fp):
        """Set a bunch of results (hashes) as false positive

        Args:
            id (str): scan ID
            resultids (str): comma separated list of result IDs
            fp (str): 0 or 1

        Returns:
            str: set false positive status as JSON
        """

        cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"

        dbh = SpiderFootDb(self.config)

        if fp not in ["0", "1"]:
            return json.dumps(["ERROR", "No FP flag set or not set correctly."]).encode('utf-8')

        try:
            ids = json.loads(resultids)
        except Exception:
            return json.dumps(["ERROR", "No IDs supplied."]).encode('utf-8')

        # Cannot set FPs if a scan is not completed
        status = dbh.scanInstanceGet(id)
        if not status:
            return self.error("Invalid scan ID: %s" % id)

        if status[5] not in ["ABORTED", "FINISHED", "ERROR-FAILED"]:
            return json.dumps([
                "WARNING",
                "Scan must be in a finished state when setting False Positives."
            ]).encode('utf-8')

        # Make sure the user doesn't set something as non-FP when the
        # parent is set as an FP.
        if fp == "0":
            data = dbh.scanElementSourcesDirect(id, ids)
            for row in data:
                if str(row[14]) == "1":
                    return json.dumps([
                        "WARNING",
                        f"Cannot unset element {id} as False Positive if a parent element is still False Positive."
                    ]).encode('utf-8')

        # Set all the children as FPs too.. it's only logical afterall, right?
        childs = dbh.scanElementChildrenAll(id, ids)
        allIds = ids + childs

        ret = dbh.scanResultsUpdateFP(id, allIds, fp)
        if ret:
            return json.dumps(["SUCCESS", ""]).encode('utf-8')

        return json.dumps(["ERROR", "Exception encountered."]).encode('utf-8')
示例#16
0
    def scanexportjsonmulti(self, ids):
        """Get scan event result data in JSON format for multiple scans

        Args:
            ids (str): comma separated list of scan IDs

        Returns:
            string: results in CSV format
        """

        dbh = SpiderFootDb(self.config)
        scaninfo = list()
        scan_name = ""

        for id in ids.split(','):
            scan = dbh.scanInstanceGet(id)

            if scan is None:
                continue

            scan_name = scan[0]

            for row in dbh.scanResultEvent(id):
                lastseen = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(row[0]))
                event_data = str(row[1]).replace("<SFURL>", "").replace("</SFURL>", "")
                source_data = str(row[2])
                source_module = str(row[3])
                event_type = row[4]
                false_positive = row[13]

                if event_type == "ROOT":
                    continue

                scaninfo.append({
                    "data": event_data,
                    "event_type": event_type,
                    "module": source_module,
                    "source_data": source_data,
                    "false_positive": false_positive,
                    "last_seen": lastseen,
                    "scan_name": scan_name,
                    "scan_target": scan[1]
                })

        if len(ids.split(',')) > 1 or scan_name == "":
            fname = "SpiderFoot.json"
        else:
            fname = scan_name + "-SpiderFoot.json"

        cherrypy.response.headers['Content-Disposition'] = "attachment; filename=" + fname
        cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
        cherrypy.response.headers['Pragma'] = "no-cache"
        return json.dumps(scaninfo).encode('utf-8')
示例#17
0
 def scanviz(self, id, gexf="0"):
     dbh = SpiderFootDb(self.config)
     sf = SpiderFoot(self.config)
     data = dbh.scanResultEvent(id, filterFp=True)
     scan = dbh.scanInstanceGet(id)
     root = scan[1]
     if gexf != "0":
         cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf"
         cherrypy.response.headers['Content-Type'] = "application/gexf"
         cherrypy.response.headers['Pragma'] = "no-cache"
         return sf.buildGraphGexf([root], "SpiderFoot Export", data)
     else:
         return sf.buildGraphJson([root], data)
示例#18
0
    def scaninfo(self, id):
        """Information about a selected scan

        Args:
            id (str): scan id
        """
        dbh = SpiderFootDb(self.config)
        res = dbh.scanInstanceGet(id)
        if res is None:
            return self.error("Scan ID not found.")

        templ = Template(filename='dyn/scaninfo.tmpl', lookup=self.lookup, input_encoding='utf-8')
        return templ.render(id=id, name=html.escape(res[0]), status=res[5], docroot=self.docroot,
                            pageid="SCANLIST")
示例#19
0
    def scanvizmulti(self, ids, gexf="1"):
        dbh = SpiderFootDb(self.config)
        sf = SpiderFoot(self.config)
        data = list()
        roots = list()
        for id in ids.split(','):
            data = data + dbh.scanResultEvent(id, filterFp=True)
            roots.append(dbh.scanInstanceGet(id)[1])

        if gexf != "0":
            cherrypy.response.headers['Content-Disposition'] = "attachment; filename=SpiderFoot.gexf"
            cherrypy.response.headers['Content-Type'] = "application/gexf"
            cherrypy.response.headers['Pragma'] = "no-cache"
            return sf.buildGraphGexf(roots, "SpiderFoot Export", data)
        else:
            # Not implemented yet
            return None
示例#20
0
    def scanopts(self, id):
        """Configuration used for a scan

        Args:
            id: scan ID

        Returns:
            str: options as JSON string
        """
        dbh = SpiderFootDb(self.config)
        ret = dict()

        meta = dbh.scanInstanceGet(id)
        if not meta:
            return ret

        if meta[3] != 0:
            started = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[3]))
        else:
            started = "Not yet"

        if meta[4] != 0:
            finished = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[4]))
        else:
            finished = "Not yet"

        ret['meta'] = [meta[0], meta[1], meta[2], started, finished, meta[5]]
        ret['config'] = dbh.scanConfigGet(id)
        ret['configdesc'] = dict()
        for key in list(ret['config'].keys()):
            if ':' not in key:
                globaloptdescs = self.config['__globaloptdescs__']
                if globaloptdescs:
                    ret['configdesc'][key] = globaloptdescs.get(key, f"{key} (legacy)")
            else:
                [modName, modOpt] = key.split(':')
                if modName not in list(self.config['__modules__'].keys()):
                    continue

                if modOpt not in list(self.config['__modules__'][modName]['optdescs'].keys()):
                    continue

                ret['configdesc'][key] = self.config['__modules__'][modName]['optdescs'][modOpt]

        return ret
示例#21
0
    def scanstatus(self, id):
        """Basic information about a scan.

        Args:
            id (str): scan ID

        Returns:
            str: scan status as JSON
        """
        dbh = SpiderFootDb(self.config)
        data = dbh.scanInstanceGet(id)

        if not data:
            return []

        created = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[2]))
        started = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[3]))
        ended = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[4]))

        return [data[0], data[1], created, started, ended, data[5]]
示例#22
0
    def scanstatus(self, id):
        """Basic information about a scan

        Args:
            id (str): scan ID
        """

        cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"

        dbh = SpiderFootDb(self.config)
        data = dbh.scanInstanceGet(id)

        if not data:
            return json.dumps([]).encode('utf-8')

        created = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[2]))
        started = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[3]))
        ended = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data[4]))

        retdata = [data[0], data[1], created, started, ended, data[5]]
        return json.dumps(retdata).encode('utf-8')
示例#23
0
    def stopscan(self, id):
        """Stop a scan.

        Args:
            id (str): scan ID
        """

        dbh = SpiderFootDb(self.config)
        scaninfo = dbh.scanInstanceGet(id)

        if not scaninfo:
            if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Invalid scan ID."]).encode('utf-8')

            return self.error("Invalid scan ID.")

        scanstatus = scaninfo[5]

        if scanstatus == "ABORTED":
            if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Scan already aborted."]).encode('utf-8')

            return self.error("The scan is already aborted.")

        if not scanstatus == "RUNNING":
            if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Scan in an invalid state for stopping."]).encode('utf-8')

            return self.error("The running scan is currently in the state '%s', please try again later or restart SpiderFoot." % scanstatus)

        dbh.scanInstanceSet(id, status="ABORT-REQUESTED")

        if 'application/json' in cherrypy.request.headers.get('Accept'):
            cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
            return json.dumps(["SUCCESS", ""]).encode('utf-8')

        raise cherrypy.HTTPRedirect("/")
示例#24
0
    def scanopts(self, id):
        """Configuration used for a scan

        Args:
            id: scan ID
        """

        ret = dict()
        dbh = SpiderFootDb(self.config)
        ret['config'] = dbh.scanConfigGet(id)
        ret['configdesc'] = dict()
        for key in list(ret['config'].keys()):
            if ':' not in key:
                ret['configdesc'][key] = self.config['__globaloptdescs__'][key]
            else:
                [modName, modOpt] = key.split(':')
                if modName not in list(self.config['__modules__'].keys()):
                    continue

                if modOpt not in list(self.config['__modules__'][modName]['optdescs'].keys()):
                    continue

                ret['configdesc'][key] = self.config['__modules__'][modName]['optdescs'][modOpt]

        meta = dbh.scanInstanceGet(id)
        if not meta:
            return json.dumps([]).encode('utf-8')
        if meta[3] != 0:
            started = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[3]))
        else:
            started = "Not yet"

        if meta[4] != 0:
            finished = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(meta[4]))
        else:
            finished = "Not yet"
        ret['meta'] = [meta[0], meta[1], meta[2], started, finished, meta[5]]

        return json.dumps(ret).encode('utf-8')
示例#25
0
    def clonescan(self, id):
        """
        Clone an existing scan (pre-selected options in the newscan page)

        Args:
            id (str): scan ID to clone

        Returns:
            None
        """

        dbh = SpiderFootDb(self.config)
        types = dbh.eventTypes()
        info = dbh.scanInstanceGet(id)

        if not info:
            return self.error("Invalid scan ID.")

        scanconfig = dbh.scanConfigGet(id)
        scanname = info[0]
        scantarget = info[1]
        targetType = None

        if scanname == "" or scantarget == "" or len(scanconfig) == 0:
            return self.error("Something went wrong internally.")

        targetType = SpiderFootHelpers.targetTypeFromString(scantarget)
        if targetType is None:
            # It must be a name, so wrap quotes around it
            scantarget = "&quot;" + scantarget + "&quot;"

        modlist = scanconfig['_modulesenabled'].split(',')

        templ = Template(filename='spiderfoot/templates/newscan.tmpl', lookup=self.lookup)
        return templ.render(pageid='NEWSCAN', types=types, docroot=self.docroot,
                            modules=self.config['__modules__'], selectedmods=modlist,
                            scanname=str(scanname),
                            scantarget=str(scantarget), version=__version__)
示例#26
0
文件: sf.py 项目: ncolyer/spiderfoot
def start_scan(sfConfig, sfModules, args):
    global dbh
    global scanId

    dbh = SpiderFootDb(sfConfig, init=True)
    sf = SpiderFoot(sfConfig)

    if args.modules:
        log.info("Modules available:")
        for m in sorted(sfModules.keys()):
            if "__" in m:
                continue
            print(('{0:25}  {1}'.format(m, sfModules[m]['descr'])))
        sys.exit(0)

    if args.types:
        log.info("Types available:")
        typedata = dbh.eventTypes()
        types = dict()
        for r in typedata:
            types[r[1]] = r[0]

        for t in sorted(types.keys()):
            print(('{0:45}  {1}'.format(t, types[t])))
        sys.exit(0)

    if not args.s:
        log.error(
            "You must specify a target when running in scan mode. Try --help for guidance."
        )
        sys.exit(-1)

    if args.x and not args.t:
        log.error("-x can only be used with -t. Use --help for guidance.")
        sys.exit(-1)

    if args.x and args.m:
        log.error(
            "-x can only be used with -t and not with -m. Use --help for guidance."
        )
        sys.exit(-1)

    if args.r and (args.o and args.o not in ["tab", "csv"]):
        log.error("-r can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.H and (args.o and args.o not in ["tab", "csv"]):
        log.error("-H can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.D and args.o != "csv":
        log.error("-D can only be used when using the csv output format.")
        sys.exit(-1)

    target = args.s
    # Usernames and names - quoted on the commandline - won't have quotes,
    # so add them.
    if " " in target:
        target = f"\"{target}\""
    if "." not in target and not target.startswith("+") and '"' not in target:
        target = f"\"{target}\""
    targetType = sf.targetType(target)

    if not targetType:
        log.error(f"Could not determine target type. Invalid target: {target}")
        sys.exit(-1)

    target = target.strip('"')

    modlist = list()
    if not args.t and not args.m:
        log.warning(
            "You didn't specify any modules or types, so all will be enabled.")
        for m in list(sfModules.keys()):
            if "__" in m:
                continue
            modlist.append(m)

    signal.signal(signal.SIGINT, handle_abort)
    # If the user is scanning by type..
    # 1. Find modules producing that type
    if args.t:
        types = args.t
        modlist = sf.modulesProducing(types)
        newmods = deepcopy(modlist)
        newmodcpy = deepcopy(newmods)

        # 2. For each type those modules consume, get modules producing
        while len(newmodcpy) > 0:
            for etype in sf.eventsToModules(newmodcpy):
                xmods = sf.modulesProducing([etype])
                for mod in xmods:
                    if mod not in modlist:
                        modlist.append(mod)
                        newmods.append(mod)
            newmodcpy = deepcopy(newmods)
            newmods = list()

    # Easier if scanning by module
    if args.m:
        modlist = list(filter(None, args.m.split(",")))

    # Add sfp__stor_stdout to the module list
    typedata = dbh.eventTypes()
    types = dict()
    for r in typedata:
        types[r[1]] = r[0]

    sfp__stor_stdout_opts = sfConfig['__modules__']['sfp__stor_stdout']['opts']
    sfp__stor_stdout_opts['_eventtypes'] = types
    if args.f:
        if args.f and not args.t:
            log.error("You can only use -f with -t. Use --help for guidance.")
            sys.exit(-1)
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.F:
        sfp__stor_stdout_opts['_requested'] = args.F.split(",")
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.o:
        sfp__stor_stdout_opts['_format'] = args.o
    if args.t:
        sfp__stor_stdout_opts['_requested'] = args.t.split(",")
    if args.n:
        sfp__stor_stdout_opts['_stripnewline'] = True
    if args.r:
        sfp__stor_stdout_opts['_showsource'] = True
    if args.S:
        sfp__stor_stdout_opts['_maxlength'] = args.S
    if args.D:
        sfp__stor_stdout_opts['_csvdelim'] = args.D
    if args.x:
        tmodlist = list()
        modlist = list()
        xmods = sf.modulesConsuming([targetType])
        for mod in xmods:
            if mod not in modlist:
                tmodlist.append(mod)

        # Remove any modules not producing the type requested
        rtypes = args.t.split(",")
        for mod in tmodlist:
            for r in rtypes:
                if not sfModules[mod]['provides']:
                    continue
                if r in sfModules[mod].get('provides',
                                           []) and mod not in modlist:
                    modlist.append(mod)

    if len(modlist) == 0:
        log.error("Based on your criteria, no modules were enabled.")
        sys.exit(-1)

    modlist += ["sfp__stor_db", "sfp__stor_stdout"]

    # Run the scan
    if sfConfig['__logging']:
        log.info(f"Modules enabled ({len(modlist)}): {','.join(modlist)}")
    cfg = sf.configUnserialize(dbh.configGet(), sfConfig)

    # Debug mode is a variable that gets stored to the DB, so re-apply it
    if args.debug:
        cfg['_debug'] = True
    else:
        cfg['_debug'] = False

    # If strict mode is enabled, filter the output from modules.
    if args.x and args.t:
        cfg['__outputfilter'] = args.t.split(",")

    if args.o == "json":
        print("[", end='')

    # Start running a new scan
    scanName = target
    scanId = sf.genScanInstanceId()
    try:
        p = mp.Process(target=SpiderFootScanner,
                       args=(scanName, scanId, target, targetType, modlist,
                             cfg))
        p.daemon = True
        p.start()
    except BaseException as e:
        log.error(f"Scan [{scanId}] failed: {e}")
        sys.exit(-1)

    # If field headers weren't disabled, print them
    if not args.H and args.o != "json":
        if args.D:
            delim = args.D
        else:
            if args.o in ["tab", None]:
                delim = "\t"

            if args.o == "csv":
                delim = ","

        if not args.r:
            if delim != "\t":
                print(delim.join(["Source", "Type", "Data"]))
            else:
                print('{0:30}{1}{2:45}{3}{4}'.format("Source", delim, "Type",
                                                     delim, "Data"))
        else:
            if delim != "\t":
                print(delim.join(["Source", "Type", "Source Data", "Data"]))
            else:
                print('{0:30}{1}{2:45}{3}{4}{5}{6}'.format(
                    "Source", delim, "Type", delim, "Source Data", delim,
                    "Data"))

    while True:
        time.sleep(1)
        info = dbh.scanInstanceGet(scanId)
        if not info:
            continue
        if info[5] in [
                "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED"
        ]:
            if sfConfig['__logging']:
                log.info(f"Scan completed with status {info[5]}")
            if args.o == "json":
                print("]")
            sys.exit(0)

    return None
示例#27
0
class SpiderFootScanner():
    """SpiderFootScanner object.

    Attributes:
        scanId (str): unique ID of the scan
        status (str): status of the scan
    """

    __scanId = None
    __status = None
    __config = None
    __sf = None
    __dbh = None
    __targetValue = None
    __targetType = None
    __moduleList = list()
    __target = None
    __moduleInstances = dict()
    __modconfig = dict()
    __scanName = None

    def __init__(self,
                 scanName: str,
                 scanId: str,
                 targetValue: str,
                 targetType: str,
                 moduleList: list,
                 globalOpts: dict,
                 start: bool = True) -> None:
        """Initialize SpiderFootScanner object.

        Args:
            scanName (str): name of the scan
            scanId (str): unique ID of the scan
            targetValue (str): scan target
            targetType (str): scan target type
            moduleList (list): list of modules to run
            globalOpts (dict): scan options
            start (bool): start the scan immediately

        Raises:
            TypeError: arg type was invalid
            ValueError: arg value was invalid

        Todo:
             Eventually change this to be able to control multiple scan instances
        """
        if not isinstance(globalOpts, dict):
            raise TypeError(
                f"globalOpts is {type(globalOpts)}; expected dict()")
        if not globalOpts:
            raise ValueError("globalOpts is empty")

        self.__config = deepcopy(globalOpts)
        self.__dbh = SpiderFootDb(self.__config)

        if not isinstance(scanName, str):
            raise TypeError(f"scanName is {type(scanName)}; expected str()")
        if not scanName:
            raise ValueError("scanName value is blank")

        self.__scanName = scanName

        if not isinstance(scanId, str):
            raise TypeError(f"scanId is {type(scanId)}; expected str()")
        if not scanId:
            raise ValueError("scanId value is blank")

        if not isinstance(targetValue, str):
            raise TypeError(
                f"targetValue is {type(targetValue)}; expected str()")
        if not targetValue:
            raise ValueError("targetValue value is blank")

        self.__targetValue = targetValue

        if not isinstance(targetType, str):
            raise TypeError(
                f"targetType is {type(targetType)}; expected str()")
        if not targetType:
            raise ValueError("targetType value is blank")

        self.__targetType = targetType

        if not isinstance(moduleList, list):
            raise TypeError(
                f"moduleList is {type(moduleList)}; expected list()")
        if not moduleList:
            raise ValueError("moduleList is empty")

        self.__moduleList = moduleList
        self.__sf = SpiderFoot(self.__config)
        self.__sf.dbh = self.__dbh

        # Create a unique ID for this scan in the back-end DB.
        if scanId:
            self.__scanId = scanId
        else:
            self.__scanId = SpiderFootHelpers.genScanInstanceId()

        self.__sf.scanId = self.__scanId
        self.__dbh.scanInstanceCreate(self.__scanId, self.__scanName,
                                      self.__targetValue)

        # Create our target
        try:
            self.__target = SpiderFootTarget(self.__targetValue,
                                             self.__targetType)
        except (TypeError, ValueError) as e:
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
            raise ValueError(f"Invalid target: {e}") from None

        # Save the config current set for this scan
        self.__config['_modulesenabled'] = self.__moduleList
        self.__dbh.scanConfigSet(
            self.__scanId, self.__sf.configSerialize(deepcopy(self.__config)))

        # Process global options that point to other places for data

        # If a proxy server was specified, set it up
        proxy_type = self.__config.get('_socks1type')
        if proxy_type:
            # TODO: allow DNS lookup to be configurable when using a proxy
            # - proxy DNS lookup: socks5h:// and socks4a://
            # - local DNS lookup: socks5:// and socks4://
            if proxy_type == '4':
                proxy_proto = 'socks4://'
            elif proxy_type == '5':
                proxy_proto = 'socks5://'
            elif proxy_type == 'HTTP':
                proxy_proto = 'http://'
            elif proxy_type == 'TOR':
                proxy_proto = 'socks5h://'
            else:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Invalid proxy type: {proxy_type}"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError(f"Invalid proxy type: {proxy_type}")

            proxy_host = self.__config.get('_socks2addr', '')

            if not proxy_host:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Proxy type is set ({proxy_type}) but proxy address value is blank"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError(
                    f"Proxy type is set ({proxy_type}) but proxy address value is blank"
                )

            proxy_port = int(self.__config.get('_socks3port') or 0)

            if not proxy_port:
                if proxy_type in ['4', '5']:
                    proxy_port = 1080
                elif proxy_type.upper() == 'HTTP':
                    proxy_port = 8080
                elif proxy_type.upper() == 'TOR':
                    proxy_port = 9050

            proxy_username = self.__config.get('_socks4user', '')
            proxy_password = self.__config.get('_socks5pwd', '')

            if proxy_username or proxy_password:
                proxy_auth = f"{proxy_username}:{proxy_password}"
                proxy = f"{proxy_proto}{proxy_auth}@{proxy_host}:{proxy_port}"
            else:
                proxy = f"{proxy_proto}{proxy_host}:{proxy_port}"

            self.__sf.debug(f"Using proxy: {proxy}")
            self.__sf.socksProxy = proxy
        else:
            self.__sf.socksProxy = None

        # Override the default DNS server
        if self.__config['_dnsserver']:
            res = dns.resolver.Resolver()
            res.nameservers = [self.__config['_dnsserver']]
            dns.resolver.override_system_resolver(res)
        else:
            dns.resolver.restore_system_resolver()

        # Set the user agent
        self.__config['_useragent'] = self.__sf.optValueToData(
            self.__config['_useragent'])

        # Set up the Internet TLD list.
        # If the cached does not exist or has expired, reload it from scratch.
        tld_data = self.__sf.cacheGet("internet_tlds",
                                      self.__config['_internettlds_cache'])
        if tld_data is None:
            tld_data = self.__sf.optValueToData(self.__config['_internettlds'])
            if tld_data is None:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Could not update TLD list"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError("Could not update TLD list")
            self.__sf.cachePut("internet_tlds", tld_data)

        self.__config['_internettlds'] = tld_data.splitlines()

        self.__setStatus("INITIALIZING", time.time() * 1000, None)

        self.__sharedThreadPool = SpiderFootThreadPool(
            threads=self.__config.get("_maxthreads", 3),
            name='sharedThreadPool')

        # Used when module threading is enabled
        self.eventQueue = None

        if start:
            self.__startScan()

    @property
    def scanId(self) -> str:
        return self.__scanId

    @property
    def status(self) -> str:
        return self.__status

    def __setStatus(self,
                    status: str,
                    started: float = None,
                    ended: float = None) -> None:
        """Set the status of the currently running scan (if any).

        Args:
            status (str): scan status
            started (float): timestamp at start of scan
            ended (float): timestamp at end of scan

        Raises:
            TypeError: arg type was invalid
            ValueError: arg value was invalid
        """
        if not isinstance(status, str):
            raise TypeError(f"status is {type(status)}; expected str()")

        if status not in [
                "INITIALIZING", "STARTING", "STARTED", "RUNNING",
                "ABORT-REQUESTED", "ABORTED", "ABORTING", "FINISHED",
                "ERROR-FAILED"
        ]:
            raise ValueError(f"Invalid scan status {status}")

        self.__status = status
        self.__dbh.scanInstanceSet(self.__scanId, started, ended, status)

    def __startScan(self) -> None:
        """Start running a scan.

        Raises:
            AssertionError: Never actually raised.
        """
        failed = True

        try:
            self.__setStatus("STARTING", time.time() * 1000, None)
            self.__sf.status(
                f"Scan [{self.__scanId}] for '{self.__target.targetValue}' initiated."
            )

            self.eventQueue = queue.Queue()

            self.__sharedThreadPool.start()

            # moduleList = list of modules the user wants to run
            self.__sf.debug(f"Loading {len(self.__moduleList)} modules ...")
            for modName in self.__moduleList:
                if not modName:
                    continue

                # Module may have been renamed or removed
                if modName not in self.__config['__modules__']:
                    self.__sf.error(f"Failed to load module: {modName}")
                    continue

                try:
                    module = __import__('modules.' + modName, globals(),
                                        locals(), [modName])
                except ImportError:
                    self.__sf.error(f"Failed to load module: {modName}")
                    continue

                try:
                    mod = getattr(module, modName)()
                    mod.__name__ = modName
                except Exception:
                    self.__sf.error(
                        f"Module {modName} initialization failed: {traceback.format_exc()}"
                    )
                    continue

                # Set up the module options, scan ID, database handle and listeners
                try:
                    # Configuration is a combined global config with module-specific options
                    self.__modconfig[modName] = deepcopy(
                        self.__config['__modules__'][modName]['opts'])
                    for opt in list(self.__config.keys()):
                        self.__modconfig[modName][opt] = deepcopy(
                            self.__config[opt])

                    # clear any listener relationships from the past
                    mod.clearListeners()
                    mod.setScanId(self.__scanId)
                    mod.setSharedThreadPool(self.__sharedThreadPool)
                    mod.setDbh(self.__dbh)
                    mod.setup(self.__sf, self.__modconfig[modName])
                except Exception:
                    self.__sf.error(
                        f"Module {modName} initialization failed: {traceback.format_exc()}"
                    )
                    mod.errorState = True
                    continue

                # Override the module's local socket module to be the SOCKS one.
                if self.__config['_socks1type'] != '':
                    try:
                        mod._updateSocket(socket)
                    except Exception as e:
                        self.__sf.error(
                            f"Module {modName} socket setup failed: {e}")
                        continue

                # Set up event output filters if requested
                if self.__config['__outputfilter']:
                    try:
                        mod.setOutputFilter(self.__config['__outputfilter'])
                    except Exception as e:
                        self.__sf.error(
                            f"Module {modName} output filter setup failed: {e}"
                        )
                        continue

                # Give modules a chance to 'enrich' the original target with aliases of that target.
                try:
                    newTarget = mod.enrichTarget(self.__target)
                    if newTarget is not None:
                        self.__target = newTarget
                except Exception as e:
                    self.__sf.error(
                        f"Module {modName} target enrichment failed: {e}")
                    continue

                # Register the target with the module
                try:
                    mod.setTarget(self.__target)
                except Exception as e:
                    self.__sf.error(
                        f"Module {modName} failed to set target '{self.__target}': {e}"
                    )
                    continue

                # Set up the outgoing event queue
                try:
                    mod.outgoingEventQueue = self.eventQueue
                    mod.incomingEventQueue = queue.Queue()
                except Exception as e:
                    self.__sf.error(
                        f"Module {modName} event queue setup failed: {e}")
                    continue

                self.__moduleInstances[modName] = mod
                self.__sf.status(f"{modName} module loaded.")

            self.__sf.debug(
                f"Scan [{self.__scanId}] loaded {len(self.__moduleInstances)} modules."
            )

            if not self.__moduleInstances:
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                self.__dbh.close()
                return

            # sort modules by priority
            self.__moduleInstances = OrderedDict(
                sorted(self.__moduleInstances.items(),
                       key=lambda m: m[-1]._priority))

            # Now we are ready to roll..
            self.__setStatus("RUNNING")

            # Create a pseudo module for the root event to originate from
            psMod = SpiderFootPlugin()
            psMod.__name__ = "SpiderFoot UI"
            psMod.setTarget(self.__target)
            psMod.setDbh(self.__dbh)
            psMod.clearListeners()
            psMod.outgoingEventQueue = self.eventQueue
            psMod.incomingEventQueue = queue.Queue()

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None)
            psMod.notifyListeners(rootEvent)
            firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue,
                                         "SpiderFoot UI", rootEvent)
            psMod.notifyListeners(firstEvent)

            # Special case.. check if an INTERNET_NAME is also a domain
            if self.__targetType == 'INTERNET_NAME':
                if self.__sf.isDomain(self.__targetValue,
                                      self.__config['_internettlds']):
                    firstEvent = SpiderFootEvent('DOMAIN_NAME',
                                                 self.__targetValue,
                                                 "SpiderFoot UI", rootEvent)
                    psMod.notifyListeners(firstEvent)

            # If in interactive mode, loop through this shared global variable
            # waiting for inputs, and process them until my status is set to
            # FINISHED.

            # Check in case the user requested to stop the scan between modules
            # initializing
            scanstatus = self.__dbh.scanInstanceGet(self.__scanId)
            if scanstatus and scanstatus[5] == "ABORT-REQUESTED":
                raise AssertionError("ABORT-REQUESTED")

            # start threads
            self.waitForThreads()
            failed = False

        except (KeyboardInterrupt, AssertionError):
            self.__sf.status(f"Scan [{self.__scanId}] aborted.")
            self.__setStatus("ABORTED", None, time.time() * 1000)

        except BaseException as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.__sf.error(
                f"Unhandled exception ({e.__class__.__name__}) encountered during scan."
                + "Please report this as a bug: " + +repr(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback)))
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)

        finally:
            if not failed:
                self.__setStatus("FINISHED", None, time.time() * 1000)
                self.runCorrelations()
                self.__sf.status(f"Scan [{self.__scanId}] completed.")
            self.__dbh.close()

    def runCorrelations(self) -> None:
        """Run correlation rules."""

        self.__sf.status(
            f"Running {len(self.__config['__correlationrules__'])} correlation rules."
        )
        ruleset = dict()
        for rule in self.__config['__correlationrules__']:
            ruleset[rule['id']] = rule['rawYaml']
        corr = SpiderFootCorrelator(self.__dbh, ruleset, self.__scanId)
        corr.run_correlations()

    def waitForThreads(self) -> None:
        """Wait for threads.

        Raises:
            TypeError: queue tried to process a malformed event
            AssertionError: scan halted for some reason
        """

        counter = 0

        try:
            if not self.eventQueue:
                return

            # start one thread for each module
            for mod in self.__moduleInstances.values():
                mod.start()
            final_passes = 3

            # watch for newly-generated events
            while True:

                # log status of threads every 10 iterations
                log_status = counter % 10 == 0
                counter += 1

                if log_status:
                    scanstatus = self.__dbh.scanInstanceGet(self.__scanId)
                    if scanstatus and scanstatus[5] == "ABORT-REQUESTED":
                        raise AssertionError("ABORT-REQUESTED")

                try:
                    sfEvent = self.eventQueue.get_nowait()
                    self.__sf.debug(
                        f"waitForThreads() got event, {sfEvent.eventType}, from eventQueue."
                    )
                except queue.Empty:
                    # check if we're finished
                    if self.threadsFinished(log_status):
                        sleep(.1)
                        # but are we really?
                        if self.threadsFinished(log_status):
                            if final_passes < 1:
                                break
                            # Trigger module.finished()
                            for mod in self.__moduleInstances.values():
                                if not mod.errorState and mod.incomingEventQueue is not None:
                                    mod.incomingEventQueue.put('FINISHED')
                            sleep(.1)
                            while not self.threadsFinished(log_status):
                                log_status = counter % 100 == 0
                                counter += 1
                                sleep(.01)
                            final_passes -= 1

                    else:
                        # save on CPU
                        sleep(.1)
                    continue

                if not isinstance(sfEvent, SpiderFootEvent):
                    raise TypeError(
                        f"sfEvent is {type(sfEvent)}; expected SpiderFootEvent"
                    )

                # for every module
                for mod in self.__moduleInstances.values():
                    # if it's been aborted
                    if mod._stopScanning:
                        # break out of the while loop
                        raise AssertionError(f"{mod.__name__} requested stop")

                    # send it the new event if applicable
                    if not mod.errorState and mod.incomingEventQueue is not None:
                        watchedEvents = mod.watchedEvents()
                        if sfEvent.eventType in watchedEvents or "*" in watchedEvents:
                            mod.incomingEventQueue.put(deepcopy(sfEvent))

        finally:
            # tell the modules to stop
            for mod in self.__moduleInstances.values():
                mod._stopScanning = True
            self.__sharedThreadPool.shutdown(wait=True)

    def threadsFinished(self, log_status: bool = False) -> bool:
        """Check if all threads are complete.

        Args:
            log_status (bool): print thread queue status to debug log

        Returns:
            bool: True if all threads are finished
        """
        if self.eventQueue is None:
            return True

        modules_waiting = dict()
        for m in self.__moduleInstances.values():
            try:
                if m.incomingEventQueue is not None:
                    modules_waiting[m.__name__] = m.incomingEventQueue.qsize()
            except Exception:
                with suppress(Exception):
                    m.errorState = True
        modules_waiting = sorted(modules_waiting.items(),
                                 key=lambda x: x[-1],
                                 reverse=True)

        modules_running = []
        for m in self.__moduleInstances.values():
            try:
                if m.running:
                    modules_running.append(m.__name__)
            except Exception:
                with suppress(Exception):
                    m.errorState = True

        modules_errored = []
        for m in self.__moduleInstances.values():
            try:
                if m.errorState:
                    modules_errored.append(m.__name__)
            except Exception:
                with suppress(Exception):
                    m.errorState = True

        queues_empty = [qsize == 0 for m, qsize in modules_waiting]

        for mod in self.__moduleInstances.values():
            if mod.errorState and mod.incomingEventQueue is not None:
                self.__sf.debug(
                    f"Clearing and unsetting incomingEventQueue for errored module {mod.__name__}."
                )
                with suppress(Exception):
                    while 1:
                        mod.incomingEventQueue.get_nowait()
                mod.incomingEventQueue = None

        if not modules_running and not queues_empty:
            self.__sf.debug("Clearing queues for stalled/aborted modules.")
            for mod in self.__moduleInstances.values():
                try:
                    while True:
                        mod.incomingEventQueue.get_nowait()
                except Exception:
                    pass

        if log_status:
            events_queued = ", ".join([
                f"{mod}: {qsize:,}" for mod, qsize in modules_waiting[:5]
                if qsize > 0
            ])
            if not events_queued:
                events_queued = 'None'
            self.__sf.debug(
                f"Events queued: {sum([m[-1] for m in modules_waiting]):,} ({events_queued})"
            )
            if modules_running:
                self.__sf.debug(
                    f"Modules running: {len(modules_running):,} ({', '.join(modules_running)})"
                )
            if modules_errored:
                self.__sf.debug(
                    f"Modules errored: {len(modules_errored):,} ({', '.join(modules_errored)})"
                )

        if all(queues_empty) and not modules_running:
            return True
        return False
示例#28
0
def start_scan(sfConfig: dict, sfModules: dict, args, loggingQueue) -> None:
    """Start scan

    Args:
        sfConfig (dict): SpiderFoot config options
        sfModules (dict): modules
        args (argparse.Namespace): command line args
        loggingQueue (Queue): main SpiderFoot logging queue
    """
    log = logging.getLogger(f"spiderfoot.{__name__}")

    global dbh
    global scanId

    dbh = SpiderFootDb(sfConfig, init=True)
    sf = SpiderFoot(sfConfig)

    if not args.s:
        log.error(
            "You must specify a target when running in scan mode. Try --help for guidance."
        )
        sys.exit(-1)

    if args.x and not args.t:
        log.error("-x can only be used with -t. Use --help for guidance.")
        sys.exit(-1)

    if args.x and args.m:
        log.error(
            "-x can only be used with -t and not with -m. Use --help for guidance."
        )
        sys.exit(-1)

    if args.r and (args.o and args.o not in ["tab", "csv"]):
        log.error("-r can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.H and (args.o and args.o not in ["tab", "csv"]):
        log.error("-H can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.D and args.o != "csv":
        log.error("-D can only be used when using the csv output format.")
        sys.exit(-1)

    target = args.s
    # Usernames and names - quoted on the commandline - won't have quotes,
    # so add them.
    if " " in target:
        target = f"\"{target}\""
    if "." not in target and not target.startswith("+") and '"' not in target:
        target = f"\"{target}\""
    targetType = SpiderFootHelpers.targetTypeFromString(target)

    if not targetType:
        log.error(f"Could not determine target type. Invalid target: {target}")
        sys.exit(-1)

    target = target.strip('"')

    modlist = list()
    if not args.t and not args.m and not args.u:
        log.warning(
            "You didn't specify any modules, types or use case, so all modules will be enabled."
        )
        for m in list(sfModules.keys()):
            if "__" in m:
                continue
            modlist.append(m)

    signal.signal(signal.SIGINT, handle_abort)
    # If the user is scanning by type..
    # 1. Find modules producing that type
    if args.t:
        types = args.t
        modlist = sf.modulesProducing(types)
        newmods = deepcopy(modlist)
        newmodcpy = deepcopy(newmods)

        # 2. For each type those modules consume, get modules producing
        while len(newmodcpy) > 0:
            for etype in sf.eventsToModules(newmodcpy):
                xmods = sf.modulesProducing([etype])
                for mod in xmods:
                    if mod not in modlist:
                        modlist.append(mod)
                        newmods.append(mod)
            newmodcpy = deepcopy(newmods)
            newmods = list()

    # Easier if scanning by module
    if args.m:
        modlist = list(filter(None, args.m.split(",")))

    # Select modules if the user selected usercase
    if args.u:
        usecase = args.u[0].upper() + args.u[
            1:]  # Make the first Letter Uppercase
        for mod in sfConfig['__modules__']:
            if usecase == 'All' or usecase in sfConfig['__modules__'][mod][
                    'group']:
                modlist.append(mod)

    # Add sfp__stor_stdout to the module list
    typedata = dbh.eventTypes()
    types = dict()
    for r in typedata:
        types[r[1]] = r[0]

    sfp__stor_stdout_opts = sfConfig['__modules__']['sfp__stor_stdout']['opts']
    sfp__stor_stdout_opts['_eventtypes'] = types
    if args.f:
        if args.f and not args.t:
            log.error("You can only use -f with -t. Use --help for guidance.")
            sys.exit(-1)
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.F:
        sfp__stor_stdout_opts['_requested'] = args.F.split(",")
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.o:
        if args.o not in ["tab", "csv", "json"]:
            log.error(
                "Invalid output format selected. Must be 'tab', 'csv' or 'json'."
            )
            sys.exit(-1)
        sfp__stor_stdout_opts['_format'] = args.o
    if args.t:
        sfp__stor_stdout_opts['_requested'] = args.t.split(",")
    if args.n:
        sfp__stor_stdout_opts['_stripnewline'] = True
    if args.r:
        sfp__stor_stdout_opts['_showsource'] = True
    if args.S:
        sfp__stor_stdout_opts['_maxlength'] = args.S
    if args.D:
        sfp__stor_stdout_opts['_csvdelim'] = args.D
    if args.x:
        tmodlist = list()
        modlist = list()
        xmods = sf.modulesConsuming([targetType])
        for mod in xmods:
            if mod not in modlist:
                tmodlist.append(mod)

        # Remove any modules not producing the type requested
        rtypes = args.t.split(",")
        for mod in tmodlist:
            for r in rtypes:
                if not sfModules[mod]['provides']:
                    continue
                if r in sfModules[mod].get('provides',
                                           []) and mod not in modlist:
                    modlist.append(mod)

    if len(modlist) == 0:
        log.error("Based on your criteria, no modules were enabled.")
        sys.exit(-1)

    modlist += ["sfp__stor_db", "sfp__stor_stdout"]

    if sfConfig['__logging']:
        log.info(f"Modules enabled ({len(modlist)}): {','.join(modlist)}")

    cfg = sf.configUnserialize(dbh.configGet(), sfConfig)

    # Debug mode is a variable that gets stored to the DB, so re-apply it
    if args.debug:
        cfg['_debug'] = True
    else:
        cfg['_debug'] = False

    # If strict mode is enabled, filter the output from modules.
    if args.x and args.t:
        cfg['__outputfilter'] = args.t.split(",")

    # Prepare scan output headers
    if args.o == "json":
        print("[", end='')
    elif not args.H:
        delim = "\t"

        if args.o == "tab":
            delim = "\t"

        if args.o == "csv":
            if args.D:
                delim = args.D
            else:
                delim = ","

        if args.r:
            if delim == "\t":
                headers = '{0:30}{1}{2:45}{3}{4}{5}{6}'.format(
                    "Source", delim, "Type", delim, "Source Data", delim,
                    "Data")
            else:
                headers = delim.join(["Source", "Type", "Source Data", "Data"])
        else:
            if delim == "\t":
                headers = '{0:30}{1}{2:45}{3}{4}'.format(
                    "Source", delim, "Type", delim, "Data")
            else:
                headers = delim.join(["Source", "Type", "Data"])

        print(headers)

    # Start running a new scan
    scanName = target
    scanId = SpiderFootHelpers.genScanInstanceId()
    try:
        p = mp.Process(target=startSpiderFootScanner,
                       args=(loggingQueue, scanName, scanId, target,
                             targetType, modlist, cfg))
        p.daemon = True
        p.start()
    except BaseException as e:
        log.error(f"Scan [{scanId}] failed: {e}")
        sys.exit(-1)

    # Poll for scan status until completion
    while True:
        time.sleep(1)
        info = dbh.scanInstanceGet(scanId)
        if not info:
            continue
        if info[5] in [
                "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED"
        ]:
            if sfConfig['__logging']:
                log.info(f"Scan completed with status {info[5]}")
            if args.o == "json":
                print("]")
            sys.exit(0)

    return
示例#29
0
                if delim != "\t":
                    print(delim.join(["Source", "Type", "Data"]))
                else:
                    print('{0:30}{1}{2:45}{3}{4}'.format(
                        "Source", delim, "Type", delim, "Data"))
            else:
                if delim != "\t":
                    print(delim.join(["Source", "Type", "Source Data",
                                      "Data"]))
                else:
                    print('{0:30}{1}{2:45}{3}{4}{5}{6}'.format(
                        "Source", delim, "Type", delim, "Source Data", delim,
                        "Data"))

        while True:
            info = dbh.scanInstanceGet(scanId)
            if not info:
                time.sleep(1)
                continue
            if info[5] in [
                    "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED"
            ]:
                if sfConfig['__logging']:
                    log.info(f"Scan completed with status {info[5]}")
                if args.o == "json":
                    print("]")
                sys.exit(0)
            time.sleep(1)
        sys.exit(0)

    # Start the web server so you can start looking at results
示例#30
0
    def startscan(self, scanname, scantarget, modulelist, typelist, usecase):
        """Initiate a scan

        Args:
            scanname (str): scan name
            scantarget (str): scan target
            modulelist (str): TBD
            typelist (str): TBD
            usecase (str): TBD
        """

        # Swap the globalscantable for the database handler
        dbh = SpiderFootDb(self.config)

        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        modlist = list()
        sf = SpiderFoot(cfg)
        targetType = None
        [scanname, scantarget] = self.cleanUserInput([scanname, scantarget])

        if scanname == "" or scantarget == "":
            if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Incorrect usage: scan name or target was not specified."]).encode('utf-8')

            return self.error("Invalid request: scan name or target was not specified.")

        if typelist == "" and modulelist == "" and usecase == "":
            if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Incorrect usage: no modules specified for scan."]).encode('utf-8')

            return self.error("Invalid request: no modules specified for scan.")

        # User selected modules
        if modulelist != "":
            modlist = modulelist.replace('module_', '').split(',')

        # User selected types
        if len(modlist) == 0 and typelist != "":
            typesx = typelist.replace('type_', '').split(',')

            # 1. Find all modules that produce the requested types
            modlist = sf.modulesProducing(typesx)
            newmods = deepcopy(modlist)
            newmodcpy = deepcopy(newmods)

            # 2. For each type those modules consume, get modules producing
            while len(newmodcpy) > 0:
                for etype in sf.eventsToModules(newmodcpy):
                    xmods = sf.modulesProducing([etype])
                    for mod in xmods:
                        if mod not in modlist:
                            modlist.append(mod)
                            newmods.append(mod)
                newmodcpy = deepcopy(newmods)
                newmods = list()

        # User selected a use case
        if len(modlist) == 0 and usecase != "":
            for mod in self.config['__modules__']:
                if usecase == 'all' or usecase in self.config['__modules__'][mod]['cats']:
                    modlist.append(mod)

        # Add our mandatory storage module..
        if "sfp__stor_db" not in modlist:
            modlist.append("sfp__stor_db")
        modlist.sort()

        targetType = sf.targetType(scantarget)
        if targetType is None:
            if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Unrecognised target type."]).encode('utf-8')

            return self.error("Invalid target type. Could not recognize it as a human name, IP address, IP subnet, ASN, domain name or host name.")

        # Delete the stdout module in case it crept in
        if "sfp__stor_stdout" in modlist:
            modlist.remove("sfp__stor_stdout")

        # Start running a new scan
        if targetType in ["HUMAN_NAME", "USERNAME"]:
            scantarget = scantarget.replace("\"", "")
        else:
            scantarget = scantarget.lower()

        # Start running a new scan
        scanId = sf.genScanInstanceId()
        try:
            p = mp.Process(target=SpiderFootScanner, args=(scanname, scanId, scantarget, targetType, modlist, cfg))
            p.daemon = True
            p.start()
        except BaseException as e:
            self.log.error(f"[-] Scan [{scanId}] failed: {e}")
            return self.error(f"[-] Scan [{scanId}] failed: {e}")

        # Wait until the scan has initialized
        # Check the database for the scan status results
        while dbh.scanInstanceGet(scanId) is None:
            self.log.info("Waiting for the scan to initialize...")
            time.sleep(1)

        if cherrypy.request.headers and 'application/json' in cherrypy.request.headers.get('Accept'):
            cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
            return json.dumps(["SUCCESS", scanId]).encode('utf-8')

        raise cherrypy.HTTPRedirect(f"/scaninfo?id={scanId}")