def test_genScanInstanceId_should_return_a_string(self):
     """
     Test genScanInstanceId()
     """
     scan_instance_id = SpiderFootHelpers.genScanInstanceId()
     self.assertIsInstance(scan_instance_id, str)
示例#2
0
    def __init__(self,
                 scanName,
                 scanId,
                 targetValue,
                 targetType,
                 moduleList,
                 globalOpts,
                 start=True):
        """Initialize SpiderFootScanner object.

        Args:
            scanName (str): name of the scan
            scanId (str): unique ID of the scan
            targetValue (str): scan target
            targetType (str): scan target type
            moduleList (list): list of modules to run
            globalOpts (dict): scan options
            start (bool): start the scan immediately

        Raises:
            TypeError: arg type was invalid
            ValueError: arg value was invalid

        Todo:
             Eventually change this to be able to control multiple scan instances
        """
        if not isinstance(globalOpts, dict):
            raise TypeError(
                f"globalOpts is {type(globalOpts)}; expected dict()")
        if not globalOpts:
            raise ValueError("globalOpts is empty")

        self.__config = deepcopy(globalOpts)
        self.__dbh = SpiderFootDb(self.__config)

        if not isinstance(scanName, str):
            raise TypeError(f"scanName is {type(scanName)}; expected str()")
        if not scanName:
            raise ValueError("scanName value is blank")

        self.__scanName = scanName

        if not isinstance(scanId, str):
            raise TypeError(f"scanId is {type(scanId)}; expected str()")
        if not scanId:
            raise ValueError("scanId value is blank")

        if not isinstance(targetValue, str):
            raise TypeError(
                f"targetValue is {type(targetValue)}; expected str()")
        if not targetValue:
            raise ValueError("targetValue value is blank")

        self.__targetValue = targetValue

        if not isinstance(targetType, str):
            raise TypeError(
                f"targetType is {type(targetType)}; expected str()")
        if not targetType:
            raise ValueError("targetType value is blank")

        self.__targetType = targetType

        if not isinstance(moduleList, list):
            raise TypeError(
                f"moduleList is {type(moduleList)}; expected list()")
        if not moduleList:
            raise ValueError("moduleList is empty")

        self.__moduleList = moduleList

        self.__sf = SpiderFoot(self.__config)
        self.__sf.dbh = self.__dbh

        # Create a unique ID for this scan in the back-end DB.
        if scanId:
            self.__scanId = scanId
        else:
            self.__scanId = SpiderFootHelpers.genScanInstanceId()

        self.__sf.scanId = self.__scanId
        self.__dbh.scanInstanceCreate(self.__scanId, self.__scanName,
                                      self.__targetValue)

        # Create our target
        try:
            self.__target = SpiderFootTarget(self.__targetValue,
                                             self.__targetType)
        except (TypeError, ValueError) as e:
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
            raise ValueError(f"Invalid target: {e}")

        # Save the config current set for this scan
        self.__config['_modulesenabled'] = self.__moduleList
        self.__dbh.scanConfigSet(
            self.__scanId, self.__sf.configSerialize(deepcopy(self.__config)))

        # Process global options that point to other places for data

        # If a proxy server was specified, set it up
        proxy_type = self.__config.get('_socks1type')
        if proxy_type:
            # TODO: allow DNS lookup to be configurable when using a proxy
            # - proxy DNS lookup: socks5h:// and socks4a://
            # - local DNS lookup: socks5:// and socks4://
            if proxy_type == '4':
                proxy_proto = 'socks4://'
            elif proxy_type == '5':
                proxy_proto = 'socks5://'
            elif proxy_type == 'HTTP':
                proxy_proto = 'http://'
            elif proxy_type == 'TOR':
                proxy_proto = 'socks5h://'
            else:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Invalid proxy type: {proxy_type}"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError(f"Invalid proxy type: {proxy_type}")

            proxy_host = self.__config.get('_socks2addr', '')

            if not proxy_host:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Proxy type is set ({proxy_type}) but proxy address value is blank"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError(
                    f"Proxy type is set ({proxy_type}) but proxy address value is blank"
                )

            proxy_port = int(self.__config.get('_socks3port') or 0)

            if not proxy_port:
                if proxy_type in ['4', '5']:
                    proxy_port = 1080
                elif proxy_type.upper() == 'HTTP':
                    proxy_port = 8080
                elif proxy_type.upper() == 'TOR':
                    proxy_port = 9050

            proxy_username = self.__config.get('_socks4user', '')
            proxy_password = self.__config.get('_socks5pwd', '')

            if proxy_username or proxy_password:
                proxy_auth = f"{proxy_username}:{proxy_password}"
                proxy = f"{proxy_proto}{proxy_auth}@{proxy_host}:{proxy_port}"
            else:
                proxy = f"{proxy_proto}{proxy_host}:{proxy_port}"

            self.__sf.debug(f"Using proxy: {proxy}")
            self.__sf.socksProxy = proxy
        else:
            self.__sf.socksProxy = None

        # Override the default DNS server
        if self.__config['_dnsserver']:
            res = dns.resolver.Resolver()
            res.nameservers = [self.__config['_dnsserver']]
            dns.resolver.override_system_resolver(res)
        else:
            dns.resolver.restore_system_resolver()

        # Set the user agent
        self.__config['_useragent'] = self.__sf.optValueToData(
            self.__config['_useragent'])

        # Get internet TLDs
        tlddata = self.__sf.cacheGet("internet_tlds",
                                     self.__config['_internettlds_cache'])

        # If it wasn't loadable from cache, load it from scratch
        if tlddata is None:
            self.__config['_internettlds'] = self.__sf.optValueToData(
                self.__config['_internettlds'])
            self.__sf.cachePut("internet_tlds", self.__config['_internettlds'])
        else:
            self.__config["_internettlds"] = tlddata.splitlines()

        self.__setStatus("INITIALIZING", time.time() * 1000, None)

        self.__sharedThreadPool = SpiderFootThreadPool(
            threads=self.__config.get("_maxthreads", 3),
            name='sharedThreadPool')

        # Used when module threading is enabled
        self.eventQueue = None

        if start:
            self.__startScan()
示例#3
0
def start_scan(sfConfig, sfModules, args):
    """Start scan

    Args:
        sfConfig (dict): SpiderFoot config options
        sfModules (dict): modules
        args (argparse.Namespace): command line args
    """
    global dbh
    global scanId

    dbh = SpiderFootDb(sfConfig, init=True)
    sf = SpiderFoot(sfConfig)

    if not args.s:
        log.error(
            "You must specify a target when running in scan mode. Try --help for guidance."
        )
        sys.exit(-1)

    if args.x and not args.t:
        log.error("-x can only be used with -t. Use --help for guidance.")
        sys.exit(-1)

    if args.x and args.m:
        log.error(
            "-x can only be used with -t and not with -m. Use --help for guidance."
        )
        sys.exit(-1)

    if args.r and (args.o and args.o not in ["tab", "csv"]):
        log.error("-r can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.H and (args.o and args.o not in ["tab", "csv"]):
        log.error("-H can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.D and args.o != "csv":
        log.error("-D can only be used when using the csv output format.")
        sys.exit(-1)

    target = args.s
    # Usernames and names - quoted on the commandline - won't have quotes,
    # so add them.
    if " " in target:
        target = f"\"{target}\""
    if "." not in target and not target.startswith("+") and '"' not in target:
        target = f"\"{target}\""
    targetType = SpiderFootHelpers.targetTypeFromString(target)

    if not targetType:
        log.error(f"Could not determine target type. Invalid target: {target}")
        sys.exit(-1)

    target = target.strip('"')

    modlist = list()
    if not args.t and not args.m:
        log.warning(
            "You didn't specify any modules or types, so all will be enabled.")
        for m in list(sfModules.keys()):
            if "__" in m:
                continue
            modlist.append(m)

    signal.signal(signal.SIGINT, handle_abort)
    # If the user is scanning by type..
    # 1. Find modules producing that type
    if args.t:
        types = args.t
        modlist = sf.modulesProducing(types)
        newmods = deepcopy(modlist)
        newmodcpy = deepcopy(newmods)

        # 2. For each type those modules consume, get modules producing
        while len(newmodcpy) > 0:
            for etype in sf.eventsToModules(newmodcpy):
                xmods = sf.modulesProducing([etype])
                for mod in xmods:
                    if mod not in modlist:
                        modlist.append(mod)
                        newmods.append(mod)
            newmodcpy = deepcopy(newmods)
            newmods = list()

    # Easier if scanning by module
    if args.m:
        modlist = list(filter(None, args.m.split(",")))

    # Add sfp__stor_stdout to the module list
    typedata = dbh.eventTypes()
    types = dict()
    for r in typedata:
        types[r[1]] = r[0]

    sfp__stor_stdout_opts = sfConfig['__modules__']['sfp__stor_stdout']['opts']
    sfp__stor_stdout_opts['_eventtypes'] = types
    if args.f:
        if args.f and not args.t:
            log.error("You can only use -f with -t. Use --help for guidance.")
            sys.exit(-1)
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.F:
        sfp__stor_stdout_opts['_requested'] = args.F.split(",")
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.o:
        sfp__stor_stdout_opts['_format'] = args.o
    if args.t:
        sfp__stor_stdout_opts['_requested'] = args.t.split(",")
    if args.n:
        sfp__stor_stdout_opts['_stripnewline'] = True
    if args.r:
        sfp__stor_stdout_opts['_showsource'] = True
    if args.S:
        sfp__stor_stdout_opts['_maxlength'] = args.S
    if args.D:
        sfp__stor_stdout_opts['_csvdelim'] = args.D
    if args.x:
        tmodlist = list()
        modlist = list()
        xmods = sf.modulesConsuming([targetType])
        for mod in xmods:
            if mod not in modlist:
                tmodlist.append(mod)

        # Remove any modules not producing the type requested
        rtypes = args.t.split(",")
        for mod in tmodlist:
            for r in rtypes:
                if not sfModules[mod]['provides']:
                    continue
                if r in sfModules[mod].get('provides',
                                           []) and mod not in modlist:
                    modlist.append(mod)

    if len(modlist) == 0:
        log.error("Based on your criteria, no modules were enabled.")
        sys.exit(-1)

    modlist += ["sfp__stor_db", "sfp__stor_stdout"]

    if sfConfig['__logging']:
        log.info(f"Modules enabled ({len(modlist)}): {','.join(modlist)}")

    cfg = sf.configUnserialize(dbh.configGet(), sfConfig)

    # Debug mode is a variable that gets stored to the DB, so re-apply it
    if args.debug:
        cfg['_debug'] = True
    else:
        cfg['_debug'] = False

    # If strict mode is enabled, filter the output from modules.
    if args.x and args.t:
        cfg['__outputfilter'] = args.t.split(",")

    # Prepare scan output headers
    if args.o == "json":
        print("[", end='')
    elif not args.H:
        delim = "\t"

        if args.o == "tab":
            delim = "\t"

        if args.o == "csv":
            if args.D:
                delim = args.D
            else:
                delim = ","

        if args.r:
            if delim == "\t":
                headers = '{0:30}{1}{2:45}{3}{4}{5}{6}'.format(
                    "Source", delim, "Type", delim, "Source Data", delim,
                    "Data")
            else:
                headers = delim.join(["Source", "Type", "Source Data", "Data"])
        else:
            if delim == "\t":
                headers = '{0:30}{1}{2:45}{3}{4}'.format(
                    "Source", delim, "Type", delim, "Data")
            else:
                headers = delim.join(["Source", "Type", "Data"])

        print(headers)

    # Start running a new scan
    scanName = target
    scanId = SpiderFootHelpers.genScanInstanceId()
    try:
        p = mp.Process(target=SpiderFootScanner,
                       args=(scanName, scanId, target, targetType, modlist,
                             cfg))
        p.daemon = True
        p.start()
    except BaseException as e:
        log.error(f"Scan [{scanId}] failed: {e}")
        sys.exit(-1)

    # Poll for scan status until completion
    while True:
        time.sleep(1)
        info = dbh.scanInstanceGet(scanId)
        if not info:
            continue
        if info[5] in [
                "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED"
        ]:
            if sfConfig['__logging']:
                log.info(f"Scan completed with status {info[5]}")
            if args.o == "json":
                print("]")
            sys.exit(0)

    return
示例#4
0
    def startscan(self, scanname, scantarget, modulelist, typelist, usecase):
        """Initiate a scan

        Args:
            scanname (str): scan name
            scantarget (str): scan target
            modulelist (str): comma separated list of modules to use
            typelist (str): selected modules based on produced event data types
            usecase (str): selected module group (passive, investigate, footprint, all)

        Returns:
            str: start scan status as JSON

        Raises:
            HTTPRedirect: redirect to new scan info page
        """
        [scanname, scantarget] = self.cleanUserInput([scanname, scantarget])

        if not scanname:
            if cherrypy.request.headers.get('Accept') and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Incorrect usage: scan name was not specified."]).encode('utf-8')

            return self.error("Invalid request: scan name was not specified.")

        if not scantarget:
            if cherrypy.request.headers.get('Accept') and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Incorrect usage: scan target was not specified."]).encode('utf-8')

            return self.error("Invalid request: scan target was not specified.")

        if not typelist and not modulelist and not usecase:
            if cherrypy.request.headers.get('Accept') and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Incorrect usage: no modules specified for scan."]).encode('utf-8')

            return self.error("Invalid request: no modules specified for scan.")

        targetType = SpiderFootHelpers.targetTypeFromString(scantarget)
        if targetType is None:
            if cherrypy.request.headers.get('Accept') and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Unrecognised target type."]).encode('utf-8')

            return self.error("Invalid target type. Could not recognize it as a target SpiderFoot supports.")

        # Swap the globalscantable for the database handler
        dbh = SpiderFootDb(self.config)

        # Snapshot the current configuration to be used by the scan
        cfg = deepcopy(self.config)
        sf = SpiderFoot(cfg)

        modlist = list()

        # User selected modules
        if modulelist:
            modlist = modulelist.replace('module_', '').split(',')

        # User selected types
        if len(modlist) == 0 and typelist:
            typesx = typelist.replace('type_', '').split(',')

            # 1. Find all modules that produce the requested types
            modlist = sf.modulesProducing(typesx)
            newmods = deepcopy(modlist)
            newmodcpy = deepcopy(newmods)

            # 2. For each type those modules consume, get modules producing
            while len(newmodcpy) > 0:
                for etype in sf.eventsToModules(newmodcpy):
                    xmods = sf.modulesProducing([etype])
                    for mod in xmods:
                        if mod not in modlist:
                            modlist.append(mod)
                            newmods.append(mod)
                newmodcpy = deepcopy(newmods)
                newmods = list()

        # User selected a use case
        if len(modlist) == 0 and usecase:
            for mod in self.config['__modules__']:
                if usecase == 'all' or usecase in self.config['__modules__'][mod]['group']:
                    modlist.append(mod)

        # If we somehow got all the way through to here and still don't have any modules selected
        if not modlist:
            if cherrypy.request.headers.get('Accept') and 'application/json' in cherrypy.request.headers.get('Accept'):
                cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
                return json.dumps(["ERROR", "Incorrect usage: no modules specified for scan."]).encode('utf-8')

            return self.error("Invalid request: no modules specified for scan.")

        # Add our mandatory storage module
        if "sfp__stor_db" not in modlist:
            modlist.append("sfp__stor_db")
        modlist.sort()

        # Delete the stdout module in case it crept in
        if "sfp__stor_stdout" in modlist:
            modlist.remove("sfp__stor_stdout")

        # Start running a new scan
        if targetType in ["HUMAN_NAME", "USERNAME", "BITCOIN_ADDRESS"]:
            scantarget = scantarget.replace("\"", "")
        else:
            scantarget = scantarget.lower()

        # Start running a new scan
        scanId = SpiderFootHelpers.genScanInstanceId()
        try:
            p = mp.Process(target=SpiderFootScanner, args=(scanname, scanId, scantarget, targetType, modlist, cfg))
            p.daemon = True
            p.start()
        except Exception as e:
            self.log.error(f"[-] Scan [{scanId}] failed: {e}")
            return self.error(f"[-] Scan [{scanId}] failed: {e}")

        # Wait until the scan has initialized
        # Check the database for the scan status results
        while dbh.scanInstanceGet(scanId) is None:
            self.log.info("Waiting for the scan to initialize...")
            time.sleep(1)

        if cherrypy.request.headers.get('Accept') and 'application/json' in cherrypy.request.headers.get('Accept'):
            cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"
            return json.dumps(["SUCCESS", scanId]).encode('utf-8')

        raise cherrypy.HTTPRedirect(f"{self.docroot}/scaninfo?id={scanId}")