    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # the API class we use to communicate with QRadar
        # this can also be a unit testing class
        self.api_class = QRadarAPIClient
        if 'api_class' in kwargs:
            self.api_class = kwargs['api_class']

        # load the AQL query for this instance
        with open(abs_path(self.config['aql_path']), 'r') as fp:
            self.aql_query = fp.read()

        # each query can specify it's own range
        if 'relative_duration_before' in self.config:
            self.relative_duration_before = create_timedelta(
            self.relative_duration_before = create_timedelta(

        if 'relative_duration_after' in self.config:
            self.relative_duration_after = create_timedelta(
            self.relative_duration_after = create_timedelta(

        # load the observable mapping for this query
        # NOTE that the keys (event field names) are case sensitive
        self.observable_mapping = {
        }  # key = event field name, value = observable_type
        for key in self.config.keys():
            if key.startswith('map_'):
                event_field, observable_type = [
                    _.strip() for _ in self.config[key].split('=', 2)
                if observable_type not in VALID_OBSERVABLE_TYPES:
                        f"invalid observable type specified for observable mapping "
                        f"{key} in {self}: {observable_type}")

                self.observable_mapping[event_field] = observable_type

        # the configuration can specify what field should be used as the event time
        # by default this is disabled, in which case the observables are non-termporal
        self.time_event_field = self.config.get('time_event_field', None)

        # the format of the time can also be specified in strptime syntax
        # the special value TIMESTAMP indicates a unix timestamp (this is the default)
        # the special value TIMESTAMP_MILLISECONDS indicates a unix timestamp in milliseconds
        self.time_event_field_format = self.config.get(
            'time_event_field_format', 'TIMESTAMP')

        # are we delaying QRadar correlational queries?
        self.correlation_delay = None
        if 'correlation_delay' in saq.CONFIG['qradar']:
            self.correlation_delay = create_timedelta(
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # for relative time searches, how far back and forward do we go?
        self.earliest_timedelta = create_timedelta(
        if 'relative_duration_before' in self.config:
            self.earliest_timedelta = create_timedelta(

        self.latest_timedelta = create_timedelta(
        if 'relative_duration_after' in self.config:
            self.latest_timedelta = create_timedelta(

        # format the elk search uri with the username and password if it's specified
        if saq.CONFIG['elk']['username'] and saq.CONFIG['elk']['password']:
            # using urlencoding in case username or password has funky characters
            self.elk_uri = 'https://{}:{}@{}'.format(
            self.elk_uri = 'https://{}'.format(saq.CONFIG['elk']['uri'])

        # make sure it ends with /
        if not self.elk_uri.endswith('/'):
            self.elk_uri += '/'

        # the maximum number of results we would want
        self.max_result_count = saq.CONFIG['elk'].getint('max_result_count')
        if 'max_result_count' in self.config:
            self.max_result_count = self.config.getint('max_result_count')

        # if we've specified a cluster in the global config then we prefix our index with that cluster
        self.cluster = ''  # by default we don't specify the cluster at all
        if saq.CONFIG['elk']['cluster']:
            self.cluster = saq.CONFIG['elk']['cluster']

        # we can also specify the cluster for this specific module
        if 'cluster' in self.config:
            self.cluster = self.config['cluster']
    def load_from_ini(self, path):
        """Loads the settings for the hunt from an ini formatted file. This function must return the 
           ConfigParser object used to load the settings."""
        config = configparser.ConfigParser()
        config.optionxform = str  # preserve case when reading option names

        section_rule = config['rule']

        self.enabled = section_rule.getboolean('enabled')
        self.name = section_rule['name']
        self.description = section_rule['description']
        self.type = section_rule['type']
        self.frequency = create_timedelta(section_rule['frequency'])
        self.tags = [_.strip() for _ in section_rule['tags'].split(',') if _]

        return config
    def load_from_ini(self, path, *args, **kwargs):
        config = super().load_from_ini(path, *args, **kwargs)

        rule_section = config['rule']
        # if we don't specify a time range then it defaults to whatever the frequency is
        self.time_range = rule_section.get('time_range', fallback=None)
        if self.time_range is None:
            self.time_range = self.frequency
            self.time_range = create_timedelta(self.time_range)

        self.max_time_range = rule_section.get('max_time_range', fallback=None)
        if self.max_time_range is not None:
            self.max_time_range = create_timedelta(self.max_time_range)

        self.full_coverage = rule_section.getboolean('full_coverage')
        self.group_by = rule_section.get('group_by', fallback=None)
        self.search_query_path = rule_section['search']
        self.use_index_time = rule_section.getboolean('use_index_time')

        self.max_result_count =  rule_section.getint('max_result_count', 

        self.query_timeout = rule_section.get('query_timeout',

        if 'offset' in rule_section:
            self.offset = create_timedelta(rule_section['offset'])

        observable_mapping_section = config['observable_mapping']
        self.observable_mapping = {}
        for key, value in observable_mapping_section.items():
            if value not in VALID_OBSERVABLE_TYPES:
                raise ValueError(f"invalid observable type {value}")

            self.observable_mapping[key] = value

        temporal_fields_section = config['temporal_fields']
        self.temporal_fields = {}
        for key in temporal_fields_section.keys():
            self.temporal_fields[key] = temporal_fields_section.getboolean(key)

        directives_section = config['directives']
        self.directives = {}
        self.directive_options = {}

        for key, value in directives_section.items():
            self.directives[key] = []
            directives = [_.strip() for _ in value.split(',')]
            for directive in directives:
                # does this directive have any options? these are : delimited
                if ':' in directive:
                    options = directive.split(':')
                    directive = options.pop(0)
                    self.directive_options[directive] = {}
                    for option in options:
                        # option_name=option_value
                        option_name, option_value = option.split('=', 1)
                        self.directive_options[key][option_name] = option_value
                if directive not in VALID_DIRECTIVES:
                    raise ValueError(f"invalid directive {directive}")


        self.query = self.load_query_from_file(self.search_query_path)
        return config
    def execute_analysis(self, observable):
        analysis = self.create_analysis(observable)

        # where are we putting the pcap?
        pcap_dir = os.path.join(self.root.storage_dir, 'pcap', observable.id)
        pcap_zip_path = os.path.join(pcap_dir, f'{observable.id}.zip')

        bpf_filter = None

        # NOTE the bpf filter doesn't seem to have any effect

        # figure out what our filter should be based on the type of observable passed in
        if observable.type == F_IPV4:
            src = observable.value
            src_port = dst = dst_port = None
            bpf_filter = f'(host {src})'
            query = [f'ipv4_address="{src}"']
        elif observable.type == F_IPV4_CONVERSATION:
            src, dst = parse_ipv4_conversation(observable.value)
            src_port = dst_port = None
            bpf_filter = f'(host {src} and host {dst})'
            query = [f'ipv4_initiator="{src}"', f'ipv4_responder="{dst}"']
        elif observable.type == F_IPV4_FULL_CONVERSATION:
            src, src_port, dst, dst_port = parse_ipv4_full_conversation(
            bpf_filter = f'((host {src} and port {src_port}) and (host {dst} and port {dst_port}))'
            query = [
                f'ipv4_initiator="{src}"', f'port_initiator="{src_port}"',
                f'ipv4_responder="{dst}"', f'port_responder="{dst_port}"'

        # ace stores everything in UTC -- solera either always uses some globally defined timezone
        # or it uses a timezone specified for the user (not sure)
        # in either case, translate the time to the timezone specified in the config
        extraction_time = observable.time if observable.time is not None else self.root.event_time
        start_time = extraction_time - create_timedelta(
        end_time = extraction_time + create_timedelta(

        start_time = start_time.astimezone(
        end_time = end_time.astimezone(pytz.timezone(self.config['timezone']))

        start_time = start_time.strftime('%Y-%m-%dT%H:%M:%S')
        end_time = end_time.strftime('%Y-%m-%dT%H:%M:%S')

            f"collecting pcap from {observable} into {pcap_dir} "
            f"start time {start_time} end time {end_time} query {query} bpf_filter {bpf_filter}"

            from SoleraConnector import SoleraConnector
            c = SoleraConnector(self.config['username'],
                                self.config['api_key'], self.config['ipv4'])

            # NOTE the appliances={} in the query part of the URL is not documented but seems to be required
            result = c.callAPI('GET', '/cmc_settings/appliances')
            appliance_ids = ','.join(
                [str(_['Appliance']['id']) for _ in result['result']])

            result = c.callAPI(
                    'timespan': {
                        'start': start_time,
                        'end': end_time
                    'query': query,
                    'name': '{}.pcap'.format(str(uuid.uuid4())),
                    #'download': {
                    #'type': 3 },
                    #'filter': bpf_filter,

            # the result comes back as a zip file of pcaps (one per sensor)
            with zipfile.ZipFile(pcap_zip_path) as fp:

                # remove the zip file once we've extracted
            except Exception as e:
                logging.error(f"unable to delete {pcap_zip_path}: {e}")

            # check that there is a pcap_dir
            if len(pcap_dir) > 0:
                # build command with correct pcap-ng files
                pcap_path = os.path.join(pcap_dir, 'merged.pcap')
                command = ['mergecap', '-w', pcap_path]
                    os.path.join(pcap_dir, i) for i in os.listdir(pcap_dir))

                # merge all pcaps in pcap_dir to merged_pcap.pcapng
                p = Popen(command, stdout=PIPE, stderr=PIPE)
                _stdout, _stderr = p.communicate()

                if os.path.getsize(pcap_path) in [92, 0]:
                    # for pcap-ng (the default), a size of 72 bytes means the pcap is empty of content
                    # also, a file of 0 means the pcap data was missing entirely
                    # merging 2 or more empty (either 0 or 72 bytes) pcap-ng files gives a pcap of size 92 bytes
                    # so we remove those
                    logging.debug(f"removing empty pcap file {pcap_path}")
                    except Exception as e:
                            f"unable to remove empty pcap file {pcap_path}: {e}"
                    # add it as an observable to the analysis
                    pcap_file = analysis.add_observable(

            return True

        except Exception as e:
            logging.error(f"unable to extract pcap from {observable}: {e}")
            analysis.error = str(e)
            return True
    def __init__(self, config_section, *args, **kwargs):
        super().__init__(*args, **kwargs)
        assert isinstance(config_section, str)

        # the section in the configuration that applies to this analysis module
        self.config_section = config_section
        self.config = None

        # a refernce to the RootAnalysis object we're analyzing
        self.root = None

        # a reference to the engine this module is running out of
        self.engine = None

        # the actual semaphore to use
        self.semaphore = None

        # we'll keep track of the Analysis and Observable objects we've generated
        # this is useful for cleanup routines
        self.generated_analysis = []
        self.generated_observables = []

        # observables that are excluded from being analyzed by this module
        self.observable_exclusions = {} # key = o_type, value = [] of o_value

        # observables that are excluded from being generated by this module
        self.expected_observables = {} # key = o_type, value = set(o_value)

        # something might try to cancel an analysis execution
        self.cancel_analysis_flag = False

        # sometimes a module can depend on another service that is failing
        # when that happens we can trigger "cooldown periods" where we skip executing this module until some time
        # has elapsed

        # the time at which the cooldown expires (None if no cooldown is in effect)
        self.cooldown_timeout = None

        # a list (set) of files that are currently being watched
        self.watched_files = {} # key = path, value = WatchedFile

        # set to true if this is a threaded module
        self.is_threaded = False
        if 'threaded' in self.config:
            self.is_threaded = self.config.getboolean('threaded')

        # how often self.execute_threaded is called from the self.execute_threaded_loop
        self.threaded_execution_frequency = 1
        if 'threaded_execution_frequency' in self.config:
            self.threaded_execution_frequency = self.config.getint('threaded_execution_frequency')

        # the actual thread performing the work
        self.threaded_execution_thread = None

        # event to signal the thread can stop
        self.threaded_execution_stop_event = None

        # the priority of the analysis module
        # lower priority scores go first
        # higher priority scores go last
        self.priority = self.config.getint('priority', fallback=10)

        # the next time we check to see if any files we are watching have changed
        # this is automatically checked and updated every time this module is used to analyze something
        self.next_check_watched_files = None

        # is this a module that groups analysis of duplicate values by time?
        self.observation_grouping_time_range = None
        if 'observation_grouping_time_range' in self.config:
            self.observation_grouping_time_range = create_timedelta(self.config['observation_grouping_time_range'])