Exemplo n.º 1
0
def add_httplog(fqdn, db, c):
    for fqdn_part in iterate_fqdn_parts(fqdn):
        execute_with_retry(db, c, """
INSERT INTO httplog ( host, numconnections, firstconnectdate ) 
VALUES ( LOWER(%s), 1, UNIX_TIMESTAMP(NOW()) )
ON DUPLICATE KEY UPDATE numconnections = numconnections + 1""", ( fqdn_part, ))

    db.commit()
Exemplo n.º 2
0
    def execute_workload_cleanup(self, db, c):
        # look up all the work that is currently completed
        # a completed work item has no entries in the work_distribution table with a status of 'READY'
        c.execute(
            """
SELECT 
    i.id, 
    i.work
FROM 
    incoming_workload i JOIN work_distribution w ON i.id = w.work_id
    JOIN incoming_workload_type t ON i.type_id = t.id
WHERE
    t.id = %s
GROUP BY 
    i.id, i.work
HAVING
    SUM(IF(w.status = 'READY', 1, 0)) = 0""", (self.workload_type_id, ))

        submission_count = 0
        for work_id, submission_blob in c:
            submission_count += 1
            logging.debug(f"completed work item {work_id}")

            submission = None

            try:
                submission = pickle.loads(submission_blob)
            except Exception as e:
                logging.error(
                    f"unable to un-pickle submission blob for id {work_id}: {e}"
                )

            # clear any files that back the submission
            if submission and submission.files:
                try:
                    target_dir = os.path.join(self.incoming_dir,
                                              submission.uuid)
                    shutil.rmtree(target_dir)
                    logging.debug(f"deleted incoming dir {target_dir}")
                except Exception as e:
                    logging.error(
                        f"unable to delete directory {target_dir}: {e}")

            # we finally clear the database entry for this workload item
            execute_with_retry(db,
                               c,
                               "DELETE FROM incoming_workload WHERE id = %s",
                               (work_id, ),
                               commit=True)

        return submission_count
Exemplo n.º 3
0
 def _t2():
     with get_db_connection() as db:
         c = db.cursor()
         lock_user0.wait(5)
         # acquire lock on user1
         execute_with_retry(
             db, c,
             "UPDATE users SET email = 'user1@_t2' WHERE username = '******'"
         )
         lock_user1.set()
         # this will block waiting for lock on user0
         execute_with_retry(
             db, c,
             "UPDATE users SET email = 'user0@_t2' WHERE username = '******'"
         )
         db.commit()
Exemplo n.º 4
0
    def schedule_submission(self, submission, db, c):

        # we don't really need to change the file paths that are stored in the Submission object
        # we just remember where we've moved them to (later)

        try:
            # add this as a workload item to the database queue
            work_id = execute_with_retry(db,
                                         c,
                                         self.insert_workload, (submission, ),
                                         commit=True)
            assert isinstance(work_id, int)

            logging.info(
                f"scheduled {submission.description} mode {submission.analysis_mode}"
            )

        except Exception as e:
            # something went wrong -- delete our incoming directory if we created one
            target_dir = self.get_submission_target_dir(submission)
            if os.path.exists(target_dir):
                try:
                    shutil.rmtree(target_dir)
                except Exception as e:
                    logging.error("unable to delete directory {}: {}".format(
                        target_dir, e))

            raise e

        self.submission_count += 1
Exemplo n.º 5
0
    def test_execute_with_retry(self, db, c):
        # simple single statement transaction
        execute_with_retry(db, c, ['SELECT 1'], [tuple()])
        db.commit()

        # multi statement transaction
        _uuid = str(uuid.uuid4())
        _lock_uuid = str(uuid.uuid4())
        execute_with_retry(db, c, [
            'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )',
            'UPDATE locks SET lock_uuid = %s WHERE uuid = %s',
            'DELETE FROM locks WHERE uuid = %s',
        ], [
            (_uuid, ),
            (_lock_uuid, _uuid),
            (_uuid, ),
        ])
        db.commit()
Exemplo n.º 6
0
 def _t2():
     _uuid = str(uuid.uuid4())
     _lock_uuid = str(uuid.uuid4())
     try:
         with get_db_connection() as db:
             c = db.cursor()
             execute_with_retry(
                 db, c, "UPDATE locks SET lock_owner = 'whatever'")
             # wait for signal to continue
             time.sleep(2)
             execute_with_retry(
                 db, c,
                 "INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )",
                 (_uuid, ))
             db.commit()
     except pymysql.err.OperationalError as e:
         if e.args[0] == 1213 or e.args[0] == 1205:
             deadlock_event.set()
Exemplo n.º 7
0
def update_content_metadata(sha256_content, node, file_name, db, c):
    return execute_with_retry(
        db,
        c,
        """
INSERT INTO cloudphish_content_metadata ( sha256_content, node, name ) VALUES ( UNHEX(%s), %s, %s )
ON DUPLICATE KEY UPDATE node = %s, name = %s""",
        (sha256_content, node, file_name, node, file_name),
        commit=True)
Exemplo n.º 8
0
    def save_persistent_data(self, key_name, key_value=None):
        """Creates a new persistent key with the given value recorded. The key must not already exist."""
        if key_value is not None:
            key_value = pickle.dumps(key_value)

        with get_db_connection() as db:
            c = db.cursor()
            execute_with_retry(
                db,
                c,
                """
INSERT INTO persistence ( 
    source_id, 
    uuid,
    value
) VALUES ( %s, %s, %s )
ON DUPLICATE KEY UPDATE last_update = CURRENT_TIMESTAMP""",
                (self.persistence_source.id, key_name, key_value),
                commit=True)
Exemplo n.º 9
0
    def test_execute_with_retry_commit(self):
        _uuid = str(uuid.uuid4())
        _lock_uuid = str(uuid.uuid4())
        disable_cached_db_connections()

        # simple insert statement with commit option
        with get_db_connection() as db:
            c = db.cursor()
            execute_with_retry(
                db,
                c,
                'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )',
                (_uuid, ),
                commit=True)

        # check it on another connection
        with get_db_connection() as db:
            c = db.cursor()
            c.execute("SELECT uuid FROM locks WHERE uuid = %s", (_uuid, ))
            self.assertIsNotNone(c.fetchone())

        _uuid = str(uuid.uuid4())
        _lock_uuid = str(uuid.uuid4())

        # and then this one should fail since we did not commit it
        with get_db_connection() as db:
            c = db.cursor()
            execute_with_retry(
                db,
                c,
                'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )',
                (_uuid, ),
                commit=False)

        with get_db_connection() as db:
            c = db.cursor()
            c.execute("SELECT uuid FROM locks WHERE uuid = %s", (_uuid, ))
            self.assertIsNone(c.fetchone())

        enable_cached_db_connections()
Exemplo n.º 10
0
def clear_alert():
    url, sha256_url = _get_url_and_hash()

    row_count = 0
    with get_db_connection() as db:
        c = db.cursor()
        row_count = execute_with_retry(db, c, """UPDATE cloudphish_analysis_results SET result = 'CLEAR' 
                                                 WHERE sha256_url = UNHEX(%s)""", 
                                      (sha256_url,), commit=True)

    logging.info("request to clear cloudphish alert for {} row_count {}".format(url if url else sha256_url, row_count))

    response = make_response(json.dumps({'result': 'OK', 'row_count': row_count}))
    response.mime_type = 'application/json'
    response.headers['Access-Control-Allow-Origin'] = '*'
    return response, 200
Exemplo n.º 11
0
def _create_analysis(url, reprocess, alertable, **kwargs):
    assert isinstance(url, str)
    assert isinstance(reprocess, bool)
    assert isinstance(alertable, bool)
    assert isinstance(kwargs, dict)

    sha256_url = hash_url(url)
    new_entry = False

    try:
        with get_db_connection('cloudphish') as db:
            c = db.cursor()
            execute_with_retry(
                c,
                """INSERT INTO analysis_results ( sha256_url ) VALUES ( UNHEX(%s) )""",
                (sha256_url, ))
            db.commit()
            new_entry = True
    except pymysql.err.IntegrityError as e:
        # timing issue -- created as we were getting ready to create
        # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry
        if e.args[0] != 1062:
            raise e

        logging.debug("entry for {} already created".format(url))

    with get_db_connection('cloudphish') as db:
        c = db.cursor()
        # if we didn't just create this then we update the status of the existing entry
        # we don't need to do this if we just created it because
        if reprocess or not new_entry:
            execute_with_retry(
                c,
                """UPDATE analysis_results SET status = %s WHERE sha256_url = UNHEX(%s)""",
                (STATUS_NEW, sha256_url))

        try:
            execute_with_retry(
                c,
                """INSERT INTO workload ( sha256_url, url, alertable, details ) VALUES ( UNHEX(%s), %s, %s, %s )""",
                (sha256_url, url, alertable, pickle.dumps(kwargs)))
        except pymysql.err.IntegrityError as e:
            # timing issue -- created as we were getting ready to create
            # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry
            if e.args[0] != 1062:
                raise e

            logging.debug("analysis request for {} already exists".format(url))

        db.commit()

    return get_cached_analysis(url)
Exemplo n.º 12
0
def update_cloudphish_result(sha256_url,
                             http_result_code=None,
                             http_message=None,
                             sha256_content=None,
                             result=None,
                             status=None,
                             db=None,
                             c=None):

    sql = []
    params = []

    if http_result_code is not None:
        sql.append('http_result_code = %s')
        params.append(http_result_code)

    if http_message is not None:
        sql.append('http_message = %s')
        params.append(http_message[:256])

    if sha256_content is not None:
        sql.append('sha256_content = UNHEX(%s)')
        params.append(sha256_content)

    if result is not None:
        sql.append('result = %s')
        params.append(result)

    if status is not None:
        sql.append('status = %s')
        params.append(status)

    if not sql:
        logging.warning(
            "update_cloudphish_result called for {} but nothing was passed in to update?"
            .format(sha256_url))
        return

    params.append(sha256_url)

    sql = "UPDATE cloudphish_analysis_results SET {} WHERE sha256_url = UNHEX(%s)".format(
        ', '.join(sql))
    logging.debug("executing cloudphish update {}".format(sql, params))
    return execute_with_retry(db, c, sql, tuple(params), commit=True)
Exemplo n.º 13
0
    def execute(self, db, c):
        # first we get a list of all the distinct analysis modes available in the work queue
        c.execute(
            """
SELECT DISTINCT(incoming_workload.mode)
FROM
    incoming_workload JOIN work_distribution ON incoming_workload.id = work_distribution.work_id
WHERE
    incoming_workload.type_id = %s
    AND work_distribution.group_id = %s
    AND work_distribution.status = 'READY'
""", (
                self.workload_type_id,
                self.group_id,
            ))
        available_modes = c.fetchall()
        db.commit()

        # if we get nothing from this query then no work is available for this group
        if not available_modes:
            if saq.UNIT_TESTING:
                logging.debug("no work available for {}".format(self))
            return NO_WORK_AVAILABLE

        # flatten this out to a list of analysis modes
        available_modes = [_[0] for _ in available_modes]

        # given this list of modes that need remote targets, see what is currently available
        with get_db_connection(self.database) as node_db:
            node_c = node_db.cursor()
            sql = """
SELECT
    nodes.id, 
    nodes.name, 
    nodes.location, 
    nodes.any_mode,
    nodes.last_update,
    node_modes.analysis_mode,
    COUNT(workload.id) AS 'WORKLOAD_COUNT'
FROM
    nodes LEFT JOIN node_modes ON nodes.id = node_modes.node_id
    LEFT JOIN workload ON nodes.id = workload.node_id
WHERE
    nodes.company_id = %s
    AND nodes.is_local = 0
    AND TIMESTAMPDIFF(SECOND, nodes.last_update, NOW()) <= %s
    AND ( nodes.any_mode OR node_modes.analysis_mode in ( {} ) )
GROUP BY
    nodes.id,
    nodes.name,
    nodes.location,
    nodes.any_mode,
    nodes.last_update,
    node_modes.analysis_mode
ORDER BY
    WORKLOAD_COUNT ASC,
    nodes.last_update ASC
""".format(','.join(['%s' for _ in available_modes]))
            params = [self.company_id, self.node_status_update_frequency * 2]
            params.extend(available_modes)
            node_c.execute(sql, tuple(params))
            node_status = node_c.fetchall()

        if not node_status:
            logging.warning(
                "no remote nodes are avaiable for all analysis modes {} for {}"
                .format(','.join(available_modes), self))

            if not self.full_delivery:
                # if this node group is NOT in full_delivery mode and there are no nodes available at all
                # then we just clear out the work queue for this group
                # if this isn't done then the work will pile up waiting for a node to come online
                execute_with_retry(
                    db,
                    c,
                    "UPDATE work_distribution SET status = 'ERROR' WHERE group_id = %s",
                    (self.group_id, ),
                    commit=True)

            return NO_NODES_AVAILABLE

        # now figure out what analysis modes are actually available for processing
        analysis_mode_mapping = {
        }  # key = analysis_mode, value = [ RemoteNode ]
        any_mode_nodes = []  # list of nodes with any_mode set to True

        for node_id, name, location, any_mode, last_update, analysis_mode, workload_count in node_status:
            remote_node = RemoteNode(node_id, name, location, any_mode,
                                     last_update, analysis_mode,
                                     workload_count)
            if any_mode:
                any_mode_nodes.append(remote_node)

            if analysis_mode:
                if analysis_mode not in analysis_mode_mapping:
                    analysis_mode_mapping[analysis_mode] = []

                analysis_mode_mapping[analysis_mode].append(remote_node)

        # now we trim our list of analysis modes down to what is available
        # if we don't have a node that supports any mode
        if not any_mode_nodes:
            available_modes = [
                m for m in available_modes
                if m in analysis_mode_mapping.keys()
            ]
            logging.debug(
                "available_modes = {} after checking available nodes".format(
                    available_modes))

        if not available_modes:
            logging.debug(
                "no nodes are available that support the available analysis modes"
            )
            return NO_NODES_AVAILABLE

        # now we get the next things to submit from the database that have an analysis mode that is currently
        # available to be submitted to

        sql = """
SELECT 
    incoming_workload.id,
    incoming_workload.mode,
    incoming_workload.work
FROM
    incoming_workload JOIN work_distribution ON incoming_workload.id = work_distribution.work_id
WHERE
    incoming_workload.type_id = %s
    AND work_distribution.group_id = %s
    AND incoming_workload.mode IN ( {} )
    AND work_distribution.status = 'READY'
ORDER BY
    incoming_workload.id ASC
LIMIT %s""".format(','.join(['%s' for _ in available_modes]))
        params = [self.workload_type_id, self.group_id]
        params.extend(available_modes)
        params.append(self.batch_size)

        c.execute(sql, tuple(params))
        work_batch = c.fetchall()
        db.commit()

        logging.info("submitting {} items".format(len(work_batch)))

        # simple flag that gets set if ANY submission is successful
        submission_success = False

        # we should have a small list of things to submit to remote nodes for this group
        for work_id, analysis_mode, submission_blob in work_batch:
            # first make sure we can un-pickle this
            try:
                submission = pickle.loads(submission_blob)
            except Exception as e:
                execute_with_retry(
                    db,
                    c,
                    """UPDATE work_distribution SET status = 'ERROR' 
                                             WHERE group_id = %s AND work_id = %s""",
                    (self.group_id, self.work_id),
                    commit=True)
                logging.error(
                    "unable to un-pickle submission blob for id {}: {}".format(
                        work_id, e))
                continue

            # simple flag to remember if we failed to send
            submission_failed = False

            # the result of the submission (we pass to Submission.success later)
            submission_result = None

            self.coverage_counter += self.coverage
            if self.coverage_counter < 100:
                # we'll be skipping this one
                logging.debug(
                    "skipping work id {} for group {} due to coverage constraints"
                    .format(work_id, self.name))
            else:
                # otherwise we try to submit it
                self.coverage_counter -= 100

                # sort the list of RemoteNode objects by the workload_count
                available_targets = any_mode_nodes[:]
                if analysis_mode in analysis_mode_mapping:
                    available_targets.extend(
                        analysis_mode_mapping[analysis_mode])

                target = sorted(available_targets,
                                key=lambda n: n.workload_count)
                target = target[0]

                # attempt the send
                try:
                    submission_result = target.submit(submission)
                    logging.info("{} got submission result {} for {}".format(
                        self, submission_result, submission))
                    submission_success = True
                except Exception as e:
                    log_function = logging.warning
                    if not self.full_delivery:
                        log_function = logging.warning
                    else:
                        if not isinstance(e, urllib3.exceptions.MaxRetryError) \
                        and not isinstance(e, urllib3.exceptions.NewConnectionError) \
                        and not isinstance(e, requests.exceptions.ConnectionError):
                            # if it's not a connection issue then report it
                            #report_exception()
                            pass

                    log_function(
                        "unable to submit work item {} to {} via group {}: {}".
                        format(submission, target, self, e))

                    # if we are in full delivery mode then we need to try this one again later
                    if self.full_delivery and (isinstance(e, urllib3.exceptions.MaxRetryError) \
                                          or isinstance(e, urllib3.exceptions.NewConnectionError) \
                                          or isinstance(e, requests.exceptions.ConnectionError)):
                        continue

                    # otherwise we consider it a failure
                    submission_failed = True
                    execute_with_retry(
                        db,
                        c,
                        """UPDATE work_distribution SET status = 'ERROR' 
                                                 WHERE group_id = %s AND work_id = %s""",
                        (self.group_id, work_id),
                        commit=True)

            # if we skipped it or we sent it, then we're done with it
            if not submission_failed:
                execute_with_retry(
                    db,
                    c,
                    """UPDATE work_distribution SET status = 'COMPLETED' 
                                             WHERE group_id = %s AND work_id = %s""",
                    (self.group_id, work_id),
                    commit=True)

            if submission_failed:
                try:
                    submission.fail(self)
                except Exception as e:
                    logging.error(f"call to {submission}.fail() failed: {e}")
                    report_exception()
            else:
                try:
                    submission.success(self, submission_result)
                except Exception as e:
                    logging.error(
                        f"call to {submission}.success() failed: {e}")
                    report_exception()

        if submission_success:
            return WORK_SUBMITTED

        return NO_WORK_SUBMITTED
Exemplo n.º 14
0
def _create_analysis(url, reprocess, details, db, c):
    assert isinstance(url, str)
    assert isinstance(reprocess, bool)
    assert isinstance(details, dict)

    sha256_url = hash_url(url)

    if reprocess:
        # if we're reprocessing the url then we clear any existing analysis
        # IF the current analysis has completed
        # it's OK if we delete nothing here
        execute_with_retry("""DELETE FROM cloudphish_analysis_results 
                              WHERE sha256_url = UNHEX(%s) AND status = 'ANALYZED'""", 
                          (sha256_url,), commit=True)

    # if we're at this point it means that when we asked the database for an entry from cloudphish_analysis_results
    # it was empty, OR, we cleared existing analysis
    # however, we could have multiple requests coming in at the same time for the same url
    # so we need to take that into account here

    # first we'll generate our analysis uuid we're going to use
    _uuid = str(uuid.uuid4())

    # so first we try to insert it
    try:
        execute_with_retry(db, c, ["""INSERT INTO cloudphish_analysis_results ( sha256_url, uuid, insert_date ) 
                                      VALUES ( UNHEX(%s), %s, NOW() )""",
                                   """INSERT INTO cloudphish_url_lookup ( sha256_url, url )
                                      VALUES ( UNHEX(%s), %s )"""],
                           [(sha256_url, _uuid),
                            (sha256_url, url)], commit=True)
    except pymysql.err.IntegrityError as e:
        # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry
        # if we get a duplicate key entry here then it means that an entry was created between when we asked
        # and now
        if e.args[0] != 1062:
            raise e

        # so just return that one that was already created
        return get_cached_analysis(url)

    # at this point we've inserted an entry into cloudphish_analysis_results for this url
    # now at it's processing to the workload

    root = RootAnalysis()
    root.uuid = _uuid
    root.storage_dir = storage_dir_from_uuid(root.uuid)
    root.initialize_storage()
    root.analysis_mode = ANALYSIS_MODE_CLOUDPHISH
    # this is kind of a kludge but,
    # the company_id initially starts out as whatever the default is for this node
    # later, should the analysis turn into an alert, the company_id changes to whatever
    # is stored as the "d" field in the KEY_DETAILS_CONTEXT
    root.company_id = saq.COMPANY_ID
    root.tool = 'ACE - Cloudphish'
    root.tool_instance = saq.SAQ_NODE
    root.alert_type = ANALYSIS_TYPE_CLOUDPHISH
    root.description = 'ACE Cloudphish Detection - {}'.format(url)
    root.event_time = datetime.datetime.now()
    root.details = {
        KEY_DETAILS_URL: url,
        KEY_DETAILS_SHA256_URL: sha256_url,
        # this used to be configurable but it's always true now
        KEY_DETAILS_ALERTABLE: True,
        KEY_DETAILS_CONTEXT: details, # <-- optionally contains the source company_id
    }

    url_observable = root.add_observable(F_URL, url)
    if url_observable:
        url_observable.add_directive(DIRECTIVE_CRAWL)

    root.save()
    root.schedule()

    return get_cached_analysis(url)
Exemplo n.º 15
0
    def execute_post_analysis(self):
        import saq.database

        # if we are already an Alert AND we have a disposition...
        if isinstance(
                self.root,
                saq.database.Alert) and self.root.id and self.root.disposition:

            # keep track of the observables we've already updated in hal
            _updated_observables = set()  # of md5 hash hexdigest

            # did we already set a disposition for this alert before?
            previous_disposition = None
            if self.state and 'previous_disposition' in self.state:
                previous_disposition = self.state['previous_disposition']
                logging.debug(
                    "loaded previous disposition of {} for {}".format(
                        previous_disposition, self))

            new_disposition = self.root.disposition

            # if the disposition didn't change then we don't care
            if previous_disposition == new_disposition:
                logging.debug(
                    "same disposition {} == {} - not updating".format(
                        new_disposition, self.root.disposition))
                return

            with get_db_connection('hal9000') as db:
                c = db.cursor()

                update_count = 0

                # update counts for all observables
                for observable in self.root.all_observables:
                    md5_hasher = md5()
                    md5_hasher.update(
                        observable.type.encode('utf-8', errors='ignore'))
                    md5_hasher.update(
                        observable.value.encode('utf-8', errors='ignore'))
                    id = md5_hasher.hexdigest()

                    # keep track of the ones we've already updated
                    # we only update any single observable value ONCE for each alert
                    if id in _updated_observables:
                        continue

                    _updated_observables.add(id)

                    # we have three major groups of dispositions: IGNORE, MAL and BENIGN
                    # if we've changed state from what we were previously then we want to "undo" what we did previously

                    if previous_disposition is None or previous_disposition in IGNORE_ALERT_DISPOSITIONS:
                        if new_disposition in MAL_ALERT_DISPOSITIONS:
                            execute_with_retry(
                                c, """
                                INSERT INTO observables (id, mal_count)
                                VALUES (UNHEX(%s), 1)
                                ON DUPLICATE KEY
                                UPDATE total_count = total_count + 1, mal_count = mal_count + 1
                                """, (id, ))
                        elif new_disposition in BENIGN_ALERT_DISPOSITIONS:
                            execute_with_retry(
                                c, """
                                INSERT INTO observables (id)
                                VALUES (UNHEX(%s))
                                ON DUPLICATE KEY
                                UPDATE total_count = total_count + 1
                                """, (id, ))
                    elif previous_disposition in BENIGN_ALERT_DISPOSITIONS:
                        if new_disposition in MAL_ALERT_DISPOSITIONS:
                            execute_with_retry(
                                c, """
                                UPDATE observables
                                SET mal_count = mal_count + 1
                                WHERE id = UNHEX(%s)
                                """, (id, ))
                        elif new_disposition in IGNORE_ALERT_DISPOSITIONS:
                            execute_with_retry(
                                c, """
                                UPDATE observables
                                SET total_count = total_count - 1
                                WHERE id = UNHEX(%s) AND total_count > 0
                                """, (id, ))
                    elif previous_disposition in MAL_ALERT_DISPOSITIONS:
                        if new_disposition in BENIGN_ALERT_DISPOSITIONS:
                            execute_with_retry(
                                c, """
                                UPDATE observables
                                SET mal_count = mal_count - 1 
                                WHERE id = UNHEX(%s) AND mal_count > 0
                                """, (id, ))
                        elif new_disposition in IGNORE_ALERT_DISPOSITIONS:
                            execute_with_retry(
                                c, """
                                UPDATE observables
                                SET total_count = total_count - 1, mal_count = mal_count - 1
                                WHERE id = UNHEX(%s) AND total_count > 0 AND mal_count > 0
                                """, (id, ))

                    update_count += 1

                db.commit()

            # remember what our disposition was
            self.state = {}
            self.state['previous_disposition'] = self.root.disposition

            logging.debug(
                "updated {} observables in hal9000".format(update_count))
            return

        # if we're not in the database AND we're not going to be an Alert...
        elif not self.root.has_detections:
            # sanity check
            if not hasattr(self, 'hal9000_observables'):
                logging.error("missing hal9000_observables property")
                return

            with get_db_connection('hal9000') as db:
                c = db.cursor()

                # record appearance of all hal9000 observables
                for id in self.root.hal9000_observables:
                    execute_with_retry(
                        c, """
                                       INSERT INTO observables (id)
                                       VALUES (UNHEX(%s))
                                       ON DUPLICATE KEY
                                       UPDATE total_count = total_count + 1""",
                        (id, ))

                db.commit()

            return

        # otherwise we don't care
        logging.debug(
            "{} is not an alert or does not have a disposition".format(self))
        return
Exemplo n.º 16
0
    def execute_post_analysis(self):
        import saq.database
        self.initialize_state({
            STATE_KEY_ID_TRACKING:
            {},  # key = return value of _compute_hal9000_md5, value = { } (see below) 
            STATE_KEY_PREVIOUS_DISPOSITION: None
        })

        # start tracking what we do with all the observables
        for observable in self.root.all_observables:
            hal9000_id = _compute_hal9000_md5(observable)
            if hal9000_id not in self.state[STATE_KEY_ID_TRACKING]:
                # we keep track of how we modified the total count and the malicious count for each observable
                # (we record what we ADDED to the value so that we can undo it later if the disposition changes)
                self.state[STATE_KEY_ID_TRACKING][hal9000_id] = {
                    'id': observable.id,
                    KEY_TOTAL_COUNT: None,
                    KEY_MAL_COUNT: None
                }

        if self.root.analysis_mode != ANALYSIS_MODE_CORRELATION:
            # TODO check to see if this analysis mode has cleanup set to True
            # really what we want to do is see if we can possibly end up in a different analysis mode
            with get_db_connection('hal9000') as db:
                c = db.cursor()

                placeholder_clause = ','.join([
                    '(UNHEX(%s))'
                    for _ in self.state[STATE_KEY_ID_TRACKING].keys()
                ])
                parameters = tuple(self.state[STATE_KEY_ID_TRACKING].keys())

                # record appearance of all hal9000 observables
                execute_with_retry(db,
                                   c,
                                   f"""
                                   INSERT INTO observables (id)
                                   VALUES {placeholder_clause}
                                   ON DUPLICATE KEY
                                   UPDATE total_count = total_count + 1""",
                                   parameters,
                                   commit=True)

            return True  # all we do here
            # we don't really need to record any more state here because
            # we expect this entire analysis to get deleted

        # are we an alert with a disposition?
        new_disposition = None

        with get_db_connection() as db:
            c = db.cursor()
            c.execute("SELECT disposition FROM alerts WHERE uuid = %s",
                      (self.root.uuid, ))
            result = c.fetchone()
            db.commit()

            if result:
                new_disposition = result[0]

        if new_disposition is None:
            return False  # no alert or no disposition -- check again later

        # did we already set a disposition for this alert before?
        previous_disposition = self.state[STATE_KEY_PREVIOUS_DISPOSITION]
        logging.debug("loaded previous disposition of {} for {}".format(
            previous_disposition, self))

        # if the disposition didn't change then we don't care
        if previous_disposition == new_disposition:
            logging.debug("same disposition {} == {} - not updating".format(
                previous_disposition, new_disposition))
            return False  # check again later

        all_sql = []  # list of SQL commands to execute
        all_parameters = [
        ]  # list of SQL parameter tuples for the SQL commands

        # if we've changed state from what we were previously then we want to undo what we did previously
        total_count_parameters = []
        mal_count_parameters = []
        for hal9000_id, value in self.state[STATE_KEY_ID_TRACKING].items():
            if self.state[STATE_KEY_ID_TRACKING][hal9000_id][
                    KEY_TOTAL_COUNT] is not None:
                total_count_parameters.append(hal9000_id)
            if self.state[STATE_KEY_ID_TRACKING][hal9000_id][
                    KEY_MAL_COUNT] is not None:
                mal_count_parameters.append(hal9000_id)

        if total_count_parameters:
            placeholder_clause = ','.join(
                ['UNHEX(%s)' for _ in total_count_parameters])
            all_sql.append(f"""
                UPDATE observables SET total_count = IF(total_count > 0, total_count - 1, 0)
                WHERE id IN ( {placeholder_clause} )""")
            all_parameters.append(tuple(total_count_parameters))

        if mal_count_parameters:
            placeholder_clause = ','.join(
                ['UNHEX(%s)' for _ in mal_count_parameters])
            all_sql.append(f"""
                UPDATE observables SET mal_count = IF(mal_count > 0, mal_count - 1, 0)
                WHERE id IN ( {placeholder_clause} )""")
            all_parameters.append(tuple(mal_count_parameters))

        # we have three major groups of dispositions: IGNORE, MAL and BENIGN
        placeholder_clause = ','.join([
            '(UNHEX(%s), 1)' for _ in self.state[STATE_KEY_ID_TRACKING].keys()
        ])
        parameters = tuple(self.state[STATE_KEY_ID_TRACKING].keys())

        if new_disposition in MAL_ALERT_DISPOSITIONS:
            placeholder_clause = ','.join([
                '(UNHEX(%s), 1)'
                for _ in self.state[STATE_KEY_ID_TRACKING].keys()
            ])
            all_sql.append(f"""
                INSERT INTO observables (id, mal_count)
                VALUES {placeholder_clause}
                ON DUPLICATE KEY
                UPDATE total_count = total_count + 1, mal_count = mal_count + 1 """
                           )
            all_parameters.append(parameters)

        elif new_disposition in BENIGN_ALERT_DISPOSITIONS:
            placeholder_clause = ','.join([
                '(UNHEX(%s))'
                for _ in self.state[STATE_KEY_ID_TRACKING].keys()
            ])
            all_sql.append(f"""
                INSERT INTO observables (id)
                VALUES {placeholder_clause}
                ON DUPLICATE KEY
                UPDATE total_count = total_count + 1 """)
            all_parameters.append(parameters)

        with get_db_connection('hal9000') as db:
            c = db.cursor()
            execute_with_retry(db, c, all_sql, all_parameters, commit=True)

        # remember what we did so we can undo it later if we need to
        for hal9000_id in self.state[STATE_KEY_ID_TRACKING].keys():
            if new_disposition in MAL_ALERT_DISPOSITIONS:
                self.state[STATE_KEY_ID_TRACKING][hal9000_id][
                    KEY_TOTAL_COUNT] = 1
                self.state[STATE_KEY_ID_TRACKING][hal9000_id][
                    KEY_MAL_COUNT] = 1
            elif new_disposition in BENIGN_ALERT_DISPOSITIONS:
                self.state[STATE_KEY_ID_TRACKING][hal9000_id][
                    KEY_TOTAL_COUNT] = 1
                self.state[STATE_KEY_ID_TRACKING][hal9000_id][
                    KEY_MAL_COUNT] = None
            else:
                self.state[STATE_KEY_ID_TRACKING][hal9000_id][
                    KEY_TOTAL_COUNT] = None
                self.state[STATE_KEY_ID_TRACKING][hal9000_id][
                    KEY_MAL_COUNT] = None

        # remember what our disposition was
        self.state[STATE_KEY_PREVIOUS_DISPOSITION] = new_disposition
        return False  # check again later
Exemplo n.º 17
0
    def execute(self, db, c):

        if self.test_mode == TEST_MODE_STARTUP:
            next_submission = None
        elif self.test_mode == TEST_MODE_SINGLE_SUBMISSION and self.submission_count > 0:
            next_submission = None
        else:
            next_submission = self.get_next_submission()

        # did we not get anything to submit?
        if next_submission is None:
            if self.service_is_debug:
                return

            # wait until we check again (defaults to 1 second, passed in on constructor)
            self.service_shutdown_event.wait(self.collection_frequency)
            return

        if not isinstance(next_submission, Submission):
            logging.critical(
                "get_next_submission() must return an object derived from Submission"
            )

        # we COPY the files over to another directory for transfer
        # we'll DELETE them later if we are able to copy them all and then insert the entry into the database
        target_dir = None
        if next_submission.files:
            target_dir = os.path.join(self.incoming_dir, next_submission.uuid)
            if os.path.exists(target_dir):
                logging.error(
                    "target directory {} already exists".format(target_dir))
            else:
                try:
                    os.mkdir(target_dir)
                    for f in next_submission.files:
                        # this could be a tuple of (source_file, target_name)
                        if isinstance(f, tuple):
                            f = f[0]

                        target_path = os.path.join(target_dir,
                                                   os.path.basename(f))
                        # TODO use hard links instead of copies to reduce I/O
                        shutil.copy2(f, target_path)
                        logging.debug("copied file from {} to {}".format(
                            f, target_path))
                except Exception as e:
                    logging.error("I/O error moving files into {}: {}".format(
                        target_dir, e))
                    report_exception()

        # we don't really need to change the file paths that are stored in the Submission object
        # we just remember where we've moved them to (later)

        try:
            # add this as a workload item to the database queue
            work_id = execute_with_retry(db,
                                         c,
                                         self.insert_workload,
                                         (next_submission, ),
                                         commit=True)
            assert isinstance(work_id, int)

            logging.info("scheduled {} mode {}".format(
                next_submission.description, next_submission.analysis_mode))

        except Exception as e:
            # something went wrong -- delete our incoming directory if we created one
            if target_dir:
                try:
                    shutil.rmtree(target_dir)
                except Exception as e:
                    logging.error("unable to delete directory {}: {}".format(
                        target_dir, e))

            raise e

        # all is well -- delete the files we've copied into our incoming directory
        if self.delete_files:
            for f in next_submission.files:
                # this could be a tuple of (source_file, target_name)
                if isinstance(f, tuple):
                    f = f[0]

                try:
                    os.remove(f)
                except Exception as e:
                    logging.error("unable to delete file {}: {}".format(f, e))

        self.submission_count += 1
Exemplo n.º 18
0
Arquivo: ace.py Projeto: code4days/ACE
    def collect(self):
        # allow persistence to load
        while not self.shutdown and not self.collection_shutdown and self.incomplete_analysis:
            try:
                logging.debug("adding persisted workload item {}".format(
                    self.incomplete_analysis[0]))
                self.work_queue.put(self.incomplete_analysis[0],
                                    block=not saq.SINGLE_THREADED,
                                    timeout=1)
                self.incomplete_analysis.pop(0)
            except Full:
                if not saq.SINGLE_THREADED:
                    continue

        if self.shutdown or self.collection_shutdown:
            return

        # grab the workload from the database
        with get_db_connection() as db:
            c = db.cursor()

            # how many items on the workload stack have already been acquired by this node?
            c.execute("SELECT COUNT(*) FROM workload WHERE node = %s",
                      (saq.SAQ_NODE, ))
            row = c.fetchone()
            assigned_count = row[0]

            if assigned_count:
                logging.debug(
                    "{} work items are currently assigned to {}".format(
                        assigned_count, saq.SAQ_NODE))

            # if there is nothing currently assigned then go ahead and assign some
            # (there is some sql trickery in here to do subselect magic in MySQL)

            if assigned_count < self.analysis_pool_size:

                sql = """
                UPDATE 
                    workload 
                SET 
                    node = %s 
                WHERE id IN ( 
                    SELECT id FROM (
                        SELECT 
                            w.id 
                        FROM 
                            workload w JOIN alerts a ON a.id = w.alert_id
                        WHERE 
                            w.node IS NULL 
                            AND a.location = %s
                        ORDER BY
                            w.id DESC
                        LIMIT %s ) as t)"""

                # the number of assigned work should equal the our analysis_pool_size
                execute_with_retry(c,
                                   sql,
                                   (saq.SAQ_NODE, saq.SAQ_NODE,
                                    self.analysis_pool_size - assigned_count),
                                   attempts=10)
                db.commit()

                if c.rowcount != -1 and c.rowcount is not None:
                    if c.rowcount:
                        logging.debug("assigned {} work items to {}".format(
                            c.rowcount, saq.SAQ_NODE))

            # what we've done so far is marked specific alerts as acquired by this node
            # no we'll actually go *get* them, add them to the workload, and remove from the database
            # we go ahead and remove the item from the database *before* we're able to execute the analysis

            sql = "SELECT w.id, a.id, a.uuid, a.storage_dir FROM workload w JOIN alerts a ON w.alert_id = a.id WHERE w.node = %s"
            c.execute(sql, (saq.SAQ_NODE, ))

            # we'll keep a list of these so we can remove them later
            assigned_workload_ids = []  # of workload_id

            for workload_id, alert_id, uuid, storage_dir in c:
                logging.debug(
                    "got workload {} alert {} uuid {} storage_dir {}".format(
                        workload_id, alert_id, uuid, storage_dir))

                # make sure this alert is still around
                if not os.path.exists(storage_dir):
                    logging.warning("invalid or missing storage_dir {}".format(
                        storage_dir))
                    continue

                # add this alert to the workload
                self.add_work_item(AnalysisRequest(uuid, storage_dir,
                                                   alert_id))
                assigned_workload_ids.append(workload_id)

            for workload_id in assigned_workload_ids:
                logging.debug("deleting workload_id {}".format(workload_id))
                c.execute("DELETE FROM workload WHERE id = %s",
                          (workload_id, ))

            db.commit()