Exemplo n.º 1
0
def _create_analysis(url, reprocess, details, db, c):
    assert isinstance(url, str)
    assert isinstance(reprocess, bool)
    assert isinstance(details, dict)

    sha256_url = hash_url(url)

    if reprocess:
        # if we're reprocessing the url then we clear any existing analysis
        # IF the current analysis has completed
        # it's OK if we delete nothing here
        execute_with_retry("""DELETE FROM cloudphish_analysis_results 
                              WHERE sha256_url = UNHEX(%s) AND status = 'ANALYZED'""", 
                          (sha256_url,), commit=True)

    # if we're at this point it means that when we asked the database for an entry from cloudphish_analysis_results
    # it was empty, OR, we cleared existing analysis
    # however, we could have multiple requests coming in at the same time for the same url
    # so we need to take that into account here

    # first we'll generate our analysis uuid we're going to use
    _uuid = str(uuid.uuid4())

    # so first we try to insert it
    try:
        execute_with_retry(db, c, ["""INSERT INTO cloudphish_analysis_results ( sha256_url, uuid, insert_date ) 
                                      VALUES ( UNHEX(%s), %s, NOW() )""",
                                   """INSERT INTO cloudphish_url_lookup ( sha256_url, url )
                                      VALUES ( UNHEX(%s), %s )"""],
                           [(sha256_url, _uuid),
                            (sha256_url, url)], commit=True)
    except pymysql.err.IntegrityError as e:
        # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry
        # if we get a duplicate key entry here then it means that an entry was created between when we asked
        # and now
        if e.args[0] != 1062:
            raise e

        # so just return that one that was already created
        return get_cached_analysis(url)

    # at this point we've inserted an entry into cloudphish_analysis_results for this url
    # now at it's processing to the workload

    root = RootAnalysis()
    root.uuid = _uuid
    root.storage_dir = storage_dir_from_uuid(root.uuid)
    root.initialize_storage()
    root.analysis_mode = ANALYSIS_MODE_CLOUDPHISH
    # this is kind of a kludge but,
    # the company_id initially starts out as whatever the default is for this node
    # later, should the analysis turn into an alert, the company_id changes to whatever
    # is stored as the "d" field in the KEY_DETAILS_CONTEXT
    root.company_id = saq.COMPANY_ID
    root.tool = 'ACE - Cloudphish'
    root.tool_instance = saq.SAQ_NODE
    root.alert_type = ANALYSIS_TYPE_CLOUDPHISH
    root.description = 'ACE Cloudphish Detection - {}'.format(url)
    root.event_time = datetime.datetime.now()
    root.details = {
        KEY_DETAILS_URL: url,
        KEY_DETAILS_SHA256_URL: sha256_url,
        # this used to be configurable but it's always true now
        KEY_DETAILS_ALERTABLE: True,
        KEY_DETAILS_CONTEXT: details, # <-- optionally contains the source company_id
    }

    url_observable = root.add_observable(F_URL, url)
    if url_observable:
        url_observable.add_directive(DIRECTIVE_CRAWL)

    root.save()
    root.schedule()

    return get_cached_analysis(url)
Exemplo n.º 2
0
def submit():

    if KEY_ANALYSIS not in request.values:
        abort(
            Response(
                "missing {} field (see documentation)".format(KEY_ANALYSIS),
                400))

    r = json.loads(request.values[KEY_ANALYSIS])

    # the specified company needs to match the company of this node
    # TODO eventually we'll have a single node that serves API to all configured companies

    if KEY_COMPANY_NAME in r and r[KEY_COMPANY_NAME] != saq.CONFIG['global'][
            'company_name']:
        abort(
            Response(
                "wrong company {} (are you sending to the correct system?)".
                format(r[KEY_COMPANY_NAME]), 400))

    if KEY_DESCRIPTION not in r:
        abort(
            Response("missing {} field in submission".format(KEY_DESCRIPTION),
                     400))

    root = RootAnalysis()
    root.uuid = str(uuid.uuid4())

    # does the engine use a different drive for the workload?
    analysis_mode = r[
        KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG['engine'][
            'default_analysis_mode']
    if analysis_mode != ANALYSIS_MODE_CORRELATION:
        root.storage_dir = workload_storage_dir(root.uuid)
    else:
        root.storage_dir = storage_dir_from_uuid(root.uuid)

    root.initialize_storage()

    try:

        root.analysis_mode = r[
            KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG[
                'engine']['default_analysis_mode']
        root.company_id = saq.CONFIG['global'].getint('company_id')
        root.tool = r[KEY_TOOL] if KEY_TOOL in r else 'api'
        root.tool_instance = r[
            KEY_TOOL_INSTANCE] if KEY_TOOL_INSTANCE in r else 'api({})'.format(
                request.remote_addr)
        root.alert_type = r[KEY_TYPE] if KEY_TYPE in r else saq.CONFIG['api'][
            'default_alert_type']
        root.description = r[KEY_DESCRIPTION]
        root.event_time = LOCAL_TIMEZONE.localize(datetime.datetime.now())
        if KEY_EVENT_TIME in r:
            try:
                root.event_time = parse_event_time(r[KEY_EVENT_TIME])
            except ValueError as e:
                abort(
                    Response(
                        "invalid event time format for {} (use {} format)".
                        format(r[KEY_EVENT_TIME],
                               event_time_format_json_tz), 400))

        root.details = r[KEY_DETAILS] if KEY_DETAILS in r else {}

        # go ahead and allocate storage
        # XXX use temp dir instead...

        if KEY_TAGS in r:
            for tag in r[KEY_TAGS]:
                root.add_tag(tag)

        # add the observables
        if KEY_OBSERVABLES in r:
            for o in r[KEY_OBSERVABLES]:
                # check for required fields
                for field in [KEY_O_TYPE, KEY_O_VALUE]:
                    if field not in o:
                        abort(
                            Response(
                                "an observable is missing the {} field".format(
                                    field), 400))

                o_type = o[KEY_O_TYPE]
                o_value = o[KEY_O_VALUE]
                o_time = None
                if KEY_O_TIME in o:
                    try:
                        o_time = parse_event_time(o[KEY_O_TIME])
                    except ValueError:
                        abort(
                            Response(
                                "an observable has an invalid time format {} (use {} format)"
                                .format(o[KEY_O_TIME],
                                        event_time_format_json_tz), 400))

                observable = root.add_observable(o_type,
                                                 o_value,
                                                 o_time=o_time)

                if KEY_O_TAGS in o:
                    for tag in o[KEY_O_TAGS]:
                        observable.add_tag(tag)

                if KEY_O_DIRECTIVES in o:
                    for directive in o[KEY_O_DIRECTIVES]:
                        # is this a valid directive?
                        if directive not in VALID_DIRECTIVES:
                            abort(
                                Response(
                                    "observable {} has invalid directive {} (choose from {})"
                                    .format('{}:{}'.format(o_type,
                                                           o_value), directive,
                                            ','.join(VALID_DIRECTIVES)), 400))

                        observable.add_directive(directive)

                if KEY_O_LIMITED_ANALYSIS in o:
                    for module_name in o[KEY_O_LIMITED_ANALYSIS]:
                        observable.limit_analysis(module_name)

        # save the files to disk and add them as observables of type file
        for f in request.files.getlist('file'):
            logging.debug("recording file {}".format(f.filename))
            #temp_dir = tempfile.mkdtemp(dir=saq.CONFIG.get('api', 'incoming_dir'))
            #_path = os.path.join(temp_dir, secure_filename(f.filename))
            try:
                #if os.path.exists(_path):
                #logging.error("duplicate file name {}".format(_path))
                #abort(400)

                #logging.debug("saving file to {}".format(_path))
                #try:
                #f.save(_path)
                #except Exception as e:
                #logging.error("unable to save file to {}: {}".format(_path, e))
                #abort(400)

                full_path = os.path.join(root.storage_dir, f.filename)

                try:
                    dest_dir = os.path.dirname(full_path)
                    if not os.path.isdir(dest_dir):
                        try:
                            os.makedirs(dest_dir)
                        except Exception as e:
                            logging.error(
                                "unable to create directory {}: {}".format(
                                    dest_dir, e))
                            abort(400)

                    logging.debug("saving file {}".format(full_path))
                    f.save(full_path)

                    # add this as a F_FILE type observable
                    root.add_observable(
                        F_FILE,
                        os.path.relpath(full_path, start=root.storage_dir))

                except Exception as e:
                    logging.error(
                        "unable to copy file from {} to {} for root {}: {}".
                        format(_path, full_path, root, e))
                    abort(400)

            except Exception as e:
                logging.error("unable to deal with file {}: {}".format(f, e))
                report_exception()
                abort(400)

            #finally:
            #try:
            #shutil.rmtree(temp_dir)
            #except Exception as e:
            #logging.error("unable to delete temp dir {}: {}".format(temp_dir, e))

        try:
            if not root.save():
                logging.error("unable to save analysis")
                abort(
                    Response(
                        "an error occured trying to save the alert - review the logs",
                        400))

            # if we received a submission for correlation mode then we go ahead and add it to the database
            if root.analysis_mode == ANALYSIS_MODE_CORRELATION:
                ALERT(root)

            # add this analysis to the workload
            root.schedule()

        except Exception as e:
            logging.error("unable to sync to database: {}".format(e))
            report_exception()
            abort(
                Response(
                    "an error occured trying to save the alert - review the logs",
                    400))

        return json_result({'result': {'uuid': root.uuid}})

    except Exception as e:
        logging.error("error processing submit: {}".format(e))
        report_exception()

        try:
            if os.path.isdir(root.storage_dir):
                logging.info("removing failed submit dir {}".format(
                    root.storage_dir))
                shutil.rmtree(root.storage_dir)
        except Exception as e2:
            logging.error("unable to delete failed submit dir {}: {}".format(
                root.storage_dir, e))

        raise e
Exemplo n.º 3
0
    def process(self, binary_path):
        logging.debug("processing {0}".format(binary_path))
        analysis_start_time = datetime.datetime.now()

        # load the JSON acquired from Carbon Black
        try:
            with open('{0}.json'.format(binary_path), 'r') as fp:
                binary_json = json.load(fp)
        except Exception as e:
            logging.error(
                "unable to parse JSON from Carbon Black for {}: {}".format(
                    binary_path, str(e)))
            return

        # we have to copy the file into the new storage directory for it to be analyzed
        # we use the file name that Carbon Black saw on the endpoint
        try:
            file_name = binary_json['observed_filename'][-1]
        except Exception as e:
            logging.error(
                "cannot determine file name for {}".format(binary_path))
            file_name = 'unknown'

        # we need to figure out if this is a path from a Windows machine or a Unix machine
        # so we count the number of backslashes and forward slashes
        # it's a hack but it should work 99.9% of the time
        if file_name.count('\\') > file_name.count('/'):
            logging.debug("{0} appears to be a windows path".format(file_name))
            file_name = ntpath.basename(file_name)
        else:
            logging.debug("{0} appears to be a unix path".format(file_name))
            file_name = os.path.basename(file_name)

        # figure out when this binary arrived to the carbon black server
        # some times the time does not have the .%fZ at the end for some reason
        time_stamp_format = "%Y-%m-%dT%H:%M:%SZ"
        if '.' in binary_json['server_added_timestamp']:
            time_stamp_format = "%Y-%m-%dT%H:%M:%S.%fZ"
        event_time = datetime.datetime.strptime(
            binary_json['server_added_timestamp'],
            time_stamp_format).replace(tzinfo=pytz.utc)
        event_time = pytz.timezone('US/Eastern').normalize(event_time)

        # create the root analysis object
        root = RootAnalysis()
        # set all of the properties individually
        # XXX fix me
        # it looks like the construction logic doesn't quite work here
        # when loading from the arguments to the constructor, the internal
        # variables with leading underscores get set rather than the properties
        # representing the database columns it was designed that way to allow the
        # JSON stuff to work correctly, so I'll need to revisit that later
        root.tool = 'ACE - Carbon Black Binary Analysis'
        root.tool_instance = socket.gethostname()
        root.alert_type = 'carbon_black_binary'
        root.description = 'Carbon Black binary {0}'.format(file_name)
        root.event_time = event_time
        root.details = binary_json

        # XXX database.Alert does not automatically create this
        root.uuid = str(uuid.uuid4())

        # we use a temporary directory while we process the file
        root.storage_dir = os.path.join(self.work_dir, root.uuid[0:3],
                                        root.uuid)

        root.initialize_storage()

        logging.debug("using storage directory {0} for {1}".format(
            root.storage_dir, binary_path))
        dest_path = os.path.join(root.storage_dir, file_name)

        try:
            shutil.copy(binary_path, dest_path)
        except Exception as e:
            logging.error("unable to copy {0} to {1}: {2}".format(
                binary_path, dest_path, str(e)))
            report_exception()
            return

        # note that the path is relative to the storage directory
        root.add_observable(F_FILE, file_name)

        # the endpoints are stored as an array of host names optionally appended with a pipe and count
        # I assume the number of times that executable has executed on that host?
        for endpoint in binary_json['endpoint']:
            if '|' in endpoint:
                endpoint = endpoint[:endpoint.index('|')]
                root.add_observable(F_HOSTNAME, endpoint)

        for file_path in binary_json['observed_filename']:
            root.add_observable(F_FILE_PATH, file_path)

        # now analyze the file
        try:
            self.analyze(root)
        except Exception as e:
            logging.error("analysis failed for {0}: {1}".format(
                binary_path, str(e)))
            report_exception()

        logging.info("completed {0} analysis time {1}".format(
            binary_path,
            datetime.datetime.now() - analysis_start_time))
Exemplo n.º 4
0
    def post_smtp_analysis(self, root):
        from saq.modules.email import EmailAnalysis, SMTPStreamAnalysis, \
                                      BrotexSMTPPackageAnalysis, \
                                      KEY_ENVELOPES_MAIL_FROM, KEY_ENVELOPES_RCPT_TO

        # get the paths to the email scanning system
        #email_scanner_dir = saq.CONFIG['engine_email_scanner']['collection_dir']
        email_scanner_dir = self.collection_dir

        # create a new analysis root for each email analysis we found
        for analysis in root.all_analysis:
            if not isinstance(analysis, EmailAnalysis) or not analysis.email:
                continue

            env_mail_from = None
            env_rcpt_to = None
            connection_id = None

            # the observable for this EmailAnalysis will be a file
            email_file = analysis.observable
            if email_file.type != F_FILE:
                logging.warning(
                    "the observable for {} should be F_FILE but it is {}".
                    format(analysis, email_file.type))
            else:
                # this will be either an rfc822 file generated by the SMTPStreamAnalysis module
                # (which will have the envelope information)
                # OR it is a "broken stream" file, which does not
                stream_analysis = [
                    a for a in root.all_analysis
                    if isinstance(a, SMTPStreamAnalysis)
                    and email_file in a.observables
                ]
                if len(stream_analysis) > 1:
                    logging.error("there should not be more than one of these")
                elif len(stream_analysis) == 1:
                    stream_analysis = stream_analysis[0]
                    logging.debug(
                        "detected stream analysis for {}".format(email_file))
                    # get the MAIL FROM and RCPT TO from this
                    if not analysis.env_mail_from:
                        if email_file.value in stream_analysis.envelopes:
                            analysis.env_mail_from = stream_analysis.envelopes[
                                email_file.value][KEY_ENVELOPES_MAIL_FROM]
                    if not analysis.env_rcpt_to:
                        if email_file.value in stream_analysis.envelopes:
                            analysis.env_rcpt_to = stream_analysis.envelopes[
                                email_file.value][KEY_ENVELOPES_RCPT_TO]

                    # get the original brotex package file that the stream came from
                    stream_package = stream_analysis.observable
                    # get the BrotexSMTPPackageAnalysis for this stream package so we can get the connection id
                    package_analysis = [
                        a for a in root.all_analysis
                        if isinstance(a, BrotexSMTPPackageAnalysis)
                        and stream_package in a.observables
                    ]
                    if len(package_analysis) > 1:
                        logging.error(
                            "there should not be more than one of these!")
                    elif len(package_analysis) == 1:
                        package_analysis = package_analysis[0]
                        connection_id = package_analysis.connection_id

                # if we could not find the stream, we will want to find the brotex smtp package so we can have the connection id
                package_analysis = [
                    a for a in root.all_analysis
                    if isinstance(a, BrotexSMTPPackageAnalysis)
                    and email_file in a.observables
                ]
                if len(package_analysis) > 1:
                    logging.error(
                        "there should not be more than one of these!")
                elif len(package_analysis) == 1:
                    package_analysis = package_analysis[0]
                    connection_id = package_analysis.connection_id

            subroot = RootAnalysis()
            subroot.company_name = root.company_name
            subroot.tool = root.tool
            subroot.tool_instance = root.tool_instance
            subroot.alert_type = root.alert_type
            subroot.description = 'Brotex SMTP Stream Detection - '

            if analysis.decoded_subject:
                subroot.description += '{} '.format(analysis.decoded_subject)
            elif analysis.subject:
                subroot.description += '{} '.format(analysis.subject)
            else:
                subroot.description += '(no subject) '
                if analysis.env_mail_from:
                    subroot.description += 'From {} '.format(
                        normalize_email_address(analysis.env_mail_from))
                elif analysis.mail_from:
                    subroot.description += 'From {} '.format(
                        normalize_email_address(analysis.mail_from))
                if analysis.env_rcpt_to:
                    if len(analysis.env_rcpt_to) == 1:
                        subroot.description += 'To {} '.format(
                            analysis.env_rcpt_to[0])
                    else:
                        subroot.description += 'To ({} recipients) '.format(
                            len(analysis.env_rcpt_to))
                elif analysis.mail_to:
                    if isinstance(analysis.mail_to,
                                  list):  # XXX I think this *has* to be a list
                        if len(analysis.mail_to) == 1:
                            subroot.description += 'To {} '.format(
                                analysis.mail_to[0])
                        else:
                            subroot.description += 'To ({} recipients) '.format(
                                len(analysis.mail_to))
                    else:
                        subroot.description += 'To {} '.format(
                            analysis.mail_to)

            subroot.event_time = root.event_time
            subroot.details = analysis.details
            subroot.details['connection_id'] = connection_id
            subroot.uuid = str(uuid.uuid4())

            # we use a temporary directory while we process the file
            subroot.storage_dir = os.path.join(email_scanner_dir,
                                               subroot.uuid[0:3], subroot.uuid)

            subroot.initialize_storage()

            # copy the original file
            src_path = os.path.join(root.storage_dir,
                                    analysis.observable.value)
            dest_path = os.path.join(subroot.storage_dir,
                                     analysis.observable.value)

            subroot.add_observable(
                F_FILE, os.path.relpath(dest_path, start=subroot.storage_dir))

            # so the EmailAnalysis that will trigger on the RFC822 file (or whatever you have)
            # will *not* have the envelope headers
            # so we do that here in the main alert
            env_mail_from = None
            if analysis.env_mail_from:
                # this is to handle this: <*****@*****.**> SIZE=80280
                # XXX assuming there can be no spaces in an email address
                env_mail_from = analysis.env_mail_from.split(' ', 1)
                env_mail_from = env_mail_from[0]

                # is this not the empty indicator?
                if env_mail_from != '<>':
                    env_mail_from = normalize_email_address(env_mail_from)
                    subroot.add_observable(F_EMAIL_ADDRESS, env_mail_from)

            if analysis.env_rcpt_to:
                for address in analysis.env_rcpt_to:
                    address = normalize_email_address(address)
                    if address:
                        subroot.add_observable(F_EMAIL_ADDRESS, address)
                        if env_mail_from:
                            subroot.add_observable(
                                F_EMAIL_CONVERSATION,
                                create_email_conversation(
                                    env_mail_from, address))

            try:
                subroot.save()
            except Exception as e:
                logging.error("unable to save {}: {}".format(alert, e))
                report_exception()
                continue

            # TODO also add the stream and update any envelopment headers and stuff

            try:
                logging.debug("copying {} to {}".format(src_path, dest_path))
                shutil.copy(src_path, dest_path)
            except Exception as e:
                logging.error("unable to copy {} to {}: {}".format(
                    src_path, dest_path, e))
                report_exception()
                continue

            # submit the path to the database of the email scanner for analysis
            try:
                submit_sql_work_item('EMAIL', subroot.storage_dir)
            except Exception as e:
                logging.error("unable to add work item: {}".format(e))
                report_exception()
                continue
Exemplo n.º 5
0
    def post_http_analysis(self, root):

        from saq.modules.http import BrotexHTTPPackageAnalysis, \
                                     KEY_TIME, \
                                     KEY_SRC_IP, \
                                     KEY_SRC_PORT, \
                                     KEY_DEST_IP, \
                                     KEY_DEST_PORT, \
                                     KEY_METHOD, \
                                     KEY_HOST, \
                                     KEY_URI, \
                                     KEY_REFERRER, \
                                     KEY_USER_AGENT, \
                                     KEY_STATUS_CODE, \
                                     KEY_FILES

        # get the paths to the http scanning system
        #http_scanner_dir = saq.CONFIG['engine_http_scanner']['collection_dir']
        http_scanner_dir = self.collection_dir

        analysis = None
        for a in root.all_analysis:
            if isinstance(a, BrotexHTTPPackageAnalysis) and a.requests:
                analysis = a
                break

        # this can happen if the request was whitelisted
        if analysis:
            for request in analysis.requests:
                subroot = RootAnalysis()
                subroot.company_name = root.company_name
                subroot.tool = root.tool
                subroot.tool_instance = root.tool_instance
                subroot.alert_type = root.alert_type
                subroot.description = "Brotex HTTP Stream Detection - "
                if request[KEY_HOST]:
                    subroot.description += " {} ".format(request[KEY_HOST])

                if request[KEY_DEST_IP]:
                    subroot.description += " ({}) ".format(
                        request[KEY_DEST_IP])

                if request[KEY_URI]:
                    # don't want to show all the fragments and query params
                    try:
                        parts = urlparse(request[KEY_URI])
                        subroot.description += parts.path
                    except Exception as e:
                        logging.warning("unable to parse {}: {}".format(
                            request[KEY_URI], e))
                        subroot.description += request[KEY_URI]

                subroot.event_time = root.event_time
                subroot.details = request
                subroot.uuid = str(uuid.uuid4())

                # we use a temporary directory while we process the file
                subroot.storage_dir = os.path.join(http_scanner_dir,
                                                   subroot.uuid[0:3],
                                                   subroot.uuid)

                subroot.initialize_storage()

                if request[KEY_SRC_IP]:
                    subroot.add_observable(F_IPV4, request[KEY_SRC_IP])

                if request[KEY_DEST_IP]:
                    subroot.add_observable(F_IPV4, request[KEY_DEST_IP])

                if request[KEY_SRC_IP] and request[KEY_DEST_IP]:
                    subroot.add_observable(
                        F_IPV4_CONVERSATION,
                        create_ipv4_conversation(request[KEY_SRC_IP],
                                                 request[KEY_DEST_IP]))

                if request[KEY_HOST]:
                    subroot.add_observable(F_FQDN, request[KEY_HOST])

                if request[KEY_URI]:
                    subroot.add_observable(F_URL, request[KEY_URI])

                if request[KEY_REFERRER]:
                    subroot.add_observable(F_URL, request[KEY_REFERRER])

                for file_path in request[KEY_FILES]:
                    src_path = os.path.join(root.storage_dir, file_path)
                    dest_path = os.path.join(subroot.storage_dir,
                                             os.path.basename(file_path))
                    try:
                        shutil.copy(src_path, dest_path)
                    except Exception as e:
                        logging.error("unable to copy {} to {}: {}".format(
                            src_path, dest_path, e))
                        report_exception()

                    subroot.add_observable(
                        F_FILE,
                        os.path.basename(file_path))  # already relative

                try:
                    subroot.save()
                except Exception as e:
                    logging.error("unable to save {}: {}".format(alert, e))
                    report_exception()
                    continue

                # submit the path to the database of the email scanner for analysis
                try:
                    submit_sql_work_item(
                        'HTTP', subroot.storage_dir)  # XXX hard coded constant
                except:
                    # failure is already logged inside the call
                    continue
Exemplo n.º 6
0
    def process(self, process):
        logging.debug("processing json")
        analysis_start_time = datetime.datetime.now()

        try:
            file_path = process['cmdline'].split('"')[-2]
        except:
            logging.error("cannot determine file path for {}".format(
                process['cmdline']))
            file_path = 'unknown'

        try:
            file_name = file_path.split('\\')[-1]
        except:
            logging.error(
                "cannot determine file name for {}".format(file_path))
            file_name = 'unknown'

        # figure out when this binary arrived to the carbon black server
        # some times the time does not have the .%fZ at the end for some reason
        time_stamp_format = "%Y-%m-%dT%H:%M:%SZ"
        if '.' in process['start']:
            time_stamp_format = "%Y-%m-%dT%H:%M:%S.%fZ"
        event_time = datetime.datetime.strptime(
            process['start'], time_stamp_format).replace(tzinfo=pytz.utc)
        event_time = pytz.timezone('US/Eastern').normalize(event_time)

        # create the root analysis object
        root = RootAnalysis()
        # set all of the properties individually
        # XXX fix me
        # it looks like the construction logic doesn't quite work here
        # when loading from the arguments to the constructor, the internal
        # variables with leading underscores get set rather than the properties
        # representing the database columns it was designed that way to allow the
        # JSON stuff to work correctly, so I'll need to revisit that later
        root.tool = 'ACE - Carbon Black Internet Office File Analysis'
        root.tool_instance = socket.gethostname()
        root.alert_type = 'carbon_black_internet_office_file'
        root.description = 'Carbon Black Internet Office File {0}'.format(
            file_name)
        root.event_time = event_time
        root.details = process

        # XXX database.Alert does not automatically create this
        root.uuid = str(uuid.uuid4())

        # we use a temporary directory while we process the file
        root.storage_dir = os.path.join(self.work_dir, root.uuid[0:3],
                                        root.uuid)
        root.initialize_storage()

        # note that the path is relative to the storage directory
        fl_observable = root.add_observable(
            F_FILE_LOCATION,
            create_file_location(process['hostname'], file_path))
        if fl_observable: fl_observable.add_directive(DIRECTIVE_COLLECT_FILE)
        root.add_observable(F_FILE_PATH, file_path)
        root.add_observable(F_FILE_NAME, file_name)
        root.add_observable(F_HOSTNAME, process['hostname'])

        # now analyze the file
        try:
            self.analyze(root)
        except Exception as e:
            logging.error("analysis failed for {}: {}".format(
                process['id'], e))
            report_exception()

        logging.info("completed {} analysis time {}".format(
            process['id'],
            datetime.datetime.now() - analysis_start_time))
Exemplo n.º 7
0
    def process(self, work_item):
        url, alertable, details = work_item
        # any other result means we should process it
        logging.info("processing url {} (alertable {})".format(url, alertable))
        #logging.debug("details = {}".format(details))

        sha256_url = hash_url(url)

        # create or update our analysis entry
        with get_db_connection('cloudphish') as db:
            c = db.cursor()
            c.execute(
                """UPDATE analysis_results SET status = %s WHERE sha256_url = UNHEX(%s)""",
                (STATUS_ANALYZING, sha256_url))
            db.commit()

        root = RootAnalysis()
        # create a temporary storage directory for this work
        root.tool = 'ACE - Cloudphish'
        root.tool_instance = self.location
        root.alert_type = 'cloudphish'
        root.description = 'ACE Cloudphish Detection - {}'.format(url)
        root.event_time = datetime.datetime.now()
        root.uuid = str(uuid.uuid4())
        root.storage_dir = os.path.join(self.work_dir, root.uuid[0:2],
                                        root.uuid)
        root.initialize_storage()

        if 'i' in details:
            root.company_name = details['i']

        if 'd' in details:
            root.company_id = details['d']

        root.details = {
            KEY_DETAILS_URL: url,
            KEY_DETAILS_SHA256_URL: sha256_url,
            KEY_DETAILS_ALERTABLE: alertable,
            KEY_DETAILS_CONTEXT: details,
        }

        url_observable = root.add_observable(F_URL, url)
        if url_observable is None:
            logging.error("request for invalid url received: {}".format(url))
            return

        url_observable.add_directive(DIRECTIVE_CRAWL)

        # the "details context" can also contain observables
        for key in root.details[KEY_DETAILS_CONTEXT].keys():
            if key in VALID_OBSERVABLE_TYPES:
                root.add_observable(key,
                                    root.details[KEY_DETAILS_CONTEXT][key])

        try:
            self.analyze(root)
        except Exception as e:
            logging.error("analysis failed for {}: {}".format(url, e))
            report_exception()

            with get_db_connection('cloudphish') as db:
                c = db.cursor()
                c.execute(
                    """UPDATE analysis_results SET 
                                 result = %s,
                                 status = %s,
                                 http_result_code = NULL,
                                 http_message = NULL,
                                 sha256_content = NULL
                             WHERE sha256_url = UNHEX(%s)""",
                    (SCAN_RESULT_ERROR, STATUS_ANALYZED, sha256_url))
                db.commit()
                return