Пример #1
0
    def get_summary_from_keys(self,
                              keys,
                              cl_engine=forge.get_classification(),
                              user_classification=None):
        out = {
            "tags": [],
            "attack_matrix": [],
            "heuristics": {
                "info": [],
                "suspicious": [],
                "malicious": []
            },
            "classification": cl_engine.UNRESTRICTED,
            "filtered": False
        }
        done_map = {"heuristics": set(), "attack": set(), "tags": set()}

        if len(keys) == 0:
            return out

        keys = [x for x in list(keys) if not x.endswith(".e")]
        file_keys = list(set([x[:64] for x in keys]))
        try:
            items = self.result.multiget(keys, as_obj=False)
        except MultiKeyError as e:
            # Generate partial summaries even if results are missing
            log.warning(
                f"Trying to generate summary but we are missing result(s): {str(e.keys)}"
            )
            items = e.partial_output
            out['missing_results'] = e.keys
        try:
            files = self.file.multiget(file_keys, as_obj=False)
        except MultiKeyError as e:
            # Generate partial summaries even if results are missing
            log.warning(
                f"Trying to generate summary but we are missing file(s): {str(e.keys)}"
            )
            files = e.partial_output
            out['missing_files'] = e.keys

        for key, item in items.items():
            for section in item.get('result', {}).get('sections', []):
                file_classification = files.get(key[:64], {}).get(
                    'classification', section['classification'])
                if user_classification:
                    if not cl_engine.is_accessible(user_classification,
                                                   section['classification']):
                        out["filtered"] = True
                        continue
                    if not cl_engine.is_accessible(user_classification,
                                                   file_classification):
                        out["filtered"] = True
                        continue

                out["classification"] = cl_engine.max_classification(
                    out["classification"], section['classification'])
                out["classification"] = cl_engine.max_classification(
                    out["classification"], file_classification)

                h_type = "info"

                if section.get('heuristic', False):
                    # Get the heuristics data
                    if section['heuristic']['score'] < 100:
                        h_type = "info"
                    elif section['heuristic']['score'] < 1000:
                        h_type = "suspicious"
                    else:
                        h_type = "malicious"

                    cache_key = f"{section['heuristic']['heur_id']}_{key}"
                    if cache_key not in done_map['heuristics']:
                        out['heuristics'][h_type].append({
                            'heur_id':
                            section['heuristic']['heur_id'],
                            'name':
                            section['heuristic']['name'],
                            'key':
                            key
                        })
                        done_map['heuristics'].add(cache_key)

                    for attack in section['heuristic'].get('attack', []):
                        # Get attack matrix data
                        attack_id = attack['attack_id']

                        cache_key = f"{attack_id}_{key}"
                        if cache_key not in done_map['attack']:
                            out['attack_matrix'].append({
                                "key":
                                key,
                                "attack_id":
                                attack_id,
                                "h_type":
                                h_type,
                                "name":
                                attack['pattern'],
                                "categories":
                                attack['categories']
                            })
                            done_map['attack'].add(cache_key)

                # Get tagging data
                for tag_type, tags in flatten(section.get('tags', {})).items():
                    if tags is not None:
                        for tag in tags:
                            cache_key = f"{tag_type}_{tag}_{key}"

                            if cache_key not in done_map['tags']:
                                out['tags'].append({
                                    'type':
                                    tag_type,
                                    'h_type':
                                    h_type,
                                    'short_type':
                                    tag_type.rsplit(".", 1)[-1],
                                    'value':
                                    tag,
                                    'key':
                                    key
                                })
                                done_map['tags'].add(cache_key)

        return out
Пример #2
0
import shutil

from flask import request

from assemblyline.common.dict_utils import flatten
from assemblyline_ui.api.base import api_login, make_api_response, make_subapi_blueprint
from assemblyline_ui.config import STORAGE, TEMP_SUBMIT_DIR
from assemblyline_ui.helper.service import ui_to_submission_params
from assemblyline_ui.helper.submission import safe_download, FileTooBigException, InvalidUrlException, ForbiddenLocation
from assemblyline_ui.helper.user import check_submission_quota, get_default_user_settings
from assemblyline.common import forge
from assemblyline.common.uid import get_random_id
from assemblyline.odm.messages.submission import Submission
from assemblyline_core.submission_client import SubmissionClient, SubmissionException

Classification = forge.get_classification()
config = forge.get_config()

SUB_API = 'submit'
submit_api = make_subapi_blueprint(SUB_API, api_version=4)
submit_api._doc = "Submit files to the system"


# noinspection PyUnusedLocal
@submit_api.route("/dynamic/<sha256>/", methods=["GET"])
@api_login(required_priv=['W'], allow_readonly=False)
def resubmit_for_dynamic(sha256, *args, **kwargs):
    """
    Resubmit a file for dynamic analysis
    
    Variables:
Пример #3
0
    def delete_submission_tree(self,
                               sid,
                               cl_engine=forge.get_classification(),
                               cleanup=True,
                               transport=None):
        submission = self.submission.get(sid, as_obj=False)
        if not submission:
            return

        # Gather file list
        errors = submission['errors']
        results = submission["results"]
        files = set()
        fix_classification_files = set()
        supp_map = {}

        temp_files = [x[:64] for x in errors]
        temp_files.extend([x[:64] for x in results])
        temp_files = set(temp_files)

        # Inspect each files to see if they are reused
        for temp in temp_files:
            # Hunt for supplementary files
            supp_list = set()
            for res in self.result.stream_search(
                    f"id:{temp}* AND response.supplementary.sha256:*",
                    fl="id",
                    as_obj=False):
                if res['id'] in results:
                    result = self.result.get(res['id'], as_obj=False)
                    for supp in result['response']['supplementary']:
                        supp_list.add(supp['sha256'])

            # Check if we delete or update classification
            if self.submission.search(f"errors:{temp}* OR results:{temp}*",
                                      rows=0,
                                      as_obj=False)["total"] < 2:
                files.add(temp)
                files = files.union(supp_list)
            else:
                fix_classification_files.add(temp)
                supp_map[temp] = supp_list

        # Filter results and errors
        errors = [x for x in errors if x[:64] in files]
        results = [x for x in results if x[:64] in files]

        # Delete files, errors, results that were only used once
        for e in errors:
            self.error.delete(e)
        for r in results:
            if r.endswith(".e"):
                self.emptyresult.delete(r)
            else:
                self.result.delete(r)
        for f in files:
            self.file.delete(f)
            if transport:
                transport.delete(f)

        if fix_classification_files and cleanup:
            # Fix classification for the files that remain in the system
            for f in fix_classification_files:
                cur_file = self.file.get(f, as_obj=False)
                if cur_file:
                    # Find possible classification for the file in the system
                    query = f"NOT id:{sid} AND (files.sha256:{f} OR results:{f}* OR errors:{f}*)"
                    classifications = list(
                        self.submission.facet('classification',
                                              query=query).keys())

                    if len(classifications) > 0:
                        new_file_class = classifications[0]
                    else:
                        new_file_class = cl_engine.UNRESTRICTED

                    for c in classifications:
                        new_file_class = cl_engine.min_classification(
                            new_file_class, c)

                    # Find the results for that classification and alter them if the new classification does not match
                    for item in self.result.stream_search(
                            f"id:{f}*", fl="classification,id", as_obj=False):
                        new_class = cl_engine.max_classification(
                            item.get('classification', cl_engine.UNRESTRICTED),
                            new_file_class)
                        if item.get('classification',
                                    cl_engine.UNRESTRICTED) != new_class:
                            parts = cl_engine.get_access_control_parts(
                                new_class)
                            update_params = [(Collection.UPDATE_SET,
                                              'classification', new_class)]
                            update_params.extend([(Collection.UPDATE_SET, k, v)
                                                  for k, v in parts.items()])
                            self.result.update(item['id'], update_params)

                    # Alter the file classification if the new classification does not match
                    if cur_file['classification'] != new_file_class:
                        parts = cl_engine.get_access_control_parts(
                            new_file_class)
                        update_params = [(Collection.UPDATE_SET,
                                          'classification', new_file_class)]
                        update_params.extend([(Collection.UPDATE_SET, k, v)
                                              for k, v in parts.items()])
                        self.file.update(f, update_params)

                    # Fix associated supplementary files
                    for supp in supp_map.get(f, set()):
                        cur_supp = self.file.get(supp, as_obj=False)
                        if cur_supp:
                            if cur_supp['classification'] != new_file_class:
                                parts = cl_engine.get_access_control_parts(
                                    new_file_class)
                                update_params = [
                                    (Collection.UPDATE_SET, 'classification',
                                     new_file_class)
                                ]
                                update_params.extend([
                                    (Collection.UPDATE_SET, k, v)
                                    for k, v in parts.items()
                                ])
                                self.file.update(supp, update_params)

        # Delete the submission and cached trees and summaries
        self.submission.delete(sid)
        for t in [
                x['id'] for x in self.submission_tree.stream_search(
                    f"id:{sid}*", fl="id", as_obj=False)
        ]:
            self.submission_tree.delete(t)
        for s in [
                x['id'] for x in self.submission_summary.stream_search(
                    f"id:{sid}*", fl="id", as_obj=False)
        ]:
            self.submission_summary.delete(s)
Пример #4
0
    def get_or_create_file_tree(self,
                                submission,
                                max_depth,
                                cl_engine=forge.get_classification(),
                                user_classification=None):
        if user_classification is not None:
            user_classification = cl_engine.normalize_classification(
                user_classification, long_format=False)
            cache_key = f"{submission['sid']}_{user_classification}"
            for illegal_char in [" ", ":", "/"]:
                cache_key = cache_key.replace(illegal_char, "")
        else:
            cache_key = submission['sid']

        if isinstance(submission, Model):
            submission = submission.as_primitives()

        num_files = len(list(set([x[:64] for x in submission['results']])))
        max_score = submission['max_score']

        cached_tree = self.submission_tree.get_if_exists(cache_key,
                                                         as_obj=False)
        if cached_tree:
            tree = json.loads(cached_tree['tree'])
            if self._is_valid_tree(tree, num_files, max_score):
                return {
                    "tree": tree,
                    "classification": cached_tree['classification'],
                    "filtered": cached_tree['filtered'],
                    "partial": False
                }

        partial = False
        files = {}
        scores = {}
        missing_files = []
        file_hashes = [x[:64] for x in submission['results']]
        file_hashes.extend([x[:64] for x in submission['errors']])
        file_hashes.extend([f['sha256'] for f in submission['files']])
        try:
            temp_file_data_map = self.file.multiget(list(set(file_hashes)),
                                                    as_dictionary=True,
                                                    as_obj=False)
        except MultiKeyError as e:
            log.warning(
                f"Trying to generate file tree but we are missing file(s): {str(e.keys)}"
            )
            temp_file_data_map = e.partial_output
            missing_files = e.keys
            partial = True
        forbidden_files = set()

        max_classification = cl_engine.UNRESTRICTED
        file_data_map = {}
        for key, value in temp_file_data_map.items():
            if user_classification and not cl_engine.is_accessible(
                    user_classification, value['classification']):
                partial = True
                forbidden_files.add(key)
                continue
            file_data_map[key] = value
            max_classification = cl_engine.max_classification(
                max_classification, value['classification'])

        try:
            results_data = self.result.multiget(
                [x for x in submission['results'] if not x.endswith(".e")],
                as_obj=False)
        except MultiKeyError as e:
            log.warning(
                f"Trying to generate file tree but we are missing result(s): {str(e.keys)}"
            )
            results_data = e.partial_output
            partial = True

        for key, item in results_data.items():
            sha256 = key[:64]

            # Get scores
            if sha256 not in scores:
                scores[sha256] = 0
            scores[sha256] += item["result"]["score"]

            # Get files
            extracted = item['response']['extracted']
            if len(extracted) == 0:
                continue
            if sha256 not in files:
                files[sha256] = []
            files[sha256].extend(extracted)

        tree_cache = []

        def recurse_tree(child_p, placeholder, parents_p, lvl=0):
            if lvl == max_depth + 1:
                # Enforce depth protection while building the tree
                return

            c_sha256 = child_p['sha256']
            c_name = child_p['name']
            if c_sha256 in placeholder:
                placeholder[c_sha256]['name'].append(c_name)
            else:
                children_list = {}
                truncated = False
                child_list = files.get(c_sha256, [])
                for new_child in child_list:
                    if new_child['sha256'] in tree_cache:
                        truncated = True
                        continue
                    tree_cache.append(child['sha256'])

                    if new_child['sha256'] not in parents_p:
                        recurse_tree(new_child, children_list,
                                     parents_p + [c_sha256], lvl + 1)

                try:
                    placeholder[c_sha256] = {
                        "name": [c_name],
                        "type": file_data_map[c_sha256]['type'],
                        "sha256": file_data_map[c_sha256]['sha256'],
                        "size": file_data_map[c_sha256]['size'],
                        "children": children_list,
                        "truncated": truncated,
                        "score": scores.get(c_sha256, 0),
                    }
                except KeyError:
                    if c_sha256 not in forbidden_files and c_sha256 not in missing_files:
                        file_data_map[c_sha256] = self.file.get(c_sha256,
                                                                as_obj=False)
                        placeholder[c_sha256] = {
                            "name": [c_name],
                            "type": file_data_map[c_sha256]['type'],
                            "sha256": file_data_map[c_sha256]['sha256'],
                            "size": file_data_map[c_sha256]['size'],
                            "children": children_list,
                            "truncated": truncated,
                            "score": scores.get(c_sha256, 0),
                        }

        tree = {}
        for f in submission['files']:
            sha256 = f['sha256']
            name = f['name']

            if sha256 in tree:
                tree[sha256]['name'].append(name)
            else:
                parents = [sha256]
                children = {}
                c_list = files.get(sha256, [])
                for child in c_list:
                    tree_cache.append(child['sha256'])
                    recurse_tree(child, children, parents)

                try:
                    tree[sha256] = {
                        "name": [name],
                        "children": children,
                        "type": file_data_map[sha256]['type'],
                        "sha256": file_data_map[sha256]['sha256'],
                        "size": file_data_map[sha256]['size'],
                        "truncated": False,
                        "score": scores.get(sha256, 0),
                    }
                except KeyError:
                    if sha256 not in forbidden_files and sha256 not in missing_files:
                        file_data_map[sha256] = self.file.get(sha256,
                                                              as_obj=False)
                        tree[sha256] = {
                            "name": [name],
                            "children": children,
                            "type": file_data_map[sha256]['type'],
                            "sha256": file_data_map[sha256]['sha256'],
                            "size": file_data_map[sha256]['size'],
                            "truncated": False,
                            "score": scores.get(sha256, 0),
                        }

        if not partial:
            cached_tree = {
                'expiry_ts': now_as_iso(days_until_archive * 24 * 60 * 60),
                'tree': json.dumps(tree),
                'classification': max_classification,
                'filtered': len(forbidden_files) > 0
            }

            self.submission_tree.save(cache_key, cached_tree)

        return {
            'tree': tree,
            'classification': max_classification,
            'filtered': len(forbidden_files) > 0,
            'partial': partial
        }
import os
import random
import tempfile

from assemblyline.common import forge
from assemblyline.common.dict_utils import flatten
from assemblyline.common.hexdump import hexdump
from assemblyline_v4_service.common.base import ServiceBase
from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT, Heuristic

# DO NOT IMPORT IN YOUR SERVICE. These are just for creating randomized results.
from assemblyline.odm.randomizer import get_random_phrase, get_random_ip, get_random_host, get_random_tags
# DO NOT LIST BODY FORMATS LIKE THIS. This is again for the data randomizer.
FORMAT_LIST = [BODY_FORMAT.TEXT, BODY_FORMAT.MEMORY_DUMP]

cl_engine = forge.get_classification()


class ResultSample(ServiceBase):
    def __init__(self, config=None):
        super(ResultSample, self).__init__(config)

    def start(self):
        # ==================================================================
        # On Startup actions:
        #   Your service might have to do some warming up on startup to make things faster

        self.log.info(f"start() from {self.service_attributes.name} service called")

    def execute(self, request):
        # ==================================================================
Пример #6
0
    def delete_submission_tree_bulk(self,
                                    sid,
                                    cl_engine=forge.get_classification(),
                                    cleanup=True,
                                    transport=None):
        submission = self.submission.get(sid, as_obj=False)
        if not submission:
            return

        # Create plans
        s_plan = self.submission.get_bulk_plan()
        st_plan = self.submission_tree.get_bulk_plan()
        ss_plan = self.submission_summary.get_bulk_plan()
        e_plan = self.error.get_bulk_plan()
        er_plan = self.emptyresult.get_bulk_plan()
        r_plan = self.result.get_bulk_plan()
        f_plan = self.file.get_bulk_plan()

        # Add delete operation for submission and cache
        s_plan.add_delete_operation(sid)
        for x in self.submission_tree.stream_search(f"id:{sid}*",
                                                    fl="id,_index",
                                                    as_obj=False):
            st_plan.add_delete_operation(x['id'], index=x['_index'])
        for x in self.submission_summary.stream_search(f"id:{sid}*",
                                                       fl="id,_index",
                                                       as_obj=False):
            ss_plan.add_delete_operation(x['id'], index=x['_index'])

        # Gather file list
        errors = submission['errors']
        results = submission["results"]
        files = set()
        fix_classification_files = set()
        supp_map = {}

        temp_files = [x[:64] for x in errors]
        temp_files.extend([x[:64] for x in results])
        temp_files = set(temp_files)

        # Inspect each files to see if they are reused
        for temp in temp_files:
            # Hunt for supplementary files
            supp_list = set()
            for res in self.result.stream_search(
                    f"id:{temp}* AND response.supplementary.sha256:*",
                    fl="id",
                    as_obj=False):
                if res['id'] in results:
                    result = self.result.get(res['id'], as_obj=False)
                    for supp in result['response']['supplementary']:
                        supp_list.add(supp['sha256'])

            # Check if we delete or update classification
            if self.submission.search(f"errors:{temp}* OR results:{temp}*",
                                      rows=0,
                                      as_obj=False)["total"] < 2:
                files.add(temp)
                files = files.union(supp_list)
            else:
                fix_classification_files.add(temp)
                supp_map[temp] = supp_list

        # Filter results and errors
        errors = [x for x in errors if x[:64] in files]
        results = [x for x in results if x[:64] in files]

        # Delete files, errors, results that were only used once
        for e in errors:
            e_plan.add_delete_operation(e)
        for r in results:
            if r.endswith(".e"):
                er_plan.add_delete_operation(r)
            else:
                r_plan.add_delete_operation(r)
        for f in files:
            f_plan.add_delete_operation(f)
            if transport:
                transport.delete(f)

        if fix_classification_files and cleanup:
            # Fix classification for the files that remain in the system
            for f in fix_classification_files:
                cur_file = self.file.get(f, as_obj=False)
                if cur_file:
                    # Find possible classification for the file in the system
                    query = f"NOT id:{sid} AND (files.sha256:{f} OR results:{f}* OR errors:{f}*)"
                    classifications = list(
                        self.submission.facet('classification',
                                              query=query).keys())

                    if len(classifications) > 0:
                        new_file_class = classifications[0]
                    else:
                        new_file_class = cl_engine.UNRESTRICTED

                    for c in classifications:
                        new_file_class = cl_engine.min_classification(
                            new_file_class, c)

                    # Find the results for that classification and alter them if the new classification does not match
                    for item in self.result.stream_search(
                            f"id:{f}*",
                            fl="classification,id,_index",
                            as_obj=False):
                        new_class = cl_engine.max_classification(
                            item.get('classification', cl_engine.UNRESTRICTED),
                            new_file_class)
                        if item.get('classification',
                                    cl_engine.UNRESTRICTED) != new_class:
                            data = cl_engine.get_access_control_parts(
                                new_class)
                            data['classification'] = new_class
                            r_plan.add_update_operation(item['id'],
                                                        data,
                                                        index=item['_index'])

                    # Alter the file classification if the new classification does not match
                    if cur_file['classification'] != new_file_class:
                        data = cl_engine.get_access_control_parts(
                            new_file_class)
                        data['classification'] = new_file_class
                        f_plan.add_update_operation(f, data)
                    # Fix associated supplementary files
                    for supp in supp_map.get(f, set()):
                        cur_supp = self.file.get(supp, as_obj=False)
                        if cur_supp:
                            if cur_supp['classification'] != new_file_class:
                                data = cl_engine.get_access_control_parts(
                                    new_file_class)
                                data['classification'] = new_file_class
                                f_plan.add_update_operation(supp, data)

        # Proceed with plan
        self.multi_index_bulk(
            [s_plan, st_plan, ss_plan, e_plan, er_plan, r_plan, f_plan])
Пример #7
0
    def try_run(self):
        try:
            self.service_class = load_module_by_path(SERVICE_PATH)
        except ValueError:
            raise
        except Exception:
            LOG.error(
                "Could not find service in path. Check your environment variables."
            )
            raise

        self.load_service_manifest()

        if not os.path.isfile(FILE_PATH):
            LOG.info(f"File not found: {FILE_PATH}")
            return

        self.file_dir = os.path.dirname(FILE_PATH)

        # Get filename and working dir
        file_name = os.path.basename(FILE_PATH)
        working_dir = os.path.join(
            self.file_dir,
            f'{os.path.basename(FILE_PATH)}_{SERVICE_NAME.lower()}')

        # Start service
        self.service.start_service()

        # Identify the file
        file_info = self.identify.fileinfo(FILE_PATH)
        if file_info['type'] == "archive/cart" or file_info[
                'magic'] == "custom: archive/cart":
            # This is a CART file, uncart it and recreate the file info object
            original_temp = os.path.join(tempfile.gettempdir(),
                                         file_info['sha256'])
            with open(FILE_PATH, 'rb') as ifile, open(original_temp,
                                                      'wb') as ofile:
                unpack_stream(ifile, ofile)

            file_info = self.identify.fileinfo(original_temp)
            target_file = os.path.join(tempfile.gettempdir(),
                                       file_info['sha256'])
            shutil.move(original_temp, target_file)
            LOG.info(
                f"File was a CaRT archive, it was un-CaRTed to {target_file} for processing"
            )

        else:
            # It not a cart, move the file to the right place to be processed
            target_file = os.path.join(tempfile.gettempdir(),
                                       file_info['sha256'])
            shutil.copyfile(FILE_PATH, target_file)

        # Create service processing task
        service_task = ServiceTask(
            dict(
                sid=get_random_id(),
                metadata={},
                service_name=SERVICE_NAME,
                service_config=self.submission_params,
                fileinfo=dict(
                    magic=file_info['magic'],
                    md5=file_info['md5'],
                    mime=file_info['mime'],
                    sha1=file_info['sha1'],
                    sha256=file_info['sha256'],
                    size=file_info['size'],
                    type=file_info['type'],
                ),
                filename=file_name,
                min_classification=forge.get_classification().UNRESTRICTED,
                max_files=501,  # TODO: get the actual value
                ttl=3600))

        LOG.info(f"Starting task with SID: {service_task.sid}")

        # Set the working directory to a directory with same parent as input file
        if os.path.isdir(working_dir):
            shutil.rmtree(working_dir)
        if not os.path.isdir(working_dir):
            os.makedirs(os.path.join(working_dir, 'working_directory'))

        self.service.handle_task(service_task)

        # Move the result.json and extracted/supplementary files to the working directory
        source = os.path.join(tempfile.gettempdir(), 'working_directory')
        if not os.path.exists(source):
            os.makedirs(source)

        files = os.listdir(source)
        for f in files:
            shutil.move(os.path.join(source, f),
                        os.path.join(working_dir, 'working_directory'))

        # Cleanup files from the original directory created by the service base
        shutil.rmtree(source)

        result_json = os.path.join(
            tempfile.gettempdir(),
            f'{service_task.sid}_{service_task.fileinfo.sha256}_result.json')

        if not os.path.exists(result_json):
            raise Exception(
                "A service error occured and no result json was found.")

        # Validate the generated result
        with open(result_json, 'r') as fh:
            try:
                result = json.load(fh)
                result.pop('temp_submission_data', None)
                for file in result['response']['extracted'] + result[
                        'response']['supplementary']:
                    file.pop('path', None)

                # Load heuristics
                heuristics = get_heuristics()

                # Transform heuristics and calculate score
                total_score = 0
                for section in result['result']['sections']:
                    # Ignore tag and sig safe flags since we have no connection to the safelist
                    section.pop('zeroize_on_tag_safe', None)
                    section.pop('zeroize_on_sig_safe', None)

                    if section['heuristic']:
                        heur_id = section['heuristic']['heur_id']

                        try:
                            section['heuristic'], new_tags = HeuristicHandler(
                            ).service_heuristic_to_result_heuristic(
                                section['heuristic'], heuristics)
                            for tag in new_tags:
                                section['tags'].setdefault(tag[0], [])
                                if tag[1] not in section['tags'][tag[0]]:
                                    section['tags'][tag[0]].append(tag[1])
                            total_score += section['heuristic']['score']
                        except InvalidHeuristicException:
                            section['heuristic'] = None
                        section['heuristic']['name'] = heuristics[heur_id][
                            'name']
                result['result']['score'] = total_score

                # Add timestamps for creation, archive and expiry
                result['created'] = now_as_iso()
                result['archive_ts'] = now_as_iso(1 * 24 * 60 * 60)
                result['expiry_ts'] = now_as_iso(service_task.ttl * 24 * 60 *
                                                 60)

                result = Result(result)

                # Print the result on console if in debug mode
                if args.debug:
                    f"{SERVICE_NAME.upper()}-RESULT".center(60, '-')
                    for line in pprint.pformat(
                            result.result.as_primitives()).split('\n'):
                        LOG.debug(line)
            except Exception as e:
                LOG.error(f"Invalid result created: {str(e)}")

        LOG.info(
            f"Cleaning up file used for temporary processing: {target_file}")
        os.unlink(target_file)

        LOG.info(
            f"Moving {result_json} to the working directory: {working_dir}/result.json"
        )
        shutil.move(result_json, os.path.join(working_dir, 'result.json'))

        LOG.info(
            f"Successfully completed task. Output directory: {working_dir}")
Пример #8
0
    def __init__(self,
                 config=None,
                 datastore=None,
                 redis=None,
                 redis_persist=None):
        super().__init__('assemblyline.scaler',
                         config=config,
                         datastore=datastore,
                         redis=redis,
                         redis_persist=redis_persist)

        self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE,
                                               host=self.redis_persist)
        self.error_count_lock = threading.Lock()
        self.error_count: dict[str, list[float]] = {}
        self.status_table = ExpiringHash(SERVICE_STATE_HASH,
                                         host=self.redis,
                                         ttl=30 * 60)
        self.service_event_sender = EventSender('changes.services',
                                                host=self.redis)
        self.service_change_watcher = EventWatcher(
            self.redis, deserializer=ServiceChange.deserialize)
        self.service_change_watcher.register('changes.services.*',
                                             self._handle_service_change_event)

        core_env: dict[str, str] = {}
        # If we have privileged services, we must be able to pass the necessary environment variables for them to
        # function properly.
        for secret in re.findall(
                r'\${\w+}',
                open('/etc/assemblyline/config.yml',
                     'r').read()) + ['UI_SERVER']:
            env_name = secret.strip("${}")
            core_env[env_name] = os.environ[env_name]

        labels = {
            'app': 'assemblyline',
            'section': 'service',
            'privilege': 'service'
        }

        if self.config.core.scaler.additional_labels:
            labels.update({
                k: v
                for k, v in (
                    _l.split("=")
                    for _l in self.config.core.scaler.additional_labels)
            })

        if KUBERNETES_AL_CONFIG:
            self.log.info(
                f"Loading Kubernetes cluster interface on namespace: {NAMESPACE}"
            )
            self.controller = KubernetesController(
                logger=self.log,
                prefix='alsvc_',
                labels=labels,
                namespace=NAMESPACE,
                priority='al-service-priority',
                cpu_reservation=self.config.services.cpu_reservation,
                log_level=self.config.logging.log_level,
                core_env=core_env)
            # If we know where to find it, mount the classification into the service containers
            if CLASSIFICATION_CONFIGMAP:
                self.controller.config_mount(
                    'classification-config',
                    config_map=CLASSIFICATION_CONFIGMAP,
                    key=CLASSIFICATION_CONFIGMAP_KEY,
                    target_path='/etc/assemblyline/classification.yml')
            if CONFIGURATION_CONFIGMAP:
                self.controller.core_config_mount(
                    'assemblyline-config',
                    config_map=CONFIGURATION_CONFIGMAP,
                    key=CONFIGURATION_CONFIGMAP_KEY,
                    target_path='/etc/assemblyline/config.yml')
        else:
            self.log.info("Loading Docker cluster interface.")
            self.controller = DockerController(
                logger=self.log,
                prefix=NAMESPACE,
                labels=labels,
                log_level=self.config.logging.log_level,
                core_env=core_env)
            self._service_stage_hash.delete()

            if DOCKER_CONFIGURATION_PATH and DOCKER_CONFIGURATION_VOLUME:
                self.controller.core_mounts.append(
                    (DOCKER_CONFIGURATION_VOLUME, '/etc/assemblyline/'))

                with open(
                        os.path.join(DOCKER_CONFIGURATION_PATH, 'config.yml'),
                        'w') as handle:
                    yaml.dump(self.config.as_primitives(), handle)

                with open(
                        os.path.join(DOCKER_CONFIGURATION_PATH,
                                     'classification.yml'), 'w') as handle:
                    yaml.dump(get_classification().original_definition, handle)

            # If we know where to find it, mount the classification into the service containers
            if CLASSIFICATION_HOST_PATH:
                self.controller.global_mounts.append(
                    (CLASSIFICATION_HOST_PATH,
                     '/etc/assemblyline/classification.yml'))

        # Information about services
        self.profiles: dict[str, ServiceProfile] = {}
        self.profiles_lock = threading.RLock()

        # Prepare a single threaded scheduler
        self.state = collection.Collection(
            period=self.config.core.metrics.export_interval)
        self.stopping = threading.Event()
        self.main_loop_exit = threading.Event()

        # Load the APM connection if any
        self.apm_client = None
        if self.config.core.metrics.apm_server.server_url:
            elasticapm.instrument()
            self.apm_client = elasticapm.Client(
                server_url=self.config.core.metrics.apm_server.server_url,
                service_name="scaler")
Пример #9
0
import functools

from assemblyline.common import version
from assemblyline.common.logformat import AL_LOG_FORMAT
from assemblyline.common import forge, log as al_log
from assemblyline.remote.datatypes.hash import Hash
from assemblyline.remote.datatypes.queues.comms import CommsQueue
from assemblyline.remote.datatypes.set import ExpiringSet
from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker

config = forge.get_config()

#################################################################
# Configuration

CLASSIFICATION = forge.get_classification()

ALLOW_RAW_DOWNLOADS = config.ui.allow_raw_downloads
APP_ID = "https://%s" % config.ui.fqdn
APP_NAME = "Assemblyline"
AUDIT = config.ui.audit

SECRET_KEY = config.ui.secret_key
DEBUG = config.ui.debug
DOWNLOAD_ENCODING = config.ui.download_encoding
MAX_CLASSIFICATION = CLASSIFICATION.UNRESTRICTED
ORGANISATION = config.system.organisation
SYSTEM_TYPE = config.system.type

BUILD_MASTER = version.FRAMEWORK_VERSION
BUILD_LOWER = version.SYSTEM_VERSION
Пример #10
0
def create_menu(user, path):
    user['groups'].insert(0, "ALL")

    submission_submenu = [{
        "class": "dropdown-header",
        "active": False,
        "link": None,
        "title": "Personal"
    }, {
        "class":
        "",
        "active": (path == "/submissions.html?user=%s" % user['uname']),
        "link":
        "/submissions.html?user=%s" % user['uname'],
        "title":
        "My Submissions"
    }, {
        "class": "divider",
        "active": False,
        "link": None,
        "title": None
    }, {
        "class": "dropdown-header",
        "active": False,
        "link": None,
        "title": "Groups"
    }]

    submission_submenu.extend([{
        "class":
        "",
        "active": (path == "/submissions.html?group=%s" % x),
        "link":
        "/submissions.html?group=%s" % x,
        "title":
        x
    } for x in user['groups']])

    help_submenu = [{
        "class": "dropdown-header",
        "active": False,
        "link": None,
        "title": "Documentation"
    }, {
        "class": "",
        "active": path.startswith("/api_doc.html"),
        "link": "/api_doc.html",
        "title": "API Documentation"
    }]

    if forge.get_classification().enforce:
        help_submenu.extend([{
            "class":
            "",
            "active":
            path.startswith("/classification_help.html"),
            "link":
            "/classification_help.html",
            "title":
            "Classification Help"
        }])

    if not config.ui.read_only:
        help_submenu.extend([{
            "class": "",
            "active": path.startswith("/configuration.html"),
            "link": "/configuration.html",
            "title": "Configuration Settings"
        }])

    help_submenu.extend([{
        "class": "",
        "active": path.startswith("/search_help.html"),
        "link": "/search_help.html",
        "title": "Search Help"
    }])

    if not config.ui.read_only:
        help_submenu.extend([{
            "class": "",
            "active": path.startswith("/services.html"),
            "link": "/services.html",
            "title": "Service Listing"
        }, {
            "class": "divider",
            "active": False,
            "link": None,
            "title": None
        }, {
            "class": "dropdown-header",
            "active": False,
            "link": None,
            "title": "Heuristics"
        }, {
            "class": "",
            "active": path.startswith("/heuristics.html"),
            "link": "/heuristics.html",
            "title": "Malware Heuristics"
        }, {
            "class": "divider",
            "active": False,
            "link": None,
            "title": None
        }, {
            "class": "dropdown-header",
            "active": False,
            "link": None,
            "title": "Statistics"
        }, {
            "class":
            "",
            "active":
            path.startswith("/heuristics_stats.html"),
            "link":
            "/heuristics_stats.html",
            "title":
            "Heuristic Statistics"
        }, {
            "class":
            "",
            "active":
            path.startswith("/signature_statistics.html"),
            "link":
            "/signature_statistics.html",
            "title":
            "Signature Statistics"
        }])

    alerting_submenu = [{
        "class": "",
        "active": path.startswith("/alerts.html"),
        "link": "/alerts.html",
        "title": "View Alerts",
        "has_submenu": False
    }, {
        "class": "",
        "active": path.startswith("/workflows.html"),
        "link": "/workflows.html",
        "title": "Workflow filters",
        "has_submenu": False
    }]

    menu = [{
        "class":
        "",
        "active":
        path.split("?")[0] == "/" or path.startswith("/submit.html"),
        "link":
        "/submit.html",
        "title":
        "Submit",
        "has_submenu":
        False
    }, {
        "class": "",
        "active": path.startswith("/submissions.html"),
        "link": "#",
        "title": "Submissions",
        "has_submenu": True,
        "submenu": submission_submenu
    }, {
        "class":
        "",
        "active":
        path.startswith("/alerts.html") or path.startswith("/workflows.html"),
        "link":
        "#",
        "title":
        "Alerts",
        "has_submenu":
        True,
        "submenu":
        alerting_submenu
    }]

    if not config.ui.read_only:
        if 'admin' in user['type'] or 'signature_manager' in user['type']:
            signature_submenu = [{
                "class": "",
                "active": path.startswith("/signatures.html"),
                "link": "/signatures.html",
                "title": "Signature management",
                "has_submenu": False
            }, {
                "class":
                "",
                "active":
                path.startswith("/source_management.html"),
                "link":
                "/source_management.html",
                "title":
                "Source management",
                "has_submenu":
                False
            }]

            menu.append({
                "class":
                "",
                "active":
                path.startswith("/signatures.html")
                or path.startswith("/source_management.html"),
                "link":
                "#",
                "title":
                "Signatures",
                "has_submenu":
                True,
                "submenu":
                signature_submenu
            })
        else:
            menu.append({
                "class": "",
                "active": path.startswith("/signatures.html"),
                "link": "/signatures.html",
                "title": "Signatures",
                "has_submenu": False
            })

    search_submenu = [
        {
            "class":
            "",
            "active":
            path.startswith("/search.html")
            and ("search_scope=all" in path or "search_scope" not in path),
            "link":
            "/search.html",
            "title":
            "All indexes",
            "has_submenu":
            False
        },
        {
            "class": "divider",
            "active": False,
            "link": None,
            "title": None
        },
        {
            "class": "dropdown-header",
            "active": False,
            "link": None,
            "title": "Specific indexes"
        },
    ]

    for idx in ["Alert", "File", "Result", "Signature", "Submission"]:
        search_submenu.append({
            "class":
            "",
            "active":
            path.startswith("/search.html")
            and f"search_scope={idx.lower()}" in path,
            "link":
            f"/search.html?search_scope={idx.lower()}",
            "title":
            f"{idx} Index",
            "has_submenu":
            False
        })

    menu.extend([{
        "class": "",
        "active": path.startswith("/search.html"),
        "link": "/search.html",
        "title": "Search",
        "has_submenu": True,
        "submenu": search_submenu
    }, {
        "class":
        "",
        "active":
        path.startswith("/api_doc.html")
        or path.startswith("/classification_help.html")
        or path.startswith("/configuration.html")
        or path.startswith("/heuristics.html")
        or path.startswith("/heuristics_stats.html")
        or path.startswith("/signature_statistics.html")
        or path.startswith("/search_help.html")
        or path.startswith("/services.html"),
        "link":
        "#",
        "title":
        "Help",
        "has_submenu":
        True,
        "submenu":
        help_submenu
    }])

    return menu
Пример #11
0
 def __init__(self, *args, yml_config=None, **kwargs):
     super().__init__(*args, **kwargs)
     self.engine = forge.get_classification(yml_config=yml_config)
def test_dispatch_file(clean_redis):
    service_queue = lambda name: get_service_queue(name, clean_redis)

    ds = MockDatastore(collections=[
        'submission', 'result', 'service', 'error', 'file', 'filescore'
    ])
    file_hash = get_random_hash(64)
    sub = random_model_obj(models.submission.Submission)
    sub.sid = sid = 'first-submission'
    sub.params.ignore_cache = False

    disp = Dispatcher(ds, clean_redis, clean_redis, logging)
    disp.active_submissions.add(
        sid,
        SubmissionTask(dict(submission=sub)).as_primitives())
    dh = DispatchHash(sid=sid, client=clean_redis)
    print('==== first dispatch')
    # Submit a problem, and check that it gets added to the dispatch hash
    # and the right service queues
    file_task = FileTask({
        'sid':
        'first-submission',
        'min_classification':
        get_classification().UNRESTRICTED,
        'file_info':
        dict(sha256=file_hash,
             type='unknown',
             magic='a',
             md5=get_random_hash(32),
             mime='a',
             sha1=get_random_hash(40),
             size=10),
        'depth':
        0,
        'max_files':
        5
    })
    disp.dispatch_file(file_task)

    assert dh.dispatch_time(file_hash, 'extract') > 0
    assert dh.dispatch_time(file_hash, 'wrench') > 0
    assert service_queue('extract').length() == 1
    assert service_queue('wrench').length() == 1

    # Making the same call again will queue it up again
    print('==== second dispatch')
    disp.dispatch_file(file_task)

    assert dh.dispatch_time(file_hash, 'extract') > 0
    assert dh.dispatch_time(file_hash, 'wrench') > 0
    assert service_queue('extract').length() == 2
    assert service_queue('wrench').length() == 2
    # assert len(mq) == 4

    # Push back the timestamp in the dispatch hash to simulate a timeout,
    # make sure it gets pushed into that service queue again
    print('==== third dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.fail_recoverable(file_hash, 'extract')

    disp.dispatch_file(file_task)

    assert dh.dispatch_time(file_hash, 'extract') > 0
    assert dh.dispatch_time(file_hash, 'wrench') > 0
    assert service_queue('extract').length() == 1
    # assert len(mq) == 1

    # Mark extract as finished, wrench as failed
    print('==== fourth dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.finish(file_hash, 'extract', 'result-key', 0, 'U')
    dh.fail_nonrecoverable(file_hash, 'wrench', 'error-key')

    disp.dispatch_file(file_task)

    assert dh.finished(file_hash, 'extract')
    assert dh.finished(file_hash, 'wrench')
    assert service_queue('av-a').length() == 1
    assert service_queue('av-b').length() == 1
    assert service_queue('frankenstrings').length() == 1

    # Have the AVs fail, frankenstrings finishes
    print('==== fifth dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.fail_nonrecoverable(file_hash, 'av-a', 'error-a')
    dh.fail_nonrecoverable(file_hash, 'av-b', 'error-b')
    dh.finish(file_hash, 'frankenstrings', 'result-key', 0, 'U')

    disp.dispatch_file(file_task)

    assert dh.finished(file_hash, 'av-a')
    assert dh.finished(file_hash, 'av-b')
    assert dh.finished(file_hash, 'frankenstrings')
    assert service_queue('xerox').length() == 1

    # Finish the xerox service and check if the submission completion got checked
    print('==== sixth dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.finish(file_hash, 'xerox', 'result-key', 0, 'U')

    disp.dispatch_file(file_task)

    assert dh.finished(file_hash, 'xerox')
    assert len(disp.submission_queue) == 1
    def __init__(self,
                 datastore=None,
                 logger=None,
                 classification=None,
                 redis=None,
                 persistent_redis=None,
                 metrics_name='ingester',
                 config=None):
        super().__init__('assemblyline.ingester',
                         logger,
                         redis=redis,
                         redis_persist=persistent_redis,
                         datastore=datastore,
                         config=config)

        # Cache the user groups
        self.cache_lock = threading.RLock()
        self._user_groups = {}
        self._user_groups_reset = time.time() // HOUR_IN_SECONDS
        self.cache = {}
        self.notification_queues = {}
        self.whitelisted = {}
        self.whitelisted_lock = threading.RLock()

        # Module path parameters are fixed at start time. Changing these involves a restart
        self.is_low_priority = load_module_by_path(
            self.config.core.ingester.is_low_priority)
        self.get_whitelist_verdict = load_module_by_path(
            self.config.core.ingester.get_whitelist_verdict)
        self.whitelist = load_module_by_path(
            self.config.core.ingester.whitelist)

        # Constants are loaded based on a non-constant path, so has to be done at init rather than load
        constants = forge.get_constants(self.config)
        self.priority_value: dict[str, int] = constants.PRIORITIES
        self.priority_range: dict[str, Tuple[int,
                                             int]] = constants.PRIORITY_RANGES
        self.threshold_value: dict[str, int] = constants.PRIORITY_THRESHOLDS

        # Classification engine
        self.ce = classification or forge.get_classification()

        # Metrics gathering factory
        self.counter = MetricsFactory(metrics_type='ingester',
                                      schema=Metrics,
                                      redis=self.redis,
                                      config=self.config,
                                      name=metrics_name)

        # State. The submissions in progress are stored in Redis in order to
        # persist this state and recover in case we crash.
        self.scanning = Hash('m-scanning-table', self.redis_persist)

        # Input. The dispatcher creates a record when any submission completes.
        self.complete_queue = NamedQueue(COMPLETE_QUEUE_NAME, self.redis)

        # Input. An external process places submission requests on this queue.
        self.ingest_queue = NamedQueue(INGEST_QUEUE_NAME, self.redis_persist)

        # Output. Duplicate our input traffic into this queue so it may be cloned by other systems
        self.traffic_queue = CommsQueue('submissions', self.redis)

        # Internal. Unique requests are placed in and processed from this queue.
        self.unique_queue = PriorityQueue('m-unique', self.redis_persist)

        # Internal, delay queue for retrying
        self.retry_queue = PriorityQueue('m-retry', self.redis_persist)

        # Internal, timeout watch queue
        self.timeout_queue: PriorityQueue[str] = PriorityQueue(
            'm-timeout', self.redis)

        # Internal, queue for processing duplicates
        #   When a duplicate file is detected (same cache key => same file, and same
        #   submission parameters) the file won't be ingested normally, but instead a reference
        #   will be written to a duplicate queue. Whenever a file is finished, in the complete
        #   method, not only is the original ingestion finalized, but all entries in the duplicate queue
        #   are finalized as well. This has the effect that all concurrent ingestion of the same file
        #   are 'merged' into a single submission to the system.
        self.duplicate_queue = MultiQueue(self.redis_persist)

        # Output. submissions that should have alerts generated
        self.alert_queue = NamedQueue(ALERT_QUEUE_NAME, self.redis_persist)

        # Utility object to help submit tasks to dispatching
        self.submit_client = SubmissionClient(datastore=self.datastore,
                                              redis=self.redis)

        if self.config.core.metrics.apm_server.server_url is not None:
            self.log.info(
                f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}"
            )
            elasticapm.instrument()
            self.apm_client = elasticapm.Client(
                server_url=self.config.core.metrics.apm_server.server_url,
                service_name="ingester")
        else:
            self.apm_client = None