def _append_section(self, section: ResultSection) -> None:
     self._flattened_sections.append(
         dict(body=section.body,
              classification=section.classification,
              body_format=section.body_format,
              depth=section.depth,
              heuristic=get_heuristic_primitives(section.heuristic),
              tags=unflatten(section.tags),
              title_text=section.title_text,
              zeroize_on_tag_safe=section.zeroize_on_tag_safe,
              auto_collapse=section.auto_collapse))
示例#2
0
 def _append_section(self, section: ResultSection) -> None:
     self._flattened_sections.append(
         dict(
             body=section.body,
             classification=section.classification,
             body_format=section.body_format,
             depth=section.depth,
             heuristic=section.heuristic,
             tags=unflatten(section.tags),
             title_text=section.title_text,
         ))
def test_result_for_service(datastore, login_session):
    _, session, host = login_session

    rand_key = random.choice(file_res_list)
    rand_hash = rand_key[:64]
    service_name = rand_key.split('.')[1]
    resp = get_api_data(
        session, f"{host}/api/v4/file/result/{rand_hash}/{service_name}/")
    result_dict = resp['results'][0]
    for s in result_dict['result']['sections']:
        s['tags'] = unflatten(tag_list_to_dict(s['tags']))
    res_data = Result(result_dict)
    assert res_data.build_key() in file_res_list
示例#4
0
def test_dict_flatten():
    src = {
        "a": {
            "b": {
                "c": 1
            }
        },
        "b": {
            "d": {
                2
            }
        }
    }

    flat_src = flatten(src)
    assert src == unflatten(flat_src)
    assert list(flat_src.keys()) == ["a.b.c", "b.d"]
示例#5
0
 def validate_tags(tag_map):
     tag_map, _ = construct_safe(Tagging, unflatten(tag_map))
     tag_map = flatten(tag_map.as_primitives(strip_null=True))
     return tag_map
示例#6
0
    def _handle_task_result(self, exec_time: int, task: ServiceTask,
                            result: Dict[str, Any], client_id, service_name,
                            freshen: bool, metric_factory):
        def freshen_file(file_info_list, item):
            file_info = file_info_list.get(item['sha256'], None)
            if file_info is None or not self.filestore.exists(item['sha256']):
                return True
            else:
                file_info['archive_ts'] = archive_ts
                file_info['expiry_ts'] = expiry_ts
                file_info['classification'] = item['classification']
                self.datastore.save_or_freshen_file(
                    item['sha256'],
                    file_info,
                    file_info['expiry_ts'],
                    file_info['classification'],
                    is_section_image=item.get('is_section_image', False))
            return False

        archive_ts = now_as_iso(self.config.datastore.ilm.days_until_archive *
                                24 * 60 * 60)
        if task.ttl:
            expiry_ts = now_as_iso(task.ttl * 24 * 60 * 60)
        else:
            expiry_ts = None

        # Check if all files are in the filestore
        if freshen:
            missing_files = []
            hashes = list(
                set([
                    f['sha256'] for f in result['response']['extracted'] +
                    result['response']['supplementary']
                ]))
            file_infos = self.datastore.file.multiget(hashes,
                                                      as_obj=False,
                                                      error_on_missing=False)

            with elasticapm.capture_span(
                    name="handle_task_result.freshen_files",
                    span_type="tasking_client"):
                with concurrent.futures.ThreadPoolExecutor(
                        max_workers=5) as executor:
                    res = {
                        f['sha256']: executor.submit(freshen_file, file_infos,
                                                     f)
                        for f in result['response']['extracted'] +
                        result['response']['supplementary']
                    }
                for k, v in res.items():
                    if v.result():
                        missing_files.append(k)

            if missing_files:
                return missing_files

        # Add scores to the heuristics, if any section set a heuristic
        with elasticapm.capture_span(
                name="handle_task_result.process_heuristics",
                span_type="tasking_client"):
            total_score = 0
            for section in result['result']['sections']:
                zeroize_on_sig_safe = section.pop('zeroize_on_sig_safe', True)
                section['tags'] = flatten(section['tags'])
                if section.get('heuristic'):
                    heur_id = f"{service_name.upper()}.{str(section['heuristic']['heur_id'])}"
                    section['heuristic']['heur_id'] = heur_id
                    try:
                        section[
                            'heuristic'], new_tags = self.heuristic_handler.service_heuristic_to_result_heuristic(
                                section['heuristic'], self.heuristics,
                                zeroize_on_sig_safe)
                        for tag in new_tags:
                            section['tags'].setdefault(tag[0], [])
                            if tag[1] not in section['tags'][tag[0]]:
                                section['tags'][tag[0]].append(tag[1])
                        total_score += section['heuristic']['score']
                    except InvalidHeuristicException:
                        section['heuristic'] = None

        # Update the total score of the result
        result['result']['score'] = total_score

        # Add timestamps for creation, archive and expiry
        result['created'] = now_as_iso()
        result['archive_ts'] = archive_ts
        result['expiry_ts'] = expiry_ts

        # Pop the temporary submission data
        temp_submission_data = result.pop('temp_submission_data', None)
        if temp_submission_data:
            old_submission_data = {
                row.name: row.value
                for row in task.temporary_submission_data
            }
            temp_submission_data = {
                k: v
                for k, v in temp_submission_data.items()
                if k not in old_submission_data or v != old_submission_data[k]
            }
            big_temp_data = {
                k: len(str(v))
                for k, v in temp_submission_data.items()
                if len(str(v)) > self.config.submission.max_temp_data_length
            }
            if big_temp_data:
                big_data_sizes = [f"{k}={v}" for k, v in big_temp_data.items()]
                self.log.warning(
                    f"[{task.sid}] The following temporary submission keys where ignored because they are "
                    "bigger then the maximum data size allowed "
                    f"[{self.config.submission.max_temp_data_length}]: {' | '.join(big_data_sizes)}"
                )
                temp_submission_data = {
                    k: v
                    for k, v in temp_submission_data.items()
                    if k not in big_temp_data
                }

        # Process the tag values
        with elasticapm.capture_span(name="handle_task_result.process_tags",
                                     span_type="tasking_client"):
            for section in result['result']['sections']:
                # Perform tag safelisting
                tags, safelisted_tags = self.tag_safelister.get_validated_tag_map(
                    section['tags'])
                section['tags'] = unflatten(tags)
                section['safelisted_tags'] = safelisted_tags

                section['tags'], dropped = construct_safe(
                    Tagging, section.get('tags', {}))

                # Set section score to zero and lower total score if service is set to zeroize score
                # and all tags were safelisted
                if section.pop('zeroize_on_tag_safe', False) and \
                        section.get('heuristic') and \
                        len(tags) == 0 and \
                        len(safelisted_tags) != 0:
                    result['result']['score'] -= section['heuristic']['score']
                    section['heuristic']['score'] = 0

                if dropped:
                    self.log.warning(
                        f"[{task.sid}] Invalid tag data from {service_name}: {dropped}"
                    )

        result = Result(result)
        result_key = result.build_key(
            service_tool_version=result.response.service_tool_version,
            task=task)
        self.dispatch_client.service_finished(task.sid, result_key, result,
                                              temp_submission_data)

        # Metrics
        if result.result.score > 0:
            metric_factory.increment('scored')
        else:
            metric_factory.increment('not_scored')

        self.log.info(
            f"[{task.sid}] {client_id} - {service_name} "
            f"successfully completed task {f' in {exec_time}ms' if exec_time else ''}"
        )

        self.status_table.set(
            client_id, (service_name, ServiceStatus.Idle, time.time() + 5))
示例#7
0
def download_file(sha256, **kwargs):
    """
    Download the file using the default encoding method. This api
    will force the browser in download mode.

    Variables:
    sha256       => A resource locator for the file (sha256)

    Arguments (optional):
    encoding     => Type of encoding use for the resulting file
    name         => Name of the file to download
    sid          => Submission ID where the file is from

    Data Block:
    None

    API call example:
    /api/v4/file/download/123456...654321/

    Result example:
    <THE FILE BINARY ENCODED IN SPECIFIED FORMAT>
    """
    user = kwargs['user']
    file_obj = STORAGE.file.get(sha256, as_obj=False)

    if not file_obj:
        return make_api_response({}, "The file was not found in the system.",
                                 404)

    if user and Classification.is_accessible(user['classification'],
                                             file_obj['classification']):
        params = load_user_settings(user)

        name = request.args.get('name', sha256) or sha256
        name = os.path.basename(name)
        name = safe_str(name)

        sid = request.args.get('sid', None) or None
        submission = {}
        file_metadata = {}
        if sid is not None:
            submission = STORAGE.submission.get(sid, as_obj=False)
            if submission is None:
                submission = {}

            if Classification.is_accessible(user['classification'],
                                            submission['classification']):
                file_metadata.update(unflatten(submission['metadata']))

        if Classification.enforce:
            submission_classification = submission.get(
                'classification', file_obj['classification'])
            file_metadata[
                'classification'] = Classification.max_classification(
                    submission_classification, file_obj['classification'])

        encoding = request.args.get('encoding', params['download_encoding'])
        password = request.args.get('password', params['default_zip_password'])

        if encoding not in FILE_DOWNLOAD_ENCODINGS:
            return make_api_response(
                {},
                f"{encoding.upper()} is not in the valid encoding types: {FILE_DOWNLOAD_ENCODINGS}",
                403)

        if encoding == "raw" and not ALLOW_RAW_DOWNLOADS:
            return make_api_response(
                {}, "RAW file download has been disabled by administrators.",
                403)

        if encoding == "zip":
            if not ALLOW_ZIP_DOWNLOADS:
                return make_api_response(
                    {},
                    "PROTECTED file download has been disabled by administrators.",
                    403)
            elif not password:
                return make_api_response(
                    {}, "No password given or retrieved from user's settings.",
                    403)

        download_dir = None
        target_path = None

        # Create a temporary download location
        if encoding == 'zip':
            download_dir = tempfile.mkdtemp()
            download_path = os.path.join(download_dir, name)
        else:
            _, download_path = tempfile.mkstemp()

        try:
            downloaded_from = FILESTORE.download(sha256, download_path)

            if not downloaded_from:
                return make_api_response(
                    {}, "The file was not found in the system.", 404)

            # Encode file
            if encoding == 'raw':
                target_path = download_path
            elif encoding == 'zip':
                name += '.zip'
                target_path = os.path.join(download_dir, name)
                subprocess.run([
                    'zip', '-j', '--password', password, target_path,
                    download_path
                ],
                               capture_output=True)
            else:
                target_path, name = encode_file(download_path, name,
                                                file_metadata)

            return stream_file_response(open(target_path, 'rb'), name,
                                        os.path.getsize(target_path))

        finally:
            # Cleanup
            if target_path:
                if os.path.exists(target_path):
                    os.unlink(target_path)
            if download_path:
                if os.path.exists(download_path):
                    os.unlink(download_path)
            if download_dir:
                if os.path.exists(download_dir):
                    os.rmdir(download_dir)
    else:
        return make_api_response({},
                                 "You are not allowed to download this file.",
                                 403)
示例#8
0
def download_file(sha256, **kwargs):
    """
    Download the file using the default encoding method. This api
    will force the browser in download mode.
    
    Variables: 
    sha256       => A resource locator for the file (sha256)
    
    Arguments (optional):
    encoding     => Type of encoding use for the resulting file
    name         => Name of the file to download
    sid          => Submission ID where the file is from

    Data Block:
    None

    API call example:
    /api/v4/file/download/123456...654321/

    Result example:
    <THE FILE BINARY ENCODED IN SPECIFIED FORMAT>
    """
    user = kwargs['user']
    file_obj = STORAGE.file.get(sha256, as_obj=False)

    if not file_obj:
        return make_api_response({}, "The file was not found in the system.", 404)

    if user and Classification.is_accessible(user['classification'], file_obj['classification']):
        params = load_user_settings(user)
    
        name = request.args.get('name', sha256) or sha256
        name = os.path.basename(name)
        name = safe_str(name)

        sid = request.args.get('sid', None) or None
        submission = {}
        submission_meta = {}
        if sid is not None:
            submission = STORAGE.submission.get(sid, as_obj=False)
            if submission is None:
                submission = {}
            hash_list = [submission.get('files', [])[0].get('sha256', None)]
            hash_list.extend([x[:64] for x in submission.get('errors', [])])
            hash_list.extend([x[:64] for x in submission.get('results', [])])

            if sha256 not in hash_list:
                return make_api_response({}, f"File {sha256} is not associated to submission {sid}.", 403)

            if Classification.is_accessible(user['classification'], submission['classification']):
                submission_meta.update(unflatten(submission['metadata']))

        if Classification.enforce:
            submission_classification = submission.get('classification', file_obj['classification'])
            submission_meta['classification'] = Classification.max_classification(submission_classification,
                                                                                  file_obj['classification'])

        encoding = request.args.get('encoding', params['download_encoding'])
        if encoding not in ['raw', 'cart']:
            return make_api_response({}, f"{encoding.upper()} is not in the valid encoding types: [raw, cart]", 403)

        if encoding == "raw" and not ALLOW_RAW_DOWNLOADS:
            return make_api_response({}, "RAW file download has been disabled by administrators.", 403)

        _, download_path = tempfile.mkstemp()
        try:
            with forge.get_filestore() as f_transport:
                downloaded_from = f_transport.download(sha256, download_path)

            if not downloaded_from:
                return make_api_response({}, "The file was not found in the system.", 404)

            if encoding == 'raw':
                target_path = download_path
            else:
                target_path, name = encode_file(download_path, name, submission_meta)

            try:
                return stream_file_response(open(target_path, 'rb'), name, os.path.getsize(target_path))
            finally:
                if target_path:
                    if os.path.exists(target_path):
                        os.unlink(target_path)
        finally:
            if download_path:
                if os.path.exists(download_path):
                    os.unlink(download_path)
    else:
        return make_api_response({}, "You are not allowed to download this file.", 403)