示例#1
0
def test_submit_binary(datastore, login_session):
    _, session, host = login_session

    sq.delete()
    byte_str = get_random_phrase(wmin=30, wmax=75).encode()
    fd, temp_path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, 'wb') as fh:
            fh.write(byte_str)

        with open(temp_path, 'rb') as fh:
            sha256 = hashlib.sha256(byte_str).hexdigest()
            json_data = {
                'name': 'text.txt',
                'metadata': {'test': 'test_submit_binary'}
            }
            data = {'json': json.dumps(json_data)}
            resp = get_api_data(session, f"{host}/api/v4/submit/", method="POST", data=data,
                                files={'bin': fh}, headers={})

        assert isinstance(resp['sid'], str)
        for f in resp['files']:
            assert f['sha256'] == sha256
            assert f['name'] == json_data['name']

        msg = SubmissionTask(sq.pop(blocking=False))
        assert msg.submission.sid == resp['sid']

    finally:
        # noinspection PyBroadException
        try:
            os.unlink(temp_path)
        except Exception:
            pass
示例#2
0
def test_resubmit(datastore, login_session):
    _, session, host = login_session

    sq.delete()
    submission_files = [f.sha256 for f in submission.files]
    resp = get_api_data(session, f"{host}/api/v4/submit/resubmit/{submission.sid}/")
    assert resp['params']['description'].startswith('Resubmit')
    assert resp['sid'] != submission.sid
    for f in resp['files']:
        assert f['sha256'] in submission_files

    msg = SubmissionTask(sq.pop(blocking=False))
    assert msg.submission.sid == resp['sid']
示例#3
0
def test_resubmit_dynamic(datastore, login_session):
    _, session, host = login_session

    sq.delete()
    sha256 = random.choice(submission.results)[:64]
    resp = get_api_data(session, f"{host}/api/v4/submit/dynamic/{sha256}/")
    assert resp['params']['description'].startswith('Resubmit')
    assert resp['params']['description'].endswith('Dynamic Analysis')
    assert resp['sid'] != submission.sid
    for f in resp['files']:
        assert f['sha256'] == sha256
    assert 'Dynamic Analysis' in resp['params']['services']['selected']

    msg = SubmissionTask(sq.pop(blocking=False))
    assert msg.submission.sid == resp['sid']
示例#4
0
def test_submit_url(datastore, login_session):
    _, session, host = login_session

    sq.delete()
    data = {
        'url': 'https://www.cyber.gc.ca/en/theme-gcwu-fegc/assets/wmms.svg',
        'name': 'wmms.svg',
        'metadata': {'test': 'test_submit_url'}
    }
    resp = get_api_data(session, f"{host}/api/v4/submit/", method="POST", data=json.dumps(data))
    assert isinstance(resp['sid'], str)
    for f in resp['files']:
        assert f['name'] == data['name']

    msg = SubmissionTask(sq.pop(blocking=False))
    assert msg.submission.sid == resp['sid']
示例#5
0
def test_submit_hash(datastore, login_session):
    _, session, host = login_session

    sq.delete()
    data = {
        'sha256': random.choice(submission.results)[:64],
        'name': 'random_hash.txt',
        'metadata': {'test': 'test_submit_hash'}
    }
    resp = get_api_data(session, f"{host}/api/v4/submit/", method="POST", data=json.dumps(data))
    assert isinstance(resp['sid'], str)
    for f in resp['files']:
        assert f['sha256'] == data['sha256']
        assert f['name'] == data['name']

    msg = SubmissionTask(sq.pop(blocking=False))
    assert msg.submission.sid == resp['sid']
示例#6
0
    def dispatch_submission(self,
                            submission: Submission,
                            completed_queue: str = None):
        """Insert a submission into the dispatching system.

        Note:
            You probably actually want to use the SubmissionTool

        Prerequsits:
            - submission should already be saved in the datastore
            - files should already be in the datastore and filestore
        """
        self.submission_queue.push(
            SubmissionTask(
                dict(
                    submission=submission,
                    completed_queue=completed_queue,
                )).as_primitives())
def test_dispatch_submission(clean_redis):
    ds = MockDatastore(
        collections=['submission', 'result', 'service', 'error', 'file'])
    file_hash = get_random_hash(64)

    ds.file.save(file_hash, random_model_obj(models.file.File))
    ds.file.get(file_hash).sha256 = file_hash
    # ds.file.get(file_hash).sha256 = ''

    submission = random_model_obj(models.submission.Submission)
    submission.files.clear()
    submission.files.append(dict(name='./file', sha256=file_hash))

    submission.sid = 'first-submission'

    disp = Dispatcher(ds,
                      logger=logging,
                      redis=clean_redis,
                      redis_persist=clean_redis)
    # Submit a problem, and check that it gets added to the dispatch hash
    # and the right service queues
    task = SubmissionTask(dict(submission=submission))
    disp.dispatch_submission(task)

    file_task = FileTask(disp.file_queue.pop())
    assert file_task.sid == submission.sid
    assert file_task.file_info.sha256 == file_hash
    assert file_task.depth == 0
    assert file_task.file_info.type == ds.file.get(file_hash).type

    dh = DispatchHash(submission.sid, clean_redis)
    for service_name in disp.scheduler.services.keys():
        dh.fail_nonrecoverable(file_hash, service_name, 'error-code')

    disp.dispatch_submission(task)
    assert ds.submission.get(submission.sid).state == 'completed'
    assert ds.submission.get(
        submission.sid).errors == ['error-code'] * len(disp.scheduler.services)
    def try_run(self):
        queue = self.dispatcher.submission_queue
        cpu_mark = time.process_time()
        time_mark = time.time()

        while self.running:
            try:
                self.heartbeat()
                self.dispatcher.counter.increment_execution_time(
                    'cpu_seconds',
                    time.process_time() - cpu_mark)
                self.dispatcher.counter.increment_execution_time(
                    'busy_seconds',
                    time.time() - time_mark)

                message = queue.pop(timeout=1)

                cpu_mark = time.process_time()
                time_mark = time.time()

                if not message:
                    continue

                # Start of process dispatcher transaction
                if self.apm_client:
                    self.apm_client.begin_transaction(
                        'Process dispatcher message')

                # This is probably a complete task
                if 'submission' in message:
                    task = SubmissionTask(message)
                    if self.apm_client:
                        elasticapm.tag(sid=task.submission.sid)

                # This is just as sid nudge, this submission should already be running
                elif 'sid' in message:
                    active_task = self.dispatcher.active_submissions.get(
                        message['sid'])
                    if self.apm_client:
                        elasticapm.tag(sid=message['sid'])
                    if active_task is None:
                        self.log.warning(
                            f"[{message['sid']}] Dispatcher was nudged for inactive submission."
                        )
                        # End of process dispatcher transaction (success)
                        if self.apm_client:
                            self.apm_client.end_transaction(
                                'submission_message', 'inactive')
                        continue

                    task = SubmissionTask(active_task)

                else:
                    self.log.error(
                        f'Corrupted submission message in dispatcher {message}'
                    )
                    # End of process dispatcher transaction (success)
                    if self.apm_client:
                        self.apm_client.end_transaction(
                            'submission_message', 'corrupted')
                    continue

                self.dispatcher.dispatch_submission(task)

                # End of process dispatcher transaction (success)
                if self.apm_client:
                    self.apm_client.end_transaction('submission_message',
                                                    'success')

            except Exception as error:
                self.log.exception(error)
                # End of process dispatcher transaction (success)
                if self.apm_client:
                    self.apm_client.end_transaction('submission_message',
                                                    'exception')
def test_dispatch_file(clean_redis):
    service_queue = lambda name: get_service_queue(name, clean_redis)

    ds = MockDatastore(collections=[
        'submission', 'result', 'service', 'error', 'file', 'filescore'
    ])
    file_hash = get_random_hash(64)
    sub = random_model_obj(models.submission.Submission)
    sub.sid = sid = 'first-submission'
    sub.params.ignore_cache = False

    disp = Dispatcher(ds, clean_redis, clean_redis, logging)
    disp.active_submissions.add(
        sid,
        SubmissionTask(dict(submission=sub)).as_primitives())
    dh = DispatchHash(sid=sid, client=clean_redis)
    print('==== first dispatch')
    # Submit a problem, and check that it gets added to the dispatch hash
    # and the right service queues
    file_task = FileTask({
        'sid':
        'first-submission',
        'min_classification':
        get_classification().UNRESTRICTED,
        'file_info':
        dict(sha256=file_hash,
             type='unknown',
             magic='a',
             md5=get_random_hash(32),
             mime='a',
             sha1=get_random_hash(40),
             size=10),
        'depth':
        0,
        'max_files':
        5
    })
    disp.dispatch_file(file_task)

    assert dh.dispatch_time(file_hash, 'extract') > 0
    assert dh.dispatch_time(file_hash, 'wrench') > 0
    assert service_queue('extract').length() == 1
    assert service_queue('wrench').length() == 1

    # Making the same call again will queue it up again
    print('==== second dispatch')
    disp.dispatch_file(file_task)

    assert dh.dispatch_time(file_hash, 'extract') > 0
    assert dh.dispatch_time(file_hash, 'wrench') > 0
    assert service_queue('extract').length() == 2
    assert service_queue('wrench').length() == 2
    # assert len(mq) == 4

    # Push back the timestamp in the dispatch hash to simulate a timeout,
    # make sure it gets pushed into that service queue again
    print('==== third dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.fail_recoverable(file_hash, 'extract')

    disp.dispatch_file(file_task)

    assert dh.dispatch_time(file_hash, 'extract') > 0
    assert dh.dispatch_time(file_hash, 'wrench') > 0
    assert service_queue('extract').length() == 1
    # assert len(mq) == 1

    # Mark extract as finished, wrench as failed
    print('==== fourth dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.finish(file_hash, 'extract', 'result-key', 0, 'U')
    dh.fail_nonrecoverable(file_hash, 'wrench', 'error-key')

    disp.dispatch_file(file_task)

    assert dh.finished(file_hash, 'extract')
    assert dh.finished(file_hash, 'wrench')
    assert service_queue('av-a').length() == 1
    assert service_queue('av-b').length() == 1
    assert service_queue('frankenstrings').length() == 1

    # Have the AVs fail, frankenstrings finishes
    print('==== fifth dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.fail_nonrecoverable(file_hash, 'av-a', 'error-a')
    dh.fail_nonrecoverable(file_hash, 'av-b', 'error-b')
    dh.finish(file_hash, 'frankenstrings', 'result-key', 0, 'U')

    disp.dispatch_file(file_task)

    assert dh.finished(file_hash, 'av-a')
    assert dh.finished(file_hash, 'av-b')
    assert dh.finished(file_hash, 'frankenstrings')
    assert service_queue('xerox').length() == 1

    # Finish the xerox service and check if the submission completion got checked
    print('==== sixth dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.finish(file_hash, 'xerox', 'result-key', 0, 'U')

    disp.dispatch_file(file_task)

    assert dh.finished(file_hash, 'xerox')
    assert len(disp.submission_queue) == 1
def test_dispatch_extracted(clean_redis):
    # Setup the fake datastore
    ds = MockDatastore(
        collections=['submission', 'result', 'service', 'error', 'file'])
    file_hash = get_random_hash(64)
    second_file_hash = get_random_hash(64)

    for fh in [file_hash, second_file_hash]:
        ds.file.save(fh, random_model_obj(models.file.File))
        ds.file.get(fh).sha256 = fh

    # Inject the fake submission
    submission = random_model_obj(models.submission.Submission)
    submission.files.clear()
    submission.files.append(dict(name='./file', sha256=file_hash))
    submission.sid = 'first-submission'

    # Launch the dispatcher
    disp = Dispatcher(ds,
                      logger=logging,
                      redis=clean_redis,
                      redis_persist=clean_redis)

    # Launch the submission
    task = SubmissionTask(dict(submission=submission))
    disp.dispatch_submission(task)

    # Check that the right values were sent to the
    file_task = FileTask(disp.file_queue.pop(timeout=1))
    assert file_task.sid == submission.sid
    assert file_task.file_info.sha256 == file_hash
    assert file_task.depth == 0
    assert file_task.file_info.type == ds.file.get(file_hash).type

    # Finish the services
    dh = DispatchHash(submission.sid, clean_redis)
    for service_name in disp.scheduler.services.keys():
        dh.finish(file_hash, service_name, 'error-code', 0, 'U')

    # But one of the services extracted a file
    dh.add_file(second_file_hash, 10, file_hash)

    # But meanwhile, dispatch_submission has been recalled on the submission
    disp.dispatch_submission(task)

    # It should see the missing file, and we should get a new file dispatch message for it
    # to make sure it is getting processed properly, this should be at depth 1, the first layer of
    # extracted files
    file_task = disp.file_queue.pop(timeout=1)
    assert file_task is not None
    file_task = FileTask(file_task)
    assert file_task.sid == submission.sid
    assert file_task.file_info.sha256 == second_file_hash
    assert file_task.depth == 1
    assert file_task.file_info.type == ds.file.get(second_file_hash).type

    # Finish the second file
    for service_name in disp.scheduler.services.keys():
        dh.finish(second_file_hash, service_name, 'error-code', 0, 'U')

    # And now we should get the finished submission
    disp.dispatch_submission(task)
    submission = ds.submission.get(submission.sid)
    assert submission.state == 'completed'
    assert submission.errors == []
    assert len(submission.results) == 2 * len(disp.scheduler.services)