def test_finish_missing_file(client, dispatch_client, heuristics):
    heuristics.get.return_value = None
    task = random_minimal_obj(Task)
    fs = forge.get_filestore()

    result: Result = random_minimal_obj(Result)
    while not result.response.extracted:
        result: Result = random_model_obj(Result)
        result.response.extracted = [
            x for x in result.response.extracted if not fs.exists(x.sha256)
        ]
    missing = {
        x.sha256
        for x in result.response.extracted if not fs.exists(x.sha256)
    }
    missing |= {
        x.sha256
        for x in result.response.supplementary if not fs.exists(x.sha256)
    }

    message = {
        'task': task.as_primitives(),
        'result': result.as_primitives(),
        'freshen': True
    }
    resp = client.post('/api/v1/task/', headers=headers, json=message)
    assert resp.status_code == 200
    assert resp.json['api_response']['success'] is False
    assert set(resp.json['api_response']['missing_files']) == missing
示例#2
0
def get_file_ascii(sha256, **kwargs):
    """
    Return the ascii values for a file where ascii chars are replaced by DOTs.

    Variables:
    sha256       => A resource locator for the file (sha256)

    Arguments:
    None

    Data Block:
    None

    Result example:
    <THE ASCII FILE>
    """

    user = kwargs['user']
    file_obj = STORAGE.file.get(sha256, as_obj=False)

    if not file_obj:
        return make_api_response({}, "The file was not found in the system.", 404)

    if user and Classification.is_accessible(user['classification'], file_obj['classification']):
        with forge.get_filestore() as f_transport:
            data = f_transport.get(sha256)

        if not data:
            return make_api_response({}, "This file was not found in the system.", 404)

        return make_api_response(data.translate(FILTER_ASCII).decode())
    else:
        return make_api_response({}, "You are not allowed to view this file.", 403)
示例#3
0
 def __init__(self,
              datastore: AssemblylineDatastore = None,
              filestore: FileStore = None,
              config=None,
              redis=None,
              redis_persist=None,
              identify=None):
     self.log = logging.getLogger('assemblyline.tasking_client')
     self.config = config or forge.CachedObject(forge.get_config)
     self.datastore = datastore or forge.get_datastore(self.config)
     self.dispatch_client = DispatchClient(self.datastore,
                                           redis=redis,
                                           redis_persist=redis_persist)
     self.event_sender = EventSender('changes.services', redis)
     self.filestore = filestore or forge.get_filestore(self.config)
     self.heuristic_handler = HeuristicHandler(self.datastore)
     self.heuristics = {
         h.heur_id: h
         for h in self.datastore.list_all_heuristics()
     }
     self.status_table = ExpiringHash(SERVICE_STATE_HASH,
                                      ttl=60 * 30,
                                      host=redis)
     self.tag_safelister = forge.CachedObject(forge.get_tag_safelister,
                                              kwargs=dict(
                                                  log=self.log,
                                                  config=config,
                                                  datastore=self.datastore),
                                              refresh=300)
     if identify:
         self.cleanup = False
     else:
         self.cleanup = True
     self.identify = identify or forge.get_identify(
         config=self.config, datastore=self.datastore, use_cache=True)
示例#4
0
    def __init__(self, shutdown_timeout: int = SHUTDOWN_SECONDS_LIMIT):
        super(RunPrivilegedService,
              self).__init__(f'assemblyline.service.{SERVICE_NAME}',
                             shutdown_timeout=shutdown_timeout)

        self.client_id = os.environ.get('HOSTNAME', 'dev-service')

        self.redis = get_client(
            host=self.config.core.redis.nonpersistent.host,
            port=self.config.core.redis.nonpersistent.port,
            private=False,
        )

        self.redis_persist = get_client(
            host=self.config.core.redis.persistent.host,
            port=self.config.core.redis.persistent.port,
            private=False,
        )

        self.tasking_client = TaskingClient(redis=self.redis,
                                            redis_persist=self.redis_persist)
        self.tasking_dir = os.environ.get('TASKING_DIR', tempfile.gettempdir())

        self.filestore = forge.get_filestore()

        self.service = None
        self.service_config = {}
        self.service_name = None
        self.service_tool_version = None

        self.status = STATUSES.INITIALIZING
        self.metric_factory = None

        self.log.setLevel(LOG_LEVEL)
def test_upload_file_bad_hash(client, file_datastore):
    fs = forge.get_filestore()

    file_size = 10003
    file_data = b'x' * file_size
    file_hash = hashlib.sha256(file_data).hexdigest()
    bad_hash = '0000' + file_hash[4:]

    fs.delete(file_hash)
    fs.delete(bad_hash)

    file_headers = dict(headers)
    file_headers['sha256'] = bad_hash
    file_headers['classification'] = 'U'
    file_headers['ttl'] = 1
    file_headers['Content-Type'] = 'application/octet-stream'

    try:
        response = client.put('/api/v1/file/',
                              headers=file_headers,
                              data=file_data)
        assert response.status_code in range(400, 500)
        assert not fs.exists(file_hash)
        assert not fs.exists(bad_hash)
        assert file_datastore.save_or_freshen_file.call_count == 0
    finally:
        fs.delete(file_hash)
        fs.delete(bad_hash)
示例#6
0
def get_file_hex(sha256, **kwargs):
    """
    Returns the file hex representation
    
    Variables: 
    sha256       => A resource locator for the file (sha256)
    
    Arguments: 
    None
    
    Data Block:
    None

    API call example:
    /api/v4/file/hex/123456...654321/

    Result example:
    <THE FILE HEX REPRESENTATION>
    """
    user = kwargs['user']
    file_obj = STORAGE.file.get(sha256, as_obj=False)

    if not file_obj:
        return make_api_response({}, "The file was not found in the system.", 404)
    
    if user and Classification.is_accessible(user['classification'], file_obj['classification']):
        with forge.get_filestore() as f_transport:
            data = f_transport.get(sha256)

        if not data:
            return make_api_response({}, "This file was not found in the system.", 404)

        return make_api_response(hexdump(data))
    else:
        return make_api_response({}, "You are not allowed to view this file.", 403)
示例#7
0
def perform_check():
    # If the service is privileged, test connectivity to core
    if environ.get('PRIVILEGED', 'false').lower() == 'true':
        forge.get_datastore()
        forge.get_filestore(connection_attempts=1)
        forge.get_service_queue(service=environ['AL_SERVICE_NAME'])
    else:
        # Otherwise, perform a test for service-server availability
        if not requests.get(f"{environ['SERVICE_API_HOST']}/healthz/live").ok:
            raise Exception('Unable to reach service-server')
    # If running with an updater, check for availability. Make sure test doesn't run on the actual updater.
    if environ.get('updates_host') and not environ['HOSTNAME'].startswith(
            environ['updates_host']):
        if not requests.get(
                f"http://{environ['updates_host']}:{environ['updates_port']}/healthz/live"
        ).ok:
            raise Exception('Unable to reach local update server')
    exit()
示例#8
0
def submission_delete_tree(key, logger):
    try:
        with forge.get_filestore() as f_transport:
            DATASTORE.delete_submission_tree(key, transport=f_transport)
    except Exception as e:
        logger.error(e)
        return "DELETE", "submission", key, False, isinstance(logger, PrintLogger)

    return "deleted", "submission", key, True, isinstance(logger, PrintLogger)
示例#9
0
    def __init__(self, datastore: AssemblylineDatastore = None, filestore: FileStore = None,
                 config=None, redis=None):
        self.log = logging.getLogger('assemblyline.submission_client')
        self.config = config or forge.CachedObject(forge.get_config)
        self.datastore = datastore or forge.get_datastore(self.config)
        self.filestore = filestore or forge.get_filestore(self.config)
        self.redis = redis

        # A client for interacting with the dispatcher
        self.dispatcher = DispatchClient(datastore, redis)
示例#10
0
def resubmit_submission_for_analysis(sid, *args, **kwargs):
    """
    Resubmit a submission for analysis with the exact same parameters as before

    Variables:
    sid         => Submission ID to re-submit

    Arguments:
    None

    Data Block:
    None

    Result example:
    # Submission message object as a json dictionary
    """
    user = kwargs['user']
    submission = STORAGE.submission.get(sid, as_obj=False)

    if submission:
        if not Classification.is_accessible(user['classification'],
                                            submission['classification']):
            return make_api_response(
                "",
                "You are not allowed to re-submit a submission that you don't have access to",
                403)

        submission_params = submission['params']
        submission_params['classification'] = submission['classification']
    else:
        return make_api_response({},
                                 "Submission %s does not exists." % sid,
                                 status_code=404)

    submission_params['submitter'] = user['uname']
    submission_params['description'] = "Resubmit %s for analysis" % ", ".join(
        [x['name'] for x in submission["files"]])

    try:
        submission_obj = Submission({
            "files": submission["files"],
            "params": submission_params
        })
    except (ValueError, KeyError) as e:
        return make_api_response("", err=str(e), status_code=400)

    with forge.get_filestore() as f_transport:
        try:
            submit_result = SubmissionClient(
                datastore=STORAGE, filestore=f_transport,
                config=config).submit(submission_obj)
        except SubmissionException as e:
            return make_api_response("", err=str(e), status_code=400)

    return make_api_response(submit_result.as_primitives())
示例#11
0
def create_extra_data(log=None, ds=None, fs=None):
    ds = ds or forge.get_datastore()
    fs = fs or forge.get_filestore()

    log.info("\nCreating 10 Submissions...")
    submissions = []
    for _ in range(10):
        s = create_submission(ds, fs, log=log)
        submissions.append(s)

    log.info("\nCreating 50 Alerts...")
    create_alerts(ds, submission_list=submissions, log=log)
示例#12
0
def format_result(r):
    try:
        title = r['result']['sections'][0]['title_text']
        if title.startswith('Result exceeded max size.'):
            sha256 = r['response']['supplementary'][-1][1]
            with forge.get_filestore() as transport:
                oversized = json.loads(transport.get(sha256))
            oversized['oversized'] = True
            return oversized
    except Exception:  # pylint:disable=W0702
        pass

    return r
示例#13
0
def get_file_strings(sha256, **kwargs):
    """
    Return all strings in a given file

    Variables:
    sha256       => A resource locator for the file (sha256)

    Arguments:
    len       => Minimum length for a string

    Data Block:
    None

    Result example:
    <THE LIST OF STRINGS>
    """
    user = kwargs['user']
    hlen = request.args.get('len', "6")
    file_obj = STORAGE.file.get(sha256, as_obj=False)

    if file_obj['size'] > API_MAX_SIZE:
        return make_api_response(
            {}, "This file is too big to be seen through this API.", 403)

    if not file_obj:
        return make_api_response({}, "The file was not found in the system.",
                                 404)

    if user and Classification.is_accessible(user['classification'],
                                             file_obj['classification']):
        with forge.get_filestore() as f_transport:
            data = f_transport.get(sha256)

        if not data:
            return make_api_response({},
                                     "This file was not found in the system.",
                                     404)

        # Ascii strings (we use decode with replace on to create delimiters)
        pattern = "[\x1f-\x7e]{%s,}" % hlen
        string_list = re.findall(pattern, data.decode("ascii",
                                                      errors="replace"))

        # UTF-16 strings
        string_list += re.findall(pattern,
                                  data.decode("utf-16", errors="replace"))

        return make_api_response("\n".join(string_list))
    else:
        return make_api_response({}, "You are not allowed to view this file.",
                                 403)
    def __init__(self, force_ilm=False):
        self.config = forge.get_config()
        if force_ilm:
            self.config.datastore.ilm.enabled = True

        super().__init__('assemblyline.expiry',
                         shutdown_timeout=self.config.core.expiry.sleep_time +
                         5)
        self.datastore = forge.get_datastore(config=self.config,
                                             archive_access=True)
        self.hot_datastore = forge.get_datastore(config=self.config,
                                                 archive_access=False)
        self.filestore = forge.get_filestore(config=self.config)
        self.cachestore = FileStore(*self.config.filestore.cache)
        self.expirable_collections = []
        self.archiveable_collections = []
        self.counter = MetricsFactory('expiry', Metrics)
        self.counter_archive = MetricsFactory('archive', Metrics)

        if self.config.datastore.ilm.enabled:
            self.fs_hashmap = {
                'file': self.archive_filestore_delete,
                'cached_file': self.archive_cachestore_delete
            }
        else:
            self.fs_hashmap = {
                'file': self.filestore_delete,
                'cached_file': self.cachestore_delete
            }

        for name, definition in self.datastore.ds.get_models().items():
            if hasattr(definition, 'archive_ts'):
                self.archiveable_collections.append(
                    getattr(self.datastore, name))
            if hasattr(definition, 'expiry_ts'):
                self.expirable_collections.append(getattr(
                    self.datastore, name))

        if self.config.core.metrics.apm_server.server_url is not None:
            self.log.info(
                f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}"
            )
            elasticapm.instrument()
            self.apm_client = elasticapm.Client(
                server_url=self.config.core.metrics.apm_server.server_url,
                service_name="expiry")
        else:
            self.apm_client = None
def create_extra_data(log=None, ds=None, fs=None):
    ds = ds or forge.get_datastore()
    fs = fs or forge.get_filestore()

    log.info("\nCreating 10 Submissions...")
    submissions = []
    for _ in range(10):
        s = create_submission(ds, fs, log=log)
        submissions.append(s)

    log.info("\nCreating 50 Alerts...")
    create_alerts(ds, submission_list=submissions, log=log)

    log.info("\nGenerating statistics for signatures and heuristics...")
    ds.calculate_signature_stats()
    ds.calculate_heuristic_stats()
示例#16
0
    def __init__(self, datastore=None, filestore=None):
        super().__init__('assemblyline.randomservice')
        self.config = forge.get_config()
        self.datastore = datastore or forge.get_datastore()
        self.filestore = filestore or forge.get_filestore()
        self.client_id = get_random_id()
        self.service_state_hash = ExpiringHash(SERVICE_STATE_HASH, ttl=30 * 60)

        self.counters = {
            n: MetricsFactory('service', Metrics, name=n, config=self.config)
            for n in self.datastore.service_delta.keys()
        }
        self.queues = [
            forge.get_service_queue(name)
            for name in self.datastore.service_delta.keys()
        ]
        self.dispatch_client = DispatchClient(self.datastore)
        self.service_info = CachedObject(self.datastore.list_all_services,
                                         kwargs={'as_obj': False})
def test_download_file(client, file_datastore):
    # Put the file in place
    fs = forge.get_filestore()
    file_size = 12345
    fs.put('test_file', b'x' * file_size)
    try:
        response = client.get('/api/v1/file/test_file/', headers=headers)
        assert response.status_code == 200
        assert response.data == (b'x' * file_size)
    finally:
        fs.delete('test_file')

    # Try getting it again where the datastore thinks its there but its missing from the filestore
    response = client.get('/api/v1/file/test_file/', headers=headers)
    assert response.status_code == 404

    # Have the datastore say it doesn't exist
    file_datastore.file.get.return_value = None
    response = client.get('/api/v1/file/test_file/', headers=headers)
    assert response.status_code == 404
    def __init__(self,
                 datastore: AssemblylineDatastore = None,
                 filestore: FileStore = None,
                 config=None,
                 redis=None,
                 identify=None):
        self.log = logging.getLogger('assemblyline.submission_client')
        self.config = config or forge.CachedObject(forge.get_config)
        self.datastore = datastore or forge.get_datastore(self.config)
        self.filestore = filestore or forge.get_filestore(self.config)
        self.redis = redis
        if identify:
            self.cleanup = False
        else:
            self.cleanup = True
        self.identify = identify or forge.get_identify(
            config=self.config, datastore=self.datastore, use_cache=True)

        # A client for interacting with the dispatcher
        self.dispatcher = DispatchClient(datastore, redis)
示例#19
0
def delete_submission(sid, **kwargs):
    """
    Delete a submission as well as all related
    files, results and errors
    
    Variables:
    sid         => Submission ID to be deleted
    
    Arguments: 
    None
    
    Data Block:
    None
    
    Result example:
    {success: true}
    """
    user = kwargs['user']
    submission = STORAGE.submission.get(sid, as_obj=False)

    if not submission:
        return make_api_response("",
                                 f"There are not submission with sid: {sid}",
                                 404)

    if Classification.is_accessible(user['classification'], submission['classification']) \
            and (submission['params']['submitter'] == user['uname'] or 'admin' in user['type']):
        with forge.get_filestore() as f_transport:
            STORAGE.delete_submission_tree_bulk(sid,
                                                Classification,
                                                transport=f_transport)
        STORAGE.submission.commit()
        return make_api_response({"success": True})
    else:
        return make_api_response(
            "", "Your are not allowed to delete this submission.", 403)
示例#20
0
def ingest_single_file(**kwargs):
    """
    Ingest a single file, sha256 or URL in the system

        Note 1:
            If you are submitting a sha256 or a URL, you must use the application/json encoding and one of
            sha256 or url parameters must be included in the data block.

        Note 2:
            If you are submitting a file directly, you have to use multipart/form-data encoding this
            was done to reduce the memory footprint and speedup file transfers
             ** Read documentation of mime multipart standard if your library does not support it**

            The multipart/form-data for sending binary has two parts:
                - The first part contains a JSON dump of the optional params and uses the name 'json'
                - The last part conatins the file binary, uses the name 'bin' and includes a filename

        Note 3:
            The ingest API uses the user's default settings to submit files to the system
            unless these settings are overridden in the 'params' field. Although, there are
            exceptions to that rule. Fields deep_scan, ignore_filtering, ignore_cache are
            resetted to False because the lead to dangerous behavior in the system.

    Variables:
    None

    Arguments:
    None

    Data Block (SHA256 or URL):
    {
     //REQUIRED VALUES: One of the following
     "sha256": "1234...CDEF"         # SHA256 hash of the file
     "url": "http://...",            # Url to fetch the file from

     //OPTIONAL VALUES
     "name": "file.exe",             # Name of the file

     "metadata": {                   # Submission Metadata
         "key": val,                    # Key/Value pair for metadata parameters
         },

     "params": {                     # Submission parameters
         "key": val,                    # Key/Value pair for params that differ from the user's defaults
         },                                 # DEFAULT: /api/v3/user/submission_params/<user>/

     "generate_alert": False,        # Generate an alert in our alerting system or not
     "notification_queue": None,     # Name of the notification queue
     "notification_threshold": None, # Threshold for notification
    }

    Data Block (Binary):

    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Randomly generated boundary for this http request
    Content-Disposition: form-data; name="json"      <-- JSON data blob part (only previous optional values valid)

    {"params": {"ignore_cache": true}, "generate_alert": true}
    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Switch to next part, file part
    Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin"

    <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED>

    --0b34a3c50d3c02dd804a172329a0b2aa--             <-- End of HTTP transmission

    Result example:
    { "ingest_id": <ID OF THE INGESTED FILE> }
    """
    user = kwargs['user']
    out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id())
    extracted_path = original_file = None
    with forge.get_filestore() as f_transport:
        try:
            # Get data block and binary blob
            if 'multipart/form-data' in request.content_type:
                if 'json' in request.values:
                    data = json.loads(request.values['json'])
                else:
                    data = {}
                binary = request.files['bin']
                name = data.get("name", binary.filename)
                sha256 = None
                url = None
            elif 'application/json' in request.content_type:
                data = request.json
                binary = None
                sha256 = data.get('sha256', None)
                url = data.get('url', None)
                name = data.get(
                    "name", None) or sha256 or os.path.basename(url) or None
            else:
                return make_api_response({}, "Invalid content type", 400)

            if not data:
                return make_api_response({}, "Missing data block", 400)

            # Get notification queue parameters
            notification_queue = data.get('notification_queue', None)
            notification_threshold = data.get('notification_threshold', None)
            if not isinstance(notification_threshold,
                              int) and notification_threshold:
                return make_api_response(
                    {}, "notification_threshold should be and int", 400)

            # Get generate alert parameter
            generate_alert = data.get('generate_alert', False)
            if not isinstance(generate_alert, bool):
                return make_api_response({},
                                         "generate_alert should be a boolean",
                                         400)

            # Get file name
            if not name:
                return make_api_response({}, "Filename missing", 400)

            name = os.path.basename(name)
            if not name:
                return make_api_response({}, "Invalid filename", 400)

            try:
                os.makedirs(out_dir)
            except Exception:
                pass
            original_file = out_file = os.path.join(out_dir, name)

            # Load file
            extra_meta = {}
            if not binary:
                if sha256:
                    if f_transport.exists(sha256):
                        f_transport.download(sha256, out_file)
                    else:
                        return make_api_response(
                            {}, "SHA256 does not exist in our datastore", 404)
                else:
                    if url:
                        if not config.ui.allow_url_submissions:
                            return make_api_response(
                                {},
                                "URL submissions are disabled in this system",
                                400)

                        try:
                            safe_download(url, out_file)
                            extra_meta['submitted_url'] = url
                        except FileTooBigException:
                            return make_api_response(
                                {}, "File too big to be scanned.", 400)
                        except InvalidUrlException:
                            return make_api_response(
                                {}, "Url provided is invalid.", 400)
                        except ForbiddenLocation:
                            return make_api_response(
                                {}, "Hostname in this URL cannot be resolved.",
                                400)
                    else:
                        return make_api_response(
                            {},
                            "Missing file to scan. No binary, sha256 or url provided.",
                            400)
            else:
                with open(out_file, "wb") as my_file:
                    my_file.write(binary.read())

            # Load default user params
            s_params = ui_to_submission_params(
                STORAGE.user_settings.get(user['uname'], as_obj=False))
            if not s_params:
                s_params = get_default_user_settings(user)

            # Reset dangerous user settings to safe values
            s_params.update({
                'deep_scan': False,
                "priority": 150,
                "ignore_cache": False,
                "ignore_dynamic_recursion_prevention": False,
                "ignore_filtering": False,
                "type": "INGEST"
            })

            # Apply provided params
            s_params.update(data.get("params", {}))

            # Override final parameters
            s_params.update({
                'generate_alert':
                generate_alert,
                'max_extracted':
                config.core.ingester.default_max_extracted,
                'max_supplementary':
                config.core.ingester.default_max_supplementary,
                'priority':
                min(s_params.get("priority", 150),
                    config.ui.ingest_max_priority),
                'submitter':
                user['uname']
            })

            # Calculate file digest
            fileinfo = identify.fileinfo(out_file)

            # Validate file size
            if fileinfo['size'] > MAX_SIZE and not s_params.get(
                    'ignore_size', False):
                msg = f"File too large ({fileinfo['size']} > {MAX_SIZE}). Ingestion failed"
                return make_api_response("", err=msg, status_code=400)
            elif fileinfo['size'] == 0:
                return make_api_response("",
                                         err="File empty. Ingestion failed",
                                         status_code=400)

            # Decode cart if needed
            extracted_path, fileinfo, al_meta = decode_file(out_file, fileinfo)
            if extracted_path:
                out_file = extracted_path

            # Save the file to the filestore if needs be
            sha256 = fileinfo['sha256']
            if not f_transport.exists(sha256):
                f_transport.upload(out_file, sha256, location='far')

            # Freshen file object
            expiry = now_as_iso(s_params['ttl'] * 24 * 60 *
                                60) if s_params.get('ttl', None) else None
            STORAGE.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry,
                                         s_params['classification'])

            # Setup notification queue if needed
            if notification_queue:
                notification_params = {
                    "queue": notification_queue,
                    "threshold": notification_threshold
                }
            else:
                notification_params = {}

            # Load metadata, setup some default values if they are missing and append the cart metadata
            ingest_id = get_random_id()
            metadata = flatten(data.get("metadata", {}))
            metadata['ingest_id'] = ingest_id
            metadata['type'] = s_params['type']
            name = al_meta.pop('name', name)
            metadata.update(al_meta)
            if 'ts' not in metadata:
                metadata['ts'] = now_as_iso()
            metadata.update(extra_meta)

            # Set description if it does not exists
            s_params['description'] = s_params[
                'description'] or f"[{s_params['type']}] Inspection of file: {name}"

            # Create submission object
            try:
                submission_obj = Submission({
                    "sid":
                    ingest_id,
                    "files": [{
                        'name': name,
                        'sha256': sha256,
                        'size': fileinfo['size']
                    }],
                    "notification":
                    notification_params,
                    "metadata":
                    metadata,
                    "params":
                    s_params
                })
            except (ValueError, KeyError) as e:
                return make_api_response("", err=str(e), status_code=400)

            # Send submission object for processing
            ingest.push(submission_obj.as_primitives())
            return make_api_response({"ingest_id": ingest_id})

        finally:
            # Cleanup files on disk
            try:
                if original_file and os.path.exists(original_file):
                    os.unlink(original_file)
            except Exception:
                pass

            try:
                if extracted_path and os.path.exists(extracted_path):
                    os.unlink(extracted_path)
            except Exception:
                pass

            try:
                if os.path.exists(out_dir):
                    shutil.rmtree(out_dir, ignore_errors=True)
            except Exception:
                pass
示例#21
0
def fs():
    return forge.get_filestore(connection_attempts=1)
示例#22
0
 def filestore():
     try:
         return forge.get_filestore(config, connection_attempts=1)
     except ConnectionError as err:
         pytest.skip(str(err))
RATE_LIMITER = Counters(prefix="quota", host=redis, track_counters=True)

# End of Configuration
#################################################################

#################################################################
# Prepare loggers
config.logging.log_to_console = config.logging.log_to_console or DEBUG
al_log.init_logging('svc', config=config)

LOGGER = logging.getLogger('assemblyline.svc')

LOGGER.debug('Logger ready!')

# End of prepare logger
#################################################################

#################################################################
# Global instances

STORAGE = forge.get_datastore(config=config)
FILESTORE = forge.get_filestore(config=config)
LOCK = threading.Lock()
TASKING_CLIENT = TaskingClient(datastore=STORAGE,
                               filestore=FILESTORE,
                               redis=redis,
                               redis_persist=redis_persist)
SAFELIST_CLIENT = SafelistClient(datastore=STORAGE)
# End global
#################################################################
示例#24
0
def download_file(sha256, **kwargs):
    """
    Download the file using the default encoding method. This api
    will force the browser in download mode.
    
    Variables: 
    sha256       => A resource locator for the file (sha256)
    
    Arguments (optional):
    encoding     => Type of encoding use for the resulting file
    name         => Name of the file to download
    sid          => Submission ID where the file is from

    Data Block:
    None

    API call example:
    /api/v4/file/download/123456...654321/

    Result example:
    <THE FILE BINARY ENCODED IN SPECIFIED FORMAT>
    """
    user = kwargs['user']
    file_obj = STORAGE.file.get(sha256, as_obj=False)

    if not file_obj:
        return make_api_response({}, "The file was not found in the system.", 404)

    if user and Classification.is_accessible(user['classification'], file_obj['classification']):
        params = load_user_settings(user)
    
        name = request.args.get('name', sha256) or sha256
        name = os.path.basename(name)
        name = safe_str(name)

        sid = request.args.get('sid', None) or None
        submission = {}
        submission_meta = {}
        if sid is not None:
            submission = STORAGE.submission.get(sid, as_obj=False)
            if submission is None:
                submission = {}
            hash_list = [submission.get('files', [])[0].get('sha256', None)]
            hash_list.extend([x[:64] for x in submission.get('errors', [])])
            hash_list.extend([x[:64] for x in submission.get('results', [])])

            if sha256 not in hash_list:
                return make_api_response({}, f"File {sha256} is not associated to submission {sid}.", 403)

            if Classification.is_accessible(user['classification'], submission['classification']):
                submission_meta.update(unflatten(submission['metadata']))

        if Classification.enforce:
            submission_classification = submission.get('classification', file_obj['classification'])
            submission_meta['classification'] = Classification.max_classification(submission_classification,
                                                                                  file_obj['classification'])

        encoding = request.args.get('encoding', params['download_encoding'])
        if encoding not in ['raw', 'cart']:
            return make_api_response({}, f"{encoding.upper()} is not in the valid encoding types: [raw, cart]", 403)

        if encoding == "raw" and not ALLOW_RAW_DOWNLOADS:
            return make_api_response({}, "RAW file download has been disabled by administrators.", 403)

        _, download_path = tempfile.mkstemp()
        try:
            with forge.get_filestore() as f_transport:
                downloaded_from = f_transport.download(sha256, download_path)

            if not downloaded_from:
                return make_api_response({}, "The file was not found in the system.", 404)

            if encoding == 'raw':
                target_path = download_path
            else:
                target_path, name = encode_file(download_path, name, submission_meta)

            try:
                return stream_file_response(open(target_path, 'rb'), name, os.path.getsize(target_path))
            finally:
                if target_path:
                    if os.path.exists(target_path):
                        os.unlink(target_path)
        finally:
            if download_path:
                if os.path.exists(download_path):
                    os.unlink(download_path)
    else:
        return make_api_response({}, "You are not allowed to download this file.", 403)
def fs(config):
    return forge.get_filestore(config)
示例#26
0
def resubmit_for_dynamic(sha256, *args, **kwargs):
    """
    Resubmit a file for dynamic analysis
    
    Variables:
    sha256         => Resource locator (SHA256)
    
    Arguments (Optional): 
    copy_sid    => Mimic the attributes of this SID.
    name        => Name of the file for the submission
    
    Data Block:
    None
    
    Result example:
    # Submission message object as a json dictionary
    """
    user = kwargs['user']
    copy_sid = request.args.get('copy_sid', None)
    name = request.args.get('name', sha256)

    if copy_sid:
        submission = STORAGE.submission.get(copy_sid, as_obj=False)
    else:
        submission = None

    if submission:
        if not Classification.is_accessible(user['classification'],
                                            submission['classification']):
            return make_api_response(
                "",
                "You are not allowed to re-submit a submission that you don't have access to",
                403)

        submission_params = submission['params']
        submission_params['classification'] = submission['classification']

    else:
        submission_params = ui_to_submission_params(
            STORAGE.user_settings.get(user['uname'], as_obj=False))

    with forge.get_filestore() as f_transport:
        if not f_transport.exists(sha256):
            return make_api_response(
                {},
                "File %s cannot be found on the server therefore it cannot be resubmitted."
                % sha256,
                status_code=404)

        files = [{'name': name, 'sha256': sha256}]

        submission_params['submitter'] = user['uname']
        if 'priority' not in submission_params:
            submission_params['priority'] = 500
        submission_params[
            'description'] = "Resubmit %s for Dynamic Analysis" % name
        if "Dynamic Analysis" not in submission_params['services']['selected']:
            submission_params['services']['selected'].append(
                "Dynamic Analysis")

        try:
            submission_obj = Submission({
                "files": files,
                "params": submission_params
            })
        except (ValueError, KeyError) as e:
            return make_api_response("", err=str(e), status_code=400)

        try:
            submit_result = SubmissionClient(
                datastore=STORAGE, filestore=f_transport,
                config=config).submit(submission_obj)
        except SubmissionException as e:
            return make_api_response("", err=str(e), status_code=400)

    return make_api_response(submit_result.as_primitives())
示例#27
0
def submit(**kwargs):
    """
    Submit a single file, sha256 or url for analysis

        Note 1:
            If you are submitting a sh256 or a URL, you must use the application/json encoding and one of
            sha256 or url parameters must be included in the data block.

        Note 2:
            If you are submitting a file directly, you have to use multipart/form-data encoding this
            was done to reduce the memory footprint and speedup file transfers
             ** Read documentation of mime multipart standard if your library does not support it**

            The multipart/form-data for sending binary has two parts:
                - The first part contains a JSON dump of the optional params and uses the name 'json'
                - The last part conatins the file binary, uses the name 'bin' and includes a filename

    Variables:
    None
    
    Arguments: 
    None
    
    Data Block (SHA256 or URL):
    {
      // REQUIRED: One of the two following
      "sha256": "123...DEF",      # SHA256 hash of the file already in the datastore
      "url": "http://...",        # Url to fetch the file from

      // OPTIONAL VALUES
      "name": "file.exe",         # Name of the file to scan otherwise the sha256 or base file of the url

      "metadata": {               # Submission metadata
        "key": val,                 # Key/Value pair metadata values
      },

      "params": {                 # Submission parameters
        "key": val,                 # Key/Value pair for params that different then defaults
      },                            # Default params can be fetch at /api/v3/user/submission_params/<user>/
    }

    Data Block (Binary):

    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Randomly generated boundary for this http request
    Content-Disposition: form-data; name="json"      <-- JSON data blob part (only previous optional values valid)

    {"metadata": {"hello": "world"}}
    --0b34a3c50d3c02dd804a172329a0b2aa               <-- Switch to next part, file part
    Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin"

    <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED>

    --0b34a3c50d3c02dd804a172329a0b2aa--             <-- End of HTTP transmission


    Result example:
    <Submission message object as a json dictionary>
    """
    user = kwargs['user']
    quota_error = check_submission_quota(user)
    if quota_error:
        return make_api_response("", quota_error, 503)

    out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id())

    with forge.get_filestore() as f_transport:
        try:
            # Get data block and binary blob
            if 'multipart/form-data' in request.content_type:
                if 'json' in request.values:
                    data = json.loads(request.values['json'])
                else:
                    data = {}
                binary = request.files['bin']
                name = data.get("name", binary.filename)
                sha256 = None
                url = None
            elif 'application/json' in request.content_type:
                data = request.json
                binary = None
                sha256 = data.get('sha256', None)
                url = data.get('url', None)
                name = data.get(
                    "name", None) or sha256 or os.path.basename(url) or None
            else:
                return make_api_response({}, "Invalid content type", 400)

            if data is None:
                return make_api_response({}, "Missing data block", 400)

            if not name:
                return make_api_response({}, "Filename missing", 400)

            name = os.path.basename(name)
            if not name:
                return make_api_response({}, "Invalid filename", 400)

            # Create task object
            if "ui_params" in data:
                s_params = ui_to_submission_params(data['ui_params'])
            else:
                s_params = ui_to_submission_params(
                    STORAGE.user_settings.get(user['uname'], as_obj=False))

            if not s_params:
                s_params = get_default_user_settings(user)

            s_params.update(data.get("params", {}))
            if 'groups' not in s_params:
                s_params['groups'] = user['groups']

            s_params['quota_item'] = True
            s_params['submitter'] = user['uname']
            if not s_params['description']:
                s_params['description'] = "Inspection of file: %s" % name

            if not Classification.is_accessible(user['classification'],
                                                s_params['classification']):
                return make_api_response(
                    {}, "You cannot start a scan with higher "
                    "classification then you're allowed to see", 400)

            # Prepare the output directory
            try:
                os.makedirs(out_dir)
            except Exception:
                pass
            out_file = os.path.join(out_dir, name)

            # Get the output file
            extra_meta = {}
            if not binary:
                if sha256:
                    if f_transport.exists(sha256):
                        f_transport.download(sha256, out_file)
                    else:
                        return make_api_response(
                            {}, "SHA256 does not exist in our datastore", 404)
                else:
                    if url:
                        if not config.ui.allow_url_submissions:
                            return make_api_response(
                                {},
                                "URL submissions are disabled in this system",
                                400)

                        try:
                            safe_download(url, out_file)
                            extra_meta['submitted_url'] = url
                        except FileTooBigException:
                            return make_api_response(
                                {}, "File too big to be scanned.", 400)
                        except InvalidUrlException:
                            return make_api_response(
                                {}, "Url provided is invalid.", 400)
                        except ForbiddenLocation:
                            return make_api_response(
                                {}, "Hostname in this URL cannot be resolved.",
                                400)
                    else:
                        return make_api_response(
                            {},
                            "Missing file to scan. No binary, sha256 or url provided.",
                            400)
            else:
                with open(out_file, "wb") as my_file:
                    my_file.write(binary.read())

            try:
                metadata = flatten(data.get('metadata', {}))
                metadata.update(extra_meta)

                submission_obj = Submission({
                    "files": [],
                    "metadata": metadata,
                    "params": s_params
                })
            except (ValueError, KeyError) as e:
                return make_api_response("", err=str(e), status_code=400)

            # Submit the task to the system
            try:
                result = SubmissionClient(datastore=STORAGE,
                                          filestore=f_transport,
                                          config=config).submit(
                                              submission_obj,
                                              local_files=[out_file],
                                              cleanup=False)
            except SubmissionException as e:
                return make_api_response("", err=str(e), status_code=400)

            return make_api_response(result.as_primitives())

        finally:
            try:
                # noinspection PyUnboundLocalVariable
                os.unlink(out_file)
            except Exception:
                pass

            try:
                shutil.rmtree(out_dir, ignore_errors=True)
            except Exception:
                pass
示例#28
0
def create_bundle(sid, working_dir=WORK_DIR):
    with forge.get_datastore() as datastore:
        temp_bundle_file = f"bundle_{get_random_id()}"
        current_working_dir = os.path.join(working_dir, temp_bundle_file)
        try:
            submission = datastore.submission.get(sid, as_obj=False)
            if submission is None:
                raise SubmissionNotFound(
                    "Can't find submission %s, skipping." % sid)
            else:
                target_file = os.path.join(working_dir,
                                           f"{temp_bundle_file}.tgz")

                try:
                    os.makedirs(current_working_dir)
                except Exception as e:
                    if isinstance(PermissionError, e):
                        raise
                    pass

                # Create file information data
                file_tree = datastore.get_or_create_file_tree(
                    submission, config.submission.max_extraction_depth)['tree']
                flatten_tree = list(
                    set(
                        recursive_flatten_tree(file_tree) +
                        [r[:64] for r in submission.get("results", [])]))
                file_infos, _ = get_file_infos(copy(flatten_tree), datastore)

                # Add bundling metadata
                if 'bundle.source' not in submission['metadata']:
                    submission['metadata']['bundle.source'] = config.ui.fqdn
                if Classification.enforce and 'bundle.classification' not in submission[
                        'metadata']:
                    submission['metadata'][
                        'bundle.classification'] = submission['classification']

                data = {
                    'submission':
                    submission,
                    'files': {
                        "list": flatten_tree,
                        "tree": file_tree,
                        "infos": file_infos
                    },
                    'results':
                    get_results(submission.get("results", []), file_infos,
                                datastore),
                    'errors':
                    get_errors(submission.get("errors", []), datastore)
                }

                # Save result files
                with open(os.path.join(current_working_dir, "results.json"),
                          "w") as fp:
                    json.dump(data, fp)

                # Download all related files
                with forge.get_filestore() as filestore:
                    for sha256 in flatten_tree:
                        try:
                            filestore.download(
                                sha256,
                                os.path.join(current_working_dir, sha256))
                        except FileStoreException:
                            pass

                # Create the bundle
                subprocess.check_call("tar czf %s *" % target_file,
                                      shell=True,
                                      cwd=current_working_dir)

                return target_file

        except Exception as e:
            raise BundlingException(
                "Could not bundle submission '%s'. [%s: %s]" %
                (sid, type(e).__name__, str(e)))
        finally:
            if current_working_dir:
                subprocess.check_call(["rm", "-rf", current_working_dir])
示例#29
0
def start_ui_submission(ui_sid, **kwargs):
    """
    Start UI submission.
    
    Starts processing after files where uploaded to the server. 
    
    Variables:
    ui_sid     => UUID for the current UI file upload
    
    Arguments: 
    None
    
    Data Block (REQUIRED):
    Dictionary of UI specific user settings
    
    Result example:
    {
     'started': True,                    # Has the submission started processing?
     'sid' : "c7668cfa-...-c4132285142e" # Submission ID
    }
    """
    user = kwargs['user']

    quota_error = check_submission_quota(user)
    if quota_error:
        return make_api_response("", quota_error, 503)

    ui_params = request.json
    ui_params['groups'] = kwargs['user']['groups']
    ui_params['quota_item'] = True
    ui_params['submitter'] = user['uname']

    if not Classification.is_accessible(user['classification'],
                                        ui_params['classification']):
        return make_api_response({
            "started": False,
            "sid": None
        }, "You cannot start a scan with higher "
                                 "classification then you're allowed to see",
                                 403)

    request_files = []
    request_dirs = []
    fnames = []
    try:
        flist = glob.glob(TEMP_DIR + ui_sid + "*")
        if len(flist) > 0:
            # Generate file list
            for fpath in flist:
                request_dirs.append(fpath)
                files = os.listdir(fpath)
                for myfile in files:
                    request_files.append(os.path.join(fpath, myfile))
                    if myfile not in fnames:
                        fnames.append(myfile)

            if not ui_params['description']:
                ui_params['description'] = "Inspection of file%s: %s" % (
                    {
                        True: "s",
                        False: ""
                    }[len(fnames) > 1], ", ".join(fnames))

            # Submit to dispatcher
            try:
                submission_obj = Submission({
                    "files": [],
                    "params":
                    ui_to_submission_params(ui_params)
                })
            except (ValueError, KeyError) as e:
                return make_api_response("", err=str(e), status_code=400)

            with forge.get_filestore() as f_transport:
                try:
                    result = SubmissionClient(datastore=STORAGE,
                                              filestore=f_transport,
                                              config=config).submit(
                                                  submission_obj,
                                                  local_files=request_files,
                                                  cleanup=False)
                except SubmissionException as e:
                    return make_api_response("", err=str(e), status_code=400)

            return make_api_response({"started": True, "sid": result.sid})
        else:
            return make_api_response({
                "started": False,
                "sid": None
            }, "No files where found for ID %s. "
                                     "Try again..." % ui_sid, 404)
    finally:
        # Remove files
        for myfile in request_files:
            try:
                os.unlink(myfile)
            except Exception:
                pass

        # Remove dirs
        for fpath in request_dirs:
            try:
                os.rmdir(fpath)
            except Exception:
                pass
示例#30
0
def import_bundle(path,
                  working_dir=WORK_DIR,
                  min_classification=Classification.UNRESTRICTED,
                  allow_incomplete=False):
    with forge.get_datastore() as datastore:
        current_working_dir = os.path.join(working_dir, get_random_id())
        res_file = os.path.join(current_working_dir, "results.json")
        try:
            os.makedirs(current_working_dir)
        except Exception:
            pass

        # Extract  the bundle
        try:
            subprocess.check_call(
                ["tar", "-zxf", path, "-C", current_working_dir])
        except subprocess.CalledProcessError:
            raise BundlingException(
                "Bundle decompression failed. Not a valid bundle...")

        with open(res_file, 'rb') as fh:
            data = json.load(fh)

        submission = data['submission']
        results = data['results']
        files = data['files']
        errors = data['errors']

        try:
            sid = submission['sid']
            # Check if we have all the service results
            for res_key in submission['results']:
                if res_key not in results['results'].keys(
                ) and not allow_incomplete:
                    raise IncompleteBundle(
                        "Incomplete results in bundle. Skipping %s..." % sid)

            # Check if we have all files
            for sha256 in list(set([x[:64] for x in submission['results']])):
                if sha256 not in files['infos'].keys(
                ) and not allow_incomplete:
                    raise IncompleteBundle(
                        "Incomplete files in bundle. Skipping %s..." % sid)

            # Check if we all errors
            for err_key in submission['errors']:
                if err_key not in errors['errors'].keys(
                ) and not allow_incomplete:
                    raise IncompleteBundle(
                        "Incomplete errors in bundle. Skipping %s..." % sid)

            if datastore.submission.get(sid, as_obj=False):
                raise SubmissionAlreadyExist("Submission %s already exists." %
                                             sid)

            # Make sure bundle's submission meets minimum classification and save the submission
            submission['classification'] = Classification.max_classification(
                submission['classification'], min_classification)
            submission.update(
                Classification.get_access_control_parts(
                    submission['classification']))
            datastore.submission.save(sid, submission)

            # Make sure files meet minimum classification and save the files
            with forge.get_filestore() as filestore:
                for f, f_data in files['infos'].items():
                    f_classification = Classification.max_classification(
                        f_data['classification'], min_classification)
                    datastore.save_or_freshen_file(f,
                                                   f_data,
                                                   f_data['expiry_ts'],
                                                   f_classification,
                                                   cl_engine=Classification)
                    try:
                        filestore.upload(os.path.join(current_working_dir, f),
                                         f)
                    except IOError:
                        pass

            # Make sure results meet minimum classification and save the results
            for key, res in results['results'].items():
                if key.endswith(".e"):
                    datastore.emptyresult.save(key,
                                               {"expiry_ts": res['expiry_ts']})
                else:
                    res['classification'] = Classification.max_classification(
                        res['classification'], min_classification)
                    datastore.result.save(key, res)

            # Make sure errors meet minimum classification and save the errors
            for ekey, err in errors['errors'].items():
                datastore.error.save(ekey, err)

        finally:
            # Perform working dir cleanup
            try:
                os.remove(path)
            except Exception:
                pass

            try:
                shutil.rmtree(current_working_dir, ignore_errors=True)
            except Exception:
                pass