def test_finish_missing_file(client, dispatch_client, heuristics): heuristics.get.return_value = None task = random_minimal_obj(Task) fs = forge.get_filestore() result: Result = random_minimal_obj(Result) while not result.response.extracted: result: Result = random_model_obj(Result) result.response.extracted = [ x for x in result.response.extracted if not fs.exists(x.sha256) ] missing = { x.sha256 for x in result.response.extracted if not fs.exists(x.sha256) } missing |= { x.sha256 for x in result.response.supplementary if not fs.exists(x.sha256) } message = { 'task': task.as_primitives(), 'result': result.as_primitives(), 'freshen': True } resp = client.post('/api/v1/task/', headers=headers, json=message) assert resp.status_code == 200 assert resp.json['api_response']['success'] is False assert set(resp.json['api_response']['missing_files']) == missing
def get_file_ascii(sha256, **kwargs): """ Return the ascii values for a file where ascii chars are replaced by DOTs. Variables: sha256 => A resource locator for the file (sha256) Arguments: None Data Block: None Result example: <THE ASCII FILE> """ user = kwargs['user'] file_obj = STORAGE.file.get(sha256, as_obj=False) if not file_obj: return make_api_response({}, "The file was not found in the system.", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): with forge.get_filestore() as f_transport: data = f_transport.get(sha256) if not data: return make_api_response({}, "This file was not found in the system.", 404) return make_api_response(data.translate(FILTER_ASCII).decode()) else: return make_api_response({}, "You are not allowed to view this file.", 403)
def __init__(self, datastore: AssemblylineDatastore = None, filestore: FileStore = None, config=None, redis=None, redis_persist=None, identify=None): self.log = logging.getLogger('assemblyline.tasking_client') self.config = config or forge.CachedObject(forge.get_config) self.datastore = datastore or forge.get_datastore(self.config) self.dispatch_client = DispatchClient(self.datastore, redis=redis, redis_persist=redis_persist) self.event_sender = EventSender('changes.services', redis) self.filestore = filestore or forge.get_filestore(self.config) self.heuristic_handler = HeuristicHandler(self.datastore) self.heuristics = { h.heur_id: h for h in self.datastore.list_all_heuristics() } self.status_table = ExpiringHash(SERVICE_STATE_HASH, ttl=60 * 30, host=redis) self.tag_safelister = forge.CachedObject(forge.get_tag_safelister, kwargs=dict( log=self.log, config=config, datastore=self.datastore), refresh=300) if identify: self.cleanup = False else: self.cleanup = True self.identify = identify or forge.get_identify( config=self.config, datastore=self.datastore, use_cache=True)
def __init__(self, shutdown_timeout: int = SHUTDOWN_SECONDS_LIMIT): super(RunPrivilegedService, self).__init__(f'assemblyline.service.{SERVICE_NAME}', shutdown_timeout=shutdown_timeout) self.client_id = os.environ.get('HOSTNAME', 'dev-service') self.redis = get_client( host=self.config.core.redis.nonpersistent.host, port=self.config.core.redis.nonpersistent.port, private=False, ) self.redis_persist = get_client( host=self.config.core.redis.persistent.host, port=self.config.core.redis.persistent.port, private=False, ) self.tasking_client = TaskingClient(redis=self.redis, redis_persist=self.redis_persist) self.tasking_dir = os.environ.get('TASKING_DIR', tempfile.gettempdir()) self.filestore = forge.get_filestore() self.service = None self.service_config = {} self.service_name = None self.service_tool_version = None self.status = STATUSES.INITIALIZING self.metric_factory = None self.log.setLevel(LOG_LEVEL)
def test_upload_file_bad_hash(client, file_datastore): fs = forge.get_filestore() file_size = 10003 file_data = b'x' * file_size file_hash = hashlib.sha256(file_data).hexdigest() bad_hash = '0000' + file_hash[4:] fs.delete(file_hash) fs.delete(bad_hash) file_headers = dict(headers) file_headers['sha256'] = bad_hash file_headers['classification'] = 'U' file_headers['ttl'] = 1 file_headers['Content-Type'] = 'application/octet-stream' try: response = client.put('/api/v1/file/', headers=file_headers, data=file_data) assert response.status_code in range(400, 500) assert not fs.exists(file_hash) assert not fs.exists(bad_hash) assert file_datastore.save_or_freshen_file.call_count == 0 finally: fs.delete(file_hash) fs.delete(bad_hash)
def get_file_hex(sha256, **kwargs): """ Returns the file hex representation Variables: sha256 => A resource locator for the file (sha256) Arguments: None Data Block: None API call example: /api/v4/file/hex/123456...654321/ Result example: <THE FILE HEX REPRESENTATION> """ user = kwargs['user'] file_obj = STORAGE.file.get(sha256, as_obj=False) if not file_obj: return make_api_response({}, "The file was not found in the system.", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): with forge.get_filestore() as f_transport: data = f_transport.get(sha256) if not data: return make_api_response({}, "This file was not found in the system.", 404) return make_api_response(hexdump(data)) else: return make_api_response({}, "You are not allowed to view this file.", 403)
def perform_check(): # If the service is privileged, test connectivity to core if environ.get('PRIVILEGED', 'false').lower() == 'true': forge.get_datastore() forge.get_filestore(connection_attempts=1) forge.get_service_queue(service=environ['AL_SERVICE_NAME']) else: # Otherwise, perform a test for service-server availability if not requests.get(f"{environ['SERVICE_API_HOST']}/healthz/live").ok: raise Exception('Unable to reach service-server') # If running with an updater, check for availability. Make sure test doesn't run on the actual updater. if environ.get('updates_host') and not environ['HOSTNAME'].startswith( environ['updates_host']): if not requests.get( f"http://{environ['updates_host']}:{environ['updates_port']}/healthz/live" ).ok: raise Exception('Unable to reach local update server') exit()
def submission_delete_tree(key, logger): try: with forge.get_filestore() as f_transport: DATASTORE.delete_submission_tree(key, transport=f_transport) except Exception as e: logger.error(e) return "DELETE", "submission", key, False, isinstance(logger, PrintLogger) return "deleted", "submission", key, True, isinstance(logger, PrintLogger)
def __init__(self, datastore: AssemblylineDatastore = None, filestore: FileStore = None, config=None, redis=None): self.log = logging.getLogger('assemblyline.submission_client') self.config = config or forge.CachedObject(forge.get_config) self.datastore = datastore or forge.get_datastore(self.config) self.filestore = filestore or forge.get_filestore(self.config) self.redis = redis # A client for interacting with the dispatcher self.dispatcher = DispatchClient(datastore, redis)
def resubmit_submission_for_analysis(sid, *args, **kwargs): """ Resubmit a submission for analysis with the exact same parameters as before Variables: sid => Submission ID to re-submit Arguments: None Data Block: None Result example: # Submission message object as a json dictionary """ user = kwargs['user'] submission = STORAGE.submission.get(sid, as_obj=False) if submission: if not Classification.is_accessible(user['classification'], submission['classification']): return make_api_response( "", "You are not allowed to re-submit a submission that you don't have access to", 403) submission_params = submission['params'] submission_params['classification'] = submission['classification'] else: return make_api_response({}, "Submission %s does not exists." % sid, status_code=404) submission_params['submitter'] = user['uname'] submission_params['description'] = "Resubmit %s for analysis" % ", ".join( [x['name'] for x in submission["files"]]) try: submission_obj = Submission({ "files": submission["files"], "params": submission_params }) except (ValueError, KeyError) as e: return make_api_response("", err=str(e), status_code=400) with forge.get_filestore() as f_transport: try: submit_result = SubmissionClient( datastore=STORAGE, filestore=f_transport, config=config).submit(submission_obj) except SubmissionException as e: return make_api_response("", err=str(e), status_code=400) return make_api_response(submit_result.as_primitives())
def create_extra_data(log=None, ds=None, fs=None): ds = ds or forge.get_datastore() fs = fs or forge.get_filestore() log.info("\nCreating 10 Submissions...") submissions = [] for _ in range(10): s = create_submission(ds, fs, log=log) submissions.append(s) log.info("\nCreating 50 Alerts...") create_alerts(ds, submission_list=submissions, log=log)
def format_result(r): try: title = r['result']['sections'][0]['title_text'] if title.startswith('Result exceeded max size.'): sha256 = r['response']['supplementary'][-1][1] with forge.get_filestore() as transport: oversized = json.loads(transport.get(sha256)) oversized['oversized'] = True return oversized except Exception: # pylint:disable=W0702 pass return r
def get_file_strings(sha256, **kwargs): """ Return all strings in a given file Variables: sha256 => A resource locator for the file (sha256) Arguments: len => Minimum length for a string Data Block: None Result example: <THE LIST OF STRINGS> """ user = kwargs['user'] hlen = request.args.get('len', "6") file_obj = STORAGE.file.get(sha256, as_obj=False) if file_obj['size'] > API_MAX_SIZE: return make_api_response( {}, "This file is too big to be seen through this API.", 403) if not file_obj: return make_api_response({}, "The file was not found in the system.", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): with forge.get_filestore() as f_transport: data = f_transport.get(sha256) if not data: return make_api_response({}, "This file was not found in the system.", 404) # Ascii strings (we use decode with replace on to create delimiters) pattern = "[\x1f-\x7e]{%s,}" % hlen string_list = re.findall(pattern, data.decode("ascii", errors="replace")) # UTF-16 strings string_list += re.findall(pattern, data.decode("utf-16", errors="replace")) return make_api_response("\n".join(string_list)) else: return make_api_response({}, "You are not allowed to view this file.", 403)
def __init__(self, force_ilm=False): self.config = forge.get_config() if force_ilm: self.config.datastore.ilm.enabled = True super().__init__('assemblyline.expiry', shutdown_timeout=self.config.core.expiry.sleep_time + 5) self.datastore = forge.get_datastore(config=self.config, archive_access=True) self.hot_datastore = forge.get_datastore(config=self.config, archive_access=False) self.filestore = forge.get_filestore(config=self.config) self.cachestore = FileStore(*self.config.filestore.cache) self.expirable_collections = [] self.archiveable_collections = [] self.counter = MetricsFactory('expiry', Metrics) self.counter_archive = MetricsFactory('archive', Metrics) if self.config.datastore.ilm.enabled: self.fs_hashmap = { 'file': self.archive_filestore_delete, 'cached_file': self.archive_cachestore_delete } else: self.fs_hashmap = { 'file': self.filestore_delete, 'cached_file': self.cachestore_delete } for name, definition in self.datastore.ds.get_models().items(): if hasattr(definition, 'archive_ts'): self.archiveable_collections.append( getattr(self.datastore, name)) if hasattr(definition, 'expiry_ts'): self.expirable_collections.append(getattr( self.datastore, name)) if self.config.core.metrics.apm_server.server_url is not None: self.log.info( f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}" ) elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=self.config.core.metrics.apm_server.server_url, service_name="expiry") else: self.apm_client = None
def create_extra_data(log=None, ds=None, fs=None): ds = ds or forge.get_datastore() fs = fs or forge.get_filestore() log.info("\nCreating 10 Submissions...") submissions = [] for _ in range(10): s = create_submission(ds, fs, log=log) submissions.append(s) log.info("\nCreating 50 Alerts...") create_alerts(ds, submission_list=submissions, log=log) log.info("\nGenerating statistics for signatures and heuristics...") ds.calculate_signature_stats() ds.calculate_heuristic_stats()
def __init__(self, datastore=None, filestore=None): super().__init__('assemblyline.randomservice') self.config = forge.get_config() self.datastore = datastore or forge.get_datastore() self.filestore = filestore or forge.get_filestore() self.client_id = get_random_id() self.service_state_hash = ExpiringHash(SERVICE_STATE_HASH, ttl=30 * 60) self.counters = { n: MetricsFactory('service', Metrics, name=n, config=self.config) for n in self.datastore.service_delta.keys() } self.queues = [ forge.get_service_queue(name) for name in self.datastore.service_delta.keys() ] self.dispatch_client = DispatchClient(self.datastore) self.service_info = CachedObject(self.datastore.list_all_services, kwargs={'as_obj': False})
def test_download_file(client, file_datastore): # Put the file in place fs = forge.get_filestore() file_size = 12345 fs.put('test_file', b'x' * file_size) try: response = client.get('/api/v1/file/test_file/', headers=headers) assert response.status_code == 200 assert response.data == (b'x' * file_size) finally: fs.delete('test_file') # Try getting it again where the datastore thinks its there but its missing from the filestore response = client.get('/api/v1/file/test_file/', headers=headers) assert response.status_code == 404 # Have the datastore say it doesn't exist file_datastore.file.get.return_value = None response = client.get('/api/v1/file/test_file/', headers=headers) assert response.status_code == 404
def __init__(self, datastore: AssemblylineDatastore = None, filestore: FileStore = None, config=None, redis=None, identify=None): self.log = logging.getLogger('assemblyline.submission_client') self.config = config or forge.CachedObject(forge.get_config) self.datastore = datastore or forge.get_datastore(self.config) self.filestore = filestore or forge.get_filestore(self.config) self.redis = redis if identify: self.cleanup = False else: self.cleanup = True self.identify = identify or forge.get_identify( config=self.config, datastore=self.datastore, use_cache=True) # A client for interacting with the dispatcher self.dispatcher = DispatchClient(datastore, redis)
def delete_submission(sid, **kwargs): """ Delete a submission as well as all related files, results and errors Variables: sid => Submission ID to be deleted Arguments: None Data Block: None Result example: {success: true} """ user = kwargs['user'] submission = STORAGE.submission.get(sid, as_obj=False) if not submission: return make_api_response("", f"There are not submission with sid: {sid}", 404) if Classification.is_accessible(user['classification'], submission['classification']) \ and (submission['params']['submitter'] == user['uname'] or 'admin' in user['type']): with forge.get_filestore() as f_transport: STORAGE.delete_submission_tree_bulk(sid, Classification, transport=f_transport) STORAGE.submission.commit() return make_api_response({"success": True}) else: return make_api_response( "", "Your are not allowed to delete this submission.", 403)
def ingest_single_file(**kwargs): """ Ingest a single file, sha256 or URL in the system Note 1: If you are submitting a sha256 or a URL, you must use the application/json encoding and one of sha256 or url parameters must be included in the data block. Note 2: If you are submitting a file directly, you have to use multipart/form-data encoding this was done to reduce the memory footprint and speedup file transfers ** Read documentation of mime multipart standard if your library does not support it** The multipart/form-data for sending binary has two parts: - The first part contains a JSON dump of the optional params and uses the name 'json' - The last part conatins the file binary, uses the name 'bin' and includes a filename Note 3: The ingest API uses the user's default settings to submit files to the system unless these settings are overridden in the 'params' field. Although, there are exceptions to that rule. Fields deep_scan, ignore_filtering, ignore_cache are resetted to False because the lead to dangerous behavior in the system. Variables: None Arguments: None Data Block (SHA256 or URL): { //REQUIRED VALUES: One of the following "sha256": "1234...CDEF" # SHA256 hash of the file "url": "http://...", # Url to fetch the file from //OPTIONAL VALUES "name": "file.exe", # Name of the file "metadata": { # Submission Metadata "key": val, # Key/Value pair for metadata parameters }, "params": { # Submission parameters "key": val, # Key/Value pair for params that differ from the user's defaults }, # DEFAULT: /api/v3/user/submission_params/<user>/ "generate_alert": False, # Generate an alert in our alerting system or not "notification_queue": None, # Name of the notification queue "notification_threshold": None, # Threshold for notification } Data Block (Binary): --0b34a3c50d3c02dd804a172329a0b2aa <-- Randomly generated boundary for this http request Content-Disposition: form-data; name="json" <-- JSON data blob part (only previous optional values valid) {"params": {"ignore_cache": true}, "generate_alert": true} --0b34a3c50d3c02dd804a172329a0b2aa <-- Switch to next part, file part Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin" <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED> --0b34a3c50d3c02dd804a172329a0b2aa-- <-- End of HTTP transmission Result example: { "ingest_id": <ID OF THE INGESTED FILE> } """ user = kwargs['user'] out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id()) extracted_path = original_file = None with forge.get_filestore() as f_transport: try: # Get data block and binary blob if 'multipart/form-data' in request.content_type: if 'json' in request.values: data = json.loads(request.values['json']) else: data = {} binary = request.files['bin'] name = data.get("name", binary.filename) sha256 = None url = None elif 'application/json' in request.content_type: data = request.json binary = None sha256 = data.get('sha256', None) url = data.get('url', None) name = data.get( "name", None) or sha256 or os.path.basename(url) or None else: return make_api_response({}, "Invalid content type", 400) if not data: return make_api_response({}, "Missing data block", 400) # Get notification queue parameters notification_queue = data.get('notification_queue', None) notification_threshold = data.get('notification_threshold', None) if not isinstance(notification_threshold, int) and notification_threshold: return make_api_response( {}, "notification_threshold should be and int", 400) # Get generate alert parameter generate_alert = data.get('generate_alert', False) if not isinstance(generate_alert, bool): return make_api_response({}, "generate_alert should be a boolean", 400) # Get file name if not name: return make_api_response({}, "Filename missing", 400) name = os.path.basename(name) if not name: return make_api_response({}, "Invalid filename", 400) try: os.makedirs(out_dir) except Exception: pass original_file = out_file = os.path.join(out_dir, name) # Load file extra_meta = {} if not binary: if sha256: if f_transport.exists(sha256): f_transport.download(sha256, out_file) else: return make_api_response( {}, "SHA256 does not exist in our datastore", 404) else: if url: if not config.ui.allow_url_submissions: return make_api_response( {}, "URL submissions are disabled in this system", 400) try: safe_download(url, out_file) extra_meta['submitted_url'] = url except FileTooBigException: return make_api_response( {}, "File too big to be scanned.", 400) except InvalidUrlException: return make_api_response( {}, "Url provided is invalid.", 400) except ForbiddenLocation: return make_api_response( {}, "Hostname in this URL cannot be resolved.", 400) else: return make_api_response( {}, "Missing file to scan. No binary, sha256 or url provided.", 400) else: with open(out_file, "wb") as my_file: my_file.write(binary.read()) # Load default user params s_params = ui_to_submission_params( STORAGE.user_settings.get(user['uname'], as_obj=False)) if not s_params: s_params = get_default_user_settings(user) # Reset dangerous user settings to safe values s_params.update({ 'deep_scan': False, "priority": 150, "ignore_cache": False, "ignore_dynamic_recursion_prevention": False, "ignore_filtering": False, "type": "INGEST" }) # Apply provided params s_params.update(data.get("params", {})) # Override final parameters s_params.update({ 'generate_alert': generate_alert, 'max_extracted': config.core.ingester.default_max_extracted, 'max_supplementary': config.core.ingester.default_max_supplementary, 'priority': min(s_params.get("priority", 150), config.ui.ingest_max_priority), 'submitter': user['uname'] }) # Calculate file digest fileinfo = identify.fileinfo(out_file) # Validate file size if fileinfo['size'] > MAX_SIZE and not s_params.get( 'ignore_size', False): msg = f"File too large ({fileinfo['size']} > {MAX_SIZE}). Ingestion failed" return make_api_response("", err=msg, status_code=400) elif fileinfo['size'] == 0: return make_api_response("", err="File empty. Ingestion failed", status_code=400) # Decode cart if needed extracted_path, fileinfo, al_meta = decode_file(out_file, fileinfo) if extracted_path: out_file = extracted_path # Save the file to the filestore if needs be sha256 = fileinfo['sha256'] if not f_transport.exists(sha256): f_transport.upload(out_file, sha256, location='far') # Freshen file object expiry = now_as_iso(s_params['ttl'] * 24 * 60 * 60) if s_params.get('ttl', None) else None STORAGE.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry, s_params['classification']) # Setup notification queue if needed if notification_queue: notification_params = { "queue": notification_queue, "threshold": notification_threshold } else: notification_params = {} # Load metadata, setup some default values if they are missing and append the cart metadata ingest_id = get_random_id() metadata = flatten(data.get("metadata", {})) metadata['ingest_id'] = ingest_id metadata['type'] = s_params['type'] name = al_meta.pop('name', name) metadata.update(al_meta) if 'ts' not in metadata: metadata['ts'] = now_as_iso() metadata.update(extra_meta) # Set description if it does not exists s_params['description'] = s_params[ 'description'] or f"[{s_params['type']}] Inspection of file: {name}" # Create submission object try: submission_obj = Submission({ "sid": ingest_id, "files": [{ 'name': name, 'sha256': sha256, 'size': fileinfo['size'] }], "notification": notification_params, "metadata": metadata, "params": s_params }) except (ValueError, KeyError) as e: return make_api_response("", err=str(e), status_code=400) # Send submission object for processing ingest.push(submission_obj.as_primitives()) return make_api_response({"ingest_id": ingest_id}) finally: # Cleanup files on disk try: if original_file and os.path.exists(original_file): os.unlink(original_file) except Exception: pass try: if extracted_path and os.path.exists(extracted_path): os.unlink(extracted_path) except Exception: pass try: if os.path.exists(out_dir): shutil.rmtree(out_dir, ignore_errors=True) except Exception: pass
def fs(): return forge.get_filestore(connection_attempts=1)
def filestore(): try: return forge.get_filestore(config, connection_attempts=1) except ConnectionError as err: pytest.skip(str(err))
RATE_LIMITER = Counters(prefix="quota", host=redis, track_counters=True) # End of Configuration ################################################################# ################################################################# # Prepare loggers config.logging.log_to_console = config.logging.log_to_console or DEBUG al_log.init_logging('svc', config=config) LOGGER = logging.getLogger('assemblyline.svc') LOGGER.debug('Logger ready!') # End of prepare logger ################################################################# ################################################################# # Global instances STORAGE = forge.get_datastore(config=config) FILESTORE = forge.get_filestore(config=config) LOCK = threading.Lock() TASKING_CLIENT = TaskingClient(datastore=STORAGE, filestore=FILESTORE, redis=redis, redis_persist=redis_persist) SAFELIST_CLIENT = SafelistClient(datastore=STORAGE) # End global #################################################################
def download_file(sha256, **kwargs): """ Download the file using the default encoding method. This api will force the browser in download mode. Variables: sha256 => A resource locator for the file (sha256) Arguments (optional): encoding => Type of encoding use for the resulting file name => Name of the file to download sid => Submission ID where the file is from Data Block: None API call example: /api/v4/file/download/123456...654321/ Result example: <THE FILE BINARY ENCODED IN SPECIFIED FORMAT> """ user = kwargs['user'] file_obj = STORAGE.file.get(sha256, as_obj=False) if not file_obj: return make_api_response({}, "The file was not found in the system.", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): params = load_user_settings(user) name = request.args.get('name', sha256) or sha256 name = os.path.basename(name) name = safe_str(name) sid = request.args.get('sid', None) or None submission = {} submission_meta = {} if sid is not None: submission = STORAGE.submission.get(sid, as_obj=False) if submission is None: submission = {} hash_list = [submission.get('files', [])[0].get('sha256', None)] hash_list.extend([x[:64] for x in submission.get('errors', [])]) hash_list.extend([x[:64] for x in submission.get('results', [])]) if sha256 not in hash_list: return make_api_response({}, f"File {sha256} is not associated to submission {sid}.", 403) if Classification.is_accessible(user['classification'], submission['classification']): submission_meta.update(unflatten(submission['metadata'])) if Classification.enforce: submission_classification = submission.get('classification', file_obj['classification']) submission_meta['classification'] = Classification.max_classification(submission_classification, file_obj['classification']) encoding = request.args.get('encoding', params['download_encoding']) if encoding not in ['raw', 'cart']: return make_api_response({}, f"{encoding.upper()} is not in the valid encoding types: [raw, cart]", 403) if encoding == "raw" and not ALLOW_RAW_DOWNLOADS: return make_api_response({}, "RAW file download has been disabled by administrators.", 403) _, download_path = tempfile.mkstemp() try: with forge.get_filestore() as f_transport: downloaded_from = f_transport.download(sha256, download_path) if not downloaded_from: return make_api_response({}, "The file was not found in the system.", 404) if encoding == 'raw': target_path = download_path else: target_path, name = encode_file(download_path, name, submission_meta) try: return stream_file_response(open(target_path, 'rb'), name, os.path.getsize(target_path)) finally: if target_path: if os.path.exists(target_path): os.unlink(target_path) finally: if download_path: if os.path.exists(download_path): os.unlink(download_path) else: return make_api_response({}, "You are not allowed to download this file.", 403)
def fs(config): return forge.get_filestore(config)
def resubmit_for_dynamic(sha256, *args, **kwargs): """ Resubmit a file for dynamic analysis Variables: sha256 => Resource locator (SHA256) Arguments (Optional): copy_sid => Mimic the attributes of this SID. name => Name of the file for the submission Data Block: None Result example: # Submission message object as a json dictionary """ user = kwargs['user'] copy_sid = request.args.get('copy_sid', None) name = request.args.get('name', sha256) if copy_sid: submission = STORAGE.submission.get(copy_sid, as_obj=False) else: submission = None if submission: if not Classification.is_accessible(user['classification'], submission['classification']): return make_api_response( "", "You are not allowed to re-submit a submission that you don't have access to", 403) submission_params = submission['params'] submission_params['classification'] = submission['classification'] else: submission_params = ui_to_submission_params( STORAGE.user_settings.get(user['uname'], as_obj=False)) with forge.get_filestore() as f_transport: if not f_transport.exists(sha256): return make_api_response( {}, "File %s cannot be found on the server therefore it cannot be resubmitted." % sha256, status_code=404) files = [{'name': name, 'sha256': sha256}] submission_params['submitter'] = user['uname'] if 'priority' not in submission_params: submission_params['priority'] = 500 submission_params[ 'description'] = "Resubmit %s for Dynamic Analysis" % name if "Dynamic Analysis" not in submission_params['services']['selected']: submission_params['services']['selected'].append( "Dynamic Analysis") try: submission_obj = Submission({ "files": files, "params": submission_params }) except (ValueError, KeyError) as e: return make_api_response("", err=str(e), status_code=400) try: submit_result = SubmissionClient( datastore=STORAGE, filestore=f_transport, config=config).submit(submission_obj) except SubmissionException as e: return make_api_response("", err=str(e), status_code=400) return make_api_response(submit_result.as_primitives())
def submit(**kwargs): """ Submit a single file, sha256 or url for analysis Note 1: If you are submitting a sh256 or a URL, you must use the application/json encoding and one of sha256 or url parameters must be included in the data block. Note 2: If you are submitting a file directly, you have to use multipart/form-data encoding this was done to reduce the memory footprint and speedup file transfers ** Read documentation of mime multipart standard if your library does not support it** The multipart/form-data for sending binary has two parts: - The first part contains a JSON dump of the optional params and uses the name 'json' - The last part conatins the file binary, uses the name 'bin' and includes a filename Variables: None Arguments: None Data Block (SHA256 or URL): { // REQUIRED: One of the two following "sha256": "123...DEF", # SHA256 hash of the file already in the datastore "url": "http://...", # Url to fetch the file from // OPTIONAL VALUES "name": "file.exe", # Name of the file to scan otherwise the sha256 or base file of the url "metadata": { # Submission metadata "key": val, # Key/Value pair metadata values }, "params": { # Submission parameters "key": val, # Key/Value pair for params that different then defaults }, # Default params can be fetch at /api/v3/user/submission_params/<user>/ } Data Block (Binary): --0b34a3c50d3c02dd804a172329a0b2aa <-- Randomly generated boundary for this http request Content-Disposition: form-data; name="json" <-- JSON data blob part (only previous optional values valid) {"metadata": {"hello": "world"}} --0b34a3c50d3c02dd804a172329a0b2aa <-- Switch to next part, file part Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin" <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED> --0b34a3c50d3c02dd804a172329a0b2aa-- <-- End of HTTP transmission Result example: <Submission message object as a json dictionary> """ user = kwargs['user'] quota_error = check_submission_quota(user) if quota_error: return make_api_response("", quota_error, 503) out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id()) with forge.get_filestore() as f_transport: try: # Get data block and binary blob if 'multipart/form-data' in request.content_type: if 'json' in request.values: data = json.loads(request.values['json']) else: data = {} binary = request.files['bin'] name = data.get("name", binary.filename) sha256 = None url = None elif 'application/json' in request.content_type: data = request.json binary = None sha256 = data.get('sha256', None) url = data.get('url', None) name = data.get( "name", None) or sha256 or os.path.basename(url) or None else: return make_api_response({}, "Invalid content type", 400) if data is None: return make_api_response({}, "Missing data block", 400) if not name: return make_api_response({}, "Filename missing", 400) name = os.path.basename(name) if not name: return make_api_response({}, "Invalid filename", 400) # Create task object if "ui_params" in data: s_params = ui_to_submission_params(data['ui_params']) else: s_params = ui_to_submission_params( STORAGE.user_settings.get(user['uname'], as_obj=False)) if not s_params: s_params = get_default_user_settings(user) s_params.update(data.get("params", {})) if 'groups' not in s_params: s_params['groups'] = user['groups'] s_params['quota_item'] = True s_params['submitter'] = user['uname'] if not s_params['description']: s_params['description'] = "Inspection of file: %s" % name if not Classification.is_accessible(user['classification'], s_params['classification']): return make_api_response( {}, "You cannot start a scan with higher " "classification then you're allowed to see", 400) # Prepare the output directory try: os.makedirs(out_dir) except Exception: pass out_file = os.path.join(out_dir, name) # Get the output file extra_meta = {} if not binary: if sha256: if f_transport.exists(sha256): f_transport.download(sha256, out_file) else: return make_api_response( {}, "SHA256 does not exist in our datastore", 404) else: if url: if not config.ui.allow_url_submissions: return make_api_response( {}, "URL submissions are disabled in this system", 400) try: safe_download(url, out_file) extra_meta['submitted_url'] = url except FileTooBigException: return make_api_response( {}, "File too big to be scanned.", 400) except InvalidUrlException: return make_api_response( {}, "Url provided is invalid.", 400) except ForbiddenLocation: return make_api_response( {}, "Hostname in this URL cannot be resolved.", 400) else: return make_api_response( {}, "Missing file to scan. No binary, sha256 or url provided.", 400) else: with open(out_file, "wb") as my_file: my_file.write(binary.read()) try: metadata = flatten(data.get('metadata', {})) metadata.update(extra_meta) submission_obj = Submission({ "files": [], "metadata": metadata, "params": s_params }) except (ValueError, KeyError) as e: return make_api_response("", err=str(e), status_code=400) # Submit the task to the system try: result = SubmissionClient(datastore=STORAGE, filestore=f_transport, config=config).submit( submission_obj, local_files=[out_file], cleanup=False) except SubmissionException as e: return make_api_response("", err=str(e), status_code=400) return make_api_response(result.as_primitives()) finally: try: # noinspection PyUnboundLocalVariable os.unlink(out_file) except Exception: pass try: shutil.rmtree(out_dir, ignore_errors=True) except Exception: pass
def create_bundle(sid, working_dir=WORK_DIR): with forge.get_datastore() as datastore: temp_bundle_file = f"bundle_{get_random_id()}" current_working_dir = os.path.join(working_dir, temp_bundle_file) try: submission = datastore.submission.get(sid, as_obj=False) if submission is None: raise SubmissionNotFound( "Can't find submission %s, skipping." % sid) else: target_file = os.path.join(working_dir, f"{temp_bundle_file}.tgz") try: os.makedirs(current_working_dir) except Exception as e: if isinstance(PermissionError, e): raise pass # Create file information data file_tree = datastore.get_or_create_file_tree( submission, config.submission.max_extraction_depth)['tree'] flatten_tree = list( set( recursive_flatten_tree(file_tree) + [r[:64] for r in submission.get("results", [])])) file_infos, _ = get_file_infos(copy(flatten_tree), datastore) # Add bundling metadata if 'bundle.source' not in submission['metadata']: submission['metadata']['bundle.source'] = config.ui.fqdn if Classification.enforce and 'bundle.classification' not in submission[ 'metadata']: submission['metadata'][ 'bundle.classification'] = submission['classification'] data = { 'submission': submission, 'files': { "list": flatten_tree, "tree": file_tree, "infos": file_infos }, 'results': get_results(submission.get("results", []), file_infos, datastore), 'errors': get_errors(submission.get("errors", []), datastore) } # Save result files with open(os.path.join(current_working_dir, "results.json"), "w") as fp: json.dump(data, fp) # Download all related files with forge.get_filestore() as filestore: for sha256 in flatten_tree: try: filestore.download( sha256, os.path.join(current_working_dir, sha256)) except FileStoreException: pass # Create the bundle subprocess.check_call("tar czf %s *" % target_file, shell=True, cwd=current_working_dir) return target_file except Exception as e: raise BundlingException( "Could not bundle submission '%s'. [%s: %s]" % (sid, type(e).__name__, str(e))) finally: if current_working_dir: subprocess.check_call(["rm", "-rf", current_working_dir])
def start_ui_submission(ui_sid, **kwargs): """ Start UI submission. Starts processing after files where uploaded to the server. Variables: ui_sid => UUID for the current UI file upload Arguments: None Data Block (REQUIRED): Dictionary of UI specific user settings Result example: { 'started': True, # Has the submission started processing? 'sid' : "c7668cfa-...-c4132285142e" # Submission ID } """ user = kwargs['user'] quota_error = check_submission_quota(user) if quota_error: return make_api_response("", quota_error, 503) ui_params = request.json ui_params['groups'] = kwargs['user']['groups'] ui_params['quota_item'] = True ui_params['submitter'] = user['uname'] if not Classification.is_accessible(user['classification'], ui_params['classification']): return make_api_response({ "started": False, "sid": None }, "You cannot start a scan with higher " "classification then you're allowed to see", 403) request_files = [] request_dirs = [] fnames = [] try: flist = glob.glob(TEMP_DIR + ui_sid + "*") if len(flist) > 0: # Generate file list for fpath in flist: request_dirs.append(fpath) files = os.listdir(fpath) for myfile in files: request_files.append(os.path.join(fpath, myfile)) if myfile not in fnames: fnames.append(myfile) if not ui_params['description']: ui_params['description'] = "Inspection of file%s: %s" % ( { True: "s", False: "" }[len(fnames) > 1], ", ".join(fnames)) # Submit to dispatcher try: submission_obj = Submission({ "files": [], "params": ui_to_submission_params(ui_params) }) except (ValueError, KeyError) as e: return make_api_response("", err=str(e), status_code=400) with forge.get_filestore() as f_transport: try: result = SubmissionClient(datastore=STORAGE, filestore=f_transport, config=config).submit( submission_obj, local_files=request_files, cleanup=False) except SubmissionException as e: return make_api_response("", err=str(e), status_code=400) return make_api_response({"started": True, "sid": result.sid}) else: return make_api_response({ "started": False, "sid": None }, "No files where found for ID %s. " "Try again..." % ui_sid, 404) finally: # Remove files for myfile in request_files: try: os.unlink(myfile) except Exception: pass # Remove dirs for fpath in request_dirs: try: os.rmdir(fpath) except Exception: pass
def import_bundle(path, working_dir=WORK_DIR, min_classification=Classification.UNRESTRICTED, allow_incomplete=False): with forge.get_datastore() as datastore: current_working_dir = os.path.join(working_dir, get_random_id()) res_file = os.path.join(current_working_dir, "results.json") try: os.makedirs(current_working_dir) except Exception: pass # Extract the bundle try: subprocess.check_call( ["tar", "-zxf", path, "-C", current_working_dir]) except subprocess.CalledProcessError: raise BundlingException( "Bundle decompression failed. Not a valid bundle...") with open(res_file, 'rb') as fh: data = json.load(fh) submission = data['submission'] results = data['results'] files = data['files'] errors = data['errors'] try: sid = submission['sid'] # Check if we have all the service results for res_key in submission['results']: if res_key not in results['results'].keys( ) and not allow_incomplete: raise IncompleteBundle( "Incomplete results in bundle. Skipping %s..." % sid) # Check if we have all files for sha256 in list(set([x[:64] for x in submission['results']])): if sha256 not in files['infos'].keys( ) and not allow_incomplete: raise IncompleteBundle( "Incomplete files in bundle. Skipping %s..." % sid) # Check if we all errors for err_key in submission['errors']: if err_key not in errors['errors'].keys( ) and not allow_incomplete: raise IncompleteBundle( "Incomplete errors in bundle. Skipping %s..." % sid) if datastore.submission.get(sid, as_obj=False): raise SubmissionAlreadyExist("Submission %s already exists." % sid) # Make sure bundle's submission meets minimum classification and save the submission submission['classification'] = Classification.max_classification( submission['classification'], min_classification) submission.update( Classification.get_access_control_parts( submission['classification'])) datastore.submission.save(sid, submission) # Make sure files meet minimum classification and save the files with forge.get_filestore() as filestore: for f, f_data in files['infos'].items(): f_classification = Classification.max_classification( f_data['classification'], min_classification) datastore.save_or_freshen_file(f, f_data, f_data['expiry_ts'], f_classification, cl_engine=Classification) try: filestore.upload(os.path.join(current_working_dir, f), f) except IOError: pass # Make sure results meet minimum classification and save the results for key, res in results['results'].items(): if key.endswith(".e"): datastore.emptyresult.save(key, {"expiry_ts": res['expiry_ts']}) else: res['classification'] = Classification.max_classification( res['classification'], min_classification) datastore.result.save(key, res) # Make sure errors meet minimum classification and save the errors for ekey, err in errors['errors'].items(): datastore.error.save(ekey, err) finally: # Perform working dir cleanup try: os.remove(path) except Exception: pass try: shutil.rmtree(current_working_dir, ignore_errors=True) except Exception: pass