def test_rc4_override(self): rc4_key = b"Test Da Key !" tmp_header = {'name': 'hello.txt'} tmp_footer = {'rc4_key': rc4_key.decode()} plaintext = b'0123456789' * 100 pt_stream = BytesIO(plaintext) ct_stream = BytesIO() cart.pack_stream(pt_stream, ct_stream, optional_header=tmp_header, optional_footer=tmp_footer, arc4_key_override=rc4_key) crypt_text = ct_stream.getvalue() ct_stream = BytesIO(crypt_text) pt_stream = BytesIO() with self.assertRaises(cart.InvalidARC4KeyException): cart.unpack_stream(ct_stream, pt_stream) ct_stream = BytesIO(crypt_text) pt_stream = BytesIO() (header, footer) = cart.unpack_stream(ct_stream, pt_stream, arc4_key_override=rc4_key) self.assertEqual(header, tmp_header) self.assertEqual(footer, tmp_footer)
def test_not_a_cart(self): fake_cart = b'0123456789' * 1000 ct_stream = BytesIO(fake_cart) ot_stream = BytesIO() with self.assertRaises(cart.InvalidCARTException): cart.unpack_stream(ct_stream, ot_stream)
def test_not_a_cart(self): from cStringIO import StringIO fake_cart = '0123456789' * 1000 ct_stream = StringIO(fake_cart) ot_stream = StringIO() try: cart.unpack_stream(ct_stream, ot_stream) except TypeError, e: self.assertEqual(str(e), "This is not valid CaRT file")
def decode_file(original_path, fileinfo): extracted_path = None hdr = {} with open(original_path, 'rb') as original_file: if is_cart(original_file.read(256)): original_file.seek(0) extracted_fd, extracted_path = tempfile.mkstemp() extracted_file = os.fdopen(extracted_fd, 'wb') cart_extracted = False try: hdr, _ = unpack_stream(original_file, extracted_file) cart_extracted = True except Exception: extracted_path = None hdr = {} fileinfo['type'] = 'corrupted/cart' finally: extracted_file.close() if cart_extracted: fileinfo = identify.fileinfo(extracted_path) return extracted_path, fileinfo, hdr
def test_empty(self): """ Empty input stream, empty opt header, empty opt footer, no digests. """ empty_stream = BytesIO() output_stream = BytesIO() header = footer = {} # Pack with empty everything cart.pack_stream(empty_stream, output_stream, header, footer, auto_digests=()) packed_text = output_stream.getvalue() self.assert_valid_mandatory_header(packed_text) # Now test unpacking the result. packed_stream = BytesIO(packed_text) plain_stream = BytesIO() (opt_header, opt_footer) = cart.unpack_stream(packed_stream, plain_stream) plain_text = plain_stream.getvalue() self.assertEqual(opt_header, {}) self.assertEqual(opt_footer, {}) self.assertEqual(len(plain_text), 0)
def test_simple(self): plaintext = b'0123456789' * 10000000 pt_stream = BytesIO(plaintext) ct_stream = BytesIO() cart.pack_stream(pt_stream, ct_stream, {'name': 'hello.txt'}, {'digest': 'done'}) crypt_text = ct_stream.getvalue() ct_stream = BytesIO(crypt_text) pt_stream = BytesIO() temp_file = tempfile.mkstemp()[1] with open(temp_file, 'wb') as f: f.write(ct_stream.getvalue()) (header, footer) = cart.unpack_stream(ct_stream, pt_stream) inline_metadata = {} if header: inline_metadata.update(header) if footer: inline_metadata.update(footer) plaintext_prime = pt_stream.getvalue() self.assertEqual(plaintext_prime, plaintext) metadata = cart.get_metadata_only(temp_file) self.assertEqual(metadata, inline_metadata) self.assertTrue(cart.is_cart(crypt_text))
def test_download_cart(datastore, login_session): _, session, host = login_session rand_hash = random.choice(file_res_list)[:64] resp = get_api_data( session, f"{host}/api/v4/file/download/{rand_hash}/?encoding=cart", raw=True) assert resp.startswith(b'CART') out = BytesIO() unpack_stream(BytesIO(resp), out) out.flush() out.seek(0) dl_hash = out.read().decode() assert dl_hash == rand_hash
def test_rc4_override(self): from cStringIO import StringIO rc4_key = "Test Da Key !" tmp_header = {'name': 'hello.txt'} tmp_footer = {'rc4_key': rc4_key} plaintext = '0123456789' * 100 pt_stream = StringIO(plaintext) ct_stream = StringIO() cart.pack_stream(pt_stream, ct_stream, optional_header=tmp_header, optional_footer=tmp_footer, arc4_key_override=rc4_key) crypt_text = ct_stream.getvalue() ct_stream = StringIO(crypt_text) pt_stream = StringIO() try: cart.unpack_stream(ct_stream, pt_stream) except ValueError, e: self.assertEqual(str(e), "Invalid ARC4 Key, could not unpack header")
def decode_file(original_path, fileinfo): extracted_path = None original_name = None al_meta = {} if fileinfo['tag'] in NEUTERED_FORMAT: from cart import unpack_stream extracted_fd, extracted_path = tempfile.mkstemp() extracted_file = os.fdopen(extracted_fd, 'wb') original_file = open(original_path) hdr, _ = unpack_stream(original_file, extracted_file) original_name = hdr.get('name', os.path.basename(original_path)) al_meta = hdr.get("al", {}).get("meta", {}) extracted_file.close() fileinfo = identify.fileinfo(extracted_path) if original_name: fileinfo['path'] = original_name return extracted_path, original_name, fileinfo, al_meta
def test_large(self): """ 128MB stream, large opt header, large opt footer, default digests + testdigester. """ test_text = b'0' * 1024 * 1024 * 128 in_stream = BytesIO(test_text) output_stream = BytesIO() test_header = {} test_footer = {} # Pack with empty everything cart.pack_stream(in_stream, output_stream, test_header, test_footer) packed_text = output_stream.getvalue() self.assert_valid_mandatory_header(packed_text) # Now test unpacking the result. packed_stream = BytesIO(packed_text) plain_stream = BytesIO() (opt_header, opt_footer) = cart.unpack_stream(packed_stream, plain_stream) plain_text = plain_stream.getvalue() self.assertEqual(test_header, opt_header) self.assertEqual(test_footer, opt_footer) self.assertEqual(test_text, plain_text)
def test_small(self): """ 1 byte stream, 1 element opt header, 1 element opt footer, default digests. """ test_text = b'a' in_stream = BytesIO(test_text) output_stream = BytesIO() test_header = {'testkey': 'testvalue'} test_footer = {'complete': 'yes'} # Pack with empty everything cart.pack_stream(in_stream, output_stream, test_header, test_footer) packed_text = output_stream.getvalue() self.assert_valid_mandatory_header(packed_text) # Now test unpacking the result. packed_stream = BytesIO(packed_text) plain_stream = BytesIO() (opt_header, opt_footer) = cart.unpack_stream(packed_stream, plain_stream) plain_text = plain_stream.getvalue() self.assertEqual(test_header, opt_header) self.assertEqual(test_footer, opt_footer) self.assertEqual(test_text, plain_text)
def try_run(self): try: self.service_class = load_module_by_path(SERVICE_PATH) except ValueError: raise except Exception: LOG.error("Could not find service in path. Check your environment variables.") raise self.load_service_manifest() if not os.path.isfile(FILE_PATH): LOG.info(f"File not found: {FILE_PATH}") return self.file_dir = os.path.dirname(FILE_PATH) # Get filename and working dir file_name = os.path.basename(FILE_PATH) working_dir = os.path.join(self.file_dir, f'{os.path.basename(FILE_PATH)}_{SERVICE_NAME.lower()}') # Start service self.service.start_service() # Identify the file file_info = identify.fileinfo(FILE_PATH) if file_info['type'] == "archive/cart": # This is a CART file, uncart it and recreate the file info object original_temp = os.path.join(tempfile.gettempdir(), file_info['sha256']) with open(FILE_PATH, 'rb') as ifile, open(original_temp, 'wb') as ofile: unpack_stream(ifile, ofile) file_info = identify.fileinfo(original_temp) target_file = os.path.join(tempfile.gettempdir(), file_info['sha256']) shutil.move(original_temp, target_file) LOG.info(f"File was a CaRT archive, it was un-CaRTed to {target_file} for processing") else: # It not a cart, move the file to the right place to be processed target_file = os.path.join(tempfile.gettempdir(), file_info['sha256']) shutil.copyfile(FILE_PATH, target_file) # Create service processing task service_task = ServiceTask(dict( sid=get_random_id(), metadata={}, service_name=SERVICE_NAME, service_config=self.submission_params, fileinfo=dict( magic=file_info['magic'], md5=file_info['md5'], mime=file_info['mime'], sha1=file_info['sha1'], sha256=file_info['sha256'], size=file_info['size'], type=file_info['type'], ), filename=file_name, min_classification=forge.get_classification().UNRESTRICTED, max_files=501, # TODO: get the actual value ttl=3600, )) LOG.info(f"Starting task with SID: {service_task.sid}") # Set the working directory to a directory with same parent as input file if os.path.isdir(working_dir): shutil.rmtree(working_dir) if not os.path.isdir(working_dir): os.makedirs(os.path.join(working_dir, 'working_directory')) self.service.handle_task(service_task) # Move the result.json and extracted/supplementary files to the working directory source = os.path.join(tempfile.gettempdir(), 'working_directory') if not os.path.exists(source): os.makedirs(source) files = os.listdir(source) for f in files: shutil.move(os.path.join(source, f), os.path.join(working_dir, 'working_directory')) # Cleanup files from the original directory created by the service base shutil.rmtree(source) result_json = os.path.join(tempfile.gettempdir(), f'{service_task.sid}_{service_task.fileinfo.sha256}_result.json') if not os.path.exists(result_json): raise Exception("A service error occured and no result json was found.") # Validate the generated result with open(result_json, 'r') as fh: try: result = json.load(fh) result.pop('temp_submission_data', None) for file in result['response']['extracted'] + result['response']['supplementary']: file.pop('path', None) # Load heuristics heuristics = get_heuristics() # Transform heuristics and calculate score total_score = 0 for section in result['result']['sections']: if section['heuristic']: heur_id = section['heuristic']['heur_id'] try: section['heuristic'] = service_heuristic_to_result_heuristic(section['heuristic'], heuristics) total_score += section['heuristic']['score'] except InvalidHeuristicException: section['heuristic'] = None section['heuristic']['name'] = heuristics[heur_id]['name'] result['result']['score'] = total_score # Add timestamps for creation, archive and expiry result['created'] = now_as_iso() result['archive_ts'] = now_as_iso(1 * 24 * 60 * 60) result['expiry_ts'] = now_as_iso(service_task.ttl * 24 * 60 * 60) result = Result(result) # Print the result on console if in debug mode if args.debug: f"{SERVICE_NAME.upper()}-RESULT".center(60, '-') for line in pprint.pformat(result.result.as_primitives()).split('\n'): LOG.debug(line) except Exception as e: LOG.error(f"Invalid result created: {str(e)}") LOG.info(f"Cleaning up file used for temporary processing: {target_file}") os.unlink(target_file) LOG.info(f"Moving {result_json} to the working directory: {working_dir}/result.json") shutil.move(result_json, os.path.join(working_dir, 'result.json')) LOG.info(f"Successfully completed task. Output directory: {working_dir}")
def import_bundle(path, working_dir=WORK_DIR, min_classification=Classification.UNRESTRICTED, allow_incomplete=False): with forge.get_datastore(archive_access=True) as datastore: current_working_dir = os.path.join(working_dir, get_random_id()) res_file = os.path.join(current_working_dir, "results.json") try: os.makedirs(current_working_dir) except Exception: pass with open(path, 'rb') as original_file: if is_cart(original_file.read(256)): original_file.seek(0) extracted_fd, extracted_path = tempfile.mkstemp() extracted_file = os.fdopen(extracted_fd, 'wb') try: hdr, _ = unpack_stream(original_file, extracted_file) if hdr.get('al', {}).get('type', 'unknown') != BUNDLE_TYPE: raise BundlingException(f"Not a valid CaRTed bundle, should be of type: {BUNDLE_TYPE}") finally: extracted_file.close() else: extracted_path = path # Extract the bundle try: subprocess.check_call(["tar", "-zxf", extracted_path, "-C", current_working_dir]) except subprocess.CalledProcessError: raise BundlingException("Bundle decompression failed. Not a valid bundle...") with open(res_file, 'rb') as fh: data = json.load(fh) submission = data['submission'] results = data['results'] files = data['files'] errors = data['errors'] try: sid = submission['sid'] # Check if we have all the service results for res_key in submission['results']: if res_key not in results['results'].keys() and not allow_incomplete: raise IncompleteBundle("Incomplete results in bundle. Skipping %s..." % sid) # Check if we have all files for sha256 in list(set([x[:64] for x in submission['results']])): if sha256 not in files['infos'].keys() and not allow_incomplete: raise IncompleteBundle("Incomplete files in bundle. Skipping %s..." % sid) # Check if we all errors for err_key in submission['errors']: if err_key not in errors['errors'].keys() and not allow_incomplete: raise IncompleteBundle("Incomplete errors in bundle. Skipping %s..." % sid) if datastore.submission.get(sid, as_obj=False): raise SubmissionAlreadyExist("Submission %s already exists." % sid) # Make sure bundle's submission meets minimum classification and save the submission submission['classification'] = Classification.max_classification(submission['classification'], min_classification) submission.update(Classification.get_access_control_parts(submission['classification'])) datastore.submission.save(sid, submission) # Make sure files meet minimum classification and save the files with forge.get_filestore() as filestore: for f, f_data in files['infos'].items(): f_classification = Classification.max_classification(f_data['classification'], min_classification) datastore.save_or_freshen_file(f, f_data, f_data['expiry_ts'], f_classification, cl_engine=Classification) try: filestore.upload(os.path.join(current_working_dir, f), f) except IOError: pass # Make sure results meet minimum classification and save the results for key, res in results['results'].items(): if key.endswith(".e"): datastore.emptyresult.save(key, {"expiry_ts": res['expiry_ts']}) else: res['classification'] = Classification.max_classification(res['classification'], min_classification) datastore.result.save(key, res) # Make sure errors meet minimum classification and save the errors for ekey, err in errors['errors'].items(): datastore.error.save(ekey, err) return submission finally: try: os.remove(extracted_path) except Exception: pass try: os.remove(path) except Exception: pass try: shutil.rmtree(current_working_dir, ignore_errors=True) except Exception: pass
class TestCart(unittest.TestCase): def setUp(self): self.MANDATORY_HEADER_SIZE = struct.calcsize(cart.MANDATORY_HEADER_FMT) def tearDown(self): pass def assert_valid_mandatory_header(self, packed): if not len(packed) >= self.MANDATORY_HEADER_SIZE: raise AssertionError('Not enough bytes for mandatory header.') # unpack the header (magic, version, reserved, arc4_key, opt_hlen) = struct.unpack(cart.MANDATORY_HEADER_FMT, packed[:self.MANDATORY_HEADER_SIZE]) self.assertEqual(magic, 'CART') self.assertEqual(version, 1) self.assertEqual(reserved, 0) self.assertEqual(arc4_key, cart.DEFAULT_ARC4_KEY) # self.assertGreaterEqual(opt_hlen, 0) def test_empty(self): """ Empty input stream, empty opt header, empty opt footer, no digests. """ empty_stream = StringIO() output_stream = StringIO() header = footer = {} # Pack with empty everything cart.pack_stream(empty_stream, output_stream, header, footer, auto_digests=()) packed_text = output_stream.getvalue() self.assert_valid_mandatory_header(packed_text) # Now test unpacking the result. packed_stream = StringIO(packed_text) plain_stream = StringIO() (opt_header, opt_footer) = cart.unpack_stream(packed_stream, plain_stream) plain_text = plain_stream.getvalue() self.assertEqual(opt_header, {}) self.assertEqual(opt_footer, {}) self.assertEqual(len(plain_text), 0) def test_small(self): """ 1 byte stream, 1 element opt header, 1 element opt footer, default digests. """ test_text = 'a' in_stream = StringIO(test_text) output_stream = StringIO() test_header = {'testkey': 'testvalue'} test_footer = {'complete': 'yes'} # Pack with empty everything cart.pack_stream(in_stream, output_stream, test_header, test_footer) packed_text = output_stream.getvalue() self.assert_valid_mandatory_header(packed_text) # Now test unpacking the result. packed_stream = StringIO(packed_text) plain_stream = StringIO() (opt_header, opt_footer) = cart.unpack_stream(packed_stream, plain_stream) plain_text = plain_stream.getvalue() self.assertEqual(test_header, opt_header) self.assertEqual(test_footer, opt_footer) self.assertEqual(test_text, plain_text) def test_large(self): """ 128MB stream, large opt header, large opt footer, default digests + testdigester. """ test_text = '0' * 1024 * 1024 * 128 in_stream = StringIO(test_text) output_stream = StringIO() test_header = {} test_footer = {} # Pack with empty everything cart.pack_stream(in_stream, output_stream, test_header, test_footer) packed_text = output_stream.getvalue() self.assert_valid_mandatory_header(packed_text) # Now test unpacking the result. packed_stream = StringIO(packed_text) plain_stream = StringIO() (opt_header, opt_footer) = cart.unpack_stream(packed_stream, plain_stream) plain_text = plain_stream.getvalue() self.assertEqual(test_header, opt_header) self.assertEqual(test_footer, opt_footer) self.assertEqual(test_text, plain_text) def test_simple(self): from cStringIO import StringIO plaintext = '0123456789' * 10000000 pt_stream = StringIO(plaintext) ct_stream = StringIO() cart.pack_stream(pt_stream, ct_stream, {'name': 'hello.txt'}, {'digest': 'done'}) crypt_text = ct_stream.getvalue() ct_stream = StringIO(crypt_text) pt_stream = StringIO() temp_file = tempfile.mkstemp()[1] with open(temp_file, 'wb') as f: f.write(ct_stream.getvalue()) (header, footer) = cart.unpack_stream(ct_stream, pt_stream) inline_metadata = {} if header: inline_metadata.update(header) if footer: inline_metadata.update(footer) plaintext_prime = pt_stream.getvalue() self.assertEqual(plaintext_prime, plaintext) metadata = cart.get_metadata_only(temp_file) self.assertEqual(metadata, inline_metadata) self.assertTrue(cart.is_cart(crypt_text)) def test_rc4_override(self): from cStringIO import StringIO rc4_key = "Test Da Key !" tmp_header = {'name': 'hello.txt'} tmp_footer = {'rc4_key': rc4_key} plaintext = '0123456789' * 100 pt_stream = StringIO(plaintext) ct_stream = StringIO() cart.pack_stream(pt_stream, ct_stream, optional_header=tmp_header, optional_footer=tmp_footer, arc4_key_override=rc4_key) crypt_text = ct_stream.getvalue() ct_stream = StringIO(crypt_text) pt_stream = StringIO() try: cart.unpack_stream(ct_stream, pt_stream) except ValueError, e: self.assertEqual(str(e), "Invalid ARC4 Key, could not unpack header") ct_stream = StringIO(crypt_text) pt_stream = StringIO() (header, footer) = cart.unpack_stream(ct_stream, pt_stream, arc4_key_override=rc4_key) self.assertEqual(header, tmp_header) self.assertEqual(footer, tmp_footer)
def import_bundle(path, working_dir=WORK_DIR, min_classification=Classification.UNRESTRICTED, allow_incomplete=False, rescan_services=None, exist_ok=False, cleanup=True, identify=None): with forge.get_datastore(archive_access=True) as datastore: current_working_dir = os.path.join(working_dir, get_random_id()) res_file = os.path.join(current_working_dir, "results.json") try: os.makedirs(current_working_dir) except Exception: pass with open(path, 'rb') as original_file: if is_cart(original_file.read(256)): original_file.seek(0) extracted_fd, extracted_path = tempfile.mkstemp() extracted_file = os.fdopen(extracted_fd, 'wb') try: hdr, _ = unpack_stream(original_file, extracted_file) if hdr.get('al', {}).get('type', 'unknown') != BUNDLE_TYPE: raise BundlingException( f"Not a valid CaRTed bundle, should be of type: {BUNDLE_TYPE}" ) finally: extracted_file.close() else: extracted_path = path # Extract the bundle try: subprocess.check_call( ["tar", "-zxf", extracted_path, "-C", current_working_dir]) except subprocess.CalledProcessError: raise BundlingException( "Bundle decompression failed. Not a valid bundle...") with open(res_file, 'rb') as fh: data = json.load(fh) alert = data.get('alert', None) submission = data.get('submission', None) try: if submission: sid = submission['sid'] # Load results, files and errors results = data.get('results', None) files = data.get('files', None) errors = data.get('errors', None) # Check if we have all the service results for res_key in submission['results']: if results is None or (res_key not in results['results'].keys() and not allow_incomplete): raise IncompleteBundle( "Incomplete results in bundle. Skipping %s..." % sid) # Check if we have all files for sha256 in list(set([x[:64] for x in submission['results']])): if files is None or (sha256 not in files['infos'].keys() and not allow_incomplete): raise IncompleteBundle( "Incomplete files in bundle. Skipping %s..." % sid) # Check if we all errors for err_key in submission['errors']: if errors is None or (err_key not in errors['errors'].keys() and not allow_incomplete): raise IncompleteBundle( "Incomplete errors in bundle. Skipping %s..." % sid) # Check if the submission does not already exist if not datastore.submission.exists(sid): # Make sure bundle's submission meets minimum classification and save the submission submission[ 'classification'] = Classification.max_classification( submission['classification'], min_classification) submission.setdefault('metadata', {}) submission['metadata']['bundle.loaded'] = now_as_iso() submission['metadata'].pop('replay', None) submission.update( Classification.get_access_control_parts( submission['classification'])) if not rescan_services: # Save the submission in the system datastore.submission.save(sid, submission) # Make sure files meet minimum classification and save the files with forge.get_filestore() as filestore: for f, f_data in files['infos'].items(): f_classification = Classification.max_classification( f_data['classification'], min_classification) datastore.save_or_freshen_file( f, f_data, f_data['expiry_ts'], f_classification, cl_engine=Classification) try: filestore.upload( os.path.join(current_working_dir, f), f) except IOError: pass # Make sure results meet minimum classification and save the results for key, res in results['results'].items(): if key.endswith(".e"): datastore.emptyresult.save( key, {"expiry_ts": res['expiry_ts']}) else: res['classification'] = Classification.max_classification( res['classification'], min_classification) datastore.result.save(key, res) # Make sure errors meet minimum classification and save the errors for ekey, err in errors['errors'].items(): datastore.error.save(ekey, err) # Start the rescan if rescan_services and SubmissionClient: extracted_file_infos = { k: { vk: v[vk] for vk in [ 'magic', 'md5', 'mime', 'sha1', 'sha256', 'size', 'type' ] } for k, v in files['infos'].items() if k in files['list'] } with SubmissionClient(datastore=datastore, filestore=filestore, config=config, identify=identify) as sc: sc.rescan(submission, results['results'], extracted_file_infos, files['tree'], list(errors['errors'].keys()), rescan_services) elif not exist_ok: raise SubmissionAlreadyExist( "Submission %s already exists." % sid) # Save alert if present and does not exist if alert and not datastore.alert.exists(alert['alert_id']): alert['classification'] = Classification.max_classification( alert['classification'], min_classification) alert.setdefault('metadata', {}) alert['metadata']['bundle.loaded'] = now_as_iso() alert['metadata'].pop('replay', None) alert['workflows_completed'] = False datastore.alert.save(alert['alert_id'], alert) return submission finally: if extracted_path != path and os.path.exists(extracted_path): os.remove(extracted_path) if cleanup and os.path.exists(path): os.remove(path) if os.path.exists(current_working_dir): shutil.rmtree(current_working_dir, ignore_errors=True)