def initialize(self): """Extend `initialize`. Works out what sort of request we have and how to parse it. Streaming may not actually be required in which case it will not be used. """ self.bytes_read = 0 self.content_length = 0 self.content_type = '' self.data = bytes() self.error = None self.stream = None if self.request.headers and 'Content-Encoding' in self.request.headers: gen_log.warning("Unsupported Content-Encoding: %s", self.request.headers['Content-Encoding']) return if self.request.headers and 'Content-Type' in self.request.headers: self.content_length = int( self.request.headers['Content-Length'] ) if 'Content-Length' in self.request.headers else 0 self.content_type = self.request.headers['Content-Type'] if self.content_type.startswith( "application/x-www-form-urlencoded"): return elif self.content_type.startswith("multipart/form-data"): # If we have a POST that is multipart/form-data we will stream any file # content to disk. This will prevent excessive RAM usage. Clearly we # will need to keep tabs on the overall data size or someone could # still use too much RAM! self.stream = self.Stream() boundary = None fields = self.content_type.split(";") for field in fields: k, _, v = field.strip().partition("=") if k == "boundary" and v: boundary = bytes(v, 'utf8') if not boundary: raise error.SnakeError('Content boundary not found') if boundary.startswith(b'"') and boundary.endswith(b'"'): boundary = boundary[1:-1] self.stream.boundary = boundary self.stream.working_dir = tempfile.TemporaryDirectory( dir=path.abspath(path.expanduser( snake_config['cache_dir']))) else: self.error = error.SnakeError('Unsupported Content-Type: %s' % self.content_type)
async def unzip_file_python(file_path, file_name, output_dir, protected=False, password=None): """Unzip file using ZipFile. Uses ZipFile to extract a file from a zip into a given directory. It will handle password protected folders and if no password is presented then it will loop through a list of passwords stored in the snake configuration. Note: Only zips with a single file are supported. Args: file_path (str): The path to the zipped file. file_name (str): The name of the file to extract from the zip. output_dir (str): The directory to extract the file to. protected (bool, optional): Is the zip password protected. Defaults to False. protected (str, optional): The password for the zip. Defaults to None. Returns: str: The path of the extracted file. Raises: RuntimeError: For any error that is not related to a Bad Password. SnakeError: When extraction of the file has failed. """ zip_file = zipfile.ZipFile(file_path) new_path = None if protected: if password: try: new_path = zip_file.extract(file_name, output_dir, bytes(password, 'utf-8')) except RuntimeError as err: if 'Bad password' not in str(err): raise else: for passwd in snake_config['zip_passwords']: try: new_path = zip_file.extract(file_name, output_dir, bytes(passwd, 'utf-8')) except RuntimeError as err: if 'Bad password' not in str(err): raise if new_path: break if not new_path: raise error.SnakeError('ZipError: incorrect password') else: new_path = zip_file.extract(file_name, output_dir, None) return new_path
def test_snake_error(): """ Test the class SnakeError """ with pytest.raises(TypeError): error.SnakeError() # pylint: disable=no-value-for-parameter err = error.SnakeError('hello') assert 'hello' in err.message assert None is err.status_code assert None is err.payload err = error.SnakeError('hello', 500) assert 'hello' in err.message assert 500 is err.status_code assert None is err.payload err = error.SnakeError('hello', 500, 'extra') assert 'hello' in err.message assert 500 is err.status_code assert 'extra' is err.payload
async def unzip_file(file_path, password=None): """Unzip a file. Unzips a file using unzip or ZipFile. For speed reasons if unzip is installed it will be used in favour of the ZipFile library. It will extract the file to the same directory as that of the zip folder. Note: The zip file must contrail only one file. Args: file_path (str): The zip file to unzip. password (str): The password for the zip. Defaults to None. Returns: str: The path to the extract file. Raises: SnakeError: When the zip file contains more than one file. When the extraction fails. """ zip_file = zipfile.ZipFile(file_path) info_list = zip_file.infolist() if len(info_list) != 1: raise error.SnakeError( 'ZipError: only one file is allowed in the container') i = info_list[0] working_dir = os.path.dirname(file_path) new_path = None protected = i.flag_bits & 0x1 # NOTE: ZipFile is slow as balls so we outsource to unzip if installed outsource = shutil.which('unzip') if outsource: new_path = await unzip_file_unix(file_path, i.filename, working_dir, protected, password) else: new_path = await unzip_file_python(file_path, i.filename, working_dir, protected, password) if not new_path: raise error.SnakeError('ZipError: failed to extract file') return new_path
async def store_file(sha256_digest, file_path, file_type, data): """Store a file to disk. Uses file storage to store the new file to disk. Upon success insert the metadata into the database. Args: sha256_digest (str): The has of the file to store. file_path (str): The location of the file to move into the store. file_type (:obj:`FileType`): The type of the file being stored. data (:obj:`CommandSchema`): The metadata for the file. Returns: :obj:`CommandSchema`: The updated document metadata. Raises: SnakeError: When the metadata cannot be inserted into the database. """ # Save the file to the 'filedb' and add it to the database file_storage = utils.FileStorage() file_storage.create(file_path, sha256_digest) if not file_storage.save(move=True): raise error.SnakeError("Failed to store file on disk") data.update(file_storage.to_dict()) data['name'] = strip_extensions(data['name']) data['timestamp'] = datetime.utcnow() data = schema.FileSchema().dump(data) data['file_type'] = file_type # load_only=True document = await db.async_file_collection.insert(data) if not document: file_storage.delete() raise error.SnakeError("Failed to insert document") document = await db.async_file_collection.select(file_storage.sha256_digest ) # Run any autoruns, if allowed await execute_autoruns(sha256_digest, file_type, file_storage.mime) return document
def extract(self, args, file, opts): samples = [] with tempfile.TemporaryDirectory(dir=path.abspath( path.expanduser( config.snake_config['cache_dir']))) as temp_dir: # Extract the samples proc = subprocess.run( [self.binwalk_path, file.file_path, '-e', '-C', temp_dir], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if not proc: raise error.CommandError( "failed to successfully extract from sample") # Get file name document = db.file_collection.select(file.sha256_digest) if not document: raise error.SnakeError("failed to get sample's metadata") # There will be one output directory connataining files with the offsets as names contents = os.listdir(temp_dir) if not contents: return [] directory = path.join(temp_dir, contents[0]) for i in os.listdir(directory): file_path = path.join(directory, i) name = '{}.{}'.format(document['name'], i) file_schema = schema.FileSchema().load({ 'name': name, 'description': 'extracted with binwalk' }) new_file = fs.FileStorage() new_file.create(file_path) new_document = submitter.submit(file_schema, enums.FileType.FILE, new_file, file, NAME) new_document = schema.FileSchema().dump( schema.FileSchema().load( new_document)) # Required to clean the above samples += [new_document] return samples
def load_scale_config(self, scale_name): """Load a scale configuration from file This loads the scale configuration files based on the scale name passed. It will load the base config along with the etc configuration if present. Args: scale_name (str): The name of the scale to load the configuration for. Raises: SnakeError: When the external configuration file fails to load. """ self.scale_configs[scale_name] = {} # Load base if we need one config_path = pkg_resources.resource_filename( "snake.scales.{}".format(scale_name), "{}.conf".format(scale_name)) if path.exists(config_path): with open(config_path, 'rb') as stream: base_config = yaml.safe_load(stream) self.scale_configs[scale_name].update(base_config) # Try and load from etc config etc_conf = path.join( path.abspath(path.expanduser(constants.ETC_DIR)), "scales", "{}.conf".format(scale_name)) if path.exists(etc_conf): try: etc_config = {} with open(etc_conf, 'rb') as stream: etc_config = yaml.safe_load(stream) if etc_config is None: # The config file is empty this is fine etc_config = {} self.scale_configs[scale_name].update(etc_config) except Exception as err: raise error.SnakeError( 'failed to load config: {}: {} - {}'.format( etc_conf, err.__class__, err))
def binary_carver(self, args, file, opts): sample = {} with tempfile.TemporaryDirectory(dir=path.abspath( path.expanduser( config.snake_config['cache_dir']))) as temp_dir: # Try and carve file_path = r2_bin_carver.carve(file.file_path, temp_dir, args['offset'], args['size'], args['magic_bytes']) if not file_path: raise error.CommandError('failed to carve binary') if args['patch']: if not r2_bin_carver.patch(file_path): raise error.CommandError( 'failed to patch binary, not a valid pe file') # Get file name document = db.file_collection.select(file.sha256_digest) if not document: raise error.SnakeError("failed to get sample's metadata") # Create schema and save name = '{}.{}'.format(document['name'], args['offset']) file_schema = schema.FileSchema().load({ 'name': name, 'description': 'extracted with radare2 script r2_bin_carver.py' }) new_file = fs.FileStorage() new_file.create(file_path) sample = submitter.submit(file_schema, enums.FileType.FILE, new_file, file, NAME) sample = schema.FileSchema().dump(schema.FileSchema().load( sample)) # Required to clean the above return sample
def data_received(self, chunk): # pylint: disable=too-many-branches, too-many-statements if self.error: raise self.error # pylint: disable=raising-bad-type self.bytes_read += len(chunk) if len( self.data ) > 104857600: # Ensure the someone is not trying to fill RAM, 100MB raise error.SnakeError('Content-Length too large (truncated)') if self.stream: # Cache files to disk chunk = self.stream.tail + chunk chunk_len = len(chunk) i = 0 while i < chunk_len: if self.stream.state == 0: # Find start of header soh = chunk.find(b'--' + self.stream.boundary, i) if soh != -1: self.data += chunk[soh:soh + len(self.stream.boundary) + 4] i = soh + len(self.stream.boundary) + 4 self.stream.state = 1 continue elif self.stream.state == 1: # Find end of header eoh = chunk.find(b'\r\n\r\n', i) if eoh != -1: self.stream.header += chunk[i:eoh + 4] i = eoh + 4 if b'filename=' in self.stream.header: # We have a file self.stream.state = 2 else: self.stream.state = 3 self.data += self.stream.header self.stream.header = bytes() continue elif self.stream.state == 2: # Handle file based content soh = chunk.find(b'--' + self.stream.boundary, i) if soh != -1: f_path = path.join(self.stream.working_dir.name, str(self.stream.file_count)) with open(f_path, 'a+b') as f: f.write(chunk[i:soh - 2]) # -2 drops the extra '\r\n' self.data += bytes(f_path + '\r\n', 'utf-8') self.stream.file_count += 1 i = soh self.stream.state = 0 continue elif self.stream.state == 3: # Handle all other content soh = chunk.find(b'--' + self.stream.boundary, i) if soh != -1: self.data += chunk[i:soh] i = soh self.stream.state = 0 continue # Handle the overlapping tail if i + TAIL_SIZE < chunk_len: if self.stream.state == 2: f_path = path.join(self.stream.working_dir.name, str(self.stream.file_count)) with open(f_path, 'a+b') as f: f.write(chunk[i:chunk_len - TAIL_SIZE]) elif self.stream.state == 1: self.stream.header += chunk[i:chunk_len - TAIL_SIZE] else: self.data += chunk[i:chunk_len - TAIL_SIZE] self.stream.tail = chunk[chunk_len - TAIL_SIZE:] i += chunk_len else: self.stream.tail = chunk[i:] i += chunk_len else: # Otherwise be normal self.data += chunk if self.bytes_read >= self.content_length: # Finished, parse the new content httputil.parse_body_arguments(self.content_type, self.data, self.request.body_arguments, self.request.files, headers=None) for k, v in self.request.body_arguments.items(): self.request.arguments.setdefault(k, []).extend(v)
def __init__(self, *args, **kwargs): # pylint: disable=unused-argument raise error.SnakeError('error')
async def queue_command(data): """Queue commands for execution This will queue commands for execution on the celery workers. Note: The returned command schema will reflect the status of the queued command. Args: data (:obj:`CommandSchema`): The command to queue for execution. Returns: :obj:`CommandSchema`: The command schema with updates """ # The lastest execution always wins, thus we replace the current one in the db document = await db.async_command_collection.select( data['sha256_digest'], data['scale'], data['command'], data['args']) if document: if 'status' in document and document['status'] == enums.Status.RUNNING: return schema.CommandSchema().dump( schema.CommandSchema().load(document)) else: _output_id = None if '_output_id' in document: _output_id = document['_output_id'] data['timestamp'] = datetime.utcnow() data = schema.CommandSchema().dump(data) await db.async_command_collection.replace(data['sha256_digest'], data['scale'], data['command'], data['args'], data) # NOTE: We delete after the replace to try and prevent concurrent # reads to a file while it is being deleted if _output_id: await db.async_command_output_collection.delete(_output_id) else: # Save the command, this will be in a pending state data['timestamp'] = datetime.utcnow() data = schema.CommandSchema().dump(data) await db.async_command_collection.insert(data) data = schema.CommandSchema().load(data) if data['asynchronous'] is True: celery.execute_command.apply_async(args=[data], time_limit=data['timeout'] + 30, soft_time_limit=data['timeout']) else: task = celery.execute_command.apply_async( args=[data], time_limit=data['timeout'] + 30, soft_time_limit=data['timeout']) result = await celery.wait_for_task(task) if not task.successful(): document = await db.async_command_collection.select( data['sha256_digest'], data['scale'], data['command'], data['args']) _output_id = None if '_output_id' in document: _output_id = document['_output_id'] _new_output_id = await db.async_command_output_collection.put( document['command'], b"{'error': 'worker failed please check log'}") document['_output_id'] = _new_output_id document['status'] = enums.Status.FAILED await db.async_command_collection.update(document['sha256_digest'], document['scale'], document['command'], data['args'], document) if _output_id: await db.async_command_output_collection.delete(_output_id) raise error.SnakeError(result) return await db.async_command_collection.select(data['sha256_digest'], data['scale'], data['command'], data['args'])
async def unzip_file_unix(file_path, file_name, output_dir, protected=False, password=None): # pylint: disable=too-many-branches """Unzip file using unzip. Uses unzip binary to extract a file from a zip into a given directory. It will handle password protected folders and if no password is presented then it will loop through a list of passwords stored in the snake configuration. Note: Only zips with a single file are supported. Args: file_path (str): The path to the zipped file. file_name (str): The name of the file to extract from the zip. output_dir (str): The directory to extract the file to. protected (bool, optional): Is the zip password protected. Defaults to False. protected (str, optional): The password for the zip. Defaults to None. Returns: str: The path of the extracted file. Raises: SnakeError: When extraction of the file has failed. """ err = '' new_path = None if protected: if password: proc = await asyncio.create_subprocess_exec( *[ "unzip", "-P", bytes(password, "utf-8"), "-j", file_path, file_name, "-d", output_dir ], stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE) _stdout, stderr = await proc.communicate() if not proc.returncode: # NOTE: We flatten dirs so we must strip dirs from file_name if present new_path = os.path.join(output_dir, file_name.split('/')[-1]) else: err = str(stderr, encoding='utf-8') else: for passwd in snake_config['zip_passwords']: proc = await asyncio.create_subprocess_exec( *[ "unzip", "-P", bytes(passwd, "utf-8"), "-j", file_path, file_name, "-d", output_dir ], stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE) _stdout, stderr = await proc.communicate() if not proc.returncode: # NOTE: We flatten dirs so we must strip dirs from file_name if present new_path = os.path.join(output_dir, file_name.split('/')[-1]) else: err = str(stderr, encoding='utf-8') if new_path: break else: proc = await asyncio.create_subprocess_exec( *["unzip", "-j", file_path, file_name, "-d", output_dir], stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE) _stdout, stderr = await proc.communicate() if not proc.returncode: # NOTE: We flatten dirs so we must strip dirs from file_name if present new_path = os.path.join(output_dir, file_name.split('/')[-1]) else: err = str(stderr, encoding='utf-8') if not new_path: if 'incorrect password' in err: raise error.SnakeError('ZipError: incorrect password') else: raise error.SnakeError('ZipError: {}'.format(err)) return new_path
def submit(file_schema, file_type, file, parent, scale_name): # pylint: disable=too-many-branches """Submit a new file to Snake. This is used generally by the command component of scales to submit a new file into snake. Args: """ # We need to be safe here so instance check the above if not isinstance(file_schema, dict): raise TypeError("file_schema must be of type dict") if not isinstance(file, fs.FileStorage): raise TypeError("file must be of type FileSchema") if not isinstance(parent, fs.FileStorage): raise TypeError("parent must be of type FileStorage") # If the hashes are the same, just stop if file.sha256_digest == parent.sha256_digest: return db.file_collection.select(file.sha256_digest) # Create submission type submission_type = 'scale:{}'.format(scale_name) # Check if the file to submit is already in Snake, if not lets add it document = db.file_collection.select(file.sha256_digest) if not document: # Validate data = schema.FileSchema().dump(schema.FileSchema().load(file_schema)) # Save the file if not file.save(move=True): raise error.SnakeError("could not save new file to disk for hash {}".format(file.sha256_digest)) data.update(file.to_dict()) # NOTE: Don't set the parent we will do this later, so blank them out # if the scale tried to be smart data['children'] = {} data['parents'] = {} data['submission_type'] = submission_type data['timestamp'] = datetime.utcnow() data = schema.FileSchema().dump(data) data['file_type'] = file_type # load_only=True # Save db.file_collection.insert(data) # Update the parent child relationships document = db.file_collection.select(file.sha256_digest) if document: # HACK: This is needed to get submission_type of parent p = db.file_collection.select(parent.sha256_digest) # Check if the parent and type already exist if 'parents' not in document: document['parents'] = {} if parent.sha256_digest in document['parents']: if submission_type in document['parents'][parent.sha256_digest]: return document else: document['parents'][parent.sha256_digest] += [p["submission_type"]] else: document['parents'][parent.sha256_digest] = [p["submission_type"]] # Validate document = schema.FileSchema().dump(schema.FileSchema().load(document)) # Update db.file_collection.update(file.sha256_digest, document) # Update the parents children document = db.file_collection.select(parent.sha256_digest) if not document: # Parent does not exist it has been delete, don't update it return db.file_collection.select(file.sha256_digest) if 'children' not in document: document['children'] = {} if file.sha256_digest in document['children']: if submission_type in document['children'][file.sha256_digest]: return db.file_collection.select(file.sha256_digest) else: document['children'][file.sha256_digest] += [submission_type] else: document['children'][file.sha256_digest] = [submission_type] # Validate document = schema.FileSchema().dump(schema.FileSchema().load(document)) # Update db.file_collection.update(parent.sha256_digest, document) else: raise error.SnakeError("could not submit new file for hash {}".format(file.sha256_digest)) return db.file_collection.select(file.sha256_digest)