class FileSchema(Schema): """The file schema. This is the schema for the file document stored within the mongo database. """ not_blank = marshmallow.validate.Length(min=1, error='Field cannot be blank') _id = fields.ObjectId(load_only=True) file_type = fields.Enum(required=True, type=enums.FileType, missing=enums.FileType.FILE) name = fields.Str(required=True, validate=not_blank) sha256_digest = fields.Str() description = fields.Str() tags = fields.Str() magic = fields.Str() mime = fields.Str() size = fields.Int() timestamp = fields.DateTime("%Y-%m-%dT%H:%M:%S.%f") submission_type = fields.Str(validate=not_blank, default="unknown") parents = fields.Dict(values=fields.List(fields.Str(validate=not_blank)), keys=fields.Str(validate=not_blank), default={}) children = fields.Dict(values=fields.List(fields.Str(validate=not_blank)), keys=fields.Str(validate=not_blank), default={})
class CommandSchema(Schema): """The command schema. This is the base schema for the command document stored within the mongo database. Note: Scales are allowed to embed additional information into this document but it will be ignored. """ _id = fields.ObjectId(load_only=True) _output_id = fields.ObjectId(load_only=True, missing=None) # GridFS sha256_digest = fields.Str(required=True) scale = fields.Str(required=True) command = fields.Str(required=True) args = fields.Dict(default={}, missing={}) asynchronous = fields.Boolean(default=False) timeout = fields.Int(default=600) format = fields.Str(type=enums.Format, missing=enums.Format.JSON) output = fields.Raw(dump_only=True, default=None, missing=None) status = fields.Str(type=enums.Status, missing=enums.Status.PENDING, default=enums.Status.PENDING) timestamp = fields.DateTime("%Y-%m-%dT%H:%M:%S.%f") start_time = fields.DateTime("%Y-%m-%dT%H:%M:%S.%f") end_time = fields.DateTime("%Y-%m-%dT%H:%M:%S.%f")
class CommandsSchema(schema.Schema): """Extends `Schema`. Defines the valid schema for post request. """ args = fields.Dict(required=False, default={}, missing={}) command = fields.Str(required=True) format = fields.Str(type=enums.Format, missing=enums.Format.JSON) sha256_digests = fields.List(fields.Str(), required=True) scale = fields.Str(required=True) timeout = fields.Int(required=False)
class StoreHandler(snake_handler.SnakeHandler): """Extends `SnakeHandler`.""" @tornadoparser.use_args({ # filter[field]: str 'file_type': fields.Enum(type=enums.FileType, required=False, missing=None), 'from': fields.Int(required=False, missing=0), 'limit': fields.Int(required=False, missing=10), 'operator': fields.Str(required=False, missing='and'), 'order': fields.Int(required=False, missing=-1), 'sort': fields.Str(required=False, missing=None) }) async def get(self, data): documents = [] filter_ = self.create_filter(self.request.arguments, data['operator']) if filter_: filter_ = {'$and': [filter_]} if data['file_type']: filter_['$and'] += [{'file_type': data['file_type']}] elif data['file_type']: filter_ = {'file_type': data['file_type']} # NOTE: With async (motor) there is no count() on cursor so we have to work around that total = await db.async_file_collection.db.files.count_documents( filter_ if filter_ else {}) cursor = db.async_file_collection.select_all(filter_, data['order'], data['sort'], data['limit'], data['from']) while await cursor.fetch_next: documents += [cursor.next_object()] documents = schema.FileSchema(many=True).dump( schema.FileSchema(many=True).load(documents)) self.jsonify({'samples': documents, 'total': total}) self.finish()
class FilesHandler(snake_handler.SnakeHandler): """Extends `SnakeHandler`.""" @tornadoparser.use_args({ 'limit': fields.Str(required=False), 'operator': fields.Str(required=False, missing='and'), 'order': fields.Int(required=False, missing=-1), 'sort': fields.Str(required=False) }) async def get(self, data): documents = [] sort = None if 'sort' in data.keys(): sort = data['sort'] filter_ = self.create_filter(self.request.arguments, data['operator']) if filter_: filter_ = {'$and': [{'file_type': enums.FileType.FILE}, filter_]} else: filter_ = {'file_type': enums.FileType.FILE} cursor = db.async_file_collection.select_all(filter_, data['order'], sort) index = 0 while await cursor.fetch_next: if 'limit' in data.keys(): if index >= int(data['limit']): break index += 1 documents += [cursor.next_object()] documents = schema.FileSchema(many=True).dump( schema.FileSchema(many=True).load(documents)) self.jsonify({'files': documents}) self.finish()
class Commands(scale.Commands): def check(self): strings = shutil.which('strings') if not strings: raise error.CommandWarning("Binary 'strings' not found") return @scale.command({ 'info': 'This function will return strings found within the file' }) def all_strings(self, args, file, opts): return str(subprocess.check_output(["strings", file.file_path]), encoding="utf-8").split('\n') @staticmethod def all_strings_plaintext(json): return '\n'.join(json) @scale.command({ 'args': { 'min_length': fields.Int(default=5) }, 'info': 'This function will return interesting strings found within the file' }) def interesting(self, args, file, opts): strings = str(subprocess.check_output(["strings", file.file_path]), encoding="utf-8").split('\n') min_length = args['min_length'] output = [] for string in strings: rules = [] match = regex.IPV4_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['IPV4_REGEX'] match = regex.IPV6_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['IPV6_REGEX'] match = regex.EMAIL_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['EMAIL_REGEX'] match = regex.URL_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['URL_REGEX'] match = regex.DOMAIN_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['DOMAIN_REGEX'] match = regex.WINDOWS_PATH_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['WINDOWS_PATH_REGEX'] match = regex.MAC_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['MAC_REGEX'] match = regex.DATE1_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['DATE1_REGEX'] match = regex.DATE2_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['DATE2_REGEX'] match = regex.DATE3_REGEX.search(string) if match and len(match.group()) > min_length: rules += ['DATE3_REGEX'] match = regex.UNIX_PATH_REGEX.search(string) if match: valid_path = False match_str = match.group() if len(match_str) <= min_length: continue if ((match_str.startswith("'") and match_str.endswith("'")) or (match_str.startswith('"') and match_str.endswith('"'))): valid_path = True elif any(char in SPECIAL_CHARS for char in match_str): valid_path = True for i in SPECIAL_CHARS: if i in match_str: index = match_str.index(i) if index > 0 and match_str[index - 1] != "\\": valid_path = False else: valid_path = True if valid_path: rules += ['UNIX_PATH_REGEX'] if rules: output += ['{} ({})'.format(string, ', '.join(rules))] return output @staticmethod def interesting_plaintext(json): return '\n'.join(json)
class Interface(scale.Interface): def check(self): if CUCKOO_API is None or CUCKOO_API == '': raise error.InterfaceError( "config variable 'cuckoo_api' has not been set") @scale.pull({'info': 'summary of scores for the sample'}) def info(self, args, file, opts): try: j = requests.get(CUCKOO_API + '/files/view/sha256/' + file.sha256_digest, verify=VERIFY).json() except requests.exceptions.RequestException: raise error.InterfaceError("failed to connect to Cuckoo") if 'sample' not in j: raise error.InterfaceWarning( "file has never been submitted to Cuckoo") s_id = j['sample']['id'] r = requests.get(CUCKOO_API + '/tasks/list', verify=VERIFY) if not r.status_code == requests.codes.ok: # pylint: disable=no-member return "No reports, sample must be pending/running", "pending" j = r.json() output = [] for t in j['tasks']: if t['sample_id'] == s_id: r = requests.get(CUCKOO_API + '/tasks/report/' + str(t['id']), verify=VERIFY) if r.status_code == requests.codes.ok: # pylint: disable=no-member j = r.json() output += [{ 'score': j['info']['score'], 'name': j['info']['machine']['name'] }] if not output: return error.InterfaceWarning("no information available!") return {'info': output} def info_markdown(self, json): output = md.table_header(('Machine', 'Score')) for j in json['info']: score = j['score'] if score > 5: s = "%red " + str(score) + " %" elif score > 3: s = "%yellow " + str(score) + " %" else: s = str(score) output += md.table_row((j['name'], s)) return output @scale.pull({ 'args': { 'id': fields.Str(required=True) }, 'info': 'view report summary' }) def report(self, args, file, opts): # TODO: Hash match! try: r = requests.get(CUCKOO_API + '/tasks/report/' + args['id'], verify=VERIFY) except requests.exceptions.RequestException: raise error.InterfaceError("failed to connect to Cuckoo") if not r.status_code == requests.codes.ok: # pylint: disable=no-member return "No task for given id" j = r.json() output = { 'score': j['info']['score'], 'platform': j['info']['platform'], 'analysis': { 'category': j['info']['category'], 'started': j['info']['started'], 'ended': j['info']['ended'], 'duration': j['info']['duration'] }, 'machine': { 'name': j['info']['machine']['name'], 'manager': j['info']['machine']['manager'] }, 'signatures': [{ 'severity': x['severity'], 'description': x['description'] } for x in j['signatures']] } return output def report_markdown(self, json): output = md.h4('General') output += md.paragraph(md.bold('Score: ') + str(json['score'])) output += md.cr() output += md.paragraph(md.bold('Platform: ') + json['platform']) output += md.h4('Analysis') output += md.table_header(('Category', 'Started', 'Ended', 'Duration')) output += md.table_row( (json['analysis']['category'], str(json['analysis']['started']), str(json['analysis']['ended']), str(json['analysis']['duration']))) output += md.h4('Machines') output += md.table_header(('Name', 'Manager')) output += md.table_row( (json['machine']['name'], json['machine']['manager'])) output += md.h4('Signatures') output += md.table_header(('Severity', 'Description')) for s in json['signatures']: if s['severity'] > 2: output += md.table_row( ('%red ' + str(s['severity']) + ' %', s['description'])) elif s['severity'] > 1: output += md.table_row( ('%orange ' + str(s['severity']) + ' %', s['description'])) else: output += md.table_row( ('%blue ' + str(s['severity']) + ' %', s['description'])) return output @scale.pull({'info': 'view reports for sample'}) def reports(self, args, file, opts): try: j = requests.get(CUCKOO_API + '/files/view/sha256/' + file.sha256_digest, verify=VERIFY).json() except requests.exceptions.RequestException: raise error.InterfaceError("failed to connect to Cuckoo") if 'sample' not in j: raise error.InterfaceWarning( "file has never been submitted to Cuckoo") s_id = j['sample']['id'] r = requests.get(CUCKOO_API + '/tasks/list', verify=VERIFY) if not r.status_code == requests.codes.ok: # pylint: disable=no-member return "No reports, sample must be pending/running", "pending" j = r.json() output = {'reports': []} for t in j['tasks']: if t['sample_id'] == s_id: output['reports'] += [{ 'id': str(t['id']), 'url': config.scale_configs['cuckoo']['cuckoo_url'] + str(t['id']), 'timestamp': str(t['added_on']), 'status': str(t['status']) }] return output def reports_markdown(self, json): output = md.table_header(('ID', 'URL', 'Timestamp', 'Status')) for r in json['reports']: output += md.table_row( (r['id'], r['url'], r['timestamp'], r['status'])) return output @scale.push({ 'args': { 'machine': fields.Str(required=False), 'priority': fields.Int(required=False), 'timeout': fields.Int(required=False) }, 'info': 'submit sample to cuckoo' }) def submit(self, args, file, opts): document = db.file_collection.select(file.sha256_digest) with open(file.file_path, "rb") as f: try: r = requests.post(CUCKOO_API + '/tasks/create/file', files={"file": (document['name'], f)}, verify=VERIFY) except requests.exceptions.RequestException: raise error.InterfaceError("failed to connect to Cuckoo") if not r.status_code == requests.codes.ok: # pylint: disable=no-member raise error.InterfaceError('failed to submit sample to Cuckoo') j = r.json() if not j["task_id"]: raise error.InterfaceError('failed to submit sample to Cuckoo') return j
class CommandHandler(snake_handler.SnakeHandler): """Extends `SnakeHandler`.""" @tornadoparser.use_args({ # 'args': fields.Dict(required=False, default={}, missing={}), 'command': fields.Str(required=True), 'format': fields.Str(type=enums.Format, missing=enums.Format.JSON), 'output': fields.Bool(required=False, default=True, missing=True), 'scale': fields.Str(required=True), 'sha256_digest': fields.Str(required=True) }) async def get(self, data): # NOTE: Tornado/Marshmallow does not like Dict in args, will have to parse manually # TODO: Use marshmallow validation if 'args' in self.request.arguments and self.request.arguments['args']: data['args'] = json.loads(self.request.arguments['args'][0]) else: data['args'] = {} document = await db.async_command_collection.select( data['sha256_digest'], data['scale'], data['command'], data['args']) if not document: self.write_warning("no output for given data", 404, data) self.finish() return if document['status'] == enums.Status.ERROR: self.write_warning("%s" % document['output'], 404, data) self.finish() return document = schema.CommandSchema().load(document) output = None if document['_output_id']: output = await db.async_command_output_collection.get( document['_output_id']) try: scale = scale_manager.get_scale(data['scale']) commands = scale_manager.get_component( scale, enums.ScaleComponent.COMMANDS) if data['output']: document['output'] = commands.snake.format( data['format'], document['command'], output) document['format'] = data['format'] except (SnakeError, TypeError) as err: self.write_warning("%s" % err, 404, data) self.finish() return document = schema.CommandSchema().dump(document) self.jsonify({'command': document}) self.finish() @tornadoparser.use_args({ 'args': fields.Dict(required=False, default={}, missing={}), 'asynchronous': fields.Bool(required=False), 'command': fields.Str(required=True), 'format': fields.Str(type=enums.Format, missing=enums.Format.JSON), 'scale': fields.Str(required=True), 'sha256_digest': fields.Str(required=True), 'timeout': fields.Int(required=False) }) async def post(self, data): # Check that there is a file for this hash document = await db.async_file_collection.select(data['sha256_digest']) if not document: self.write_warning("no sample for given data", 404, data) self.finish() return # Check scale support try: scale = scale_manager.get_scale(data['scale'], document['file_type']) commands = scale_manager.get_component( scale, enums.ScaleComponent.COMMANDS) cmd = commands.snake.command(data['command']) except SnakeError as err: self.write_warning("%s" % err, 404, data) self.finish() return # Validate arguments as to not waste users time, yes this is also done on execution result, args = validate_args(cmd, data['args']) if not result: self.write_warning(args, 422, data) self.finish() return data['args'] = args # Queue command try: document = await route_support.queue_command(data) except SnakeError as err: self.write_warning("%s" % err, 500, data) self.finish() return document = schema.CommandSchema().load(document) output = None if document['_output_id']: output = await db.async_command_output_collection.get( document['_output_id']) try: document['output'] = commands.snake.format(data['format'], document['command'], output) document['format'] = data['format'] except SnakeError as err: self.write_warning("%s" % err, 404, data) self.finish() return # Dump and finish document = schema.CommandSchema().dump(document) self.jsonify({"command": document}) self.finish()