def scan( self, content: bytes, payload_meta: Optional[PayloadMeta] = None, request_meta: Optional[RequestMeta] = None, add_start_dispatch: Optional[List[str]] = None, add_start_deep_dispatch: Optional[List[str]] = None, ratelimit: Optional[str] = None, ) -> StoqResponse: """ Wrapper for `scan_payload` that creates a `Payload` object from bytes :param content: Raw bytes to be scanned :param payload_meta: Metadata pertaining to originating source :param request_meta: Metadata pertaining to the originating request :param add_start_dispatch: Force first round of scanning to use specified plugins :param add_start_deep_dispatch: Force second round of scanning to use specified plugins :param ratelimit: Rate limit calls to scan :return: Complete scan results :rtype: StoqResponse """ payload_meta = PayloadMeta() if payload_meta is None else payload_meta payload = Payload(content, payload_meta) return self.scan_payload(payload, request_meta, add_start_dispatch, add_start_deep_dispatch)
async def scan( self, content: bytes, payload_meta: Optional[PayloadMeta] = None, request_meta: Optional[RequestMeta] = None, add_start_dispatch: Optional[List[str]] = None, ratelimit: Optional[str] = None, ) -> StoqResponse: """ Wrapper for `scan_request` that creates a `Payload` object from bytes :param content: Raw bytes to be scanned :param payload_meta: Metadata pertaining to originating source :param request_meta: Metadata pertaining to the originating request :param add_start_dispatch: Force first round of scanning to use specified plugins :param ratelimit: Rate limit calls to scan """ self.log.debug( f'Content received ({len(content)} bytes): ' f'PayloadMeta: {helpers.dumps(payload_meta, indent=0)}, ' f'RequestMeta: {helpers.dumps(request_meta, indent=0)}' ) payload_meta = payload_meta or PayloadMeta() payload = Payload(content, payload_meta) request_meta = request_meta or RequestMeta() request = Request(payloads=[payload], request_meta=request_meta) return await self.scan_request(request, add_start_dispatch)
async def scan( self, payload: Payload, request: Request ) -> Optional[WorkerResponse]: if self.RAISE_EXCEPTION: raise Exception('Test exception please ignore') extracted_payload_content = self.EXTRACTED_PAYLOAD or b'Lorem ipsum' extracted_payload = ExtractedPayload( extracted_payload_content, PayloadMeta( should_scan=self.SHOULD_SCAN, dispatch_to=self.EXTRACTED_DISPATCH_TO ), ) wr = WorkerResponse({"valuable_insight": "wow"}, extracted=[extracted_payload]) if self.RETURN_ERRORS: wr.errors.append( Error( plugin_name="simple_worker", error="Test error please ignore", payload_id=payload.results.payload_id, ) ) if self.ADDITIONAL_DISPATCH_TO: wr.dispatch_to.extend(self.ADDITIONAL_DISPATCH_TO) return wr
async def ingest(self, queue: Queue) -> None: consumer = AIOKafkaConsumer( self.topic, group_id=self.group, auto_offset_reset='earliest', bootstrap_servers=self.servers, heartbeat_interval_ms=self.heartbeat_interval_ms, session_timeout_ms=self.session_timeout_ms, loop=get_event_loop(), ) await consumer.start() self.log.info(f'Monitoring {self.topic} topic for messages...') async for message in consumer: msg = json.loads(message.value) if msg.get('_is_payload'): # This message is a payload that was placed on the queue # from the kafka-queue archiver plugin extra_data = msg['_payload_meta'] extra_data['request_meta'] = msg['_request_meta'] meta = PayloadMeta(extra_data=extra_data) payload = Payload(content=b64decode(msg['_content']), payload_meta=meta) await queue.put(payload) else: await queue.put(msg)
def get(self, task: ArchiverResponse) -> Optional[Payload]: """ Retrieve archived payload from MongoDB """ self._connect_gridfs() result = self.gridfs_db.get(task.results['_id']) if result: # payload = result.read() return Payload(payload, PayloadMeta(extra_data=task.results))
def scan(self, payload: Payload, request_meta: RequestMeta) -> Optional[WorkerResponse]: if self.RAISE_EXCEPTION: raise Exception('Test exception please ignore') if self.DISPATCH_TO: dispatch_meta = PayloadMeta(dispatch_to=self.DISPATCH_TO) p = ExtractedPayload(b'Lorem ipsum', dispatch_meta) else: p = ExtractedPayload(b'Lorem ipsum') wr = WorkerResponse({'valuable_insight': 'wow'}, extracted=[p]) if self.RETURN_ERRORS: wr.errors += ['Test error please ignore'] return wr
async def get(self, task: ArchiverResponse) -> Payload: """ Retrieve archived payload from Azure Blob Storage """ blob_client: BlobClient = BlobClient.from_connection_string( conn_str=self.conn_str, container_name=task.results['container_name'], blob_name=task.results['blob_name'], ) content = await blob_client.download_blob() await blob_client.close() meta = PayloadMeta(task.results) return Payload(content.readall(), meta)
def get(self, task: ArchiverResponse) -> Payload: """ Retrieve archived payload from S3 """ if not self.client: self._get_client() meta = PayloadMeta( extra_data={'bucket': task.results['bucket'], 'path': task.results['path']} ) content = self.client.get_object( Bucket=task.results['bucket'], Key=task.results['path'] )['Body'] return Payload(content.read(), meta)
async def test_dont_dest_archive_payload(self): s = Stoq(base_dir=utils.get_data_dir(), dest_archivers=['dummy_archiver']) dummy_archiver = s.load_plugin('dummy_archiver') dummy_archiver.archive = asynctest.create_autospec( dummy_archiver.archive, return_value=None ) response = await s.scan( self.generic_content, payload_meta=PayloadMeta(should_archive=False), add_start_dispatch=['extract_payload'], request_meta=RequestMeta(archive_payloads=True), ) dummy_archiver.archive.assert_awaited_once() self.assertNotIn('dummy_archiver', response.results[0].plugins_run['archivers']) self.assertIn('dummy_archiver', response.results[1].plugins_run['archivers'])
def ingest(self, queue: Queue) -> None: consumer = KafkaConsumer( self.topic, group_id=self.group, auto_offset_reset='earliest', bootstrap_servers=self.servers, ) print(f'Monitoring {self.topic} topic for messages...') for message in consumer: msg = json.loads(message.value) if msg.get('_is_payload'): meta = PayloadMeta(extra_data=msg['_request_meta']) payload = Payload(content=msg['_content'], payload_meta=meta) queue.put(payload) else: queue.put(msg)
def get(self, task: ArchiverResponse) -> Payload: """ Retrieve archived payload from gcs """ meta = PayloadMeta( extra_data={ 'bucket': task.results['archive_bucket'], 'path': task.results['path'], 'project_id': task.results['project_id'], }) client = Client(project=task.results['project_id']) bucket = client.get_bucket(task.results['archive_bucket']) blob = Blob(task.results['path'], bucket) content = BytesIO() blob.download_to_file(content) content.seek(0) return Payload(content.read(), meta)
async def ingest(self, queue: Queue) -> None: self.log.info(f'Monitoring redis queue {self.redis_queue}') while True: msg = self.conn.blpop(self.redis_queue, timeout=0) if not msg: time.sleep(0.1) continue data = msg[1].decode() payload = self.conn.get(f'{data}_buf') meta = self.conn.get(f'{data}_meta') if meta and payload: meta = json.loads(meta.decode()) await queue.put( Payload(payload, payload_meta=PayloadMeta(extra_data=meta)) ) self.conn.delete(f'{meta}_buf') self.conn.delete(f'{meta}_meta') else: await queue.put(json.loads(data))
def get(self, task: ArchiverResponse) -> Payload: """ Retrieve archived payload from gcs """ meta = PayloadMeta( extra_data={ 'bucketId': task.results['bucketId'], 'objectId': task.results['objectId'], 'projectId': task.results['projectId'], } ) count = 0 client = Client(project=task.results['projectId']) while count < self.max_retries: try: bucket = client.get_bucket(task.results['bucketId']) blob = Blob(task.results['objectId'], bucket) content = BytesIO() blob.download_to_file(content) break except ( InvalidResponse, GoogleAPICallError, InternalServerError, SSLError, ) as e: if count >= self.max_retries: raise StoqPluginException( f'Failed to download {task.results["bucketId"]}/{task.results["objectId"]} from GCS: {str(e)}' ) count += 1 sleep(randrange(0, 4)) content.seek(0) data = content.read() if self.use_encryption: data = self._decrypt(data) return Payload(data, meta)
def test_payloadmeta_to_str(self): response = PayloadMeta() response_str = str(response) response_dict = json.loads(response_str) self.assertIsInstance(response_str, str) self.assertIsInstance(response_dict, dict)
def get(self, task: ArchiverResponse) -> Optional[Payload]: if self.RAISE_EXCEPTION: raise Exception('Test exception please ignore') return Payload(self.PAYLOAD, PayloadMeta(extra_data=task.results))
async def test_reconstruct_all_subresponses(self): # Construct a fake stoq_response as if it were generated from a file # A.zip that contains two files, B.txt and C.zip, where C.zip contains D.txt results = [ Payload(content=b'', payload_id='A.zip', payload_meta=PayloadMeta()), Payload( content=b'', payload_id='B.txt', payload_meta=PayloadMeta(), extracted_from='A.zip', extracted_by='fake', ), Payload( content=b'', payload_id='C.zip', payload_meta=PayloadMeta(), extracted_from='A.zip', extracted_by='fake', ), Payload( content=b'', payload_id='D.txt', payload_meta=PayloadMeta(), extracted_from='C.zip', extracted_by='fake', ), ] request = Request(request_meta=RequestMeta(extra_data={'check': 'me'})) payload_count = 1 for result in results: result.results.workers['fake'] = f'result-{payload_count}' result.results.plugins_run['workers'].append('fake') request.payloads.append(result) payload_count += 1 initial_response = StoqResponse(request) s = Stoq(base_dir=utils.get_data_dir(), decorators=['simple_decorator']) all_subresponses = [ r async for r in s.reconstruct_all_subresponses(initial_response) ] # We expect there to be four "artificial" responses generated, one for # each payload as the root. self.assertEqual(len(all_subresponses), 4) # We expect the first response to have all 4 payloads, the second response # to have just the second payload, the third response to have the third # and fourth payload, and the fourth response to have just the fourth payload self.assertEqual( [len(stoq_response.results) for stoq_response in all_subresponses], [4, 1, 2, 1], ) self.assertEqual( [ stoq_response.results[0].workers['fake'] for stoq_response in all_subresponses ], ['result-1', 'result-2', 'result-3', 'result-4'], ) self.assertTrue( all( 'simple_decorator' in stoq_response.decorators for stoq_response in all_subresponses ) ) # Assert that they all have the same scan ID self.assertEqual( len({stoq_response.scan_id for stoq_response in all_subresponses}), 1 )
async def scan(self, payload: Payload, request: Request) -> WorkerResponse: pe = self._get_pe_file(payload.content) imports = self._get_imports(pe) exports = self._get_exports(pe) version_info = self._get_version_info(pe) certs = self._get_certs(pe) sections = self._get_section_info(pe) resources = self._get_resource_info(pe) rich_header = self._get_rich_header_hash(pe) imphash = self._get_imphash(pe) compile_time = self._get_compile_time(pe) tls_callbacks = self._get_tls_callbacks(pe) image_base = self._get_image_base(pe) entry_point = self._get_entry_point(pe) debug_info = self._get_debug_info(pe) is_packed = self._is_packed(pe) is_exe = self._is_exe(pe) is_dll = self._is_dll(pe) is_driver = self._is_driver(pe) is_suspicious = self._is_suspicious(pe) is_valid = self._is_valid(pe) results: Dict = {} extracted: List[ExtractedPayload] = [] if imports: results['imports'] = imports if exports: results['exports'] = exports if version_info: results['version_info'] = version_info if certs: results['certificates'] = [] for (cert_data, content) in certs: results['certificates'].append(cert_data) if content: cert_data['filename'] = bytes(cert_data['sha256'], 'ascii') extracted.append( ExtractedPayload( content=content, payload_meta=PayloadMeta(extra_data=cert_data), ) ) if sections: results['sections'] = sections if resources: results['resources'] = [] for (rsrc_data, content) in resources: results['resources'].append(rsrc_data) if content: rsrc_data['filename'] = rsrc_data['name'] extracted.append( ExtractedPayload( content=content, payload_meta=PayloadMeta(extra_data=rsrc_data), ) ) if rich_header: results['rich_header'] = rich_header if imphash: results['imphash'] = imphash if tls_callbacks: results['tls_callbacks'] = tls_callbacks if debug_info: results['debug_info'] = debug_info if is_packed: results['is_packed'] = is_packed if is_exe: results['is_exe'] = is_exe if is_dll: results['is_dll'] = is_dll if is_driver: results['is_driver'] = is_driver if is_suspicious: results['is_suspicious'] = is_suspicious if is_valid: results['is_valid'] = is_valid results['compile_time_epoch'] = compile_time[0] results['compile_time'] = compile_time[1] results['image_base'] = image_base results['entrypoint'] = entry_point pe.close() return WorkerResponse(results=results, extracted=extracted)