def set_process_schema(client: DgraphClient, engagement: bool = False): schema = """node_key: string @upsert @index(hash) . process_id: int @index(int) . created_timestamp: int @index(int) . asset_id: string @index(hash) . terminate_time: int @index(int) . image_name: string @index(exact, hash, trigram, fulltext) . process_name: string @index(exact, hash, trigram, fulltext) . arguments: string @index(fulltext) @index(trigram) . bin_file: uid @reverse . children: [uid] @reverse . created_files: [uid] @reverse . deleted_files: [uid] @reverse . read_files: [uid] @reverse . wrote_files: [uid] @reverse . created_connections: [uid] @reverse . bound_connections: [uid] @reverse . """ if engagement: schema += "\n" schema += "risks: [uid] @reverse ." # unstable schema += """ process_guid: string @index(exact, hash, trigram, fulltext) . """ op = pydgraph.Operation(schema=schema) client.alter(op)
def set_file_schema(client: DgraphClient, engagement: bool = False) -> None: schema = """ node_key: string @upsert @index(hash) . file_name: string @index(exact, hash, trigram, fulltext) . asset_id: string @index(exact, hash, trigram, fulltext) . file_path: string @index(exact, hash, trigram, fulltext) . file_extension: string @index(exact, hash, trigram, fulltext) . file_mime_type: string @index(exact, hash, trigram, fulltext) . file_size: int @index(int) . file_version: string @index(exact, hash, trigram, fulltext) . file_description: string @index(exact, hash, trigram, fulltext) . file_product: string @index(exact, hash, trigram, fulltext) . file_company: string @index(exact, hash, trigram, fulltext) . file_directory: string @index(exact, hash, trigram, fulltext) . file_inode: int @index(int) . file_hard_links: string @index(exact, hash, trigram, fulltext) . signed: bool @index(bool) . signed_status: string @index(exact, hash, trigram, fulltext) . md5_hash: string @index(exact, hash, trigram, fulltext) . sha1_hash: string @index(exact, hash, trigram, fulltext) . sha256_hash: string @index(exact, hash, trigram, fulltext) . """ if engagement: schema += "\n" schema += "risks: uid @reverse ." op = pydgraph.Operation(schema=schema) client.alter(op)
def create_process_schema(eg_client: DgraphClient): schema = \ 'node_key: string @index(hash) .\n' +\ 'engagement_key: string @index(hash) .\n' +\ 'children: uid @reverse .\n' +\ 'pid: int @index(int) .\n' op = pydgraph.Operation(schema=schema) eg_client.alter(op)
def __init__(self, id: str): self.engagement_key = id self.eg_client: DgraphClient = DgraphClient( DgraphClientStub("alpha0.engagementgraphcluster.grapl:9080")) self.mg_client: DgraphClient = DgraphClient( DgraphClientStub("alpha0.mastergraphcluster.grapl:9080")) self.node_copier: NodeCopier = NodeCopier(id, self.mg_client, self.eg_client)
def copy_node( src_client: DgraphClient, dst_client: DgraphClient, node_key: str, init_node: Optional[Dict[str, Any]] = None, ) -> Any: if not init_node: init_node = dict() assert init_node is not None query = f""" {{ q0(func: eq(node_key, "{node_key}")) {{ uid, dgraph.type, expand(_all_), }} }} """ txn = src_client.txn(read_only=True) try: res = json.loads(txn.query(query).json)["q0"] finally: txn.discard() if not res: raise Exception("ERROR: Can not find res") if not res[0].get("dgraph.type"): raise Exception("ERROR: Can not find res dgraph.type") raw_to_copy = {**res[0], **init_node} return upsert(dst_client, raw_to_copy)
def _upsert(client: DgraphClient, node_dict: Dict[str, Property]) -> str: if node_dict.get('uid'): node_dict.pop('uid') node_dict['uid'] = '_:blank-0' node_key = node_dict['node_key'] query = f""" {{ q0(func: eq(node_key, "{node_key}")) {{ uid, expand(_forward_) }} }} """ txn = client.txn(read_only=False) try: res = json.loads(txn.query(query).json)['q0'] new_uid = None if res: node_dict['uid'] = res[0]['uid'] new_uid = res[0]['uid'] mutation = node_dict m_res = txn.mutate(set_obj=mutation, commit_now=True) uids = m_res.uids if new_uid is None: new_uid = uids['blank-0'] return str(new_uid) finally: txn.discard()
def upsert(client: DgraphClient, node_dict: Dict[str, Any]) -> Any: if node_dict.get("uid"): node_dict.pop("uid") node_dict["uid"] = "_:blank-0" node_key = node_dict["node_key"] query = f""" {{ q0(func: eq(node_key, "{node_key}")) @cascade {{ uid, dgraph.type, }} }} """ txn = client.txn(read_only=False) try: res = json.loads(txn.query(query).json)["q0"] if res: node_dict["uid"] = res[0]["uid"] node_dict = {**node_dict, **res[0]} mutation = node_dict mut_res = txn.mutate(set_obj=mutation, commit_now=True) new_uid = node_dict.get("uid") or mut_res.uids["blank-0"] return new_uid finally: txn.discard()
def test_process_with_created_files(self) -> None: # Given: a process with a pid 100 & process_name word.exe, local_client = DgraphClient(DgraphClientStub("localhost:9080")) created_timestamp = int(time.time()) parent_process = { "process_id": 100, "process_name": "word.exe", "created_timestamp": created_timestamp, } # type: Dict[str, Property] parent_process_view = upsert( local_client, "Process", ProcessView, "763ddbda-8812-4a07-acfe-83402b92379d", parent_process, ) created_file = { "file_path": "/folder/file.txt", "created_timestamp": created_timestamp + 1000, } # type: Dict[str, Property] created_file_view = upsert( local_client, "File", FileView, "575f103e-1a11-4650-9f1b-5b72e44dfec3", created_file, ) create_edge( local_client, parent_process_view.uid, "created_files", created_file_view.uid, ) queried_process = ( ProcessQuery() .with_node_key(eq="763ddbda-8812-4a07-acfe-83402b92379d") .with_process_id(eq=100) .with_process_name(contains="word") .with_created_timestamp(eq=created_timestamp) .with_created_files( FileQuery() .with_node_key(eq="575f103e-1a11-4650-9f1b-5b72e44dfec3") .with_file_path(eq="/folder/file.txt") ) .query_first(local_client) ) assert queried_process assert queried_process.process_id == 100 assert len(queried_process.created_files) == 1 created_file = queried_process.created_files[0] assert created_file.file_path == "/folder/file.txt"
def _get_children(self, client: DgraphClient, eg=True) -> Optional[List[Process]]: if eg: q_filter = '@filter(eq(engagement_key, "{}"))'.format( self.engagement_key) else: q_filter = '' proc_res = json.loads( client.query("""{{ q0(func: eq(node_key, "{}")) {} {{ children {{ uid, node_key, image_name, image_path, }} }} }}""".format(self.node_key, q_filter)).json)['q0'] if not proc_res: return None return [self.from_dict(child) for child in proc_res[0]['children']]
def _get_parent(self, client: DgraphClient, eg=True) -> Optional[Process]: if eg: q_filter = '@filter(eq(engagement_key, "{}"))'.format( self.engagement_key) else: q_filter = '' proc_res = json.loads( client.query("""{{ q0(func: eq(node_key, "{}")) {} {{ ~children {{ uid, node_key, image_name, image_path, }} }} }}""".format(self.node_key, q_filter)).json)['q0'] if not proc_res: return None return Process( proc_res[0]['~children'][0]['node_key'], proc_res[0]['~children'][0]['uid'], self.engagement_key, proc_res[0]['~children'][0].get('image_name', None), proc_res[0]['~children'][0].get('image_path', None), self.engagement, )
def upsert(client: DgraphClient, node_dict: Dict[str, Any]) -> str: if node_dict.get('uid'): node_dict.pop('uid') node_dict['uid'] = '_:blank-0' node_key = node_dict['node_key'] print(f"INFO: Upserting node: {node_dict}") query = f""" {{ q0(func: eq(node_key, "{node_key}")) {{ uid, dgraph.type, }} }} """ txn = client.txn(read_only=False) try: res = json.loads(txn.query(query).json)['q0'] if res: node_dict['uid'] = res[0]['uid'] node_dict = {**node_dict, **res[0]} mutation = node_dict mut_res = txn.mutate(set_obj=mutation, commit_now=True) new_uid = node_dict.get('uid') or mut_res.uids["blank-0"] return new_uid finally: txn.discard()
def raw_node_from_uid(dgraph_client: DgraphClient, uid: str) -> Optional[Dict[str, Any]]: query = f""" {{ res(func: uid("{uid}"), first: 1) {{ uid, expand(_all_), node_type: dgraph.type }} }} """ txn = dgraph_client.txn(read_only=True, best_effort=False) try: res = json.loads(txn.query(query).json)["res"] finally: txn.discard() if not res: return None else: if isinstance(res, list): node_type = res[0].get("node_type") if node_type: res[0]["node_type"] = res[0]["node_type"][0] else: print(f"WARN: node_type missing from {uid} {res}") return cast(Dict[str, Any], res[0]) else: node_type = res.get("node_type") if node_type: res["node_type"] = res["node_type"][0] else: print(f"WARN: node_type missing from {uid} {res}") return cast(Dict[str, Any], res)
def get_count( self, dgraph_client: DgraphClient, first: Optional[int] = None, ) -> int: query_str = generate_query( query_name="res", binding_modifier="res", root=self, contains_node_key=None, first=first, count=True, ) txn = dgraph_client.txn(read_only=True) try: raw_count = json.loads(txn.query(query_str).json)["res"] finally: txn.discard() if not raw_count: return 0 else: if isinstance(raw_count, list): return int(raw_count[0].get("count", 0)) if isinstance(raw_count, dict): return int(raw_count.get("count", 0)) raise TypeError("raw_count is not list or dict")
def edge_in_lens(dg_client: DgraphClient, node_uid: str, edge_name: str, lens_name: str) -> List[Dict[str, Any]]: query = f""" query q0($node_uid: string, $lens_name: string) {{ q0(func: uid($node_uid)) @cascade {{ {edge_name} {{ uid, node_key, node_type: dgraph.type, ~scope @filter(eq(lens, $lens_name)) {{ uid, node_type: dgraph.type, }} }} }} }} """ txn = dg_client.txn(read_only=True) try: variables = {"$node_uid": node_uid, "$lens_name": lens_name} res = json.loads(txn.query(query, variables=variables).json) return res["q0"] finally: txn.discard()
def query( self, dgraph_client: DgraphClient, contains_node_key: Optional[str] = None, first: Optional[int] = 1000, ) -> List["NV"]: if not first: first = 1000 if contains_node_key: first = 1 query_str = generate_query( query_name="res", binding_modifier="res", root=self, contains_node_key=contains_node_key, first=first, ) txn = dgraph_client.txn(read_only=True) try: raw_views = json.loads(txn.query(query_str).json)["res"] except Exception as e: raise Exception(query_str, e) finally: txn.discard() if not raw_views: return [] return [ self.view_type.from_dict(dgraph_client, raw_view) for raw_view in raw_views ]
def test_process_query_view_parity(self, process_props: ProcessProps): local_client = DgraphClient(DgraphClientStub("localhost:9080")) created_proc = get_or_create_process(self, local_client, process_props,) queried_proc = ( ProcessQuery() .with_node_key(eq=created_proc.node_key) .query_first(local_client) ) assert queried_proc assert process_props["node_key"] == queried_proc.node_key assert "Process" == queried_proc.get_node_type() assert process_props["process_id"] == queried_proc.get_process_id() assert process_props["arguments"] == escape_dgraph_str( queried_proc.get_arguments() ) assert ( process_props["created_timestamp"] == queried_proc.get_created_timestamp() ) assert None == queried_proc.get_asset() assert process_props["terminate_time"] == queried_proc.get_terminate_time() assert process_props["image_name"] == escape_dgraph_str( queried_proc.get_image_name() ) assert process_props["process_name"] == escape_dgraph_str( queried_proc.get_process_name() )
def get_lens_risks(dg_client: DgraphClient, lens: str) -> List[Dict[str, Any]]: query = """ query q0($a: string) { q0(func: eq(lens, $a)) { uid, node_type: dgraph.type, node_key, lens, score, scope { uid, node_key, node_type: dgraph.type risks { uid, node_key, analyzer_name, node_type: dgraph.type, risk_score } } } }""" txn = dg_client.txn(read_only=True) try: variables = {"$a": lens} res = json.loads(txn.query(query, variables=variables).json) if not res["q0"]: return [] return res["q0"][0]["scope"] finally: txn.discard()
def expand_node_forward(dgraph_client: DgraphClient, node_key: str) -> Optional[Dict[str, Any]]: query = """ query res($node_key: string) { res(func: eq(node_key, $node_key)) { uid, expand(_all_) { uid, expand(_all_), node_type: dgraph.type } node_type: dgraph.type } } """ txn = dgraph_client.txn(read_only=True) variables = {"$node_key": node_key} try: res = json.loads(txn.query(query, variables=variables).json) finally: txn.discard() return res['res'][0]
def copy_node(mgclient: DgraphClient, egclient: DgraphClient, node_key: str, init_node: Optional[Dict[str, Any]] = None) -> None: if not init_node: init_node = dict() assert init_node is not None query = f""" {{ q0(func: eq(node_key, "{node_key}")) {{ uid, expand(_all_), dgraph.type }} }} """ txn = mgclient.txn(read_only=True) try: res = json.loads(txn.query(query).json)['q0'] finally: txn.discard() if not res: raise Exception("ERROR: Can not find res") print(f"Copy query result: {res}") raw_to_copy = {**res[0], **init_node} return upsert(egclient, raw_to_copy)
def should_throttle( analyzer_name: str, root_node_key: str, dgraph_client: DgraphClient ) -> bool: query = """ query q0($a: string, $n: string) { q0(func: eq(node_key, $n), first: 1) @cascade { uid, ~scope { uid, risks @filter(eq(analyzer_name, $a)) { uid } } } } """ variables = { '$a': analyzer_name, '$n': root_node_key } res = json.loads(dgraph_client.txn(read_only=True). query(query, variables=variables).json) if res['q0']: return True return False
def upsert(client: DgraphClient, node_key: str, props: Dict[str, str]): query = """ query q0($a: string) { q0(func: eq(node_key, $a)) { uid, expand(_all_) } } """ txn = client.txn(read_only=False) try: res = json.loads(txn.query(query, variables={'$a': node_key}).json) node = res['q0'] if not node: node = props else: # TODO: Merge lists of properties together node = {**props, **node[0]} res = txn.mutate(set_obj=node, commit_now=True) uids = res.uids uid = uids['blank-0'] finally: txn.discard() return uid
def test_process_query_view_miss(self, process_props: ProcessProps) -> None: local_client = DgraphClient(DgraphClientStub("localhost:9080")) created_proc = get_or_create_process(self, local_client, process_props) assert ( created_proc.process_id is not None and created_proc.arguments is not None and created_proc.created_timestamp is not None and created_proc.terminate_time is not None and created_proc.image_name is not None and created_proc.process_name is not None ) queried_proc = ( ProcessQuery() .with_node_key(eq=created_proc.node_key) .with_process_id(eq=Not(created_proc.process_id)) .with_arguments(eq=Not(created_proc.arguments)) .with_created_timestamp(eq=Not(created_proc.created_timestamp)) .with_terminate_time(eq=Not(created_proc.terminate_time)) .with_image_name(eq=Not(created_proc.image_name)) .with_process_name(eq=Not(created_proc.process_name)) .query_first(local_client) ) assert not queried_proc
def get_uid(client: DgraphClient, node_key: str) -> str: txn = client.txn(read_only=True) try: query = """ query res($a: string) { res(func: eq(node_key, $a), first: 1) @cascade { uid, } }""" res = txn.query(query, variables={"$a": node_key}) res = json.loads(res.json) if isinstance(res["res"], list): if res["res"]: return str(res["res"][0]["uid"]) else: raise Exception( f"get_uid failed for node_key: {node_key} {res}") else: return str(res["res"]["uid"]) finally: txn.discard()
def _upsert(client: DgraphClient, node_dict: Dict[str, Property]) -> str: if node_dict.get("uid"): node_dict.pop("uid") node_dict["uid"] = "_:blank-0" node_key = node_dict["node_key"] query = f""" {{ q0(func: eq(node_key, "{node_key}")) {{ uid, expand(_all_) }} }} """ txn = client.txn(read_only=False) try: res = json.loads(txn.query(query).json)["q0"] new_uid = None if res: node_dict["uid"] = res[0]["uid"] new_uid = res[0]["uid"] mutation = node_dict m_res = txn.mutate(set_obj=mutation, commit_now=True) uids = m_res.uids if new_uid is None: new_uid = uids["blank-0"] return str(new_uid) finally: txn.discard()
def get_lens_scope(dg_client: DgraphClient, lens: str) -> Dict[str, Any]: query = """ query q0($a: string) { q0(func: eq(lens, $a)) { uid, node_type: dgraph.type, node_key, lens, score, scope { uid, expand(_all_), node_type: dgraph.type, } } }""" txn = dg_client.txn(read_only=True) try: variables = {"$a": lens} res = json.loads(txn.query(query, variables=variables).json) if not res["q0"]: return {} return res["q0"][0] finally: txn.discard()
def attach_risk(client: DgraphClient, node: Dict[str, Any], analyzer_name: str, risk_score: int): txn = client.txn(read_only=False) try: query = """ query q0($a: string, $b: string) { n as var(func: eq(node_key, $a), first: 1) { uid } q0(func: uid(n), first: 1) { uid, risks @filter( eq(analyzer_name, $b) ) { uid } } } """ variables = { '$a': node['node_key'], '$b': analyzer_name } txn = client.txn(read_only=False) res = json.loads(txn.query(query, variables=variables).json) if res['q0'] and res['q0'][0].get('risks'): return mutation = { "uid": res['q0'][0]['uid'], "risks": { 'analyzer_name': analyzer_name, 'risk_score': risk_score } } print(f"mutation: {mutation}") txn.mutate(set_obj=mutation, commit_now=True) finally: txn.discard()
def test_with_wrote_files(self) -> None: # Given: a process with a pid 100 & process_name word.exe, local_client = DgraphClient(DgraphClientStub("localhost:9080")) created_timestamp = int(time.time()) parent_process = { "process_id": 100, "process_name": "word.exe", "created_timestamp": created_timestamp, } # type: Dict[str, Property] parent_process_view = upsert( local_client, "Process", ProcessView, "test_with_wrote_files-8f0761fb-2ffe-4d4b-ab38-68e5489f56dc", parent_process, ) wrote_file = { "file_path": "/folder/file.txt", "created_timestamp": created_timestamp + 1000, } # type: Dict[str, Property] wrote_file_view = upsert( local_client, "File", FileView, "test_with_wrote_files-2325c49a-95b4-423f-96d0-99539fe03833", wrote_file, ) create_edge( local_client, parent_process_view.uid, "wrote_files", wrote_file_view.uid, ) queried_process = (ProcessQuery().with_node_key( eq="test_with_wrote_files-8f0761fb-2ffe-4d4b-ab38-68e5489f56dc" ).with_process_id(eq=100).with_process_name( contains="word" ).with_created_timestamp(eq=created_timestamp).with_wrote_files( FileQuery().with_node_key( eq="test_with_wrote_files-2325c49a-95b4-423f-96d0-99539fe03833" ).with_file_path(eq="/folder/file.txt")).query_first(local_client)) assert queried_process assert (queried_process.node_key == "test_with_wrote_files-8f0761fb-2ffe-4d4b-ab38-68e5489f56dc") assert queried_process.process_id == 100 assert queried_process.process_name == "word.exe" assert len(queried_process.wrote_files) == 1 assert (queried_process.wrote_files[0].node_key == "test_with_wrote_files-2325c49a-95b4-423f-96d0-99539fe03833") assert queried_process.wrote_files[0].file_path == "/folder/file.txt"
def analyzer(graph: Subgraph, sender: Connection): try: print('analyzing') client = DgraphClient(DgraphClientStub('db.mastergraph:9080')) _analyzer(client, graph, sender) except Exception as e: print('analyzer failed: {}'.format(e)) sender.send(None)
def test_with_bin_file(self) -> None: # Given: a process with a pid 100 & process_name word.exe, local_client = DgraphClient(DgraphClientStub("localhost:9080")) created_timestamp = int(time.time()) parent_process = { "process_id": 100, "process_name": "word.exe", "created_timestamp": created_timestamp, } # type: Dict[str, Property] parent_process_view = upsert( local_client, "Process", ProcessView, "635952af-87f3-4a2a-a65d-3f1859db9525", parent_process, ) bin_file = { "file_path": "/folder/file.txt", "created_timestamp": created_timestamp + 1000, } # type: Dict[str, Property] bin_file_view = upsert( local_client, "File", FileView, "9f16e0c9-33c0-4d18-9878-ef686373570b", bin_file, ) create_edge( local_client, parent_process_view.uid, "bin_file", bin_file_view.uid, ) queried_process = (ProcessQuery().with_node_key( eq="635952af-87f3-4a2a-a65d-3f1859db9525" ).with_process_id(eq=100).with_process_name( contains="word").with_created_timestamp( eq=created_timestamp).with_bin_file(FileQuery().with_node_key( eq="9f16e0c9-33c0-4d18-9878-ef686373570b").with_file_path( eq="/folder/file.txt")).query_first(local_client)) assert queried_process assert "635952af-87f3-4a2a-a65d-3f1859db9525" assert queried_process.process_id == 100 assert queried_process.process_name == "word.exe" assert queried_process.created_timestamp == created_timestamp bin_file = queried_process.bin_file assert bin_file.node_key == "9f16e0c9-33c0-4d18-9878-ef686373570b" assert bin_file.file_path == "/folder/file.txt"
def lambda_handler(events: Any, context: Any) -> None: # Parse sns message print("handling") print(events) print(context) alpha_names = os.environ["MG_ALPHAS"].split(",") client_stubs = [DgraphClientStub("{}:9080".format(name)) for name in alpha_names] client = DgraphClient(*client_stubs) s3 = get_s3_client() for event in events["Records"]: if not IS_LOCAL: event = json.loads(event['body'])['Records'][0] data = parse_s3_event(s3, event) message = json.loads(data) print(f'Executing Analyzer: {message["key"]}') analyzer = download_s3_file(s3, f"{os.environ['BUCKET_PREFIX']}-analyzers-bucket", message["key"]) analyzer_name = message["key"].split("/")[-2] subgraph = SubgraphView.from_proto(client, bytes(message["subgraph"])) # TODO: Validate signature of S3 file print(f'event {event}') rx, tx = Pipe(duplex=False) # type: Tuple[Connection, Connection] p = Process(target=execute_file, args=(analyzer_name, analyzer, subgraph, tx, '')) p.start() t = 0 while True: p_res = rx.poll(timeout=5) if not p_res: t += 1 print(f"Polled {analyzer_name} for {t * 5} seconds without result") continue result = rx.recv() # type: Optional[Any] if isinstance(result, ExecutionComplete): print("execution complete") break # emit any hits to an S3 bucket if isinstance(result, ExecutionHit): print(f"emitting event for {analyzer_name} {result.analyzer_name} {result.root_node_key}") emit_event(s3, result) update_msg_cache(analyzer, result.root_node_key, message['key']) update_hit_cache(analyzer_name, result.root_node_key) assert not isinstance( result, ExecutionFailed ), f"Analyzer {analyzer_name} failed." p.join()