def _setup_params(form: dict, schema: dict, is_external: bool) -> dict: logger.debug("Setting up parameters") params: Dict[str, Any] = {} if is_external: # External parameters are in the form params = {} for param in schema["params"]: if param["name"] in request.form: params[param["name"]] = request.form[param["name"]] logger.info(f"ExternalDataSource params received {params}") else: for param in schema["params"]: # Save the files, keep track of which parameter they represent if param["name"] in request.files: params[param["name"]] = tempfile.NamedTemporaryFile() request.files[param["name"]].save(params[param["name"]].name) params[param["name"]].seek(0) logger.info(f"Saved uploaded files {params}") logger.debug("Set up parameters") return params
def _make_edges(self, source_graph: nx.Graph) -> None: logger.info("Grouping Edges by type") sorted_edges = sorted(source_graph.edges(data=True, keys=True), key=lambda edge: edge[3]["edge_name"]) edges_by_type = itertools.groupby( sorted_edges, key=lambda edge: edge[3]["edge_name"]) for edge_type, edges in edges_by_type: # Remove white spaces edge_type = edge_type.replace(" ", "_") cypher_edges = list(map(self._edge_as_cypher, edges)) logger.debug( f"Inserting {len(cypher_edges)} {edge_type} edges into Neo4J") for i in range(0, len(cypher_edges), self.batch_size): start = i end = i + self.batch_size cypher = f"UNWIND [{', '.join(cypher_edges[start: end])}] as row\n" cypher += "MATCH (src {_key: row.src}), (dst {_key: row.dst})" cypher += f" CREATE (src)-[:`{edge_type}`]->(dst)" with self.neo4j.session() as session: session.write_transaction(lambda tx: tx.run(cypher)) logger.debug(f"Finished batch {i+1} ({start} -> {end})")
def adhoc(): """Allows for ad-hoc transformation of generic JSON Data based on one of two CIM models: 1. The Beagle CIM Model (defined in `constants.py`) 2. The OSSEM Model (defined in https://github.com/Cyb3rWard0g/OSSEM) """ valid_cim_formats = ["beagle"] data = request.get_json() events = data["data"] cim_format = data.get("cim", "beagle") if str(cim_format).lower() not in valid_cim_formats: response = jsonify({"message": f"cim_format must be in {cim_format}"}) return response if not isinstance(events, list): events = [events] logger.info(f"Beginning ad-hoc graphing request") g = JSONData(events).to_graph(consolidate_edges=True) logger.info(f"Completed ad-hoc graphing request") return jsonify({"data": NetworkX.graph_to_json(g)})
def run(self) -> List[Node]: """Generates the list of nodes from the datasource. This methods kicks off a producer/consumer queue. The producer grabs events one by one from the datasource by iterating over the events from the `events` generator. Each event is then sent to the :py:meth:`transformer` function to be transformer into one or more `Node` objects. Returns ------- List[Node] All Nodes created from the data source. """ logger.debug("Launching transformer") threads: List[Thread] = [] producer_thread = Thread(target=self._producer_thread) producer_thread.start() threads.append(producer_thread) self.errors[producer_thread] = [] logger.debug("Started producer thread") consumer_count = _THREAD_COUNT - 1 if consumer_count == 0: consumer_count = 1 for i in range(consumer_count): t = Thread(target=self._consumer_thread) self.errors[t] = [] t.start() threads.append(t) logger.debug(f"Started {_THREAD_COUNT-1} consumer threads") # Wait for the producer to finish producer_thread.join() self._queue.join() # Stop the threads for i in range(consumer_count): self._queue.put(_SENTINEL) for thread in threads: thread.join() logger.info( f"Finished processing of events, created {len(self.nodes)} nodes.") if any([len(x) > 0 for x in self.errors.values()]): logger.warning(f"Parsing finished with errors.") logger.debug(self.errors) return self.nodes
def _add_to_exiting_graph( existing_backend: Backend, datasource_cls: Type[DataSource], transformer_cls: Type[Transformer], params: Dict[str, Any], is_external: bool, ) -> Tuple[dict, bool]: try: # Set up parameters for datasource class datasource_params = ( # Use filenames if we are referencing a temporary file {param_name: tempfile.name for param_name, tempfile in params.items()} if not is_external else params ) # Create the datasource datasource = datasource_cls(**datasource_params) # type: ignore # Create transformer transformer = datasource.to_transformer(transformer_cls) # Create the nodes nodes = transformer.run() # Create the backend G = existing_backend.add_nodes(nodes) except Exception as e: logger.critical(f"Failure to generate graph {e}") import traceback logger.debug(f"{traceback.format_exc()}") if not is_external: # Clean up temporary files try: for _tempfile in params.values(): _tempfile.close() except Exception as e: logger.critical(f"Failure to clean up temporary files after error {e}") return {"message": str(e)}, False logger.info("Cleaning up tempfiles") if not is_external: # Clean up temporary files for _tempfile in params.values(): _tempfile.close() logger.info("Finished generating graph") # Check if we even had a graph. # This will be on the G attribute for any class subclassing NetworkX if existing_backend.is_empty(): return {"message": f"Graph generation resulted in 0 nodes."}, False return {"graph": G, "backend": existing_backend}, True
def _get_rdpcap(self): if not self._imported_scapy: logger.info("Loading Scapy") from scapy.all import rdpcap logger.info("Scapy Loaded") self._imported_scapy = True return rdpcap
def __init__(self, metadata: dict = {}, consolidate_edges: bool = False, *args, **kwargs) -> None: self.metadata = metadata self.consolidate_edges = consolidate_edges self.G = nx.MultiDiGraph(metadata=metadata) super().__init__(*args, **kwargs) logger.info("Initialized NetworkX Backend")
def __init__(self, anonymize: bool = False, render: bool = False, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.anonymize = anonymize self.render = render logger.info("Initialized Graphistry Backend") self.key = self._get_key() if self.key is None: raise RuntimeError( f"Please set the graphistry API key in either the GRAPHISTRY_API_KEY" + " or BEAGLE__GRAPHISTRY__API_KEY enviroment variables" )
def graph(self) -> None: logger.info(f"Generating graph using NetworkX") nx_graph = super().graph() logger.info(f"Migrating graph to Neo4j") logger.info(f"Inserting nodes into Neo4J in batches of {self.batch_size}") self._make_nodes(nx_graph) logger.info(f"Inserting edges into Neo4J in batches of {self.batch_size}") self._make_edges(nx_graph) logger.info("All data inserted into Neo4J")
def graph(self) -> str: logger.info(f"Generating graph using NetworkX") nx_graph = super().graph() logger.info(f"Migrating graph to Neo4j") logger.info( f"Inserting nodes into Neo4J in batches of {self.batch_size}") self._make_nodes(nx_graph) logger.info( f"Inserting edges into Neo4J in batches of {self.batch_size}") self._make_edges(nx_graph) logger.info("All data inserted into Neo4J") return self.uri.replace("bolt", "http")
def graph(self) -> nx.MultiDiGraph: """Generates the MultiDiGraph. Places the nodes in the Graph. Returns ------- nx.MultiDiGraph The generated NetworkX object. """ logger.info("Beginning graph generation.") # De-duplicate nodes. self.nodes = dedup_nodes(self.nodes) for node in self.nodes: # Insert the node into the graph. # This also takes care of edges. self.insert_node(node, hash(node)) logger.info("Completed graph generation.") logger.info( f"Graph contains {len(self.G.nodes())} nodes and {len(self.G.edges())} edges." ) return self.G
def __init__( self, uri: str = Config.get("neo4j", "host"), username: str = Config.get("neo4j", "username"), password: str = Config.get("neo4j", "password"), clear_database: bool = False, *args, **kwargs, ): logger.info(f"Connecting to neo4j server at {uri}") self.neo4j = GraphDatabase.driver(uri, auth=(username, password)) super().__init__(*args, **kwargs) logger.info("Initialized Neo4j Backend") self.batch_size = int(Config.get("neo4j", "batch_size")) self.uri = uri if clear_database: logger.info("Wiping database") with self.neo4j.session() as session: session.write_transaction( lambda tx: tx.run("MATCH (n) DETACH DELETE n"))
def _make_nodes(self, source_graph: nx.Graph) -> None: logger.info("Grouping Nodes by type") # Group nodes by class sorted_nodes = sorted( [node["data"] for _, node in source_graph.nodes(data=True)], key=lambda node: node.__name__, reverse=True, ) nodes_by_type = itertools.groupby(sorted_nodes, key=lambda node: node.__name__) for node_type, nodes in nodes_by_type: # remove whitespaces node_type = node_type.replace(" ", "_") self._create_constraint(node_type) cypher_nodes = list(map(self._node_as_cypher, nodes)) logger.debug( f"Inserting {len(cypher_nodes)} {node_type} nodes into Neo4J") for i in range(0, len(cypher_nodes), self.batch_size): start = i end = i + self.batch_size cypher = f"UNWIND [{', '.join(cypher_nodes[start: end])}] as row\n" cypher += f"CREATE (node:{node_type} {{_key: row._key}}) SET node = row" with self.neo4j.session() as session: session.write_transaction(lambda tx: tx.run(cypher)) logger.debug(f"Finished batch {i+1} ({start} -> {end})")
def add_nodes(self, nodes: List[Node]) -> nx.MultiDiGraph: logger.info("Appending nodes into existing graph.") nodes = dedup_nodes(nodes) for node in nodes: self.insert_node(node, hash(node)) logger.info("Completed appending nodes graph.") logger.info( f"Graph contains {len(self.G.nodes())} nodes and {len(self.G.edges())} edges." ) return self.G
def graph(self) -> nx.MultiDiGraph: """Generates the MultiDiGraph. Places the nodes in the Graph. Returns ------- nx.MultiDiGraph The generated NetworkX object. """ logger.info("Beginning graph generation.") for node in self.nodes: node_id = hash(node) self.insert_node(node, node_id) logger.info("Completed graph generation.") logger.info( f"Graph contains {len(self.G.nodes())} nodes and {len(self.G.edges())} edges." ) return self.G
def __init__( self, host: str = Config.get("dgraph", "host"), batch_size: int = int(Config.get("dgraph", "batch_size")), wipe_db: bool = False, *args, **kwargs, ): logger.info(f"Connecting to Dgraph server at {host}") client_stub = pydgraph.DgraphClientStub(host) self.dgraph = pydgraph.DgraphClient(client_stub) super().__init__(*args, **kwargs) if wipe_db: logger.info("Wiping existing database due to wipe_db=True") self.dgraph.alter(pydgraph.Operation(drop_all=True)) self.batch_size = 1000 logger.info("Initialized Dgraph Backend")
def _save_graph_to_db(backend: NetworkX, category: str, graph_id: int = None) -> dict: """Saves a graph to the database, optionally forcing an overwrite of an existing graph. Parameters ---------- backend : NetworkX The NetworkX object to save category : str The category graph_id: int The graph ID to override. Returns ------- dict JSON to return to client with ID and path. """ # Take the SHA256 of the contents of the graph. contents_hash = hashlib.sha256( json.dumps(backend.to_json(), sort_keys=True).encode("utf-8") ).hexdigest() # See if we have previously generated this *exact* graph. existing = Graph.query.filter_by(meta=backend.metadata, sha256=contents_hash).first() if existing: logger.info(f"Graph previously generated with id {existing.id}") return {"id": existing.id, "self": f"/{existing.category}/{existing.id}"} dest_folder = category.replace(" ", "_").lower() # Set up the storage directory. dest_path = f"{Config.get('storage', 'dir')}/{dest_folder}/{contents_hash}.json" os.makedirs(f"{Config.get('storage', 'dir')}/{dest_folder}", exist_ok=True) json.dump(backend.to_json(), open(dest_path, "w")) if graph_id: db_entry = Graph.query.filter_by(id=graph_id).first() # set the new hash. db_entry.file_path = f"{contents_hash}.json" db_entry.sha256 = contents_hash # NOTE: Old path is not deleted. else: db_entry = Graph( sha256=contents_hash, meta=backend.metadata, comment=request.form.get("comment", None), category=dest_folder, # Categories use the lower name! file_path=f"{contents_hash}.json", ) # Add new entry db.session.add(db_entry) db.session.commit() logger.info(f"Added graph to database with id={db_entry.id}") logger.info(f"Saved graph to {dest_path}") return {"id": db_entry.id, "self": f"/{dest_folder}/{db_entry.id}"}
def graph(self): """Pushes the nodes and edges into DGraph.""" logger.info(f"Generating base graph using NetworkX") nx_graph = super().graph() logger.info(f"Migrating graph to DGraph") logger.info(f"Setting up schema") self.setup_schema() logger.info(f"Created schema") uids_to_nodes: Dict[str, int] = {} nodes_to_uids: Dict[int, int] = {} current_id = 0 def _node_to_dgraph_dict(node: Node) -> dict: return { f"{node.__name__.lower().replace(' ', '_')}.{k}": ( json.dumps(v) if isinstance(v, dict) else v ) for k, v in node.to_dict().items() if v } logger.info(f"Inserting nodes") nodes_txn = [] all_nodes = [node["data"] for _, node in nx_graph.nodes(data=True)] for i in range(0, len(all_nodes), self.batch_size): for node in all_nodes[i : i + self.batch_size]: txn = self.dgraph.txn() # Remove spaces, lowercase and escape node_data = _node_to_dgraph_dict(node) node_data["uid"] = f"_:node_{current_id}" uids_to_nodes[node_data["uid"]] = hash(node) current_id += 1 node_data["type"] = node.__name__.lower().replace(" ", "_") nodes_txn.append(node_data) assigned = txn.mutate(set_obj=nodes_txn) for uid, assigned_uid in assigned.uids.items(): nodes_to_uids[uids_to_nodes[f"_:{uid}"]] = assigned_uid txn.commit() logger.info( f"Inserted nodes batch {i} -> {i+self.batch_size}, Total UIDs: {len(nodes_to_uids.keys())} UIDs" ) logger.info(f"Inserting edges") all_edges = nx_graph.edges(data=True, keys=True) for i in range(0, len(all_edges), self.batch_size): edge_nquads = "" for edge in all_edges[i : i + self.batch_size]: edge_nquads += f"<{nodes_to_uids[edge[0]]}> <{edge[2].lower().replace(' ', '_')}> <{nodes_to_uids[edge[1]]}> .\n" txn = self.dgraph.txn() assigned = txn.mutate(set_nquads=edge_nquads) logger.info( f"Inserted edges batch {i} -> {i+self.batch_size}, got back {len(assigned.context.keys)} UIDs" ) txn.commit() return self.host
def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) logger.info("Created PCAP Transformer")
def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) logger.info("Created Windows EVTX Transformer.")
def new(): """Generate a new graph using the supplied DataSource, Transformer, and the parameters passed to the DataSource. At minimum, the user must supply the following form parameters: 1. datasource 2. transformer 3. comment 4. backend Outside of that, the user must supply at **minimum** the parameters marked by the datasource as required. * Use the /api/datasources endpoint to see which ones these are. * Programmatically, these are any parameters without a default value. Failure to supply either the minimum three or the required parameters for that datasource returns a 400 status code with the missing parameters in the 'message' field. If any part of the graph creation yields an error, a 500 HTTP code is returend with the python exception as a string in the 'message' field. If the graph is succesfully created, the user is returned a dictionary with the ID of the graph and the URI path to viewing it in the *beagle web interface*. For example: >>> { id: 1, self: /fireeye_hx/1 } Returns ------- dict {id: integer, self: string} """ # Returns a tuple of (dict, bool). resp, success = _validate_params(form=request.form, files=request.files) # If false, return error message if not success: return make_response(jsonify(resp), 400) datasource_cls: Type[DataSource] = resp["datasource"] transformer_cls: Type[Transformer] = resp["transformer"] backend_cls: Type[Backend] = resp["backend"] datasource_schema = resp["schema"] # If this class extends the ExternalDataSource class, we know that the parameters # represent strings, and not files. is_external = issubclass(datasource_cls, ExternalDataSource) logger.info( f"Recieved upload request for datasource=<{datasource_cls.__name__}>, " + f"transformer=<{transformer_cls.__name__}>, backend=<{backend_cls.__name__}>" ) logger.info("Transforming data to a graph.") params = _setup_params(form=request.form, schema=datasource_schema, is_external=is_external) resp, success = _create_graph( datasource_cls=datasource_cls, transformer_cls=transformer_cls, backend_cls=backend_cls, params=params, is_external=is_external, ) if not success: return make_response(jsonify(resp), 400) G = resp["graph"] # If the backend is NetworkX, save the graph. # Otherwise, redirect the user to wherever he sent it (if possible) if backend_cls.__name__ == "NetworkX": response = _save_graph_to_db(backend=resp["backend"], category=datasource_cls.category) response = jsonify(response) else: logger.debug(G) response = jsonify({"resp": G}) return response
def add(graph_id: int): """Add data to an existing NetworkX based graph. Parameters ---------- graph_id : int The graph ID to add to. """ graph_obj = Graph.query.filter_by(id=graph_id).first() if not graph_obj: return make_response(jsonify({"message": "Graph not found"}), 404) # Validate the parameters are valid. # Returns a tuple of (dict, bool). resp, success = _validate_params(form=request.form, files=request.files) # If false, return error message if not success: return make_response(jsonify(resp), 400) datasource_cls: Type[DataSource] = resp["datasource"] transformer_cls: Type[Transformer] = resp["transformer"] backend_cls: Type[Backend] = resp["backend"] is_external = issubclass(datasource_cls, ExternalDataSource) # Only NetworkX for now. if backend_cls.__name__ != "NetworkX": logger.info("Cannot append to non NetworkX graphs for now.") return make_response(jsonify({"message": "Can only add to NetworkX Graphs for now."}), 400) # Cast to NetworkX backend_cls = cast(Type[NetworkX], backend_cls) datasource_schema = resp["schema"] # If this class extends the ExternalDataSource class, we know that the parameters # represent strings, and not files. logger.info( f"Recieved add data request for existing graph=<{graph_id}>" + f"datasource=<{datasource_cls.__name__}>, " + f"transformer=<{transformer_cls.__name__}>, backend=<{backend_cls.__class__.__name__}>" ) params = _setup_params(form=request.form, schema=datasource_schema, is_external=is_external) # NOTE: This will all need to change for support non NetworkX backends. # Get the existing graph as JSON dest_path = f"{Config.get('storage', 'dir')}/{graph_obj.category}/{graph_obj.file_path}" json_data = json.load(open(dest_path, "r")) # Make a dummy backend instance backend_instance = backend_cls(nodes=[], consolidate_edges=True) existing_graph = backend_cls.from_json(json_data) # Set the graph backend_instance.G = existing_graph resp, success = _add_to_exiting_graph( existing_backend=backend_instance, datasource_cls=datasource_cls, transformer_cls=transformer_cls, params=params, is_external=is_external, ) if not success: return make_response(jsonify(resp), 400) # Save the existing graph object to disk. resp = _save_graph_to_db( backend=backend_instance, # Use the existing category. category=graph_obj.category, # Graph ID graph_id=graph_obj.id, ) return make_response(jsonify(resp), 200)
def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) logger.info("Created FireEyeHX Transformer.")
def new(): """Generate a new graph using the supplied DataSource, Transformer, and the parameters passed to the DataSource. At minimum, the user must supply the following form parameters: 1. datasource 2. transformer 3. comment 4. backend Outside of that, the user must supply at **minimum** the parameters marked by the datasource as required. * Use the /api/datasources endpoint to see which ones these are. * Programmatically, these are any parameters without a default value. Failure to supply either the minimum three or the required parameters for that datasource returns a 400 status code with the missing parameters in the 'message' field. If any part of the graph creation yields an error, a 500 HTTP code is returend with the python exception as a string in the 'message' field. If the graph is succesfully created, the user is returned a dictionary with the ID of the graph and the URI path to viewing it in the *beagle web interface*. For example: >>> { id: 1, self: /fireeye_hx/1 } Returns ------- dict {id: integer, self: string} """ # Verify we have the basic parameters. missing_params = [] for param in ["datasource", "transformer", "comment"]: if param not in request.form: missing_params.append(param) if len(missing_params) > 0: logger.debug(f"Request to /new missing parameters: {missing_params}") return make_response( jsonify({"message": f"Missing parameters {missing_params}"}), 400) # Get the requested_datasource = request.form["datasource"] requested_transformer = request.form["transformer"] requested_backend = request.form.get("backend", "NetworkX") datasource_schema = next( filter(lambda entry: entry["id"] == requested_datasource, SCHEMA["datasources"]), None) if datasource_schema is None: logger.debug( f"User requested a non-existent data source {requested_datasource}" ) return make_response( jsonify({ "message": f"Requested datasource '{requested_datasource}' is invalid, " + "please use /api/datasources to find a list of valid datasources" }), 400, ) logger.info( f"Recieved upload request for datasource=<{requested_datasource}>, " + f"transformer=<{requested_transformer}>, backend=<{requested_backend}>" ) datasource_cls = DATASOURCES[requested_datasource] transformer_cls = TRANSFORMERS[requested_transformer] backend_class = BACKENDS[requested_backend] required_parameters = datasource_schema["params"] # If this class extends the ExternalDataSource class, we know that the parameters # represent strings, and not files. is_external = issubclass(datasource_cls, ExternalDataSource) # Make sure the user provided all required parameters for the datasource. datasource_missing_params = [] for param in required_parameters: # Skip missnig parameters if param["required"] is False: continue if is_external and param["name"] not in request.form: datasource_missing_params.append(param["name"]) if not is_external and param["name"] not in request.files: datasource_missing_params.append(param["name"]) if len(datasource_missing_params) > 0: logger.debug( f"Missing datasource {'form' if is_external else 'files'} params {datasource_missing_params}" ) return make_response( jsonify({ "message": f"Missing datasource {'form' if is_external else 'files'} params {datasource_missing_params}" }), 400, ) logger.info("Transforming data to a graph.") logger.debug("Setting up parameters") params = {} if is_external: # External parameters are in the form params = {} for param in datasource_schema["params"]: if param["name"] in request.form: params[param["name"]] = request.form[param["name"]] logger.info(f"ExternalDataSource params received {params}") else: for param in datasource_schema["params"]: # Save the files, keep track of which parameter they represent if param["name"] in request.files: params[param["name"]] = tempfile.NamedTemporaryFile() request.files[param["name"]].save(params[param["name"]].name) params[param["name"]].seek(0) logger.info(f"Saved uploaded files {params}") logger.debug("Set up parameters") try: # Create the datasource datasource = datasource_cls( # Give file paths instead of file-like objects when not external source. **({ param_name: tempfile.name for param_name, tempfile in params.items() } if not is_external else params)) transformer = datasource.to_transformer(transformer_cls) graph = backend_class(metadata=datasource.metadata(), nodes=transformer.run(), consolidate_edges=True) # Make the graph G = graph.graph() except Exception as e: logger.critical(f"Failure to generate graph {e}") import traceback logger.debug(f"{traceback.format_exc()}") if not is_external: # Clean up temporary files try: for _tempfile in params.values(): _tempfile.close() except Exception as e: logger.critical( f"Failure to clean up temporary files after error {e}") response = make_response(jsonify({"message": str(e)}), 500) response.headers.add("Access-Control-Allow-Origin", "*") return response logger.info("Cleaning up tempfiles") if not is_external: # Clean up temporary files for _tempfile in params.values(): _tempfile.close() logger.info("Finished generating graph") # Check if we even had a graph. # This will be on the G attribute for any class subclassing NetworkX if graph.is_empty(): return make_response( jsonify({"message": f"Graph generation resulted in 0 nodes. "}), 400) # If the backend is NetworkX, save the graph. # Otherwise, redirect the user to wherever he sent it (if possible) if backend_class.__name__ == "NetworkX": # Take the SHA256 of the contents of the graph. contents_hash = hashlib.sha256( json.dumps(graph.to_json(), sort_keys=True).encode("utf-8")).hexdigest() # See if we have previously generated this *exact* graph. existing = Graph.query.filter_by(meta=graph.metadata, sha256=contents_hash).first() if existing: logger.info(f"Graph previously generated with id {existing.id}") response = jsonify({ "id": existing.id, "self": f"/{existing.category}/{existing.id}" }) response.headers.add("Access-Control-Allow-Origin", "*") return response dest_folder = datasource_cls.category.replace(" ", "_").lower() # Set up the storage directory. dest_path = f"{Config.get('storage', 'dir')}/{dest_folder}/{contents_hash}.json" os.makedirs(f"{Config.get('storage', 'dir')}/{dest_folder}", exist_ok=True) db_entry = Graph( sha256=contents_hash, meta=graph.metadata, comment=request.form.get("comment", None), category=dest_folder, # Categories use the lower name! file_path=f"{contents_hash}.json", ) db.session.add(db_entry) db.session.commit() logger.info(f"Added graph to database with id={db_entry.id}") json.dump(graph.to_json(), open(dest_path, "w")) logger.info(f"Saved graph to {dest_path}") response = jsonify({ "id": db_entry.id, "self": f"/{dest_folder}/{db_entry.id}" }) else: logger.debug(G) response = jsonify({"resp": G}) response.headers.add("Access-Control-Allow-Origin", "*") return response
def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) logger.info("Created Darpa Transperant Computing Transformer.")
def events(self) -> Generator[dict, None, None]: reader = self._get_rdpcap() from scapy.all import Ether, IP, TCP, DNS, UDP, Packet from scapy.layers.http import HTTPRequest logger.info("Reading PCAP File") pcap = reader(self.pcap_file) layers_data = { Ether: { "src_mac": lambda layer: layer.fields["src"], "dst_mac": lambda layer: layer.fields["dst"], }, IP: { "src_ip": lambda layer: layer.fields["src"], "dst_ip": lambda layer: layer.fields["dst"], # returns protocol as a human readable string. "protocol": lambda layer: layer.get_field("proto") .i2s[layer.fields["proto"]] .upper(), }, UDP: { "dport": lambda layer: layer.fields["dport"], "sport": lambda layer: layer.fields["sport"], }, TCP: { "sport": lambda layer: layer.fields["sport"], "dport": lambda layer: layer.fields["dport"], }, DNS: {"dns": self._parse_dns_request}, HTTPRequest: { "http_method": lambda layer: layer.fields["Method"].decode(), "uri": lambda layer: layer.fields["Path"].decode(), "http_dest": lambda layer: layer.fields.get("Host", b"").decode(), }, } packet_type = "Ether" for packet in pcap: packet = cast(Packet, packet) payload = packet.build() if packet.haslayer(IP): payload = packet[IP].build() packet_data = { "payload": "".join( c for c in payload.decode(encoding="ascii", errors="ignore").replace( "\x00", "." ) # replace null bytes # Remove unicode control characters if unicodedata.category(c) not in {"Cc", "Cf", "Cs", "Co", "Cn"} ), "timestamp": int(packet.time), } for layer_name, config in layers_data.items(): if not packet.haslayer(layer_name): continue packet_type = layer_name.__name__ layer = packet[layer_name] for name, processor in config.items(): output = processor(layer) # Allows the processor to output multiple values. if isinstance(output, dict): packet_data.update(output) else: packet_data[name] = output packet_data["event_type"] = packet_type yield packet_data
def graph(self) -> nx.MultiDiGraph: """Generates the MultiDiGraph. Places the nodes in the Graph. Returns ------- nx.MultiDiGraph The generated NetworkX object. """ logger.info("Beginning graph generation.") # De-duplicate nodes. self.nodes = dedup_nodes(self.nodes) for node in self.nodes: # Insert the node into the graph. self.insert_node(node, hash(node)) # Insert the node's edges # Add in all the edges for this node. for edge_dict in node.edges: for dest_node, edge_data in edge_dict.items(): default_edge_name = edge_data.__name__ edge_instances = [{ "edge_name": entry.pop("edge_name", default_edge_name), "data": entry } for entry in edge_data._events] if len(edge_instances) == 0: edge_instances = [{"edge_name": default_edge_name}] # Sort by name edge_instances = sorted(edge_instances, key=lambda e: e["edge_name"]) for edge_name, instances in groupby( edge_instances, key=lambda e: e["edge_name"]): self.insert_edges( u=node, # Source node v=dest_node, # Dest Node edge_name=edge_name, # All instances of edges between u->v. Only get the data instances=[ e.get("data", None) for e in edge_instances ], ) logger.info("Completed graph generation.") logger.info( f"Graph contains {len(self.G.nodes())} nodes and {len(self.G.edges())} edges." ) return self.G