Пример #1
0
def _setup_params(form: dict, schema: dict, is_external: bool) -> dict:
    logger.debug("Setting up parameters")

    params: Dict[str, Any] = {}

    if is_external:
        # External parameters are in the form
        params = {}
        for param in schema["params"]:
            if param["name"] in request.form:
                params[param["name"]] = request.form[param["name"]]

        logger.info(f"ExternalDataSource params received {params}")

    else:
        for param in schema["params"]:
            # Save the files, keep track of which parameter they represent
            if param["name"] in request.files:
                params[param["name"]] = tempfile.NamedTemporaryFile()
                request.files[param["name"]].save(params[param["name"]].name)
                params[param["name"]].seek(0)

        logger.info(f"Saved uploaded files {params}")

    logger.debug("Set up parameters")

    return params
Пример #2
0
    def _make_edges(self, source_graph: nx.Graph) -> None:

        logger.info("Grouping Edges by type")

        sorted_edges = sorted(source_graph.edges(data=True, keys=True),
                              key=lambda edge: edge[3]["edge_name"])

        edges_by_type = itertools.groupby(
            sorted_edges, key=lambda edge: edge[3]["edge_name"])

        for edge_type, edges in edges_by_type:

            # Remove white spaces
            edge_type = edge_type.replace(" ", "_")

            cypher_edges = list(map(self._edge_as_cypher, edges))

            logger.debug(
                f"Inserting {len(cypher_edges)} {edge_type} edges into Neo4J")

            for i in range(0, len(cypher_edges), self.batch_size):

                start = i
                end = i + self.batch_size

                cypher = f"UNWIND [{', '.join(cypher_edges[start: end])}] as row\n"
                cypher += "MATCH (src {_key: row.src}), (dst {_key: row.dst})"
                cypher += f" CREATE (src)-[:`{edge_type}`]->(dst)"

                with self.neo4j.session() as session:
                    session.write_transaction(lambda tx: tx.run(cypher))

                logger.debug(f"Finished batch {i+1} ({start} -> {end})")
Пример #3
0
def adhoc():
    """Allows for ad-hoc transformation of generic JSON Data based on one of two CIM models:

    1. The Beagle CIM Model (defined in `constants.py`)
    2. The OSSEM Model (defined in https://github.com/Cyb3rWard0g/OSSEM)
    """

    valid_cim_formats = ["beagle"]
    data = request.get_json()
    events = data["data"]
    cim_format = data.get("cim", "beagle")

    if str(cim_format).lower() not in valid_cim_formats:
        response = jsonify({"message": f"cim_format must be in {cim_format}"})

        return response

    if not isinstance(events, list):
        events = [events]

    logger.info(f"Beginning ad-hoc graphing request")

    g = JSONData(events).to_graph(consolidate_edges=True)

    logger.info(f"Completed ad-hoc graphing request")

    return jsonify({"data": NetworkX.graph_to_json(g)})
Пример #4
0
    def run(self) -> List[Node]:
        """Generates the list of nodes from the datasource.

        This methods kicks off a producer/consumer queue. The producer grabs events
        one by one from the datasource by iterating over the events from the `events`
        generator. Each event is then sent to the :py:meth:`transformer` function to be
        transformer into one or more `Node` objects.

        Returns
        -------
        List[Node]
            All Nodes created from the data source.
        """

        logger.debug("Launching transformer")

        threads: List[Thread] = []

        producer_thread = Thread(target=self._producer_thread)
        producer_thread.start()
        threads.append(producer_thread)
        self.errors[producer_thread] = []

        logger.debug("Started producer thread")

        consumer_count = _THREAD_COUNT - 1
        if consumer_count == 0:
            consumer_count = 1

        for i in range(consumer_count):
            t = Thread(target=self._consumer_thread)
            self.errors[t] = []
            t.start()
            threads.append(t)

        logger.debug(f"Started {_THREAD_COUNT-1} consumer threads")

        # Wait for the producer to finish
        producer_thread.join()
        self._queue.join()

        # Stop the threads
        for i in range(consumer_count):
            self._queue.put(_SENTINEL)

        for thread in threads:
            thread.join()

        logger.info(
            f"Finished processing of events, created {len(self.nodes)} nodes.")

        if any([len(x) > 0 for x in self.errors.values()]):
            logger.warning(f"Parsing finished with errors.")
            logger.debug(self.errors)

        return self.nodes
Пример #5
0
def _add_to_exiting_graph(
    existing_backend: Backend,
    datasource_cls: Type[DataSource],
    transformer_cls: Type[Transformer],
    params: Dict[str, Any],
    is_external: bool,
) -> Tuple[dict, bool]:
    try:
        # Set up parameters for datasource class
        datasource_params = (
            # Use filenames if we are referencing a temporary file
            {param_name: tempfile.name for param_name, tempfile in params.items()}
            if not is_external
            else params
        )
        # Create the datasource
        datasource = datasource_cls(**datasource_params)  # type: ignore
        # Create transformer
        transformer = datasource.to_transformer(transformer_cls)

        # Create the nodes
        nodes = transformer.run()

        # Create the backend
        G = existing_backend.add_nodes(nodes)

    except Exception as e:
        logger.critical(f"Failure to generate graph {e}")
        import traceback

        logger.debug(f"{traceback.format_exc()}")

        if not is_external:
            # Clean up temporary files
            try:
                for _tempfile in params.values():
                    _tempfile.close()
            except Exception as e:
                logger.critical(f"Failure to clean up temporary files after error {e}")
                return {"message": str(e)}, False

    logger.info("Cleaning up tempfiles")

    if not is_external:
        # Clean up temporary files
        for _tempfile in params.values():
            _tempfile.close()

    logger.info("Finished generating graph")

    # Check if we even had a graph.
    # This will be on the G attribute for any class subclassing NetworkX
    if existing_backend.is_empty():
        return {"message": f"Graph generation resulted in 0 nodes."}, False

    return {"graph": G, "backend": existing_backend}, True
Пример #6
0
    def _get_rdpcap(self):

        if not self._imported_scapy:
            logger.info("Loading Scapy")
            from scapy.all import rdpcap

            logger.info("Scapy Loaded")

            self._imported_scapy = True

        return rdpcap
Пример #7
0
    def __init__(self,
                 metadata: dict = {},
                 consolidate_edges: bool = False,
                 *args,
                 **kwargs) -> None:

        self.metadata = metadata
        self.consolidate_edges = consolidate_edges
        self.G = nx.MultiDiGraph(metadata=metadata)
        super().__init__(*args, **kwargs)

        logger.info("Initialized NetworkX Backend")
Пример #8
0
    def __init__(self, anonymize: bool = False, render: bool = False, *args, **kwargs) -> None:

        super().__init__(*args, **kwargs)

        self.anonymize = anonymize
        self.render = render

        logger.info("Initialized Graphistry Backend")

        self.key = self._get_key()
        if self.key is None:
            raise RuntimeError(
                f"Please set the graphistry API key in either the GRAPHISTRY_API_KEY"
                + " or BEAGLE__GRAPHISTRY__API_KEY enviroment variables"
            )
Пример #9
0
    def graph(self) -> None:

        logger.info(f"Generating graph using NetworkX")

        nx_graph = super().graph()

        logger.info(f"Migrating graph to Neo4j")

        logger.info(f"Inserting nodes into Neo4J in batches of {self.batch_size}")

        self._make_nodes(nx_graph)

        logger.info(f"Inserting edges into Neo4J in batches of {self.batch_size}")

        self._make_edges(nx_graph)

        logger.info("All data inserted into Neo4J")
Пример #10
0
    def graph(self) -> str:

        logger.info(f"Generating graph using NetworkX")

        nx_graph = super().graph()

        logger.info(f"Migrating graph to Neo4j")

        logger.info(
            f"Inserting nodes into Neo4J in batches of {self.batch_size}")

        self._make_nodes(nx_graph)

        logger.info(
            f"Inserting edges into Neo4J in batches of {self.batch_size}")

        self._make_edges(nx_graph)

        logger.info("All data inserted into Neo4J")
        return self.uri.replace("bolt", "http")
Пример #11
0
    def graph(self) -> nx.MultiDiGraph:
        """Generates the MultiDiGraph.

        Places the nodes in the Graph.

        Returns
        -------
        nx.MultiDiGraph
            The generated NetworkX object.
        """

        logger.info("Beginning graph generation.")

        # De-duplicate nodes.
        self.nodes = dedup_nodes(self.nodes)

        for node in self.nodes:
            # Insert the node into the graph.
            # This also takes care of edges.
            self.insert_node(node, hash(node))

        logger.info("Completed graph generation.")
        logger.info(
            f"Graph contains {len(self.G.nodes())} nodes and {len(self.G.edges())} edges."
        )

        return self.G
Пример #12
0
    def __init__(
        self,
        uri: str = Config.get("neo4j", "host"),
        username: str = Config.get("neo4j", "username"),
        password: str = Config.get("neo4j", "password"),
        clear_database: bool = False,
        *args,
        **kwargs,
    ):

        logger.info(f"Connecting to neo4j server at {uri}")

        self.neo4j = GraphDatabase.driver(uri, auth=(username, password))

        super().__init__(*args, **kwargs)

        logger.info("Initialized Neo4j Backend")
        self.batch_size = int(Config.get("neo4j", "batch_size"))
        self.uri = uri

        if clear_database:
            logger.info("Wiping database")
            with self.neo4j.session() as session:
                session.write_transaction(
                    lambda tx: tx.run("MATCH (n) DETACH DELETE n"))
Пример #13
0
    def _make_nodes(self, source_graph: nx.Graph) -> None:

        logger.info("Grouping Nodes by type")

        # Group nodes by class
        sorted_nodes = sorted(
            [node["data"] for _, node in source_graph.nodes(data=True)],
            key=lambda node: node.__name__,
            reverse=True,
        )

        nodes_by_type = itertools.groupby(sorted_nodes,
                                          key=lambda node: node.__name__)

        for node_type, nodes in nodes_by_type:

            # remove whitespaces
            node_type = node_type.replace(" ", "_")

            self._create_constraint(node_type)

            cypher_nodes = list(map(self._node_as_cypher, nodes))

            logger.debug(
                f"Inserting {len(cypher_nodes)} {node_type} nodes into Neo4J")

            for i in range(0, len(cypher_nodes), self.batch_size):

                start = i
                end = i + self.batch_size

                cypher = f"UNWIND [{', '.join(cypher_nodes[start: end])}] as row\n"

                cypher += f"CREATE (node:{node_type} {{_key: row._key}}) SET node = row"

                with self.neo4j.session() as session:
                    session.write_transaction(lambda tx: tx.run(cypher))

                logger.debug(f"Finished batch {i+1} ({start} -> {end})")
Пример #14
0
    def add_nodes(self, nodes: List[Node]) -> nx.MultiDiGraph:
        logger.info("Appending nodes into existing graph.")
        nodes = dedup_nodes(nodes)
        for node in nodes:
            self.insert_node(node, hash(node))

        logger.info("Completed appending nodes graph.")
        logger.info(
            f"Graph contains {len(self.G.nodes())} nodes and {len(self.G.edges())} edges."
        )
        return self.G
Пример #15
0
    def graph(self) -> nx.MultiDiGraph:
        """Generates the MultiDiGraph.

        Places the nodes in the Graph.

        Returns
        -------
        nx.MultiDiGraph
            The generated NetworkX object.
        """

        logger.info("Beginning graph generation.")

        for node in self.nodes:
            node_id = hash(node)
            self.insert_node(node, node_id)

        logger.info("Completed graph generation.")
        logger.info(
            f"Graph contains {len(self.G.nodes())} nodes and {len(self.G.edges())} edges."
        )

        return self.G
Пример #16
0
    def __init__(
        self,
        host: str = Config.get("dgraph", "host"),
        batch_size: int = int(Config.get("dgraph", "batch_size")),
        wipe_db: bool = False,
        *args,
        **kwargs,
    ):

        logger.info(f"Connecting to Dgraph server at {host}")

        client_stub = pydgraph.DgraphClientStub(host)

        self.dgraph = pydgraph.DgraphClient(client_stub)

        super().__init__(*args, **kwargs)

        if wipe_db:
            logger.info("Wiping existing database due to wipe_db=True")
            self.dgraph.alter(pydgraph.Operation(drop_all=True))

        self.batch_size = 1000
        logger.info("Initialized Dgraph Backend")
Пример #17
0
def _save_graph_to_db(backend: NetworkX, category: str, graph_id: int = None) -> dict:
    """Saves a graph to the database, optionally forcing an overwrite of an existing graph.

    Parameters
    ----------
    backend : NetworkX
        The NetworkX object to save
    category : str
        The category
    graph_id: int
        The graph ID to override.

    Returns
    -------
    dict
        JSON to return to client with ID and path.
    """
    # Take the SHA256 of the contents of the graph.
    contents_hash = hashlib.sha256(
        json.dumps(backend.to_json(), sort_keys=True).encode("utf-8")
    ).hexdigest()

    # See if we have previously generated this *exact* graph.
    existing = Graph.query.filter_by(meta=backend.metadata, sha256=contents_hash).first()

    if existing:
        logger.info(f"Graph previously generated with id {existing.id}")
        return {"id": existing.id, "self": f"/{existing.category}/{existing.id}"}

    dest_folder = category.replace(" ", "_").lower()

    # Set up the storage directory.
    dest_path = f"{Config.get('storage', 'dir')}/{dest_folder}/{contents_hash}.json"
    os.makedirs(f"{Config.get('storage', 'dir')}/{dest_folder}", exist_ok=True)

    json.dump(backend.to_json(), open(dest_path, "w"))

    if graph_id:
        db_entry = Graph.query.filter_by(id=graph_id).first()
        # set the new hash.
        db_entry.file_path = f"{contents_hash}.json"
        db_entry.sha256 = contents_hash
        # NOTE: Old path is not deleted.

    else:
        db_entry = Graph(
            sha256=contents_hash,
            meta=backend.metadata,
            comment=request.form.get("comment", None),
            category=dest_folder,  # Categories use the lower name!
            file_path=f"{contents_hash}.json",
        )
        # Add new entry
        db.session.add(db_entry)

    db.session.commit()

    logger.info(f"Added graph to database with id={db_entry.id}")

    logger.info(f"Saved graph to {dest_path}")

    return {"id": db_entry.id, "self": f"/{dest_folder}/{db_entry.id}"}
Пример #18
0
    def graph(self):
        """Pushes the nodes and edges into DGraph."""

        logger.info(f"Generating base graph using NetworkX")

        nx_graph = super().graph()

        logger.info(f"Migrating graph to DGraph")

        logger.info(f"Setting up schema")

        self.setup_schema()

        logger.info(f"Created schema")

        uids_to_nodes: Dict[str, int] = {}
        nodes_to_uids: Dict[int, int] = {}

        current_id = 0

        def _node_to_dgraph_dict(node: Node) -> dict:
            return {
                f"{node.__name__.lower().replace(' ', '_')}.{k}": (
                    json.dumps(v) if isinstance(v, dict) else v
                )
                for k, v in node.to_dict().items()
                if v
            }

        logger.info(f"Inserting nodes")

        nodes_txn = []

        all_nodes = [node["data"] for _, node in nx_graph.nodes(data=True)]
        for i in range(0, len(all_nodes), self.batch_size):
            for node in all_nodes[i : i + self.batch_size]:
                txn = self.dgraph.txn()

                # Remove spaces, lowercase and escape
                node_data = _node_to_dgraph_dict(node)

                node_data["uid"] = f"_:node_{current_id}"

                uids_to_nodes[node_data["uid"]] = hash(node)
                current_id += 1
                node_data["type"] = node.__name__.lower().replace(" ", "_")

                nodes_txn.append(node_data)

            assigned = txn.mutate(set_obj=nodes_txn)

            for uid, assigned_uid in assigned.uids.items():
                nodes_to_uids[uids_to_nodes[f"_:{uid}"]] = assigned_uid

            txn.commit()
            logger.info(
                f"Inserted nodes batch {i} -> {i+self.batch_size}, Total UIDs: {len(nodes_to_uids.keys())} UIDs"
            )

        logger.info(f"Inserting edges")

        all_edges = nx_graph.edges(data=True, keys=True)
        for i in range(0, len(all_edges), self.batch_size):
            edge_nquads = ""
            for edge in all_edges[i : i + self.batch_size]:

                edge_nquads += f"<{nodes_to_uids[edge[0]]}> <{edge[2].lower().replace(' ', '_')}> <{nodes_to_uids[edge[1]]}> .\n"

            txn = self.dgraph.txn()

            assigned = txn.mutate(set_nquads=edge_nquads)

            logger.info(
                f"Inserted edges batch {i} -> {i+self.batch_size}, got back {len(assigned.context.keys)} UIDs"
            )

            txn.commit()

        return self.host
Пример #19
0
    def __init__(self, *args, **kwargs) -> None:

        super().__init__(*args, **kwargs)

        logger.info("Created PCAP Transformer")
Пример #20
0
    def __init__(self, *args, **kwargs) -> None:

        super().__init__(*args, **kwargs)

        logger.info("Created Windows EVTX Transformer.")
Пример #21
0
def new():
    """Generate a new graph using the supplied DataSource, Transformer, and the parameters
    passed to the DataSource.

    At minimum, the user must supply the following form parameters:
        1. datasource
        2. transformer
        3. comment
        4. backend

    Outside of that, the user must supply at **minimum** the parameters marked by
    the datasource as required.
        * Use the /api/datasources endpoint to see which ones these are.
        * Programmatically, these are any parameters without a default value.

    Failure to supply either the minimum three or the required parameters for that datasource
    returns a 400 status code with the missing parameters in the 'message' field.

    If any part of the graph creation yields an error, a 500 HTTP code is returend with the
    python exception as a string in the 'message' field.

    If the graph is succesfully created, the user is returned a dictionary with the ID of the graph
    and the URI path to viewing it in the *beagle web interface*.

    For example:

    >>> {
        id: 1,
        self: /fireeye_hx/1
    }

    Returns
    -------
    dict
        {id: integer, self: string}
    """

    # Returns a tuple of (dict, bool).
    resp, success = _validate_params(form=request.form, files=request.files)

    # If false, return error message
    if not success:
        return make_response(jsonify(resp), 400)

    datasource_cls: Type[DataSource] = resp["datasource"]
    transformer_cls: Type[Transformer] = resp["transformer"]
    backend_cls: Type[Backend] = resp["backend"]
    datasource_schema = resp["schema"]
    # If this class extends the ExternalDataSource class, we know that the parameters
    # represent strings, and not files.

    is_external = issubclass(datasource_cls, ExternalDataSource)

    logger.info(
        f"Recieved upload request for datasource=<{datasource_cls.__name__}>, "
        + f"transformer=<{transformer_cls.__name__}>, backend=<{backend_cls.__name__}>"
    )

    logger.info("Transforming data to a graph.")

    params = _setup_params(form=request.form, schema=datasource_schema, is_external=is_external)

    resp, success = _create_graph(
        datasource_cls=datasource_cls,
        transformer_cls=transformer_cls,
        backend_cls=backend_cls,
        params=params,
        is_external=is_external,
    )

    if not success:
        return make_response(jsonify(resp), 400)

    G = resp["graph"]

    # If the backend is NetworkX, save the graph.
    # Otherwise, redirect the user to wherever he sent it (if possible)
    if backend_cls.__name__ == "NetworkX":
        response = _save_graph_to_db(backend=resp["backend"], category=datasource_cls.category)
        response = jsonify(response)
    else:
        logger.debug(G)
        response = jsonify({"resp": G})

    return response
Пример #22
0
def add(graph_id: int):
    """Add data to an existing NetworkX based graph.

    Parameters
    ----------
    graph_id : int
        The graph ID to add to.
    """

    graph_obj = Graph.query.filter_by(id=graph_id).first()
    if not graph_obj:
        return make_response(jsonify({"message": "Graph not found"}), 404)

    # Validate the parameters are valid.
    # Returns a tuple of (dict, bool).
    resp, success = _validate_params(form=request.form, files=request.files)

    # If false, return error message
    if not success:
        return make_response(jsonify(resp), 400)

    datasource_cls: Type[DataSource] = resp["datasource"]
    transformer_cls: Type[Transformer] = resp["transformer"]
    backend_cls: Type[Backend] = resp["backend"]

    is_external = issubclass(datasource_cls, ExternalDataSource)

    # Only NetworkX for now.
    if backend_cls.__name__ != "NetworkX":
        logger.info("Cannot append to non NetworkX graphs for now.")
        return make_response(jsonify({"message": "Can only add to NetworkX Graphs for now."}), 400)

    # Cast to NetworkX
    backend_cls = cast(Type[NetworkX], backend_cls)

    datasource_schema = resp["schema"]
    # If this class extends the ExternalDataSource class, we know that the parameters
    # represent strings, and not files.

    logger.info(
        f"Recieved add data request for existing graph=<{graph_id}>"
        + f"datasource=<{datasource_cls.__name__}>, "
        + f"transformer=<{transformer_cls.__name__}>, backend=<{backend_cls.__class__.__name__}>"
    )

    params = _setup_params(form=request.form, schema=datasource_schema, is_external=is_external)

    # NOTE: This will all need to change for support non NetworkX backends.

    # Get the existing graph as JSON
    dest_path = f"{Config.get('storage', 'dir')}/{graph_obj.category}/{graph_obj.file_path}"
    json_data = json.load(open(dest_path, "r"))

    # Make a dummy backend instance
    backend_instance = backend_cls(nodes=[], consolidate_edges=True)
    existing_graph = backend_cls.from_json(json_data)

    # Set the graph
    backend_instance.G = existing_graph

    resp, success = _add_to_exiting_graph(
        existing_backend=backend_instance,
        datasource_cls=datasource_cls,
        transformer_cls=transformer_cls,
        params=params,
        is_external=is_external,
    )

    if not success:
        return make_response(jsonify(resp), 400)

    # Save the existing graph object to disk.
    resp = _save_graph_to_db(
        backend=backend_instance,
        # Use the existing category.
        category=graph_obj.category,
        # Graph ID
        graph_id=graph_obj.id,
    )
    return make_response(jsonify(resp), 200)
Пример #23
0
    def __init__(self, *args, **kwargs) -> None:

        super().__init__(*args, **kwargs)

        logger.info("Created FireEyeHX Transformer.")
Пример #24
0
def new():
    """Generate a new graph using the supplied DataSource, Transformer, and the parameters
    passed to the DataSource.

    At minimum, the user must supply the following form parameters:
        1. datasource
        2. transformer
        3. comment
        4. backend

    Outside of that, the user must supply at **minimum** the parameters marked by
    the datasource as required.
        * Use the /api/datasources endpoint to see which ones these are.
        * Programmatically, these are any parameters without a default value.

    Failure to supply either the minimum three or the required parameters for that datasource
    returns a 400 status code with the missing parameters in the 'message' field.

    If any part of the graph creation yields an error, a 500 HTTP code is returend with the
    python exception as a string in the 'message' field.

    If the graph is succesfully created, the user is returned a dictionary with the ID of the graph
    and the URI path to viewing it in the *beagle web interface*.

    For example:

    >>> {
        id: 1,
        self: /fireeye_hx/1
    }

    Returns
    -------
    dict
        {id: integer, self: string}
    """

    # Verify we have the basic parameters.
    missing_params = []
    for param in ["datasource", "transformer", "comment"]:
        if param not in request.form:
            missing_params.append(param)

    if len(missing_params) > 0:
        logger.debug(f"Request to /new missing parameters: {missing_params}")
        return make_response(
            jsonify({"message": f"Missing parameters {missing_params}"}), 400)

    # Get the
    requested_datasource = request.form["datasource"]
    requested_transformer = request.form["transformer"]
    requested_backend = request.form.get("backend", "NetworkX")

    datasource_schema = next(
        filter(lambda entry: entry["id"] == requested_datasource,
               SCHEMA["datasources"]), None)

    if datasource_schema is None:
        logger.debug(
            f"User requested a non-existent data source {requested_datasource}"
        )
        return make_response(
            jsonify({
                "message":
                f"Requested datasource '{requested_datasource}' is invalid, " +
                "please use /api/datasources to find a list of valid datasources"
            }),
            400,
        )

    logger.info(
        f"Recieved upload request for datasource=<{requested_datasource}>, " +
        f"transformer=<{requested_transformer}>, backend=<{requested_backend}>"
    )

    datasource_cls = DATASOURCES[requested_datasource]
    transformer_cls = TRANSFORMERS[requested_transformer]
    backend_class = BACKENDS[requested_backend]

    required_parameters = datasource_schema["params"]

    # If this class extends the ExternalDataSource class, we know that the parameters
    # represent strings, and not files.
    is_external = issubclass(datasource_cls, ExternalDataSource)

    # Make sure the user provided all required parameters for the datasource.
    datasource_missing_params = []
    for param in required_parameters:
        # Skip missnig parameters
        if param["required"] is False:
            continue
        if is_external and param["name"] not in request.form:
            datasource_missing_params.append(param["name"])

        if not is_external and param["name"] not in request.files:
            datasource_missing_params.append(param["name"])

    if len(datasource_missing_params) > 0:
        logger.debug(
            f"Missing datasource {'form' if is_external else 'files'} params {datasource_missing_params}"
        )
        return make_response(
            jsonify({
                "message":
                f"Missing datasource {'form' if is_external else 'files'} params {datasource_missing_params}"
            }),
            400,
        )

    logger.info("Transforming data to a graph.")

    logger.debug("Setting up parameters")
    params = {}

    if is_external:
        # External parameters are in the form
        params = {}
        for param in datasource_schema["params"]:
            if param["name"] in request.form:
                params[param["name"]] = request.form[param["name"]]

        logger.info(f"ExternalDataSource params received {params}")

    else:
        for param in datasource_schema["params"]:
            # Save the files, keep track of which parameter they represent
            if param["name"] in request.files:
                params[param["name"]] = tempfile.NamedTemporaryFile()
                request.files[param["name"]].save(params[param["name"]].name)
                params[param["name"]].seek(0)

        logger.info(f"Saved uploaded files {params}")

    logger.debug("Set up parameters")

    try:
        # Create the datasource
        datasource = datasource_cls(
            # Give file paths instead of file-like objects when not external source.
            **({
                param_name: tempfile.name
                for param_name, tempfile in params.items()
            } if not is_external else params))
        transformer = datasource.to_transformer(transformer_cls)
        graph = backend_class(metadata=datasource.metadata(),
                              nodes=transformer.run(),
                              consolidate_edges=True)
        # Make the graph
        G = graph.graph()

    except Exception as e:
        logger.critical(f"Failure to generate graph {e}")
        import traceback

        logger.debug(f"{traceback.format_exc()}")

        if not is_external:
            # Clean up temporary files
            try:
                for _tempfile in params.values():
                    _tempfile.close()
            except Exception as e:
                logger.critical(
                    f"Failure to clean up temporary files after error {e}")

        response = make_response(jsonify({"message": str(e)}), 500)
        response.headers.add("Access-Control-Allow-Origin", "*")
        return response

    logger.info("Cleaning up tempfiles")

    if not is_external:
        # Clean up temporary files
        for _tempfile in params.values():
            _tempfile.close()

    logger.info("Finished generating graph")

    # Check if we even had a graph.
    # This will be on the G attribute for any class subclassing NetworkX
    if graph.is_empty():
        return make_response(
            jsonify({"message": f"Graph generation resulted in 0 nodes. "}),
            400)

    # If the backend is NetworkX, save the graph.
    # Otherwise, redirect the user to wherever he sent it (if possible)
    if backend_class.__name__ == "NetworkX":

        # Take the SHA256 of the contents of the graph.
        contents_hash = hashlib.sha256(
            json.dumps(graph.to_json(),
                       sort_keys=True).encode("utf-8")).hexdigest()

        # See if we have previously generated this *exact* graph.
        existing = Graph.query.filter_by(meta=graph.metadata,
                                         sha256=contents_hash).first()

        if existing:
            logger.info(f"Graph previously generated with id {existing.id}")
            response = jsonify({
                "id": existing.id,
                "self": f"/{existing.category}/{existing.id}"
            })
            response.headers.add("Access-Control-Allow-Origin", "*")
            return response

        dest_folder = datasource_cls.category.replace(" ", "_").lower()
        # Set up the storage directory.
        dest_path = f"{Config.get('storage', 'dir')}/{dest_folder}/{contents_hash}.json"
        os.makedirs(f"{Config.get('storage', 'dir')}/{dest_folder}",
                    exist_ok=True)

        db_entry = Graph(
            sha256=contents_hash,
            meta=graph.metadata,
            comment=request.form.get("comment", None),
            category=dest_folder,  # Categories use the lower name!
            file_path=f"{contents_hash}.json",
        )

        db.session.add(db_entry)
        db.session.commit()

        logger.info(f"Added graph to database with id={db_entry.id}")

        json.dump(graph.to_json(), open(dest_path, "w"))

        logger.info(f"Saved graph to {dest_path}")

        response = jsonify({
            "id": db_entry.id,
            "self": f"/{dest_folder}/{db_entry.id}"
        })
    else:
        logger.debug(G)
        response = jsonify({"resp": G})

    response.headers.add("Access-Control-Allow-Origin", "*")
    return response
Пример #25
0
    def __init__(self, *args, **kwargs) -> None:

        super().__init__(*args, **kwargs)

        logger.info("Created Darpa Transperant Computing Transformer.")
Пример #26
0
    def events(self) -> Generator[dict, None, None]:
        reader = self._get_rdpcap()

        from scapy.all import Ether, IP, TCP, DNS, UDP, Packet
        from scapy.layers.http import HTTPRequest

        logger.info("Reading PCAP File")

        pcap = reader(self.pcap_file)

        layers_data = {
            Ether: {
                "src_mac": lambda layer: layer.fields["src"],
                "dst_mac": lambda layer: layer.fields["dst"],
            },
            IP: {
                "src_ip": lambda layer: layer.fields["src"],
                "dst_ip": lambda layer: layer.fields["dst"],
                # returns protocol as a human readable string.
                "protocol": lambda layer: layer.get_field("proto")
                .i2s[layer.fields["proto"]]
                .upper(),
            },
            UDP: {
                "dport": lambda layer: layer.fields["dport"],
                "sport": lambda layer: layer.fields["sport"],
            },
            TCP: {
                "sport": lambda layer: layer.fields["sport"],
                "dport": lambda layer: layer.fields["dport"],
            },
            DNS: {"dns": self._parse_dns_request},
            HTTPRequest: {
                "http_method": lambda layer: layer.fields["Method"].decode(),
                "uri": lambda layer: layer.fields["Path"].decode(),
                "http_dest": lambda layer: layer.fields.get("Host", b"").decode(),
            },
        }

        packet_type = "Ether"
        for packet in pcap:

            packet = cast(Packet, packet)

            payload = packet.build()
            if packet.haslayer(IP):
                payload = packet[IP].build()

            packet_data = {
                "payload": "".join(
                    c
                    for c in payload.decode(encoding="ascii", errors="ignore").replace(
                        "\x00", "."
                    )  # replace null bytes
                    # Remove unicode control characters
                    if unicodedata.category(c) not in {"Cc", "Cf", "Cs", "Co", "Cn"}
                ),
                "timestamp": int(packet.time),
            }

            for layer_name, config in layers_data.items():

                if not packet.haslayer(layer_name):
                    continue

                packet_type = layer_name.__name__

                layer = packet[layer_name]

                for name, processor in config.items():

                    output = processor(layer)

                    # Allows the processor to output multiple values.
                    if isinstance(output, dict):
                        packet_data.update(output)
                    else:
                        packet_data[name] = output

            packet_data["event_type"] = packet_type

            yield packet_data
Пример #27
0
    def graph(self) -> nx.MultiDiGraph:
        """Generates the MultiDiGraph.

        Places the nodes in the Graph.

        Returns
        -------
        nx.MultiDiGraph
            The generated NetworkX object.
        """

        logger.info("Beginning graph generation.")

        # De-duplicate nodes.
        self.nodes = dedup_nodes(self.nodes)

        for node in self.nodes:

            # Insert the node into the graph.
            self.insert_node(node, hash(node))

            # Insert the node's edges
            # Add in all the edges for this node.
            for edge_dict in node.edges:
                for dest_node, edge_data in edge_dict.items():

                    default_edge_name = edge_data.__name__

                    edge_instances = [{
                        "edge_name":
                        entry.pop("edge_name", default_edge_name),
                        "data":
                        entry
                    } for entry in edge_data._events]

                    if len(edge_instances) == 0:
                        edge_instances = [{"edge_name": default_edge_name}]

                    # Sort by name
                    edge_instances = sorted(edge_instances,
                                            key=lambda e: e["edge_name"])

                    for edge_name, instances in groupby(
                            edge_instances, key=lambda e: e["edge_name"]):

                        self.insert_edges(
                            u=node,  # Source node
                            v=dest_node,  # Dest Node
                            edge_name=edge_name,
                            # All instances of edges between u->v. Only get the data
                            instances=[
                                e.get("data", None) for e in edge_instances
                            ],
                        )

        logger.info("Completed graph generation.")
        logger.info(
            f"Graph contains {len(self.G.nodes())} nodes and {len(self.G.edges())} edges."
        )

        return self.G