Exemplo n.º 1
0
def compute_ppi_graph(
    protein_list: List[str],
    edge_construction_funcs: List[Callable],
    graph_annotation_funcs: Optional[List[Callable]] = None,
    node_annotation_funcs: Optional[List[Callable]] = None,
    edge_annotation_funcs: Optional[List[Callable]] = None,
    config: Optional[PPIGraphConfig] = None,
) -> nx.Graph:
    """
    Computes a PPI Graph from a list of protein IDs. This is the core function for PPI graph construction.

    :param protein_list: List of protein identifiers
    :type protein_list: List[str]
    :param edge_construction_funcs:  List of functions to construct edges with
    :type edge_construction_funcs: List[Callable], optional
    :param graph_annotation_funcs: List of functions to annotate graph metadata
    :type graph_annotation_funcs: List[Callable], optional
    :param node_annotation_funcs: List of functions to annotate node metadata
    :type node_annotation_funcs: List[Callable], optional
    :param edge_annotation_funcs: List of function to annotate edge metadata
    :type edge_annotation_funcs: List[Callable], optional
    :param config: Config object specifying additional parameters for STRING and BIOGRID API calls
    :type config: PPIGraphConfig, optional
    :return: ``nx.Graph`` of PPI network
    :rtype: nx.Graph
    """

    # Load default config if none supplied
    if config is None:
        config = PPIGraphConfig()

    # Parse kwargs from config
    config = parse_kwargs_from_config(config)

    # Create graph and add proteins as nodes
    G = nx.Graph(
        protein_list=protein_list,
        sources=[],
        ncbi_taxon_id=config.ncbi_taxon_id,
    )
    G.add_nodes_from(protein_list)
    log.debug(f"Added {len(protein_list)} nodes to graph")

    nx.set_node_attributes(
        G,
        dict(zip(protein_list, protein_list)),
        "protein_id",
    )

    # Annotate additional graph metadata
    if graph_annotation_funcs is not None:
        G = annotate_graph_metadata(G, graph_annotation_funcs)

    # Annotate additional node metadata
    if node_annotation_funcs is not None:
        G = annotate_node_metadata(G, node_annotation_funcs)

    # Add edges
    G = compute_edges(G, edge_construction_funcs)

    # Annotate additional edge metadata
    if edge_annotation_funcs is not None:
        G = annotate_edge_metadata(G, edge_annotation_funcs)

    return G
Exemplo n.º 2
0
def construct_rna_graph(
    dotbracket: Optional[str],
    sequence: Optional[str],
    edge_construction_funcs: List[Callable],
    edge_annotation_funcs: Optional[List[Callable]] = None,
    node_annotation_funcs: Optional[List[Callable]] = None,
    graph_annotation_funcs: Optional[List[Callable]] = None,
) -> nx.Graph:
    """
    Constructs an RNA secondary structure graph from dotbracket notation

    :param dotbracket: Dotbracket notation representation of secondary structure
    :type dotbracket: str, optional
    :param sequence: Corresponding sequence RNA bases
    :type sequence: str, optional
    :param edge_construction_funcs: List of edge construction functions. Defaults to None.
    :type edge_construction_funcs: List[Callable], optional
    :param edge_annotation_funcs: List of edge metadata annotation functions. Defaults to None.
    :type edge_annotation_funcs: List[Callable], optional
    :param node_annotation_funcs: List of node metadata annotation functions. Defaults to None.
    :type node_annotation_funcs: List[Callable], optional
    :param graph_annotation_funcs: List of graph metadata annotation functions. Defaults to None
    :type graph_annotation_funcs: List[Callable], optional
    :return: nx.Graph of RNA secondary structure
    :rtype: nx.Graph
    """
    G = nx.Graph()

    # Build node IDs first.
    node_ids = (list(range(len(sequence))) if sequence else list(
        range(len(dotbracket))))

    # Check sequence and dotbracket lengths match
    if dotbracket and sequence:
        validate_lengths(dotbracket, sequence)

    # add nodes
    G.add_nodes_from(node_ids)
    log.debug(f"Added {len(node_ids)} nodes")

    # Add dotbracket symbol if dotbracket is provided
    if dotbracket:
        validate_dotbracket(dotbracket)
        G.graph["dotbracket"] = dotbracket

        nx.set_node_attributes(
            G,
            dict(zip(node_ids, dotbracket)),
            "dotbracket_symbol",
        )

    # Add nucleotide base info if sequence is provided
    if sequence:
        validate_rna_sequence(sequence)
        G.graph["sequence"] = sequence
        nx.set_node_attributes(G, dict(zip(node_ids, sequence)), "nucleotide")
        colors = [RNA_BASE_COLORS[i] for i in sequence]
        nx.set_node_attributes(G, dict(zip(node_ids, colors)), "color")

    # Annotate additional graph metadata
    if graph_annotation_funcs is not None:
        G = annotate_graph_metadata(G, graph_annotation_funcs)

    # Annotate additional node metadata
    if node_annotation_funcs is not None:
        G = annotate_node_metadata(G, node_annotation_funcs)

    # Add edges
    G = compute_edges(G, edge_construction_funcs)

    # Annotate additional edge metadata
    if edge_annotation_funcs is not None:
        G = annotate_edge_metadata(G, edge_annotation_funcs)

    return G
Exemplo n.º 3
0
def construct_graph(
    config: Optional[ProteinGraphConfig] = None,
    pdb_path: Optional[str] = None,
    pdb_code: Optional[str] = None,
    chain_selection: str = "all",
    df_processing_funcs: Optional[List[Callable]] = None,
    edge_construction_funcs: Optional[List[Callable]] = None,
    edge_annotation_funcs: Optional[List[Callable]] = None,
    node_annotation_funcs: Optional[List[Callable]] = None,
    graph_annotation_funcs: Optional[List[Callable]] = None,
) -> nx.Graph:
    """
    Constructs protein structure graph from a pdb_code or pdb_path. Users can provide a ProteinGraphConfig object.

    However, config parameters can be overridden by passing arguments directly to the function.

    :param config: ProteinGraphConfig object. If None, defaults to config in graphein.protein.config
    :type config: graphein.protein.config.ProteinGraphConfig, optional
    :param pdb_path: Path to pdb_file to build graph from
    :type pdb_path: str, optional
    :param pdb_code: 4-character PDB accession pdb_code to build graph from
    :type pdb_code: str, optional
    :param chain_selection: String of polypeptide chains to include in graph. E.g "ABDF" or "all"
    :type chain_selection: str, optional
    :param df_processing_funcs: List of dataframe processing functions
    :type df_processing_funcs: List[Callable], optional
    :param edge_construction_funcs: List of edge construction functions
    :type edge_construction_funcs: List[Callable], optional
    :param edge_annotation_funcs: List of edge annotation functions
    :type edge_annotation_funcs: List[Callable], optional
    :param node_annotation_funcs: List of node annotation functions
    :type node_annotation_funcs: List[Callable], optional
    :param graph_annotation_funcs: List of graph annotation function
    :type graph_annotation_funcs: List[Callable]
    :return: Protein Structure Graph
    :type: nx.Graph
    """

    # If no config is provided, use default
    if config is None:
        config = ProteinGraphConfig()

    # Get name from pdb_file is no pdb_code is provided
    if pdb_path and (pdb_code is None):
        pdb_code = get_protein_name_from_filename(pdb_path)

    # If config params are provided, overwrite them
    config.protein_df_processing_functions = (
        df_processing_funcs if config.protein_df_processing_functions is None
        else config.protein_df_processing_functions)
    config.edge_construction_functions = (
        edge_construction_funcs if config.edge_construction_functions is None
        else config.edge_construction_functions)
    config.node_metadata_functions = (node_annotation_funcs
                                      if config.node_metadata_functions is None
                                      else config.node_metadata_functions)
    config.graph_metadata_functions = (graph_annotation_funcs if
                                       config.graph_metadata_functions is None
                                       else config.graph_metadata_functions)
    config.edge_metadata_functions = (edge_annotation_funcs
                                      if config.edge_metadata_functions is None
                                      else config.edge_metadata_functions)

    raw_df = read_pdb_to_dataframe(
        pdb_path,
        pdb_code,
        verbose=config.verbose,
        granularity=config.granularity,
    )
    protein_df = process_dataframe(raw_df,
                                   chain_selection=chain_selection,
                                   granularity=config.granularity)

    # Initialise graph with metadata
    g = initialise_graph_with_metadata(
        protein_df=protein_df,
        raw_pdb_df=raw_df.df["ATOM"],
        pdb_id=pdb_code,
        granularity=config.granularity,
    )
    # Add nodes to graph
    g = add_nodes_to_graph(g)

    # Add config to graph
    g.graph["config"] = config

    # Annotate additional node metadata
    if config.node_metadata_functions is not None:
        g = annotate_node_metadata(g, config.node_metadata_functions)

    # Compute graph edges
    g = compute_edges(
        g,
        funcs=config.edge_construction_functions,
        get_contacts_config=None,
    )

    # Annotate additional graph metadata
    if config.graph_metadata_functions is not None:
        g = annotate_graph_metadata(g, config.graph_metadata_functions)

    # Annotate additional edge metadata
    if config.edge_metadata_functions is not None:
        g = annotate_edge_metadata(g, config.edge_metadata_functions)

    return g
Exemplo n.º 4
0
def compute_grn_graph(
    gene_list: List[str],
    edge_construction_funcs: List[Callable],
    graph_annotation_funcs: Optional[List[Callable]] = None,
    node_annotation_funcs: Optional[List[Callable]] = None,
    edge_annotation_funcs: Optional[List[Callable]] = None,
    config: Optional[GRNGraphConfig] = None,
) -> nx.Graph:
    """
    Computes a Gene Regulatory Network Graph from a list of gene IDs

    :param gene_list: List of gene identifiers
    :type gene_list: List[str]
    :param edge_construction_funcs:  List of functions to construct edges with
    :type edge_construction_funcs: List[Callable]
    :param graph_annotation_funcs: List of functions functools annotate graph metadata, defaults to None
    :type graph_annotation_funcs: List[Callable], optional
    :param node_annotation_funcs: List of functions to annotate node metadata, defaults to None
    :type node_annotation_funcs: List[Callable], optional
    :param edge_annotation_funcs: List of functions to annotate edge metadata, defaults to None
    :type edge_annotation_funcs: List[Callable], optional
    :param config: Config specifying additional parameters for STRING and BIOGRID, defaults to None
    :type config: graphein.grn.GRNGraphConfig, optional
    :return: nx.Graph of PPI network
    :rtype: nx.Graph
    """

    # Load default config if none supplied
    if config is None:
        config = GRNGraphConfig()

    # Parse kwargs from config
    config = parse_kwargs_from_config(config)

    # Create *directed* graph and add genes as nodes
    G = nx.DiGraph(
        gene_list=gene_list,
        sources=[],
        # ncbi_taxon_id=config.ncbi_taxon_id,
    )
    G.add_nodes_from(gene_list)
    log.debug(f"Added {len(gene_list)} nodes to graph")

    nx.set_node_attributes(
        G,
        dict(zip(gene_list, gene_list)),
        "gene_id",
    )

    # Annotate additional graph metadata
    if graph_annotation_funcs is not None:
        G = annotate_graph_metadata(G, graph_annotation_funcs)

    # Annotate additional node metadata
    if node_annotation_funcs is not None:
        G = annotate_node_metadata(G, node_annotation_funcs)

    # Add edges
    G = compute_edges(G, edge_construction_funcs)

    # Annotate additional edge metadata
    if edge_annotation_funcs is not None:
        G = annotate_edge_metadata(G, edge_annotation_funcs)

    return G