Exemplo n.º 1
0
    def update_network(self):
        if self._center_nodes == []:
            return

        subnet = network.Graph()
        central_nodes, to_add = self._center_nodes[:], self._center_nodes[:]
        for l in range(self._nhops):
            for i in central_nodes:
                neig = sorted([
                    x for x in self._network.neighbors(i) if
                    self._network.edge[i][x]['weight'] > self._edge_threshold
                ],
                              reverse=True)
                if len(neig) > self._n_max_neighbors:
                    neig = neig[:self._n_max_neighbors]
                to_add.extend(neig)
            central_nodes = neig
        to_add = list(set(to_add))
        subnet.add_nodes_from([(x, self._network.node[x]) for x in to_add])
        nodes = subnet.nodes()
        while nodes:
            i = nodes.pop()
            subnet.node[i] = self._network.node[i]
            neig = [x for x in self._network.neighbors(i) if x in nodes]
            subnet.add_weighted_edges_from([(i, x, w) for x, w in zip(
                neig, [self._network.edge[i][y]['weight'] for y in neig])])

        subnet.remove_nodes_from(self._hidden_nodes)
        subnet = self._propagate(subnet)

        if self._nx_explorer is not None:
            self._nx_explorer.change_graph(subnet)
Exemplo n.º 2
0
def to_single_mode(net, mode_mask, conn_mask, weighting):
    """
    Convert two-mode network into a single mode

    Args:
        net: network to convert
        mode_mask (boolean array): a mask with nodes to connect
        conn_mask (boolean array): a mask with nodes to use for connecting
        weighting (int): normalization for edge weigthts

    Returns:
        single-mode network
    """
    new_net = network.Graph()
    new_net.add_nodes_from(range(mode_mask.sum()))
    mode_edges = _filtered_edges(net, mode_mask, conn_mask, weighting > 1)
    if mode_edges is not None:
        new_edges = Weighting[weighting].func(mode_edges)
        new_edges = new_edges.tocoo()
        new_net.add_weighted_edges_from(
            zip(new_edges.row, new_edges.col, new_edges.data))
    return new_net
    def generateGraph(self, N_changed=False):
        self.error()
        matrix = None
        self.warning('')

        if N_changed:
            self.node_selection = NodeSelection.COMPONENTS

        if self.matrix is None:
            if hasattr(self, "infoa"):
                self.infoa.setText("No data loaded.")
            if hasattr(self, "infob"):
                self.infob.setText("")
            if hasattr(self, "infoc"):
                self.infoc.setText("")
            self.pconnected = 0
            self.nedges = 0
            self.graph = None
            self.sendSignals()
            return

        nEdgesEstimate = 2 * sum(y for x, y in zip(self.histogram.xData, self.histogram.yData)
                                 if x <= self.spinUpperThreshold)

        if nEdgesEstimate > 200000:
            self.graph = None
            nedges = 0
            n = 0
            self.error('Estimated number of edges is too high (%d).' % nEdgesEstimate)
        else:
            graph = network.Graph()
            graph.add_nodes_from(range(self.matrix.shape[0]))
            matrix = self.matrix

            if matrix is not None and matrix.row_items is not None:
                if isinstance(self.matrix.row_items, Orange.data.Table):
                    graph.set_items(self.matrix.row_items)
                else:
                    data = [[str(x)] for x in self.matrix.row_items]
                    items = Orange.data.Table(Orange.data.Domain([], metas=[Orange.data.StringVariable('label')]), data)
                    graph.set_items(items)

            # set the threshold
            # set edges where distance is lower than threshold
            self.warning(0)
            if self.kNN >= self.matrix.shape[0]:
                self.warning(0, "kNN larger then supplied distance matrix dimension. Using k = %i" % (self.matrix.shape[0] - 1))

            def edges_from_distance_matrix(matrix, upper, knn):
                rows, cols = matrix.shape
                for i in range(rows):
                    for j in range(i + 1, cols):
                        if matrix[i, j] <= upper:
                            yield i, j, matrix[i, j]
                    if not knn: continue
                    for j in np.argsort(matrix[i])[:knn]:
                        yield i, j, matrix[i, j]

            edge_list = edges_from_distance_matrix(
                self.matrix, self.spinUpperThreshold,
                min(self.kNN, self.matrix.shape[0] - 1) if self.include_knn else 0)
            if self.edge_weights == EdgeWeights.INVERSE:
                edge_list = list(edge_list)
                max_weight = max(d for u, v, d in edge_list)
                graph.add_edges_from((u, v, {'weight': max_weight - d})
                                     for u, v, d in edge_list)
            else:
                graph.add_edges_from((u, v, {'weight': d})
                                     for u, v, d in edge_list)
            matrix = None
            self.graph = None
            component = []
            # exclude unconnected
            if self.node_selection == NodeSelection.COMPONENTS:
                component = list(chain.from_iterable(x for x in network.nx.connected_components(graph)
                                                     if len(x) >= self.excludeLimit))
            # largest connected component only
            elif self.node_selection == NodeSelection.LARGEST_COMP:
                component = max(network.nx.connected_components(graph), key=len)
            else:
                self.graph = graph
            if len(component) > 1:
                if len(component) == graph.number_of_nodes():
                    self.graph = graph
                    matrix = self.matrix
                else:
                    self.graph = graph.subgraph(component)
                    matrix = self.matrix.submatrix(sorted(component))

        if matrix is not None:
            matrix.row_items = self.graph.items()
        self.graph_matrix = matrix

        if self.graph is None:
            self.pconnected = 0
            self.nedges = 0
        else:
            self.pconnected = self.graph.number_of_nodes()
            self.nedges = self.graph.number_of_edges()
        if hasattr(self, "infoa"):
            self.infoa.setText("Data items on input: %d" % self.matrix.shape[0])
        if hasattr(self, "infob"):
            self.infob.setText("Network nodes: %d (%3.1f%%)" % (self.pconnected,
                self.pconnected / float(self.matrix.shape[0]) * 100))
        if hasattr(self, "infoc"):
            self.infoc.setText("Network edges: %d (%.2f edges/node)" % (
                self.nedges, self.nedges / float(self.pconnected)
                if self.pconnected else 0))

        self.warning(303)
        if self.pconnected > 1000 or self.nedges > 2000:
            self.warning(303, 'Large number of nodes/edges; performance will be hindered.')

        self.sendSignals()
        self.histogram.setRegion(0, self.spinUpperThreshold)
    def generateGraph(self, N_changed=False):
        self.Error.clear()
        self.Warning.clear()
        matrix = None

        if N_changed:
            self.node_selection = NodeSelection.COMPONENTS

        if self.matrix is None:
            if hasattr(self, "infoa"):
                self.infoa.setText("No data loaded.")
            if hasattr(self, "infob"):
                self.infob.setText("")
            if hasattr(self, "infoc"):
                self.infoc.setText("")
            self.pconnected = 0
            self.nedges = 0
            self.graph = None
            self.sendSignals()
            return

        nEdgesEstimate = 2 * sum(
            y for x, y in zip(self.histogram.xData, self.histogram.yData)
            if x <= self.spinUpperThreshold)

        if nEdgesEstimate > 200000:
            self.graph = None
            nedges = 0
            n = 0
            self.Error.number_of_edges(nEdgesEstimate)
        else:
            graph = network.Graph()
            graph.add_nodes_from(range(self.matrix.shape[0]))
            matrix = self.matrix

            if matrix is not None and matrix.row_items is not None:
                row_items = self.matrix.row_items
                if isinstance(row_items, Table):
                    if self.matrix.axis == 1:
                        items = row_items
                    else:
                        items = [[v.name] for v in row_items.domain.attributes]
                else:
                    items = [[str(x)] for x in self.matrix.row_items]
            if len(items) != self.matrix.shape[0]:
                self.Warning.invalid_number_of_items()
            else:
                if items and not isinstance(items, Table):
                    items = Table(Domain([], metas=[StringVariable('label')]),
                                  items)
                graph.set_items(items)

            # set the threshold
            # set edges where distance is lower than threshold
            self.Warning.kNN_too_large.clear()
            if self.kNN >= self.matrix.shape[0]:
                self.Warning.kNN_too_large(self.matrix.shape[0] - 1)

            def edges_from_distance_matrix(matrix, upper, knn):
                rows, cols = matrix.shape
                for i in range(rows):
                    for j in range(i + 1, cols):
                        if matrix[i, j] <= upper:
                            yield i, j, matrix[i, j]
                    if not knn: continue
                    for j in np.argsort(matrix[i])[:knn]:
                        yield i, j, matrix[i, j]

            edge_list = edges_from_distance_matrix(
                self.matrix, self.spinUpperThreshold,
                min(self.kNN, self.matrix.shape[0] -
                    1) if self.include_knn else 0)
            if self.edge_weights == EdgeWeights.INVERSE:
                edge_list = list(edge_list)
                max_weight = max(d for u, v, d in edge_list)
                graph.add_edges_from((u, v, {
                    'weight': max_weight - d
                }) for u, v, d in edge_list)
            else:
                graph.add_edges_from((u, v, {
                    'weight': d
                }) for u, v, d in edge_list)
            matrix = None
            self.graph = None
            component = []
            # exclude unconnected
            if self.node_selection == NodeSelection.COMPONENTS:
                component = list(
                    chain.from_iterable(
                        x for x in network.nx.connected_components(graph)
                        if len(x) >= self.excludeLimit))
            # largest connected component only
            elif self.node_selection == NodeSelection.LARGEST_COMP:
                component = max(network.nx.connected_components(graph),
                                key=len)
            else:
                self.graph = graph
            if len(component) > 1:
                if len(component) == graph.number_of_nodes():
                    self.graph = graph
                    matrix = self.matrix
                else:
                    self.graph = graph.subgraph(component)
                    matrix = self.matrix.submatrix(sorted(component))

        if matrix is not None:
            matrix.row_items = self.graph.items()
        self.graph_matrix = matrix

        if self.graph is None:
            self.pconnected = 0
            self.nedges = 0
        else:
            self.pconnected = self.graph.number_of_nodes()
            self.nedges = self.graph.number_of_edges()
        if hasattr(self, "infoa"):
            self.infoa.setText("Data items on input: %d" %
                               self.matrix.shape[0])
        if hasattr(self, "infob"):
            self.infob.setText("Network nodes: %d (%3.1f%%)" %
                               (self.pconnected, self.pconnected /
                                float(self.matrix.shape[0]) * 100))
        if hasattr(self, "infoc"):
            self.infoc.setText(
                "Network edges: %d (%.2f edges/node)" %
                (self.nedges, self.nedges /
                 float(self.pconnected) if self.pconnected else 0))

        self.Warning.large_number_of_nodes.clear()
        if self.pconnected > 1000 or self.nedges > 2000:
            self.Warning.large_number_of_nodes()

        self.sendSignals()
        self.histogram.setRegion(0, self.spinUpperThreshold)
Exemplo n.º 5
0
def extract_network(ppidb,
                    query,
                    geneinfo,
                    include_neighborhood=True,
                    min_score=None,
                    progress=None):
    if not isinstance(query, dict):
        query = {name: name for name in query}

    report_weights = True
    if isinstance(ppidb, ppi.BioGRID):
        # BioGRID scores are not comparable (they can be p values,
        # confidence scores, ..., i.e. whatever was reported in the source
        # publication)
        report_weights = False
        if min_score is not None:
            raise ValueError("min_score used with BioGrid")


#     graph = networkx.Graph()
    graph = network.Graph()
    # node ids in Orange.network.Graph need to be in [0 .. n-1]
    nodeids = defaultdict(partial(next, count()))

    def gi_info(names):
        mapping = [(name, geneinfo.matcher.umatch(name)) for name in names]
        mapping = [(name, match) for name, match in mapping if match]
        entries = [(name, geneinfo[match]) for name, match in mapping]

        if len(entries) > 1:
            # try to resolve conflicts by prioritizing entries whose
            # symbol/gene_id/locus_tag exactly matches the synonym name.
            entries_ = [
                (name, entry) for name, entry in entries
                if name in [entry.gene_id, entry.symbol, entry.locus_tag]
            ]
            if len(entries_) == 1:
                entries = entries_

        if len(entries) == 0:
            return None
        elif len(entries) >= 1:
            # Need to report multiple mappings
            return entries[0][1]

    # Add query nodes.
    for key, query_name in query.items():
        nodeid = nodeids[key]
        synonyms = ppidb.synonyms(key)
        entry = gi_info(synonyms)
        graph.add_node(nodeid,
                       key=key,
                       synonyms=synonyms,
                       query_name=query_name,
                       symbol=entry.symbol if entry is not None else "")

    if include_neighborhood:
        # extend the set of nodes in the network with immediate neighborers
        edges_iter = (edge for key in query for edge in ppidb.edges(key))
        for id1, id2, score in edges_iter:
            if min_score is None or score >= min_score:
                nodeid1 = nodeids[id1]
                nodeid2 = nodeids[id2]
                if nodeid1 not in graph:
                    synonyms1 = ppidb.synonyms(id1)
                    entry1 = gi_info(synonyms1)
                    symbol1 = entry1.symbol if entry1 is not None else ""
                    graph.add_node(nodeid1,
                                   key=id1,
                                   synonyms=synonyms1,
                                   symbol=symbol1)

                if nodeid2 not in graph:
                    synonyms2 = ppidb.synonyms(id2)
                    entry2 = gi_info(synonyms2)
                    symbol2 = entry2.symbol if entry2 is not None else ""
                    graph.add_node(nodeid2,
                                   key=id2,
                                   synonyms=synonyms2,
                                   symbol=symbol2)

    # add edges between nodes
    for i, id1 in enumerate(nodeids.keys()):
        if progress is not None:
            progress(100.0 * i / len(nodeids))

        for _, id2, score in ppidb.edges(id1):
            if id2 in nodeids and (min_score is None or score >= min_score):
                nodeid1 = nodeids[id1]
                nodeid2 = nodeids[id2]
                assert nodeid1 in graph and nodeid2 in graph
                if score is not None and report_weights:
                    graph.add_edge(nodeid1, nodeid2, weight=score)
                else:
                    graph.add_edge(nodeid1, nodeid2)

    nodedomain = Orange.data.Domain(
        [],
        [],
        [
            Orange.data.StringVariable("Query name"),  # if applicable
            Orange.data.StringVariable("id"),  # ppidb primary key
            Orange.data.StringVariable("Synonyms"),  # ppidb synonyms
            Orange.data.StringVariable("Symbol"),  # ncbi gene name ??
            Orange.data.DiscreteVariable("source", values=["false", "true"])
        ],
    )
    N = len(graph.nodes())
    node_items = sorted(graph.node.items(), key=lambda t: nodeids[t[0]])

    meta = [[
        node.get("query_name", ""),
        node.get("key", ""), ", ".join(node.get("synonyms", [])),
        node.get("symbol", nodeid), (1 if "query_name" in node else 0)
    ] for nodeid, node in node_items]
    if not meta:
        meta = numpy.empty((0, len(nodedomain.metas)), dtype=object)

    nodeitems = Orange.data.Table.from_numpy(nodedomain, numpy.empty((N, 0)),
                                             numpy.empty((N, 0)),
                                             numpy.array(meta, dtype=object))

    graph.set_items(nodeitems)

    return graph