def ego_graph(self, radius=1, types=None, min_degree=None): '''Generate an undirected ego graph around the current entity. :param radius: radius or degree of the ego graph; defaults to 1 :param types: node types to be included in the graph (e.g., restrict to people and organizations only) :param min_degree: optionally filter nodes in the generated ego graph by minimum degree ''' network = network_data() undirected_net = network.to_undirected() # filter network *before* generating ego graph # so we don't get disconnected nodes if types is not None: for n in undirected_net.nodes(): if 'type' not in undirected_net.node[n] or \ undirected_net.node[n]['type'] not in types: undirected_net.remove_node(n) # converted multidigraph to undirected # to make it possible to find all neighbors, # not just outbound connections # (should be a way to get this from a digraph...) eg = nx.ego_graph(undirected_net, self.nx_node_id, radius=radius) if min_degree is not None: return filter_graph(eg, min_degree=min_degree) return eg
def group_people_js(request, mode="egograph", output="full"): """Return Belfast Group network graph data as JSON, for use with :meth:`group_people`. Optionally filter the data by minimum degree, if min_degree is specified as a url parameter. When generating the node and link data, nodes are annotated with degree, in degree, out degree, betweenness centrality, and eigenvector centrality if available. (In/out degree is only available for directed graphs.) :param mode: egograph: network information for a one- or two-degree egograph centered around the Belfast Group; groupsheet-model: alternate network graph based on the Group sheets themselves :param output: full: node and link data; adjacency: adjacency matrix, used for generating chord diagram """ if mode == "egograph": degree = request.GET.get("degree", 1) extra_opts = {} try: degree = int(degree) # currently only support 1 or 2 degree degree = max(1, min(degree, 2)) # NOTE: degree 2 graph is large enough that it *must* be filtered # to be sensible and usable on the webpage; # by trial & error, I found a minimum degree of 5 to be reasonable if degree == 2: extra_opts["min_degree"] = 5 except ValueError: # if a value is passed that can't be converted to int, fallback to 1 degree = 1 belfast_group = RdfOrganization(network_data().copy(), BELFAST_GROUP_URI) graph = belfast_group.ego_graph(radius=degree, types=["Person", "Organization"], **extra_opts) # annotate nodes in graph with degree # FIXME: not a directional graph; in/out degree not available graph = annotate_graph( graph, fields=["degree", "in_degree", "out_degree", "betweenness_centrality", "eigenvector_centrality"] ) elif mode == "groupsheet-model": graph = gexf.read_gexf(settings.GEXF_DATA["bg1"]) graph = annotate_graph( graph, fields=["degree", "betweenness_centrality", "eigenvector_centrality"] #'in_degree', 'out_degree', ) if output == "full": data = json_graph.node_link_data(graph) if output == "adjacency": # adjacency matrix for generating chord diagram matrix = nx.convert_matrix.to_numpy_matrix(graph) # NOTE: this also works, but as of networx 1.9 requires scipy # matrix = nx.linalg.graphmatrix.adjacency_matrix(graph) data = matrix.tolist() return HttpResponse(json.dumps(data), content_type="application/json")
def connections(self, rdftype=None, resource=rdflib.resource.Resource): '''Generate a dictionary of connected entities (direct neighbors in the network graph) with a list of relationship terms (edge labels). Optionally, takes an RDF type to filter the entities (e.g., restrict only to People or Organizations), and a subclass of :class:`rdflib.resource.Resource` to initialize the entity as.''' network = network_data() graph = rdf_data() if self.nx_node_id not in network.nodes(): return {} # this also works... # neighbors = network.neighbors(self.nx_node_id) ego_graph = self.ego_graph() neighbors = ego_graph.nodes() connections = {} for node in neighbors: weight = 0 # don't include the current person in their own connections if node == self.nx_node_id: continue uriref = rdflib.URIRef(node) # if an rdf type was specified, filter out items that do not # match that type. if rdftype is not None and \ (uriref, rdflib.RDF.type, rdftype) not in graph: continue res = resource(graph, uriref) rels = set() # find any edges between this node and me # include data to simplify accessing edge label # use edges & labels from original multidigraph all_edges = network.out_edges(node, data=True) + \ network.in_edges(node, data=True) for edge in all_edges: src, target, data = edge if self.nx_node_id in edge and 'label' in data: weight += data.get('weight', 1) # assume default of 1 if not set rels.add(data['label']) if 'knows' in rels and 'correspondedWith' in rels: rels.remove('knows') # connections[res] = {'rels': rels, 'weight': weight} connections[res] = (rels, weight) # sort by weight so strongest connections will be listed first conn = sorted(connections.items(), key=lambda x: x[1][1], reverse=True) return conn
def gexf_content(request, mode): """Make network data available as GEXF files for download and use in tools like Gephi.""" if mode == "all": graph = network_data() elif mode == "group-people": # filtered graph of people/places/organizations used for # first BG network graph displayed on the site # - same data used in :meth:`full_js` graph = _network_graph() elif mode == "groupsheets": graph = gexf.read_gexf(settings.GEXF_DATA["bg1"]) buf = StringIO() gexf.write_gexf(graph, buf) response = HttpResponse(buf.getvalue(), content_type="application/gexf+xml") response["Content-Disposition"] = "attachment; filename=belfastgroup-%s.gexf" % mode return response
def _network_graph(min_degree=1, **kwargs): graph = network_data().copy() # don't modify the original network rdfgraph = rdf_data() # filter graph by type of node types = ["Person", "Organization", "Place", "BelfastGroupSheet"] for n in graph.nodes(): if "type" not in graph.node[n] or graph.node[n]["type"] not in types: graph.remove_node(n) continue # use groupsheets to infer a connection between the author # of the groupsheet and the group itself # FIXME: this needs to be in data prep/clean, NOT here # TODO: should be handled in prep now; confirm and then remove this logic if graph.node[n]["type"] == "BelfastGroupSheet": sheet = RdfGroupSheet(rdfgraph, rdflib.URIRef(n)) # FIXME: error handling when author is not in the graph? # should probably at least log this... if sheet.author and unicode(sheet.author.identifier) in graph: graph.add_edge(unicode(sheet.author.identifier), BELFAST_GROUP_URI, weight=4) # remove the groupsheet itself from the network, to avoid # cluttering up the graph with too much information # graph.add_edge(n, BELFAST_GROUP_URI, weight=5) graph.remove_node(n) # AFTER filtering by type, filter out by requested minimum degree removed = 0 for n in graph.nodes(): if graph.degree(n) < min_degree: removed += 1 graph.remove_node(n) logger.info("removed %d nodes with degree less than %d" % (removed, min_degree)) return graph