def gen_graph_from_mongo(self): ''' load graph structure from storage. note that add_edge_list will not match vertex ids (str ids) in subsequent calls of the function ''' self.from_storage(find={'projection': {'material_id': 1, 'edges': 1}}) sources = self.memory['material_id'] destinations = self.memory['edges'] self.memory = None # cleanup memory attribute print('loaded data structures') edge_list = [(sources[i], destinations[i][j]) for i in range(len(sources)) for j in range(len(destinations[i]))] print('generated edge list') sources = None # cleanup temporary data variables destinations = None graph = Graph(directed=False) graph.add_edge_list(edge_list, hashed=True, string_vals=True) return graph
def test_feasibility(g, weights): internal_g = from_gt(g, weights) edges = minimum_branching(internal_g, [0]) tree = Graph(directed=True) tree.add_edge_list(edges) assert is_arborescence(tree)
def alignment_graph(lengths=[], pairings=[], alignments=[]): #print('making graph') g = Graph(directed=False) seq_index = g.new_vertex_property("int") time = g.new_vertex_property("int") #add vertices g.add_vertex(sum(lengths)) seq_index.a = np.concatenate([np.repeat(i,l) for i,l in enumerate(lengths)]) time.a = np.concatenate([np.arange(l) for l in lengths]) #add edges (alignments) alignment_index = g.new_edge_property("int") segment_index = g.new_edge_property("int") for i,a in enumerate(alignments): if len(a) > 0: j, k = pairings[i] pairs = np.concatenate(a, axis=0) indicesJ = (np.arange(lengths[j]) + sum(lengths[:j]))[pairs.T[0]] indicesK = (np.arange(lengths[k]) + sum(lengths[:k]))[pairs.T[1]] seg_indices = np.concatenate([np.repeat(i, len(a)) for i,a in enumerate(a)]) g.add_edge_list(np.vstack([indicesJ, indicesK, np.repeat(i, len(pairs)), seg_indices]).T, eprops=[alignment_index, segment_index]) #g.add_edge_list([(b, a) for (a, b) in g.edges()]) #print('created alignment graph', g) #g = prune_isolated_vertices(g) #print('pruned alignment graph', g) #g = transitive_closure(g) #graph_draw(g, output_size=(1000, 1000), output="results/casey_jones_bars.pdf") return g, seq_index, time, alignment_index, segment_index
def construct_motif_graph(graph_container, motif, vertex_maps=None): """Construct and return a undirected gt graph containing motif relationship. Note that graph_tool generates empty nodes to fill in the missing indices. For example, if we add edge (1,2) to an empty graph, the graph will have 3 nodes: 0, 1, 2 and 1 edge (1,2). For this reason, the returned `m_graph` usually has a large number of disconnected nodes. Parameters: graph_container - GraphContainer - Store the original network motif - Motif - Motif in study Returns: m_graph - gt.Graph - Undirected graph for motif cooccurence """ if motif.anchors is None: print("Warning: Turning motif groups into cliques.") graph = graph_container.get_gt_graph() graph.set_directed(motif.gt_motif.is_directed()) # graph_tool.Graph m_graph = Graph(directed=False) if vertex_maps is None: m, c, vertex_maps = count_motif(graph, motif) for prop_list in vertex_maps: for prop in prop_list: edges = [i for i in motif.anchored_edges(graph, prop.get_array())] m_graph.add_edge_list(edges) return m_graph
def gen_sub_graph_from_mongo(self, center, snn=1): ''' load graph structure from storage. note that add_edge_list will not match vertex ids (str ids) in subsequent calls of the function Args: center (str) mp-id of the center of the graph snn (int) the number of second nearest neighbors to expand to ''' edge_list = [] self.from_storage( find={ 'filter': { 'material_id': center }, 'projection': { 'material_id': 1, 'edges': 1 } }) sources = self.memory['material_id'][0] destinations = self.memory['edges'][0] edge_list.extend([(sources, destinations[j]) for j in range(len(destinations))]) for i in range(snn): self.from_storage( find={ 'filter': { 'material_id': { '$in': destinations } }, 'projection': { 'material_id': 1, 'edges': 1 } }) sources = self.memory['material_id'] destinations = self.memory['edges'] edge_list.extend([(sources[i], destinations[i][j]) for i in range(len(sources)) for j in range(len(destinations[i]))]) destinations = [ destinations[i][j] for i in range(len(sources)) for j in range(len(destinations[i])) ] print('generated edge list') graph = Graph(directed=False) graph.add_edge_list(edge_list, hashed=True, string_vals=True) return graph
def test_feasibility(g, weights): edges = [(e[0], e[1], w) for e, w in zip(g.get_edges(), weights)] min_edges = find_minimum_branching(g.num_vertices(), edges, roots=[0]) tree = Graph(directed=True) tree.add_edge_list(min_edges) assert is_arborescence(tree)
def graph_from_matrix(matrix, directed=False): g = Graph(directed=directed) g.add_vertex(len(matrix)) weights = g.new_ep("float") edges = np.nonzero(matrix) edges = np.append(edges, [matrix[edges]], axis=0) g.add_edge_list(list(zip(*edges)), eprops=[weights]) #graph_draw(g, output_size=(1000, 1000), output="results/structure.pdf") return g, weights
def test_graphtool(): g = Graph(directed=True) g.add_vertex(4) g.add_edge_list([(0, 1), (1, 2), (2, 3), (3, 0)]) weight = g.new_edge_property('float') weight[g.edge(0, 1)] = 1 weight[g.edge(1, 2)] = 2 weight[g.edge(2, 3)] = 3 weight[g.edge(3, 0)] = 4 assert set(gt2edges_and_weights(g, weight)) == { (0, 1, 1), (1, 2, 2), (2, 3, 3), (3, 0, 4) }
class ZonedNetwork: def __init__(self, size: Tuple[int] = (10, 10), field_size: Tuple[int] = (100, 100)): self.g = Graph(directed=True) self.n_zones = size[0] * size[1] self.fwidth = field_size[0] self.fheight = field_size[1] self.n_rows = size[0] self.n_cols = size[1] self.row_size: float = self.fheight / self.n_rows self.col_size: float = self.fwidth / self.n_cols self.g.add_vertex(self.n_zones) def get_zone(self, coords: Tuple): r = int(coords[1] / self.row_size) c = int(coords[0] / self.col_size) r = min(self.n_rows - 1, r) c = min(self.n_cols - 1, c) return self.g.vertex(r * self.n_cols + c) def add_passes(self, coords_pairs: List[Tuple]): pairs = [(self.get_zone((x1, y1)), self.get_zone((x2, y2))) for x1, y1, x2, y2 in coords_pairs] return self.g.add_edge_list(pairs) def save(self, file: str): self.g.save(file, fmt='graphml')
def clean_up(g, seg_index): #plot_matrix(np.triu(adjacency_matrix(g)), "results/clean0.png") #graph_draw(g, output_size=(1000, 1000), output="results/clean_up0.pdf") seg_combos = get_segment_combos(g, seg_index) best = sorted(seg_combos.items(), key=lambda c: c[1], reverse=True)#[:200] #print(best) best = best[0][0] #print(best) #print(edges[:100]) reduced = Graph(directed=False) reduced.add_vertex(len(g.get_vertices())) edges = g.get_edges([seg_index]) edges = edges[np.where(np.isin(edges[:,2], best))] reduced.add_edge_list(edges) #print(reduced) #plot_matrix(np.triu(adjacency_matrix(reduced)), "results/cleani2.png") #graph_draw(reduced, output_size=(1000, 1000), output="results/clean_up1.pdf") return reduced
def build_graph(m_codes, m_list): n_models, n_attributes = m_codes.shape g = Graph() v_map = {} names = g.new_vertex_property("object") v_atts = g.add_vertex(n_attributes) v_mods = g.add_vertex(n_models) v_imps = g.add_vertex(n_attributes) for v_idx, v in enumerate(v_atts): v_n = v_name(v_idx, kind="data") v_map[v_n] = int(v) names[v] = v_n for v_idx, v in enumerate(v_mods): v_n = v_name(v_idx, kind="model") v_map[v_n] = int(v) names[v] = v_n in_edges = ((d, v) for d in m_list[v_idx].desc_ids) out_edges = ((v, t) for t in m_list[v_idx].targ_ids) g.add_edge_list(in_edges) g.add_edge_list(out_edges) for v_idx, v in enumerate(v_imps): v_n = v_name(v_idx, kind="imputation") v_map[v_n] = int(v) names[v] = v_n g.vp.names = names g.v_map = v_map return g
def get_pagerank_values(self): start = time.time() logger.info('Started call to get_pagerank') g = Graph() vp = g.add_edge_list(self.__v.get_graph_edges(), hashed=True, hash_type='int') logger.info('Delta time to build graph: {}s'.format( timedelta(seconds=(time.time() - start)))) start = time.time() ranks = pagerank(g) logger.info('Delta time to compute pagerank: {}s'.format( timedelta(seconds=(time.time() - start)))) for vertex in g.vertices(): qid = vp[vertex] r = ranks[vertex] yield qid, r
dict_map = pickle.load(handle) print (dict_map) print ("Carregando arquivo...") g = Graph(directed=False) edgelist = [] with open(args.edge_list) as f: for line in f: if(line): edgelist.append(map(int,line.split())) labels_vertices = g.add_edge_list(edgelist,hashed=True) labels_vertices_str = g.new_vertex_property("string") for v in g.vertices(): labels_vertices_str[v] = str(labels_vertices[v]) labels_vertices_inv = mapeiaLabels(g,labels_vertices) pos = sfdp_layout(g) colors = trataCores(dict_map) pos_new = trataPosicoes(g,pos,dict_map,labels_vertices_inv) color = g.new_vertex_property("string") for v in g.vertices(): index = g.vertex_index[v]
class GeneralGraph(): """ General wrapper for graph-tool or networkx graphs to add edges and nodes according to constraints """ def __init__(self, directed=True, verbose=1): self.graphtool = GRAPH_TOOL # Initialize graph if self.graphtool: self.graph = Graph(directed=directed) self.weight = self.graph.new_edge_property("float") else: if directed: print("directed graph") self.graph = nx.DiGraph() else: self.graph = nx.Graph() # set metaparameter self.time_logs = {} self.verbose = verbose def set_edge_costs(self, layer_classes=["resistance"], class_weights=[1], **kwargs): """ Initialize edge cost variables :param classes: list of cost categories :param weights: list of weights for cost categories - must be of same shape as classes (if None, then equal weighting) """ class_weights = np.array(class_weights) # set different costs: self.cost_classes = layer_classes if self.graphtool: self.cost_props = [ self.graph.new_edge_property("float") for _ in range(len(layer_classes)) ] self.cost_weights = class_weights / np.sum(class_weights) if self.verbose: print(self.cost_classes, self.cost_weights) # save weighted instance for plotting self.instance = np.sum( np.moveaxis(self.cost_instance, 0, -1) * self.cost_weights, axis=2) * self.hard_constraints def set_shift(self, start, dest, pylon_dist_min=3, pylon_dist_max=5, max_angle=np.pi / 2, **kwargs): """ Initialize shift variable by getting the donut values :param lower, upper: min and max distance of pylons :param vec: vector of diretion of edges :param max_angle: Maximum angle of edges to vec """ vec = dest - start if self.verbose: print("SHIFT:", pylon_dist_min, pylon_dist_max, vec, max_angle) self.shifts = get_half_donut(pylon_dist_min, pylon_dist_max, vec, angle_max=max_angle) self.shift_tuples = self.shifts def set_corridor(self, dist_surface, start_inds, dest_inds, sample_func="mean", sample_method="simple", factor_or_n_edges=1): # set new corridor corridor = (dist_surface > 0).astype(int) self.factor = factor_or_n_edges self.cost_rest = self.cost_instance * (self.hard_constraints > 0).astype(int) * corridor # downsample tic = time.time() if self.factor > 1: self.cost_rest = CostUtils.downsample(self.cost_rest, self.factor, mode=sample_method, func=sample_func) self.time_logs["downsample"] = round(time.time() - tic, 3) # repeat because edge artifacts self.cost_rest = self.cost_rest * (self.hard_constraints > 0).astype(int) * corridor # add start and end TODO ugly self.cost_rest[:, dest_inds[0], dest_inds[1]] = self.cost_instance[:, dest_inds[0], dest_inds[1]] self.cost_rest[:, start_inds[0], start_inds[1]] = self.cost_instance[:, start_inds[0], start_inds[1]] def add_nodes(self, nodes): """ Add vertices to the graph param nodes: list of node names if networkx, integer if graphtool """ tic = time.time() # add nodes to graph if self.graphtool: _ = self.graph.add_vertex(nodes) self.n_nodes = len(list(self.graph.vertices())) else: self.graph.add_nodes_from(np.arange(nodes)) self.n_nodes = len(self.graph.nodes()) # verbose if self.verbose: print("Added nodes:", nodes, "in time:", time.time() - tic) self.time_logs["add_nodes"] = round(time.time() - tic, 3) def add_edges(self): tic_function = time.time() n_edges = 0 # kernels, posneg = ConstraintUtils.get_kernel(self.shifts, # self.shift_vals) # edge_array = [] times_edge_list = [] times_add_edges = [] if self.verbose: print("n_neighbors:", len(self.shift_tuples)) for i in range(len(self.shift_tuples)): tic_edges = time.time() # set cost rest if necessary (random graph) self.set_cost_rest() # compute shift and weights out = self._compute_edges(self.shift_tuples[i]) # Error if -1 entries because graph-tool crashes with -1 nodes if np.any(out[:, :2].flatten() < 0): print(np.where(out[:, :2] < 0)) raise RuntimeError n_edges += len(out) times_edge_list.append(round(time.time() - tic_edges, 3)) # add edges to graph tic_graph = time.time() if self.graphtool: self.graph.add_edge_list(out, eprops=self.cost_props) else: nx_edge_list = [(e[0], e[1], { "weight": np.sum(e[2:] * self.cost_weights) }) for e in out] self.graph.add_edges_from(nx_edge_list) times_add_edges.append(round(time.time() - tic_graph, 3)) # alternative: collect edges here and add alltogether # edge_array.append(out) # # alternative: add edges all in one go # tic_concat = time.time() # edge_lists_concat = np.concatenate(edge_array, axis=0) # self.time_logs["concatenate"] = round(time.time() - tic_concat, 3) # print("time for concatenate:", self.time_logs["concatenate"]) # tic_graph = time.time() # self.graph.add_edge_list(edge_lists_concat, eprops=[self.weight]) # self.time_logs["add_edges"] = round( # (time.time() - tic_graph) / len(shifts), 3 # ) self.n_edges = len(list(self.graph.edges())) self._update_time_logs(times_add_edges, times_edge_list, tic_function) if self.verbose: print("DONE adding", n_edges, "edges:", time.time() - tic_function) def _update_time_logs(self, times_add_edges, times_edge_list, tic_function): self.time_logs["add_edges"] = round(np.mean(times_add_edges), 3) self.time_logs["add_edges_times"] = times_add_edges self.time_logs["edge_list"] = round(np.mean(times_edge_list), 3) self.time_logs["edge_list_times"] = times_edge_list self.time_logs["add_all_edges"] = round(time.time() - tic_function, 3) if self.verbose: print("Done adding edges:", len(list(self.graph.edges()))) def sum_costs(self): """ Additive weighting of costs Take the individual edge costs, compute weighted sum --> self.weight """ # add sum of all costs if not self.graphtool: return tic = time.time() summed_costs_arr = np.zeros(self.cost_props[0].get_array().shape) for i in range(len(self.cost_props)): prop = self.cost_props[i].get_array() summed_costs_arr += prop * self.cost_weights[i] self.weight.a = summed_costs_arr self.time_logs["sum_of_costs"] = round(time.time() - tic, 3) def remove_vertices(self, dist_surface, delete_padding=0): """ Remove edges in a certain corridor (or all) to replace them by a refined surface @param dist_surface: a surface where each pixel value corresponds to the distance of the pixel to the shortest path @param delete_padding: define padding in which part of the corridor to delete vertices (cannot delete all because then graph unconnected) """ tic = time.time() self.graph.clear_edges() self.graph.shrink_to_fit() self.time_logs["remove_edges"] = round(time.time() - tic, 3) def get_pareto(self, vary, source, dest, out_path=None, compare=[0, 1], plot=1): """ Arguments: vary: how many weights to explore e.g 3 --> each cost class can have weight 0, 0.5 or 1 source, dest: as always the source and destination vertex out_path: where to save the pareto figure(s) compare: indices of cost classes to compare Returns: paths: All found paths pareto: The costs for each combination of weights """ tic = time.time() # initialize lists pareto = list() paths = list() cost_sum = list() # get the edge costs cost_arrs = [cost.get_array() for cost in self.cost_props] # [self.cost_props[comp].get_array() for comp in compare] # get vary weights between 0 and 1 var_weights = np.around(np.linspace(0, 1, vary), 2) # construct weights array if len(compare) == 2: weights = [[v, 1 - v] for v in var_weights] elif len(compare) == 3: weights = list() for w0 in var_weights: for w1 in var_weights[var_weights <= 1 - w0]: weights.append([w0, w1, 1 - w0 - w1]) else: raise ValueError("argument compare can only have length 2 or 3") # w_avail: keep weights of non-compare classes, get leftover amount w_avail = np.sum(np.asarray(self.cost_weights)[compare]) # compute paths for each combination of weights for j in range(len(weights)): # option 2: np.zeros(len(cost_arrs)) + non_compare_weight w = self.cost_weights.copy() # replace the ones we want to compare w[compare] = np.array(weights[j]) * w_avail # weighted sum of edge costs self.weight.a = np.sum( [cost_arrs[i] * w[i] for i in range(len(cost_arrs))], axis=0) # get shortest path path, path_costs, _ = self.get_shortest_path(source, dest) # don't take cost_sum bc this is sum of original weighting pareto.append(np.sum(path_costs, axis=0)[compare]) paths.append(path) # take overall sum of costs (unweighted) that this w leads to cost_sum.append(np.sum(path_costs)) # print best weighting best_weight = np.argmin(cost_sum) w = self.cost_weights.copy() w[compare] = np.array(weights[best_weight]) * w_avail print("Best weights:", w, "with (unweighted) costs:", np.min(cost_sum)) self.time_logs["pareto"] = round(time.time() - tic, 3) pareto = np.array(pareto) classes = [self.cost_classes[comp] for comp in compare] # Plotting if plot: if len(compare) == 2: plot_pareto_scatter_2d(pareto, weights, classes, cost_sum=cost_sum, out_path=out_path) elif len(compare) == 3: # plot_pareto_3d(pareto, weights, classes) plot_pareto_scatter_3d(pareto, weights, classes, cost_sum=cost_sum, out_path=out_path) return paths, weights, cost_sum def get_shortest_path(self, source, target): """ Compute shortest path from source vertex to target vertex """ tic = (time.time()) # #if source and target are given as indices: if self.graphtool: vertices_path, _ = shortest_path(self.graph, source, target, weights=self.weight, negative_weights=True) else: try: vertices_path = nx.dijkstra_path(self.graph, source, target) except nx.exception.NetworkXNoPath: return [] self.time_logs["shortest_path"] = round(time.time() - tic, 3) return vertices_path def save_graph(self, OUT_PATH): """ Save the graph in OUT_PATH """ if self.graphtool: for i, cost_class in enumerate(self.cost_classes): self.graph.edge_properties[cost_class] = self.cost_props[i] self.graph.edge_properties["weight"] = self.weight self.graph.save(OUT_PATH + ".xml.gz") else: nx.write_weighted_edgelist(self.graph, OUT_PATH + '.weighted.edgelist') def load_graph(self, IN_PATH): """ Retrieve graph from IN_PATH """ if self.graphtool: self.g_prev = load_graph(IN_PATH + ".xml.gz") self.weight_prev = self.g_prev.ep.weight # weight = G2.ep.weight[G2.edge(66, 69)] else: self.g_prev = nx.read_edgelist(IN_PATH + '.weighted.edgelist', nodetype=int, data=(('weight', float), )) # ----------------------------------------------------------------------- # INTERFACE def single_sp(self, **kwargs): """ Function for full processing until shortest path """ self.start_inds = kwargs["start_inds"] self.dest_inds = kwargs["dest_inds"] self.set_shift(self.start_inds, self.dest_inds, **kwargs) # self.set_corridor( # np.ones(self.hard_constraints.shape) * 0.5, # self.start_inds, # self.dest_inds, # factor_or_n_edges=1 # ) if self.verbose: print("1) Initialize shifts and instance (corridor)") self.set_edge_costs(**kwargs) # add vertices self.add_nodes() if self.verbose: print("2) Initialize distances to inf and predecessors") self.add_edges() if self.verbose: print("3) Compute source shortest path tree") print("number of vertices and edges:", self.n_nodes, self.n_edges) # weighted sum of all costs self.sum_costs() source_v, target_v = self.add_start_and_dest(self.start_inds, self.dest_inds) # get actual best path path, path_costs, cost_sum = self.get_shortest_path(source_v, target_v) if self.verbose: print("4) shortest path", cost_sum) return path, path_costs, cost_sum
def phylomemetic_graph(steps, communities, min_size=3, max_size=50, parent_limit=2, workers='auto', chunksize='auto', method='fast', min_backwards_containment=0, min_forward_containment=0): '''phylomemetic_graph Parameters ---------- steps : :obj:`iter` of :obj:`int` communities : :obj:`iter` of :obj:`iter` of :obj:`int` min_size : :obj:`int` max_size : :obj:`int` parent_limit : :obj:`int` workers : :obj:`int` chunksize : :obj:`int` method : :obj:`str` min_backwards_containment : :obj:`float` min_forward_containment : :obj:`float` Returns ------- g : :obj:`graph_too.Graph` group_link_strength : :obj:`graph_tool.EdgePropertyMap` single_link_strength : :obj:`graph_tool.EdgePropertyMap` vertex_steps : :obj:`graph_tool.VertexPropertyMap` element_vertex_map : :obj:`dict` ''' if workers == 'auto': workers = cpu_count() - 1 communities_filt = [] communities_lengths = [] element_community_mappings = [] for sequences in communities: s_filt = list(filter_by_size(sequences, min_size, max_size)) communities_filt.append(s_filt) communities_lengths.append(len(s_filt)) element_community_mappings.append(reverse_index(s_filt)) community_vertex_maps = [] communities_offsets = [] cumsum_lengths = np.cumsum(communities_lengths) for length, count in zip(communities_lengths, cumsum_lengths): start = count - length end = count communities_offsets.append((start, end)) community_vertex_maps.append( {c: v for c, v in zip(range(length), range(start, end))}) n_communities = np.sum(communities_lengths) phylomemetic_links = [] for i, (cps, cfs) in enumerate(window(communities_filt, 2)): n_cf = len(cfs) logger.info(f'Processing {i+1} of {len(communities)-1} periods') if chunksize == 'auto': chunksize_i = int(np.ceil((1 / workers) * n_cf)) else: chunksize_i = chunksize with Pool(workers) as pool: phylomemetic_links.append( pool.map( find_links, zip( cfs, range(0, len(cfs)), repeat(cps, n_cf), repeat(communities_offsets[i], n_cf), repeat(element_community_mappings[i], n_cf), repeat(parent_limit, n_cf), ), chunksize=chunksize_i, )) pool.close() pool.join() g = Graph(directed=True) g.add_vertex(n_communities) group_link_strength = g.new_edge_property('float') single_link_strength = g.new_edge_property('float') phylomemetic_links = flatten(flatten(phylomemetic_links)) g.add_edge_list(phylomemetic_links, eprops=[group_link_strength, single_link_strength]) element_vertex_map = reverse_index_communities(flatten(communities_filt)) vertex_steps = g.new_vertex_property('int') for (start, end), step in zip(communities_offsets, steps): vertex_steps.a[start:end] = step return (g, group_link_strength, single_link_strength, vertex_steps, element_vertex_map)
class BiblioNetwork(): "Bibliography network displayer" def __init__(self, filepath): self.filepath = filepath self.db = None self._auth_betw = None self._auth_betw_computed_from = 0 self.layout_pos = None self.graph = None self.author_list = [] @staticmethod def _split_authors(row): "Split authors of the row" auth = row['Authors'].split(", ") auth = [", ".join(auth[2*i:2*i+2]) for i in range(int(len(auth)/2))] return auth def parse(self, nmb_to_import=None, delimiter=","): "Parse the database csv file" # import database self.db = pd.read_csv(self.filepath, delimiter, index_col=False, nrows=nmb_to_import, encoding="ISO8859", error_bad_lines=False, warn_bad_lines=True) self.db.reset_index() # separate authors self.db['Authors'] = self.db.apply(self._split_authors, axis=1) # Replace missing values self.db['Cited by'].fillna(0, inplace=True) # Updat author list self.update_author_list() def clean(self, min_citations=10): "Remove some entries" len_bef = len(self.db) self.db.drop(self.db[self.db["Cited by"] < min_citations].index, inplace=True) len_after = len(self.db) print(" Removed {} articles, {} remaining".format(len_bef-len_after, len_after)) self.update_author_list() self._auth_betw = None def remove_anterior(self, year): "Remove some entries" len_bef = len(self.db) self.db.drop(self.db[self.db["Year"] <= year].index, inplace=True) len_after = len(self.db) print(" Removed {} articles, {} remaining".format(len_bef-len_after, len_after)) self.update_author_list() self._auth_betw = None def remove_posterior(self, year): "Remove some entries" len_bef = len(self.db) self.db.drop(self.db[self.db["Year"] > year].index, inplace=True) len_after = len(self.db) print(" Removed {} articles, {} remaining".format(len_bef-len_after, len_after)) self.update_author_list() self._auth_betw = None def update_author_list(self): "Update author list from database" auths = list(set(np.concatenate(self.db['Authors'].values))) self.author_list = np.sort(auths) @property def author_betweeness(self): "Compute authors betweness" # If already computed, just return it if self._auth_betw is not None and \ self._auth_betw_computed_from == len(self.db): return self._auth_betw # else compute it self._auth_betw_computed_from = len(self.db) auth_betw = {auth: {} for auth in self.author_list} for auths in self.db['Authors']: # skip if only one author if len(auths) == 1: continue # Loop on authors couples for i1, auth1 in enumerate(auths): for auth2 in auths[i1+1::]: keys = auth_betw.keys() # create couple if necessary, or increment if auth2 not in auth_betw[auth1].keys(): auth_betw[auth1][auth2] = 1 else: auth_betw[auth1][auth2] += 1 if auth1 not in auth_betw[auth2].keys(): auth_betw[auth2][auth1] = 1 else: auth_betw[auth2][auth1] += 1 self._auth_betw = auth_betw return self._auth_betw @author_betweeness.setter def author_betweeness(self, val): raise Exception("You cannot change that") def get_total_citation(self): """ Return total number of citations for each author""" nmbcits = {} for _, art in self.db.iterrows(): auths = art['Authors'] nmbcit = int(art['Cited by']) for auth in auths: if auth in nmbcits.keys(): nmbcits[auth] += nmbcit else: nmbcits[auth] = nmbcit return nmbcits def get_auth_nmb_of_art(self): """ Return number of article for each author""" nmbart = {} for _, art in self.db.iterrows(): auths = art['Authors'] for auth in auths: if auth in nmbart.keys(): nmbart[auth] += 1 else: nmbart[auth] = 1 return nmbart def _get_author_publication(self): auth2pub = {} for _, art in self.db.iterrows(): for auth in art['Authors']: if auth in auth2pub.keys(): auth2pub[auth] += [art.name] else: auth2pub[auth] = [art.name] return auth2pub def write_author_list(self, filepath): with open(filepath, "w") as f: data = ['{}: {}\n'.format(i, auth) for i, auth in enumerate(self.author_list)] f.writelines(data) def make_article_graph(self, layout="arf"): """Make an article graph""" self.graph = Graph(directed=False) # add vertex self.graph.add_vertex(len(self.db)) # add properties cb = self.graph.new_vertex_property("int", self.db['Cited by'].values) self.graph.vertex_properties['nmb_citation'] = cb # Add links auths = list(self.author_betweeness.keys()) auth2ind = {auths[i]: i for i in range(len(auths))} auth2pub = self._get_author_publication() for _, pubs in auth2pub.items(): if len(pubs) < 2: continue combis = itertools.combinations(pubs, 2) self.graph.add_edge_list(list(combis)) # layout if layout == "arf": self.layout_pos = arf_layout(self.graph) elif layout == "sfpd": self.layout_pos = sfdp_layout(self.graph) elif layout == "fr": self.layout_pos = fruchterman_reingold_layout(self.graph) elif layout == "radial": self.layout_pos = radial_tree_layout(self.graph, auth2ind['Logan, B.E.']) else: raise ValueError() def make_author_graph(self, layout="arf"): """Make an author graph""" self.graph = Graph(directed=False) # add vertex auths = self.author_list self.graph.add_vertex(len(auths)) # add links auth2ind = {auths[i]: i for i in range(len(auths))} abet = [] authbet = copy.deepcopy(self.author_betweeness) for auth in auths: for col, weight in authbet[auth].items(): if col == auth: continue self.graph.add_edge(auth2ind[auth], auth2ind[col]) del authbet[col][auth] # ensure that edges are not doubled abet.append(weight) # add properties cb = self.graph.new_edge_property("int", abet) self.graph.edge_properties['weight'] = cb # layout if layout == "arf": self.layout_pos = arf_layout(self.graph, weight=self.graph.ep.weight, pos=self.layout_pos, max_iter=10000) elif layout == "sfpd": self.layout_pos = sfdp_layout(self.graph, eweight=self.graph.ep.weight, pos=self.layout_pos) elif layout == "fr": self.layout_pos = fruchterman_reingold_layout(self.graph, weight=self.graph.ep.weight, circular=True, pos=self.layout_pos) elif layout == "radial": nc = self.get_total_citation() main_auth_ind = np.argmax(list(nc.values())) main_auth = list(nc.keys())[main_auth_ind] self.layout_pos = radial_tree_layout(self.graph, auth2ind[main_auth]) elif layout == "planar": self.layout_pos = planar_layout(self.graph) else: raise ValueError() def display_article_graph(self, out="graph.pdf", min_size=1, max_size=10, indice=False): """Display an article graph One point per article. Size and color corespond to the number of citation. """ cb = np.log(np.array(self.graph.vp.nmb_citation.a)+2) ms = cb/max(cb)*(max_size - min_size) + min_size ms = self.graph.new_vertex_property('float', ms) graph_draw(self.graph, pos=self.layout_pos, output=out, vertex_size=ms, vertex_fill_color=self.graph.vp.nmb_citation, vcmap=plt.cm.viridis) def display_author_graph(self, out="graph.pdf", min_size=1, max_size=10, indice=False): """Display an author graph """ auths = self.author_list nc = self.get_total_citation() nc = [int(nc[auth]) for auth in auths] na = self.get_auth_nmb_of_art() na = [int(na[auth]) for auth in auths] # normalize citation number nc = np.array(nc, dtype=float) nc /= np.max(nc) nc *= (max_size - min_size) nc += min_size # normalize edge width weight = np.array(self.graph.ep.weight.a, dtype=float) weight /= np.max(weight) weight *= (1 - 0.1) weight += 0.1 # Get vertex display order vorder = np.argsort(nc) # Get index if indice: text = range(len(vorder)) textg = self.graph.new_vertex_property('string', text) else: textg = None # plot ncg = self.graph.new_vertex_property('float', nc) nag = self.graph.new_vertex_property('int', na) vorderg = self.graph.new_vertex_property('int', vorder) weightg = self.graph.new_edge_property('float', weight) self.graph.vp['nmb_citation'] = ncg graph_draw(self.graph, pos=self.layout_pos, output=out, vertex_fill_color=nag, vertex_size=ncg, edge_pen_width=weightg, vertex_text=textg, vorder=vorderg, vertex_text_position=0, vcmap=plt.cm.PuBu)
def test_find_minimum_branching(g, weights): edges = find_minimum_branching(g, weights=weights) tree = Graph(directed=True) tree.add_edge_list(edges) assert is_arborescence(tree)