def transform(self, graphs): """Transform a list of networkx graphs into a sparse matrix. Parameters ---------- graphs : list[graphs] The input list of networkx graphs. Returns ------- data_matrix : array-like, shape = [n_samples, n_features] Vector representation of input graphs. >>> # transforming the same graph (with different node-ids). >>> import networkx as nx >>> def get_path_graph(length=4): ... g = nx.path_graph(length) ... for n,d in g.nodes(data=True): ... d['label'] = 'C' ... for a,b,d in g.edges(data=True): ... d['label'] = '1' ... return g >>> g = get_path_graph(4) >>> g2 = get_path_graph(5) >>> g2.remove_node(0) >>> g[1][2]['label']='2' >>> g2[2][3]['label']='2' >>> v = Vectorizer() >>> def vec_to_hash(vec): ... return hash(tuple(vec.data + vec.indices)) >>> vec_to_hash(v.transform([g])) == vec_to_hash (v.transform([g2])) True """ if self.n_jobs == 1: return self._transform_serial(graphs) if self.n_jobs == -1: pool = mp.Pool(mp.cpu_count()) else: pool = mp.Pool(self.n_jobs) results = [apply_async( pool, self._transform_serial, args=([subset_graphs])) for subset_graphs in chunks(graphs, self.block_size)] for i, p in enumerate(results): pos_data_matrix = p.get() if i == 0: data_matrix = pos_data_matrix else: data_matrix = vstack([data_matrix, pos_data_matrix]) pool.close() pool.join() return data_matrix
def transform(self, graphs): """Transform a list of networkx graphs into a sparse matrix. Parameters ---------- graphs : list[graphs] The input list of networkx graphs. Returns ------- data_matrix : array-like, shape = [n_samples, n_features] Vector representation of input graphs. >>> # transforming the same graph (with different node-ids). >>> import networkx as nx >>> def get_path_graph(length=4): ... g = nx.path_graph(length) ... for n,d in g.nodes(data=True): ... d['label'] = 'C' ... for a,b,d in g.edges(data=True): ... d['label'] = '1' ... return g >>> g = get_path_graph(4) >>> g2 = get_path_graph(5) >>> g2.remove_node(0) >>> g[1][2]['label']='2' >>> g2[2][3]['label']='2' >>> v = Vectorizer() >>> def vec_to_hash(vec): ... return hash(tuple(vec.data + vec.indices)) >>> vec_to_hash(v.transform([g])) == vec_to_hash (v.transform([g2])) True """ if self.n_jobs == 1: return self._transform_serial(graphs) if self.n_jobs == -1: pool = multiprocessing.Pool(multiprocessing.cpu_count()) else: pool = multiprocessing.Pool(self.n_jobs) results = [ apply_async(pool, self._transform_serial, args=([subset_graphs])) for subset_graphs in chunks(graphs, self.block_size) ] for i, p in enumerate(results): pos_data_matrix = p.get() if i == 0: data_matrix = pos_data_matrix else: data_matrix = vstack([data_matrix, pos_data_matrix]) pool.close() pool.join() return data_matrix
def vertex_transform(self, graphs): """Transform a list of networkx graphs into a list of sparse matrices. Each matrix has dimension n_nodes x n_features, i.e. each vertex is associated to a sparse vector that encodes the neighborhood of the vertex up to radius + distance. Parameters ---------- graphs : list[graphs] The input list of networkx graphs. Returns ------- matrix_list : array-like, shape = [n_samples, [n_nodes, n_features]] Vector representation of each vertex in the input graphs. """ if self.n_jobs == 1: return self._vertex_transform_serial(graphs) if self.n_jobs == -1: pool = multiprocessing.Pool(multiprocessing.cpu_count()) else: pool = multiprocessing.Pool(self.n_jobs) results = [ apply_async(pool, self._vertex_transform_serial, args=([subset_graphs])) for subset_graphs in chunks(graphs, self.block_size) ] matrix_list = [] for i, p in enumerate(results): matrix_list += p.get() pool.close() pool.join() return matrix_list
def vertex_transform(self, graphs): """Transform a list of networkx graphs into a list of sparse matrices. Each matrix has dimension n_nodes x n_features, i.e. each vertex is associated to a sparse vector that encodes the neighborhood of the vertex up to radius + distance. Parameters ---------- graphs : list[graphs] The input list of networkx graphs. Returns ------- matrix_list : array-like, shape = [n_samples, [n_nodes, n_features]] Vector representation of each vertex in the input graphs. """ if self.n_jobs == 1: return self._vertex_transform_serial(graphs) if self.n_jobs == -1: pool = mp.Pool(mp.cpu_count()) else: pool = mp.Pool(self.n_jobs) results = [apply_async( pool, self._vertex_transform_serial, args=([subset_graphs])) for subset_graphs in chunks(graphs, self.block_size)] matrix_list = [] for i, p in enumerate(results): matrix_list += p.get() pool.close() pool.join() return matrix_list