def _compute_kernel_list_imap_unordered(self, g1, g_list): self._check_graphs(g_list + [g1]) self._add_dummy_labels(g_list + [g1]) if not self._ds_infos['directed']: # convert g1 = g1.to_directed() g_list = [G.to_directed() for G in g_list] # compute kernel list. kernel_list = [None] * len(g_list) # def init_worker(g1_toshare, g_list_toshare): # global G_g1, G_g_list # G_g1 = g1_toshare # G_g_list = g_list_toshare # direct product graph method - exponential if self._compute_method == 'exp': do_fun = self._wrapper_kernel_list_do_exp # direct product graph method - geometric elif self._compute_method == 'geo': do_fun = self._wrapper_kernel_list_do_geo def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._all_graphs_have_edges([g1] + g_list) # get shortest path graphs of g1 and each graph in g_list. g1 = getSPGraph(g1, edge_weight=self._edge_weight) pool = Pool(self._n_jobs) get_sp_graphs_fun = self._wrapper_get_sp_graphs itr = zip(g_list, range(0, len(g_list))) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), desc='getting sp graphs', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i, g in iterator: g_list[i] = g pool.close() pool.join() # compute Gram matrix. kernel_list = [None] * len(g_list) def init_worker(g1_toshare, gl_toshare): global G_g1, G_gl G_g1 = g1_toshare G_gl = gl_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): # get shortest paths of g1 and each graph in g_list. sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) splist = [None] * len(g_list) pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 # get shortest path graphs of g_list if self.__compute_method == 'trie': get_sps_fun = self._wrapper_get_sps_trie else: get_sps_fun = self._wrapper_get_sps_naive if self.verbose >= 2: iterator = tqdm(pool.imap_unordered(get_sps_fun, itr, chunksize), desc='getting shortest paths', file=sys.stdout) else: iterator = pool.imap_unordered(get_sps_fun, itr, chunksize) for i, sp in iterator: splist[i] = sp pool.close() pool.join() # compute Gram matrix. kernel_list = [None] * len(g_list) def init_worker(sp1_toshare, spl_toshare, g1_toshare, gl_toshare): global G_sp1, G_spl, G_g1, G_gl G_sp1 = sp1_toshare G_spl = spl_toshare G_g1 = g1_toshare G_gl = gl_toshare if self.__compute_method == 'trie': do_fun = self.__wrapper_ssp_do_trie else: do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._check_edge_weight(g_list + [g1], self._verbose) self._check_graphs(g_list + [g1]) if self._verbose >= 2: import warnings warnings.warn('All labels are ignored. Only works for undirected graphs.') # compute kernel list. kernel_list = [None] * len(g_list) if self._q is None: # precompute the spectral decomposition of each graph. A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() D1, P1 = np.linalg.eig(A1) P_list = [] D_list = [] if self._verbose >= 2: iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout) else: iterator = g_list for G in iterator: # don't normalize adjacency matrices if q is a uniform vector. Note # A actually is the transpose of the adjacency matrix. A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose() ew, ev = np.linalg.eig(A) D_list.append(ew) P_list.append(ev) # @todo: parallel? if self._p is None: # p is uniform distribution as default. q_T1 = 1 / nx.number_of_nodes(g1) q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel? def init_worker(q_T1_toshare, P1_toshare, D1_toshare, q_T_list_toshare, P_list_toshare, D_list_toshare): global G_q_T1, G_P1, G_D1, G_q_T_list, G_P_list, G_D_list G_q_T1 = q_T1_toshare G_P1 = P1_toshare G_D1 = D1_toshare G_q_T_list = q_T_list_toshare G_P_list = P_list_toshare G_D_list = D_list_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) else: # @todo pass else: # @todo pass return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._add_dummy_labels(g_list + [g1]) if self._remove_totters: g1 = untotterTransformation( g1, self._node_labels, self._edge_labels) # @todo: this may not work. pool = Pool(self._n_jobs) itr = range(0, len(g_list)) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 remove_fun = self._wrapper_untotter iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), desc='removing tottering', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i, g in iterator: g_list[i] = g pool.close() pool.join() # compute kernel list. kernel_list = [None] * len(g_list) def init_worker(g1_toshare, g_list_toshare): global G_g1, G_g_list G_g1 = g1_toshare G_g_list = g_list_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self.__add_dummy_labels(g_list + [g1]) # get all canonical keys of all graphs before calculating kernels to save # time, but this may cost a lot of memory for large dataset. canonkeys_1 = self.__get_canonkeys(g1) canonkeys_list = [[] for _ in range(len(g_list))] pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 get_fun = self._wrapper_get_canonkeys if self._verbose >= 2: iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize), desc='getting canonkeys', file=sys.stdout) else: iterator = pool.imap_unordered(get_fun, itr, chunksize) for i, ck in iterator: canonkeys_list[i] = ck pool.close() pool.join() # compute kernel list. kernel_list = [None] * len(g_list) def init_worker(ck_1_toshare, ck_list_toshare): global G_ck_1, G_ck_list G_ck_1 = ck_1_toshare G_ck_list = ck_list_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._check_edge_weight(g_list + [g1], self._verbose) self._check_graphs(g_list + [g1]) # compute kernel list. kernel_list = [None] * len(g_list) # Reindex nodes using consecutive integers for the convenience of kernel computation. g1 = nx.convert_node_labels_to_integers( g1, first_label=0, label_attribute='label_orignal') # @todo: parallel this. iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) g_list = [ nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator ] if self._p is None and self._q is None: # p and q are uniform distributions as default. def init_worker(g1_toshare, g_list_toshare): global G_g1, G_g_list G_g1 = g1_toshare G_g_list = g_list_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) else: # @todo pass return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._add_dummy_labels(g_list + [g1]) # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) if len(g_list) < 100 * self._n_jobs: chunksize = int(len(g_list) / self._n_jobs) + 1 else: chunksize = 100 paths_g_list = [[] for _ in range(len(g_list))] if self._compute_method == 'trie' and self._k_func is not None: paths_g1 = self._find_all_path_as_trie(g1) get_ps_fun = self._wrapper_find_all_path_as_trie elif self._compute_method != 'trie' and self._k_func is not None: paths_g1 = self._find_all_paths_until_length(g1) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) else: paths_g1 = self._find_all_paths_until_length(g1) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), desc='getting paths', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) for i, ps in iterator: paths_g_list[i] = ps pool.close() pool.join() # compute kernel list. kernel_list = [None] * len(g_list) def init_worker(p1_toshare, plist_toshare): global G_p1, G_plist G_p1 = p1_toshare G_plist = plist_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._check_edge_weight(g_list + [g1], self._verbose) self._check_graphs(g_list + [g1]) if self._verbose >= 2: import warnings warnings.warn('All labels are ignored.') # compute kernel list. kernel_list = [None] * len(g_list) if self._q is None: # don't normalize adjacency matrices if q is a uniform vector. Note # A_wave_list actually contains the transposes of the adjacency matrices. A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? if self._p is None: # p is uniform distribution as default. def init_worker(A_wave_1_toshare, A_wave_list_toshare): global G_A_wave_1, G_A_wave_list G_A_wave_1 = A_wave_1_toshare G_A_wave_list = A_wave_list_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) else: # @todo pass else: # @todo pass return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): self._add_dummy_node_labels(g_list + [g1]) if self._base_kernel == 'subtree': kernel_list = [None] * len(g_list) def init_worker(g1_toshare, g_list_toshare): global G_g1, G_g_list G_g1 = g1_toshare G_g_list = g_list_toshare do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list else: if self._verbose >= 2: import warnings warnings.warn('This base kernel is not parallelized. The serial computation is used instead.') return self._compute_kernel_list_series(g1, g_list)