def xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial): save_file_suffix = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) # Return if the file exists. if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): return None, None """**2. Set parameters.**""" # Parameters for GED computation. ged_options = {'method': 'IPFP', # use IPFP huristic. 'initialization_method': 'RANDOM', # or 'NODE', etc. # when bigger than 1, then the method is considered mIPFP. 'initial_solutions': int(num_solutions * 4), 'edit_cost': 'CONSTANT', # use CONSTANT cost. # the distance between non-symbolic node/edge labels is computed by euclidean distance. 'attr_distance': 'euclidean', 'ratio_runs_from_initial_solutions': 0.25, # parallel threads. Do not work if mpg_options['parallel'] = False. 'threads': multiprocessing.cpu_count(), 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' } edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] # edit_cost_constants = [item * 0.01 for item in edit_cost_constants] # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) options = ged_options.copy() options['edit_cost_constants'] = edit_cost_constants options['node_labels'] = [] options['edge_labels'] = [] options['node_attrs'] = [] options['edge_attrs'] = [] parallel = True # if num_solutions == 1 else False """**5. Compute GED matrix.**""" ged_mat = 'error' runtime = 0 try: time0 = time.time() ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True) runtime = time.time() - time0 except Exception as exp: print('An exception occured when running this experiment:') LOG_FILENAME = save_dir + 'error.txt' logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) logging.exception(save_file_suffix) print(repr(exp)) """**6. Get results.**""" with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: pickle.dump(ged_mat, f) with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: pickle.dump(runtime, f) return ged_mat, runtime
def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial): save_file_suffix = '.' + ds_name + '.mnum_sols_' + str( max_num_solutions) + '.ratio_' + "{:.2f}".format( ratio) + '.trial_' + str(trial) # Return if the file exists. if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): return None, None """**2. Set parameters.**""" # Parameters for GED computation. ged_options = { 'method': 'BIPARTITE', # use BIPARTITE huristic. # 'initialization_method': 'RANDOM', # or 'NODE', etc. (for GEDEnv) 'lsape_model': 'ECBP', # # ??when bigger than 1, then the method is considered mIPFP. # the actual number of computed solutions might be smaller than the specified value 'max_num_solutions': max_num_solutions, 'edit_cost': 'CONSTANT', # use CONSTANT cost. 'greedy_method': 'BASIC', # # the distance between non-symbolic node/edge labels is computed by euclidean distance. 'attr_distance': 'euclidean', 'optimal': True, # if TRUE, the option --greedy-method has no effect # parallel threads. Do not work if mpg_options['parallel'] = False. 'threads': multiprocessing.cpu_count(), 'centrality_method': 'NONE', 'centrality_weight': 0.7, 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' } edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] # edit_cost_constants = [item * 0.01 for item in edit_cost_constants] # pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) options = ged_options.copy() options['edit_cost_constants'] = edit_cost_constants options['node_labels'] = dataset.node_labels options['edge_labels'] = dataset.edge_labels options['node_attrs'] = dataset.node_attrs options['edge_attrs'] = dataset.edge_attrs parallel = True # if num_solutions == 1 else False """**5. Compute GED matrix.**""" ged_mat = 'error' runtime = 0 try: time0 = time.time() ged_vec_init, ged_mat, n_edit_operations = compute_geds( dataset.graphs, options=options, repeats=1, parallel=parallel, verbose=True) runtime = time.time() - time0 except Exception as exp: print('An exception occured when running this experiment:') LOG_FILENAME = save_dir + 'error.txt' logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) logging.exception(save_file_suffix) print(repr(exp)) """**6. Get results.**""" with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: pickle.dump(ged_mat, f) with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: pickle.dump(runtime, f) return ged_mat, runtime
def __optimize_ecc_by_kernel_distances(self): # compute distances in feature space. dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() dis_k_vec = [] for i in range(len(dis_k_mat)): # for j in range(i, len(dis_k_mat)): for j in range(i + 1, len(dis_k_mat)): dis_k_vec.append(dis_k_mat[i, j]) dis_k_vec = np.array(dis_k_vec) # init ged. if self._verbose >= 2: print('\ninitial:') time0 = time.time() graphs = [self.__clean_graph(g) for g in self._dataset.graphs] self.__edit_cost_constants = self.__init_ecc options = self.__ged_options.copy() options['edit_cost_constants'] = self.__edit_cost_constants # @todo options['node_labels'] = self._dataset.node_labels options['edge_labels'] = self._dataset.edge_labels options['node_attrs'] = self._dataset.node_attrs options['edge_attrs'] = self._dataset.edge_attrs ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] time_list = [time.time() - time0] edit_cost_list = [self.__init_ecc] nb_cost_mat = np.array(n_edit_operations) nb_cost_mat_list = [nb_cost_mat] if self._verbose >= 2: print('Current edit cost constants:', self.__edit_cost_constants) print('Residual list:', residual_list) # run iteration from initial edit costs. self.__converged = False itrs_without_update = 0 self.__itrs = 0 self.__num_updates_ecc = 0 timer = Timer(self.__time_limit_in_sec) while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update): if self._verbose >= 2: print('\niteration', self.__itrs + 1) time0 = time.time() # "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method. # np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm', # nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, # n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init, # ged_mat=ged_mat) self.__edit_cost_constants, _ = self.__update_ecc(nb_cost_mat, dis_k_vec) for i in range(len(self.__edit_cost_constants)): if -1e-9 <= self.__edit_cost_constants[i] <= 1e-9: self.__edit_cost_constants[i] = 0 if self.__edit_cost_constants[i] < 0: raise ValueError('The edit cost is negative.') # for i in range(len(self.__edit_cost_constants)): # if self.__edit_cost_constants[i] < 0: # self.__edit_cost_constants[i] = 0 # compute new GEDs and numbers of edit operations. options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75]) options['edit_cost_constants'] = self.__edit_cost_constants # @todo options['node_labels'] = self._dataset.node_labels options['edge_labels'] = self._dataset.edge_labels options['node_attrs'] = self._dataset.node_attrs options['edge_attrs'] = self._dataset.edge_attrs ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) time_list.append(time.time() - time0) edit_cost_list.append(self.__edit_cost_constants) nb_cost_mat = np.array(n_edit_operations) nb_cost_mat_list.append(nb_cost_mat) # check convergency. ec_changed = False for i, cost in enumerate(self.__edit_cost_constants): if cost == 0: if edit_cost_list[-2][i] > self.__epsilon_ec: ec_changed = True break elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ec: ec_changed = True break # if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec: # ec_changed = True # break residual_changed = False if residual_list[-1] == 0: if residual_list[-2] > self.__epsilon_residual: residual_changed = True elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_residual: residual_changed = True self.__converged = not (ec_changed or residual_changed) if self.__converged: itrs_without_update += 1 else: itrs_without_update = 0 self.__num_updates_ecc += 1 # print current states. if self._verbose >= 2: print() print('-------------------------------------------------------------------------') print('States of iteration', self.__itrs + 1) print('-------------------------------------------------------------------------') # print('Time spend:', self.__runtime_optimize_ec) print('Total number of iterations for optimizing:', self.__itrs + 1) print('Total number of updating edit costs:', self.__num_updates_ecc) print('Was optimization of edit costs converged:', self.__converged) print('Did edit costs change:', ec_changed) print('Did residual change:', residual_changed) print('Iterations without update:', itrs_without_update) print('Current edit cost constants:', self.__edit_cost_constants) print('Residual list:', residual_list) print('-------------------------------------------------------------------------') self.__itrs += 1