def xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial):

	save_file_suffix = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)

	# Return if the file exists.
	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
		return None, None

	"""**2.  Set parameters.**"""

	# Parameters for GED computation.
	ged_options = {'method': 'IPFP',  # use IPFP huristic.
				   'initialization_method': 'RANDOM',  # or 'NODE', etc.
				   # when bigger than 1, then the method is considered mIPFP.
				   'initial_solutions': int(num_solutions * 4),
				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
				   'attr_distance': 'euclidean',
				   'ratio_runs_from_initial_solutions': 0.25,
				   # parallel threads. Do not work if mpg_options['parallel'] = False.
				   'threads': multiprocessing.cpu_count(),
				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
				   }
	
	edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
# 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
# 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

	options = ged_options.copy()
	options['edit_cost_constants'] = edit_cost_constants
	options['node_labels'] = []
	options['edge_labels'] = []
	options['node_attrs'] = []
	options['edge_attrs'] = []
	parallel = True # if num_solutions == 1 else False
	
	"""**5.   Compute GED matrix.**"""
	ged_mat = 'error'
	runtime = 0
	try:
		time0 = time.time()
		ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True)
		runtime = time.time() - time0
	except Exception as exp:
		print('An exception occured when running this experiment:')
		LOG_FILENAME = save_dir + 'error.txt'
		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
		logging.exception(save_file_suffix)
		print(repr(exp))
					
	"""**6. Get results.**"""
	
	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
		pickle.dump(ged_mat, f)
	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
		pickle.dump(runtime, f)

	return ged_mat, runtime
def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial):

    save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(
        max_num_solutions) + '.ratio_' + "{:.2f}".format(
            ratio) + '.trial_' + str(trial)

    # Return if the file exists.
    if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
        return None, None
    """**2.  Set parameters.**"""

    # Parameters for GED computation.
    ged_options = {
        'method': 'BIPARTITE',  # use BIPARTITE huristic.
        # 'initialization_method': 'RANDOM',  # or 'NODE', etc. (for GEDEnv)
        'lsape_model': 'ECBP',  # 
        # ??when bigger than 1, then the method is considered mIPFP.
        # the actual number of computed solutions might be smaller than the specified value
        'max_num_solutions': max_num_solutions,
        'edit_cost': 'CONSTANT',  # use CONSTANT cost.
        'greedy_method': 'BASIC',  # 
        # the distance between non-symbolic node/edge labels is computed by euclidean distance.
        'attr_distance': 'euclidean',
        'optimal': True,  # if TRUE, the option --greedy-method has no effect 
        # parallel threads. Do not work if mpg_options['parallel'] = False.
        'threads': multiprocessing.cpu_count(),
        'centrality_method': 'NONE',
        'centrality_weight': 0.7,
        'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
    }

    edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
    # 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
    # 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

    options = ged_options.copy()
    options['edit_cost_constants'] = edit_cost_constants
    options['node_labels'] = dataset.node_labels
    options['edge_labels'] = dataset.edge_labels
    options['node_attrs'] = dataset.node_attrs
    options['edge_attrs'] = dataset.edge_attrs
    parallel = True  # if num_solutions == 1 else False
    """**5.   Compute GED matrix.**"""
    ged_mat = 'error'
    runtime = 0
    try:
        time0 = time.time()
        ged_vec_init, ged_mat, n_edit_operations = compute_geds(
            dataset.graphs,
            options=options,
            repeats=1,
            parallel=parallel,
            verbose=True)
        runtime = time.time() - time0
    except Exception as exp:
        print('An exception occured when running this experiment:')
        LOG_FILENAME = save_dir + 'error.txt'
        logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
        logging.exception(save_file_suffix)
        print(repr(exp))
    """**6. Get results.**"""

    with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
        pickle.dump(ged_mat, f)
    with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
        pickle.dump(runtime, f)

    return ged_mat, runtime
示例#3
0
	def __optimize_ecc_by_kernel_distances(self):		
		# compute distances in feature space.
		dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix()
		dis_k_vec = []
		for i in range(len(dis_k_mat)):
	#		for j in range(i, len(dis_k_mat)):
			for j in range(i + 1, len(dis_k_mat)):
				dis_k_vec.append(dis_k_mat[i, j])
		dis_k_vec = np.array(dis_k_vec)
		
		# init ged.
		if self._verbose >= 2:
			print('\ninitial:')
		time0 = time.time()
		graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
		self.__edit_cost_constants = self.__init_ecc
		options = self.__ged_options.copy()
		options['edit_cost_constants'] = self.__edit_cost_constants # @todo
		options['node_labels'] = self._dataset.node_labels
		options['edge_labels'] = self._dataset.edge_labels
		options['node_attrs'] = self._dataset.node_attrs
		options['edge_attrs'] = self._dataset.edge_attrs
		ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1))
		residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]	
		time_list = [time.time() - time0]
		edit_cost_list = [self.__init_ecc]  
		nb_cost_mat = np.array(n_edit_operations)
		nb_cost_mat_list = [nb_cost_mat]
		if self._verbose >= 2:
			print('Current edit cost constants:', self.__edit_cost_constants)
			print('Residual list:', residual_list)
		
		# run iteration from initial edit costs.
		self.__converged = False
		itrs_without_update = 0
		self.__itrs = 0
		self.__num_updates_ecc = 0
		timer = Timer(self.__time_limit_in_sec)
		while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update):
			if self._verbose >= 2:
				print('\niteration', self.__itrs + 1)
			time0 = time.time()
			# "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method.
# 			np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm', 
# 					 nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, 
# 					 n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
# 					 ged_mat=ged_mat)
			self.__edit_cost_constants, _ = self.__update_ecc(nb_cost_mat, dis_k_vec)
			for i in range(len(self.__edit_cost_constants)):
				if -1e-9 <= self.__edit_cost_constants[i] <= 1e-9:
					self.__edit_cost_constants[i] = 0
				if self.__edit_cost_constants[i] < 0:
					raise ValueError('The edit cost is negative.')
	#		for i in range(len(self.__edit_cost_constants)):
	#			if self.__edit_cost_constants[i] < 0:
	#				self.__edit_cost_constants[i] = 0
	
			# compute new GEDs and numbers of edit operations.
			options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75])
			options['edit_cost_constants'] = self.__edit_cost_constants # @todo
			options['node_labels'] = self._dataset.node_labels
			options['edge_labels'] = self._dataset.edge_labels
			options['node_attrs'] = self._dataset.node_attrs
			options['edge_attrs'] = self._dataset.edge_attrs
			ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1))
			residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
			time_list.append(time.time() - time0)
			edit_cost_list.append(self.__edit_cost_constants)
			nb_cost_mat = np.array(n_edit_operations)
			nb_cost_mat_list.append(nb_cost_mat)	
				
			# check convergency.
			ec_changed = False
			for i, cost in enumerate(self.__edit_cost_constants):
				if cost == 0:
 					if edit_cost_list[-2][i] > self.__epsilon_ec:
						 ec_changed = True
						 break
				elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ec:
 					ec_changed = True
 					break
# 				if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec:
#  					ec_changed = True
#  					break
			residual_changed = False
			if residual_list[-1] == 0:
				if residual_list[-2] > self.__epsilon_residual:
					residual_changed = True
			elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_residual:
				residual_changed = True
			self.__converged = not (ec_changed or residual_changed)
			if self.__converged:
				itrs_without_update += 1
			else:
				itrs_without_update = 0
				self.__num_updates_ecc += 1
				
			# print current states.
			if self._verbose >= 2:
				print()
				print('-------------------------------------------------------------------------')
				print('States of iteration', self.__itrs + 1)
				print('-------------------------------------------------------------------------')
# 				print('Time spend:', self.__runtime_optimize_ec)
				print('Total number of iterations for optimizing:', self.__itrs + 1)
				print('Total number of updating edit costs:', self.__num_updates_ecc)
				print('Was optimization of edit costs converged:', self.__converged)
				print('Did edit costs change:', ec_changed)
				print('Did residual change:', residual_changed)
				print('Iterations without update:', itrs_without_update)
				print('Current edit cost constants:', self.__edit_cost_constants)
				print('Residual list:', residual_list)
				print('-------------------------------------------------------------------------')
			
			self.__itrs += 1