def __init__(self, graph: GraphSignalGraph, obj: GraphSignalData, node2id: Optional[Mapping[object, int]] = None): """Should **ALWAYS** instantiate graph signals with the method to_signal, which handles non-instantiation semantics.""" self.graph = graph self.node2id = {v: i for i, v in enumerate(graph) } if node2id is None else node2id if backend.is_array(obj): if backend.length(graph) != backend.length(obj): raise Exception("Graph signal array dimensions " + str(backend.length(obj)) + " should be equal to graph nodes " + str(backend.length(graph))) self.np = backend.to_array(obj) elif obj is None: self.np = backend.repeat(1.0, len(graph)) else: self.np = np.repeat( 0.0, len(graph) ) # tensorflow does not initialize editing of eager tensors for key, value in obj.items(): self[key] = value self.np = backend.to_array( self.np ) # make all operations with numpy and then potentially switch to tensorflow
def to_numpy( self, scores: GraphSignalData, normalization: bool = False ) -> Union[Tuple[GraphSignal, GraphSignal], Tuple[BackendPrimitive, BackendPrimitive]]: if isinstance(scores, numbers.Number) and isinstance( self.known_scores, numbers.Number): return backend.to_array([self.known_scores ]), backend.to_array([scores]) elif isinstance(scores, GraphSignal): return to_signal(scores, self.known_scores).filter( exclude=self.exclude), scores.normalized(normalization).filter( exclude=self.exclude) elif isinstance(self.known_scores, GraphSignal): return self.known_scores.filter(exclude=self.exclude), to_signal( self.known_scores, scores).normalized(normalization).filter(exclude=self.exclude) else: if self.exclude is not None: raise Exception( "Needs to parse graph signal scores or known_scores to be able to exclude specific nodes" ) scores = backend.self_normalize( backend.to_array(scores, copy_array=True) ) if normalization else backend.to_array(scores) return backend.to_array(self.known_scores), scores
def _tune(self, graph=None, personalization=None, *args, **kwargs): previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( graph, backend.to_array(personalization.np)) prev_dropout = kwargs.get("graph_dropout") kwargs["graph_dropout"] = 0 best_value = -float('inf') best_ranker = None fraction_of_training = self.fraction_of_training if isinstance( self.fraction_of_training, Iterable) else [self.fraction_of_training] for ranker in self.rankers: values = list() for seed, fraction in enumerate(fraction_of_training): training, validation = split(backend_personalization, fraction, seed=seed) measure = self.measure(validation, training) values.append( measure.best_direction() * measure.evaluate(ranker.rank(training, *args, **kwargs))) value = np.min(values) if value > best_value: best_value = value best_ranker = ranker if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(previous_backend) # TODO: make training back-propagate through tensorflow for combined_prediction==False kwargs["graph_dropout"] = prev_dropout return best_ranker, personalization if self.combined_prediction else training
def __init__(self, graph: GraphSignalGraph, obj: GraphSignalData, node2id: Optional[Mapping[object, int]] = None): """Should **ALWAYS** instantiate graph signals with the method to_signal, which handles non-instantiation semantics.""" if node2id is not None: self.node2id = node2id elif hasattr(graph, "_pygrank_node2id"): # obtained from preprocessing self.node2id = graph._pygrank_node2id elif hasattr(graph, "shape"): # externally defined type self.node2id = {i: i for i in range(graph.shape[0])} else: # this is the case where it is an actual graph self.node2id = {v: i for i, v in enumerate(graph)} self.graph = graph #self.node2id = ({i: i for i in range(graph.shape[0])} if hasattr(graph, "shape") # else {v: i for i, v in enumerate(graph)}) if node2id is None else node2id graph_len = graph.shape[0] if hasattr(graph, "shape") else len(graph) if backend.is_array(obj): if graph_len != backend.length(obj): raise Exception("Graph signal array dimensions " + str(backend.length(obj)) + " should be equal to graph nodes " + str(len(graph))) self._np = backend.to_array(obj) elif obj is None: self._np = backend.repeat(1.0, graph_len) else: import numpy as np self._np = np.repeat( 0.0, graph_len ) # tensorflow does not initialize editing of eager tensors for key, value in obj.items(): self[key] = value self._np = backend.to_array( self._np ) # make all operations with numpy and then potentially switch to tensorflow
def _tune(self, graph=None, personalization=None, *args, **kwargs): previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( graph, backend.to_array(personalization.np)) training, validation = split(backend_personalization, self.fraction_of_training) measure = self.measure(validation, training) best_params = optimize( lambda params: -measure.best_direction() * measure.evaluate( self._run(training, params, *args, **kwargs)), **self.optimize_args) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(previous_backend) # TODO: make training back-propagate through tensorflow for combined_prediction==False (do this with a gather in the split method) return self.ranker_generator( best_params ), personalization if self.combined_prediction else training
def _tune(self, graph=None, personalization=None, *args, **kwargs): previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( graph, backend.to_array(personalization.np)) training, validation = split(backend_personalization, self.fraction_of_training) measure = self.measure(validation, training) best_value = -float('inf') best_ranker = None for ranker in self.rankers: value = measure.best_direction() * measure.evaluate( ranker.rank(training, *args, **kwargs)) if value > best_value: best_value = value best_ranker = ranker if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(previous_backend) # TODO: make training back-propagate through tensorflow for combined_prediction==False return best_ranker, personalization if self.combined_prediction else training
def np(self, value): self._np = backend.to_array(self.__compliant_value(value))
def np(self): return backend.to_array(self._np)
def krylov2original(V, filterH, krylov_space_degree: int): if isinstance(V, int) or isinstance(V, float): V = backend.ones((krylov_space_degree, krylov_space_degree)) * V ret = V @ filterH return backend.to_array(ret[:, 0])
def _tune(self, graph=None, personalization=None, *args, **kwargs): #graph_dropout = kwargs.get("graph_dropout", 0) #kwargs["graph_dropout"] = 0 previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) graph = personalization.graph if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( personalization, backend.to_array(personalization.np)) #training, validation = split(backend_personalization, 0.8) #training2, validation2 = split(backend_personalization, 0.6) #measure_weights = [1, 1, 1, 1, 1] #propagated = [training.np, validation.np, backend_personalization.np, training2.np, validation2.np] measure_values = [None] * (self.num_parameters + self.autoregression) M = self.ranker_generator(measure_values).preprocessor(graph) #for _ in range(10): # backend_personalization.np = backend.conv(backend_personalization.np, M) training, validation = split(backend_personalization, 0.8) training1, training2 = split(training, 0.5) propagated = [training1.np, training2.np] measures = [ self.measure(backend_personalization, training1), self.measure(backend_personalization, training2) ] #measures = [self.measure(validation, training), self.measure(training, validation)] if self.basis == "krylov": for i in range(len(measure_values)): measure_values[i] = [ measure(p) for p, measure in zip(propagated, measures) ] propagated = [backend.conv(p, M) for p in propagated] else: basis = [ arnoldi_iteration(M, p, len(measure_values))[0] for p in propagated ] for i in range(len(measure_values)): measure_values[i] = [ float(measure(base[:, i])) for base, measure in zip(basis, measures) ] measure_values = backend.to_primitive(measure_values) mean_value = backend.mean(measure_values, axis=0) measure_values = measure_values - mean_value best_parameters = measure_values measure_weights = [1] * measure_values.shape[1] if self.autoregression != 0: #vals2 = -measure_values-mean_value #measure_values = np.concatenate([measure_values, vals2-np.mean(vals2, axis=0)], axis=1) window = backend.repeat(1. / self.autoregression, self.autoregression) beta1 = 0.9 beta2 = 0.999 beta1t = 1 beta2t = 1 rms = window * 0 momentum = window * 0 error = float('inf') while True: beta1t *= beta1 beta2t *= beta2 prev_error = error parameters = backend.copy(measure_values) for i in range(len(measure_values) - len(window) - 2, -1, -1): parameters[i, :] = backend.dot( (window), measure_values[(i + 1):(i + len(window) + 1), :]) errors = (parameters - measure_values ) * measure_weights / backend.sum(measure_weights) for j in range(len(window)): gradient = 0 for i in range(len(measure_values) - len(window) - 1): gradient += backend.dot(measure_values[i + j + 1, :], errors[i, :]) momentum[j] = beta1 * momentum[j] + ( 1 - beta1) * gradient #*np.sign(window[j]) rms[j] = beta2 * rms[j] + (1 - beta2) * gradient * gradient window[j] -= 0.01 * momentum[j] / (1 - beta1t) / ( (rms[j] / (1 - beta2t))**0.5 + 1.E-8) #window[j] -= 0.01*gradient*np.sign(window[j]) error = backend.mean(backend.abs(errors)) if error == 0 or abs(error - prev_error) / error < 1.E-6: best_parameters = parameters break best_parameters = backend.mean(best_parameters[:self.num_parameters, :] * backend.to_primitive(measure_weights), axis=1) + backend.mean(mean_value) if self.tunable_offset is not None: div = backend.max(best_parameters) if div != 0: best_parameters /= div measure = self.tunable_offset(validation, training) base = basis[0] if self.basis != "krylov" else None best_offset = optimize( lambda params: -measure.best_direction() * measure( self._run(training, [(best_parameters[i] + params[ 2]) * params[0]**i + params[1] for i in range( len(best_parameters))], base, *args, **kwargs)), #lambda params: - measure.evaluate(self._run(training, best_parameters + params[0], *args, **kwargs)), max_vals=[1, 0, 0], min_vals=[0, 0, 0], deviation_tol=0.005, parameter_tol=1, partitions=5, divide_range=2) #best_parameters += best_offset[0] best_parameters = [ (best_parameters[i] + best_offset[2]) * best_offset[0]**i + best_offset[1] for i in range(len(best_parameters)) ] best_parameters = backend.to_primitive(best_parameters) if backend.sum(backend.abs(best_parameters)) != 0: best_parameters /= backend.mean(backend.abs(best_parameters)) if self.tuning_backend is not None and self.tuning_backend != previous_backend: best_parameters = [ float(param) for param in best_parameters ] # convert parameters to backend-independent list backend.load_backend(previous_backend) #kwargs["graph_dropout"] = graph_dropout if self.basis != "krylov": return Tautology(), self._run( personalization, best_parameters, *args, **kwargs) # TODO: make this unecessary return self.ranker_generator(best_parameters), personalization
def _tune(self, graph=None, personalization=None, *args, **kwargs): previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( graph, backend.to_array(personalization.np)) total_params = list() for seed0 in range(self.cross_validate): fraction_of_training = self.fraction_of_training if isinstance( self.fraction_of_training, Iterable) else [self.fraction_of_training] #fraction_of_training = [random.choice(fraction_of_training)] internal_training_list = list() validation_list = list() for seed, fraction in enumerate(fraction_of_training): training, validation = split(backend_personalization, fraction, seed0 + seed) internal_training = training if self.pre_diffuse is not None: internal_training = self.pre_diffuse(internal_training) validation = self.pre_diffuse(validation) internal_training_list.append(internal_training) validation_list.append(validation) def eval(params): val = 0 for internal_training, validation in zip( internal_training_list, validation_list): """import pygrank as pg scores = self._run(backend_personalization, params, *args, **kwargs) internal_training = pg.Undersample(int(backend.sum(internal_training)))(scores*backend_personalization) validation = backend_personalization - internal_training""" measure = self.measure( validation, internal_training if internal_training != validation else None) val = val - measure.best_direction() * measure.evaluate( self._run(internal_training, params, *args, **kwargs)) return val / len(internal_training_list) best_params = self.optimizer(eval, **self.optimize_args) """import cma es = cma.CMAEvolutionStrategy([0.5 for _ in range(len(self.optimize_args["max_vals"]))], 1./12**0.5) es.optimize(eval, verb_disp=False) best_params = es.result.xbest""" total_params.append(best_params) best_params = [0 for _ in best_params] best_squares = [0 for _ in best_params] best_means = [0 for _ in best_params] for params in total_params: for i in range(len(best_params)): best_params[i] = max(best_params[i], params[i]) best_means[i] += params[i] / self.cross_validate best_squares[i] += params[i]**2 / self.cross_validate best_params = best_means if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(previous_backend) # TODO: make training back-propagate through tensorflow for combined_prediction==False (do this with a gather in the split method) self.last_params = best_params return self.ranker_generator( best_params ), personalization if self.combined_prediction else internal_training