def gnn_accuracy(labels, predictions, nodes): if backend.backend_name() == "tensorflow": return _gnn_accuracy_tf(labels, predictions, nodes) elif backend.backend_name() == "pytorch": return _gnn_accuracy_torch(labels, predictions, nodes) raise Exception( "GNN accuracy is supported only for tensorflow and pytorch backends")
def gnn_train(*args, **kwargs): if backend.backend_name() == "tensorflow": return _gnn_train_tf(*args, **kwargs) elif backend.backend_name() == "pytorch": return _gnn_train_torch(*args, **kwargs) raise Exception( "GNN training is supported only for tensorflow and pytorch backends")
def _idfier(*args, **kwargs): """ Converts args and kwargs into a hashable array of object ids. """ return "[" + ",".join(obj2id(arg) for arg in args) + "]" + "{" + ",".join( v + ":" + obj2id(kwarg) for v, kwarg in kwargs.items()) + "}" + backend.backend_name()
def _tune(self, graph=None, personalization=None, *args, **kwargs): previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( graph, backend.to_array(personalization.np)) prev_dropout = kwargs.get("graph_dropout") kwargs["graph_dropout"] = 0 best_value = -float('inf') best_ranker = None fraction_of_training = self.fraction_of_training if isinstance( self.fraction_of_training, Iterable) else [self.fraction_of_training] for ranker in self.rankers: values = list() for seed, fraction in enumerate(fraction_of_training): training, validation = split(backend_personalization, fraction, seed=seed) measure = self.measure(validation, training) values.append( measure.best_direction() * measure.evaluate(ranker.rank(training, *args, **kwargs))) value = np.min(values) if value > best_value: best_value = value best_ranker = ranker if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(previous_backend) # TODO: make training back-propagate through tensorflow for combined_prediction==False kwargs["graph_dropout"] = prev_dropout return best_ranker, personalization if self.combined_prediction else training
def _tune(self, graph=None, personalization=None, *args, **kwargs): previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( graph, backend.to_array(personalization.np)) training, validation = split(backend_personalization, self.fraction_of_training) measure = self.measure(validation, training) best_params = optimize( lambda params: -measure.best_direction() * measure.evaluate( self._run(training, params, *args, **kwargs)), **self.optimize_args) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(previous_backend) # TODO: make training back-propagate through tensorflow for combined_prediction==False (do this with a gather in the split method) return self.ranker_generator( best_params ), personalization if self.combined_prediction else training
def _tune(self, graph=None, personalization=None, *args, **kwargs): previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( graph, backend.to_array(personalization.np)) training, validation = split(backend_personalization, self.fraction_of_training) measure = self.measure(validation, training) best_value = -float('inf') best_ranker = None for ranker in self.rankers: value = measure.best_direction() * measure.evaluate( ranker.rank(training, *args, **kwargs)) if value > best_value: best_value = value best_ranker = ranker if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(previous_backend) # TODO: make training back-propagate through tensorflow for combined_prediction==False return best_ranker, personalization if self.combined_prediction else training
def _tune(self, graph=None, personalization=None, *args, **kwargs): #graph_dropout = kwargs.get("graph_dropout", 0) #kwargs["graph_dropout"] = 0 previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) graph = personalization.graph if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( personalization, backend.to_array(personalization.np)) #training, validation = split(backend_personalization, 0.8) #training2, validation2 = split(backend_personalization, 0.6) #measure_weights = [1, 1, 1, 1, 1] #propagated = [training.np, validation.np, backend_personalization.np, training2.np, validation2.np] measure_values = [None] * (self.num_parameters + self.autoregression) M = self.ranker_generator(measure_values).preprocessor(graph) #for _ in range(10): # backend_personalization.np = backend.conv(backend_personalization.np, M) training, validation = split(backend_personalization, 0.8) training1, training2 = split(training, 0.5) propagated = [training1.np, training2.np] measures = [ self.measure(backend_personalization, training1), self.measure(backend_personalization, training2) ] #measures = [self.measure(validation, training), self.measure(training, validation)] if self.basis == "krylov": for i in range(len(measure_values)): measure_values[i] = [ measure(p) for p, measure in zip(propagated, measures) ] propagated = [backend.conv(p, M) for p in propagated] else: basis = [ arnoldi_iteration(M, p, len(measure_values))[0] for p in propagated ] for i in range(len(measure_values)): measure_values[i] = [ float(measure(base[:, i])) for base, measure in zip(basis, measures) ] measure_values = backend.to_primitive(measure_values) mean_value = backend.mean(measure_values, axis=0) measure_values = measure_values - mean_value best_parameters = measure_values measure_weights = [1] * measure_values.shape[1] if self.autoregression != 0: #vals2 = -measure_values-mean_value #measure_values = np.concatenate([measure_values, vals2-np.mean(vals2, axis=0)], axis=1) window = backend.repeat(1. / self.autoregression, self.autoregression) beta1 = 0.9 beta2 = 0.999 beta1t = 1 beta2t = 1 rms = window * 0 momentum = window * 0 error = float('inf') while True: beta1t *= beta1 beta2t *= beta2 prev_error = error parameters = backend.copy(measure_values) for i in range(len(measure_values) - len(window) - 2, -1, -1): parameters[i, :] = backend.dot( (window), measure_values[(i + 1):(i + len(window) + 1), :]) errors = (parameters - measure_values ) * measure_weights / backend.sum(measure_weights) for j in range(len(window)): gradient = 0 for i in range(len(measure_values) - len(window) - 1): gradient += backend.dot(measure_values[i + j + 1, :], errors[i, :]) momentum[j] = beta1 * momentum[j] + ( 1 - beta1) * gradient #*np.sign(window[j]) rms[j] = beta2 * rms[j] + (1 - beta2) * gradient * gradient window[j] -= 0.01 * momentum[j] / (1 - beta1t) / ( (rms[j] / (1 - beta2t))**0.5 + 1.E-8) #window[j] -= 0.01*gradient*np.sign(window[j]) error = backend.mean(backend.abs(errors)) if error == 0 or abs(error - prev_error) / error < 1.E-6: best_parameters = parameters break best_parameters = backend.mean(best_parameters[:self.num_parameters, :] * backend.to_primitive(measure_weights), axis=1) + backend.mean(mean_value) if self.tunable_offset is not None: div = backend.max(best_parameters) if div != 0: best_parameters /= div measure = self.tunable_offset(validation, training) base = basis[0] if self.basis != "krylov" else None best_offset = optimize( lambda params: -measure.best_direction() * measure( self._run(training, [(best_parameters[i] + params[ 2]) * params[0]**i + params[1] for i in range( len(best_parameters))], base, *args, **kwargs)), #lambda params: - measure.evaluate(self._run(training, best_parameters + params[0], *args, **kwargs)), max_vals=[1, 0, 0], min_vals=[0, 0, 0], deviation_tol=0.005, parameter_tol=1, partitions=5, divide_range=2) #best_parameters += best_offset[0] best_parameters = [ (best_parameters[i] + best_offset[2]) * best_offset[0]**i + best_offset[1] for i in range(len(best_parameters)) ] best_parameters = backend.to_primitive(best_parameters) if backend.sum(backend.abs(best_parameters)) != 0: best_parameters /= backend.mean(backend.abs(best_parameters)) if self.tuning_backend is not None and self.tuning_backend != previous_backend: best_parameters = [ float(param) for param in best_parameters ] # convert parameters to backend-independent list backend.load_backend(previous_backend) #kwargs["graph_dropout"] = graph_dropout if self.basis != "krylov": return Tautology(), self._run( personalization, best_parameters, *args, **kwargs) # TODO: make this unecessary return self.ranker_generator(best_parameters), personalization
def to_sparse_matrix(G, normalization="auto", weight="weight", renormalize=False, reduction=backend.degrees, transform_adjacency=lambda x: x, cors=False): """ Used to normalize a graph and produce a sparse matrix representation. Args: G: A networkx or fastgraph graph. If an object with a "shape" attribute is provided (which means that it is backend matrix) then it is directly returned. normalization: Optional. The type of normalization can be "none", "col", "symmetric", "laplacian", "both", or "auto" (default). The last one selects the type of normalization between "col" and "symmetric", depending on whether the graph is directed or not respectively. Alternatively, this could be a callable, in which case it transforms a scipy sparse adjacency matrix to produce a normalized copy. weight: Optional. The weight attribute (default is "weight") of *networkx* graph edges. This is ignored when *fastgraph* graphs are parsed, as these are unweighted. renormalize: Optional. If True, the renormalization trick (self-loops) of graph neural networks is applied to ensure iteration stability by shrinking the graph's spectrum. Default is False. Can provide anything that can be cast to a float to regularize the renormalization. reduction: Optional. Controls how degrees are calculated from a callable (e.g. `pygrank.eigdegree` for entropy-preserving transition matrices [li2011link]). Default is `pygrank.degrees`. cors: Optional.<details><summary>Cross-origin resource (shared between backends). Default is false.</summary> If True, it enriches backend primitives holding the outcome of graph preprocessing with additional private metadata that enable their usage as base graphs when passing through other postprocessors in other backends. This is not required when constructing GraphSignal instances with the pattern `pygrank.to_signal(M, personalization_data)` where `M = pygrank.preprocessor(cors=True)(graph)` but is mandarotry when the two commands are called in different backends. Note that *cors* objects are not normalized again with other strategies in other preprocessors and compliance is not currently enforced. There is **significant speedup** in using *cors* when frequently switching between backends for the same graphs. Furthermore, after defining such instances, they can be used in place of base graphs. If False (default), a lot of memory is saved by not keeping pointers to all versions of adjacency matrices among backends that use them. Enabling *cors* and then visiting up to two backends out of which one is "numpy", does not affect the maximum memory consumption by code processing one graph. </details> """ if hasattr(G, "__pygrank_preprocessed"): if backend.backend_name() in G.__pygrank_preprocessed: return G.__pygrank_preprocessed[backend.backend_name( )] # this is basically caching, but it's pretty safe for just passing adjacency matrices around ret = backend.scipy_sparse_to_backend( G.__pygrank_preprocessed["numpy"]) if cors: ret.__pygrank_preprocessed = G.__pygrank_preprocessed ret.__pygrank_preprocessed[backend.backend_name()] = ret else: ret.__pygrank_preprocessed = {backend.backend_name(): ret} ret._pygrank_node2id = G._pygrank_node2id return ret with backend.Backend("numpy"): normalization = normalization.lower() if isinstance( normalization, str) else normalization if normalization == "auto": normalization = "col" if G.is_directed() else "symmetric" M = G.to_scipy_sparse_array() if isinstance( G, fastgraph.Graph) else nx.to_scipy_sparse_matrix( G, weight=weight, dtype=float) renormalize = float(renormalize) left_reduction = reduction #(lambda x: backend.degrees(x)) if reduction == "sum" else reduction right_reduction = lambda x: left_reduction(x.T) if renormalize != 0: M = M + scipy.sparse.eye(M.shape[0]) * renormalize if normalization == "col": S = np.array(left_reduction(M)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M elif normalization == "laplacian": S = np.array(np.sqrt(left_reduction(M))).flatten() S[S != 0] = 1.0 / S[S != 0] Qleft = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') S = np.array(np.sqrt(right_reduction(M))).flatten() S[S != 0] = 1.0 / S[S != 0] Qright = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Qleft * M * Qright M = -M + scipy.sparse.eye(M.shape[0]) elif normalization == "both": S = np.array(left_reduction(M)).flatten() S[S != 0] = 1.0 / S[S != 0] Qleft = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') S = np.array(right_reduction(M)).flatten() S[S != 0] = 1.0 / S[S != 0] Qright = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Qleft * M * Qright elif normalization == "symmetric": S = np.array(np.sqrt(left_reduction(M))).flatten() S[S != 0] = 1.0 / S[S != 0] Qleft = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') S = np.array(np.sqrt(right_reduction(M))).flatten() S[S != 0] = 1.0 / S[S != 0] Qright = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Qleft * M * Qright elif callable(normalization): M = normalization(M) elif normalization != "none": raise Exception( "Supported normalizations: none, col, symmetric, both, laplacian, auto" ) M = transform_adjacency(M) ret = M if backend.backend_name( ) == "numpy" else backend.scipy_sparse_to_backend(M) ret._pygrank_node2id = {v: i for i, v in enumerate(G)} if cors: ret.__pygrank_preprocessed = {backend.backend_name(): ret, "numpy": M} M.__pygrank_preprocessed = ret.__pygrank_preprocessed else: ret.__pygrank_preprocessed = {backend.backend_name(): ret} return ret
def _tune(self, graph=None, personalization=None, *args, **kwargs): previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( graph, backend.to_array(personalization.np)) total_params = list() for seed0 in range(self.cross_validate): fraction_of_training = self.fraction_of_training if isinstance( self.fraction_of_training, Iterable) else [self.fraction_of_training] #fraction_of_training = [random.choice(fraction_of_training)] internal_training_list = list() validation_list = list() for seed, fraction in enumerate(fraction_of_training): training, validation = split(backend_personalization, fraction, seed0 + seed) internal_training = training if self.pre_diffuse is not None: internal_training = self.pre_diffuse(internal_training) validation = self.pre_diffuse(validation) internal_training_list.append(internal_training) validation_list.append(validation) def eval(params): val = 0 for internal_training, validation in zip( internal_training_list, validation_list): """import pygrank as pg scores = self._run(backend_personalization, params, *args, **kwargs) internal_training = pg.Undersample(int(backend.sum(internal_training)))(scores*backend_personalization) validation = backend_personalization - internal_training""" measure = self.measure( validation, internal_training if internal_training != validation else None) val = val - measure.best_direction() * measure.evaluate( self._run(internal_training, params, *args, **kwargs)) return val / len(internal_training_list) best_params = self.optimizer(eval, **self.optimize_args) """import cma es = cma.CMAEvolutionStrategy([0.5 for _ in range(len(self.optimize_args["max_vals"]))], 1./12**0.5) es.optimize(eval, verb_disp=False) best_params = es.result.xbest""" total_params.append(best_params) best_params = [0 for _ in best_params] best_squares = [0 for _ in best_params] best_means = [0 for _ in best_params] for params in total_params: for i in range(len(best_params)): best_params[i] = max(best_params[i], params[i]) best_means[i] += params[i] / self.cross_validate best_squares[i] += params[i]**2 / self.cross_validate best_params = best_means if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(previous_backend) # TODO: make training back-propagate through tensorflow for combined_prediction==False (do this with a gather in the split method) self.last_params = best_params return self.ranker_generator( best_params ), personalization if self.combined_prediction else internal_training