def rank(self, graph=None, personalization=None, *args, **kwargs): personalization = to_signal(graph, personalization) norm = backend.sum(backend.abs(personalization.np)) ranks = self.ranker(graph, personalization, *args, **kwargs) if norm != 0: ranks.np = ranks.np * norm / backend.sum(backend.abs(ranks.np)) return ranks
def _transform(self, ranks: GraphSignal, **kwargs): separator = to_signal(ranks, self.separator) ranksR = ranks * separator ranksB = ranks * (1-separator) mulR = backend.safe_div(1., backend.sum(ranksR)) mulB = backend.safe_div(1., backend.sum(ranksB)) return ranksR*mulR + ranksB*mulB
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: known_scores, scores = self.to_numpy(scores) known_scores = backend.safe_div(known_scores, backend.max(known_scores)) scores = backend.safe_div(scores, backend.max(scores)) return backend.safe_div(backend.sum(known_scores * scores), backend.sum(scores))
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: if len(self.get_graph(scores)) == 0: return 0 adjacency, scores = self.to_numpy(scores) neighbors = backend.conv(scores, adjacency) internal_edges = backend.dot(neighbors, scores) expected_edges = backend.sum(scores) ** 2 - backend.sum(scores ** 2) # without self-loops return backend.safe_div(internal_edges, expected_edges)
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: known_scores, scores = self.to_numpy(scores, normalization=True) eps = backend.epsilon() known_scores = known_scores - backend.min(known_scores) + eps known_scores = known_scores / backend.sum(known_scores) scores = scores - backend.min(scores) + eps scores = scores / backend.sum(scores) ratio = scores / known_scores ret = -backend.sum(scores * backend.log(ratio)) return ret / backend.length(scores)
def rank(self, graph: GraphSignalGraph = None, personalization: GraphSignalData = None, warm_start: GraphSignalData = None, graph_dropout: float = 0, *args, **kwargs) -> GraphSignal: personalization = to_signal(graph, personalization) self._prepare(personalization) personalization = self.personalization_transform(personalization) personalization_norm = backend.sum(backend.abs(personalization.np)) if personalization_norm == 0: return personalization personalization = to_signal(personalization, personalization.np / personalization_norm) ranks = to_signal( personalization, backend.copy(personalization.np) if warm_start is None else warm_start) M = self.preprocessor(personalization.graph) self.convergence.start() self._start(backend.graph_dropout(M, graph_dropout), personalization, ranks, *args, **kwargs) while not self.convergence.has_converged(ranks.np): self._step(backend.graph_dropout(M, graph_dropout), personalization, ranks, *args, **kwargs) self._end(backend.graph_dropout(M, graph_dropout), personalization, ranks, *args, **kwargs) ranks.np = ranks.np * personalization_norm return ranks
def krylov_base(M, personalization, krylov_space_degree): warnings.warn( "Krylov approximation is not stable yet (results may differ in future versions)" ) # TODO: throw exception for non-symmetric matrix personalization = backend.to_primitive(personalization) base = [ personalization / backend.dot(personalization, personalization)**0.5 ] base_norms = [] alphas = [] for j in range(0, krylov_space_degree): v = base[j] w = backend.conv(v, M) a = backend.dot(v, w) alphas.append(a) next_w = w - a * v if j > 0: next_w -= base[j - 1] * base_norms[j - 1] next_w_norm = (backend.sum(next_w**2))**0.5 base_norms.append(next_w_norm) if j != krylov_space_degree - 1: base.append(next_w / next_w_norm) H = diags([alphas, base_norms[1:], base_norms[1:]], [0, -1, 1]) V = backend.combine_cols(base) #V = np.column_stack(base) return V, H
def _transform(self, ranks: GraphSignal, **kwargs): ensure_used_args(kwargs) min_rank = 0 if self.method == "range": max_rank = float(backend.max(ranks.np)) min_rank = float(backend.min(ranks.np)) elif self.method == "max": max_rank = float(backend.max(ranks.np)) elif self.method == "sum": max_rank = float(backend.sum(ranks.np)) elif self.method == "L2": max_rank = float(backend.sum(ranks.np**2))**0.5 else: raise Exception("Can only normalize towards max, sum, range, or L2") if min_rank == max_rank: return ranks ret = (ranks.np-min_rank) / (max_rank-min_rank) return ret
def _step(self, M, personalization, ranks, *args, **kwargs): ranks.np = self._formula(M, personalization, ranks, *args, **kwargs) if isinstance(ranks.np, GraphSignal): ranks.np = ranks.np.np if isinstance(self.use_quotient, Postprocessor): ranks.np = self.use_quotient(ranks) elif self.use_quotient: ranks.np = backend.safe_div(ranks, backend.sum(ranks)) if self.converge_to_eigenvectors: personalization.np = ranks.np
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: sensitive, scores = self.to_numpy(scores) p1 = backend.dot(scores, sensitive) p2 = backend.dot(scores, 1 - sensitive) if p1 == 0 or p2 == 0: return 0 s = backend.sum(sensitive) p1 = backend.safe_div(p1, s) p2 = backend.safe_div(p2, backend.length(sensitive) - s) if p1 <= p2: # this implementation is derivable return p1 / p2 return p2 / p1
def _start(self, M, personalization, ranks, sensitive, *args, **kwargs): sensitive = to_signal(ranks, sensitive) outR = self.outR # backend.conv(sensitive.np, M) outB = self.outB # backend.conv(1.-sensitive.np, M) phi = backend.sum(sensitive.np) / backend.length( sensitive.np) * self.target_prule dR = backend.repeat(0., len(sensitive.graph)) dB = backend.repeat(0., len(sensitive.graph)) case1 = outR < phi * (outR + outB) case2 = (1 - case1) * (outR != 0) case3 = (1 - case1) * (1 - case2) dR[case1] = phi - (1 - phi) / outB[case1] * outR[case1] dR[case3] = phi dB[case2] = (1 - phi) - phi / outR[case2] * outB[case2] dB[case3] = 1 - phi personalization.np = backend.safe_div(sensitive.np * personalization.np, backend.sum(sensitive.np)) * self.target_prule \ + backend.safe_div(personalization.np * (1 - sensitive.np), backend.sum(1 - sensitive.np)) personalization.np = backend.safe_div(personalization.np, backend.sum(personalization.np)) L = sensitive.np if self.redistributor is None or self.redistributor == "uniform": original_ranks = 1 elif self.redistributor == "original": original_ranks = PageRank( alpha=self.alpha, preprocessor=default_preprocessor(assume_immutability=False, normalization="col"), convergence=self.convergence)(personalization).np else: original_ranks = self.redistributor(personalization).np self.dR = dR self.dB = dB self.xR = backend.safe_div(original_ranks * L, backend.sum(original_ranks * L)) self.xB = backend.safe_div(original_ranks * (1 - L), backend.sum(original_ranks * (1 - L))) super()._start(M, personalization, ranks, *args, **kwargs)
def _step(self, M, personalization, ranks, *args, **kwargs): ranks.np = self._formula(M, personalization.np, ranks.np, *args, **kwargs) if isinstance(self.use_quotient, Postprocessor): ranks.np = self.use_quotient.transform(ranks).np elif self.use_quotient: ranks_sum = backend.sum(ranks.np) if ranks_sum != 0: ranks.np = ranks.np / ranks_sum if self.converge_to_eigenvectors: personalization.np = ranks.np
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: sensitive, scores = self.to_numpy(scores) p1 = backend.dot(scores, sensitive) p2 = backend.dot(scores, 1 - sensitive) s = backend.sum(sensitive) n = backend.length(sensitive) p1 = backend.safe_div(p1, s) p2 = backend.safe_div(p2, n - s) #if p1 <= p2*self.target_pRule: # p2 *= self.target_pRule #elif p2 <= p1*self.target_pRule: # p1 *= self.target_pRule #else: # return 0 return (p1 - p2)**2 * n
def _run(self, personalization: GraphSignal, params: object, base=None, *args, **kwargs): params = backend.to_primitive(params) div = backend.sum(backend.abs(params)) if div != 0: params = params / div if self.basis != "krylov": if base is None: M = self.ranker_generator(params).preprocessor( personalization.graph) base = arnoldi_iteration(M, personalization.np, len(params))[0] ret = 0 for i in range(backend.length(params)): ret = ret + params[i] * base[:, i] return to_signal(personalization, ret) return self.ranker_generator(params).rank(personalization, *args, **kwargs)
def _formula(self, M, personalization, ranks, sensitive, *args, **kwargs): deltaR = backend.sum(ranks * self.dR) deltaB = backend.sum(ranks * self.dB) return (backend.conv(ranks, M) + deltaR * self.xR + deltaB * self.xB) * self.alpha + personalization * (1 - self.alpha)
def _prepare_graph(self, graph, sensitive, *args, **kwargs): sensitive = to_signal(graph, sensitive) self.sensitive = sensitive self.phi = backend.sum(sensitive.np) / backend.length( sensitive.np) * self.target_prule return graph
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: known_scores, scores = self.to_numpy(scores) return 1 - backend.sum( backend.abs(known_scores - scores)) / backend.length(scores)
def _tune(self, graph=None, personalization=None, *args, **kwargs): #graph_dropout = kwargs.get("graph_dropout", 0) #kwargs["graph_dropout"] = 0 previous_backend = backend.backend_name() personalization = to_signal(graph, personalization) graph = personalization.graph if self.tuning_backend is not None and self.tuning_backend != previous_backend: backend.load_backend(self.tuning_backend) backend_personalization = to_signal( personalization, backend.to_array(personalization.np)) #training, validation = split(backend_personalization, 0.8) #training2, validation2 = split(backend_personalization, 0.6) #measure_weights = [1, 1, 1, 1, 1] #propagated = [training.np, validation.np, backend_personalization.np, training2.np, validation2.np] measure_values = [None] * (self.num_parameters + self.autoregression) M = self.ranker_generator(measure_values).preprocessor(graph) #for _ in range(10): # backend_personalization.np = backend.conv(backend_personalization.np, M) training, validation = split(backend_personalization, 0.8) training1, training2 = split(training, 0.5) propagated = [training1.np, training2.np] measures = [ self.measure(backend_personalization, training1), self.measure(backend_personalization, training2) ] #measures = [self.measure(validation, training), self.measure(training, validation)] if self.basis == "krylov": for i in range(len(measure_values)): measure_values[i] = [ measure(p) for p, measure in zip(propagated, measures) ] propagated = [backend.conv(p, M) for p in propagated] else: basis = [ arnoldi_iteration(M, p, len(measure_values))[0] for p in propagated ] for i in range(len(measure_values)): measure_values[i] = [ float(measure(base[:, i])) for base, measure in zip(basis, measures) ] measure_values = backend.to_primitive(measure_values) mean_value = backend.mean(measure_values, axis=0) measure_values = measure_values - mean_value best_parameters = measure_values measure_weights = [1] * measure_values.shape[1] if self.autoregression != 0: #vals2 = -measure_values-mean_value #measure_values = np.concatenate([measure_values, vals2-np.mean(vals2, axis=0)], axis=1) window = backend.repeat(1. / self.autoregression, self.autoregression) beta1 = 0.9 beta2 = 0.999 beta1t = 1 beta2t = 1 rms = window * 0 momentum = window * 0 error = float('inf') while True: beta1t *= beta1 beta2t *= beta2 prev_error = error parameters = backend.copy(measure_values) for i in range(len(measure_values) - len(window) - 2, -1, -1): parameters[i, :] = backend.dot( (window), measure_values[(i + 1):(i + len(window) + 1), :]) errors = (parameters - measure_values ) * measure_weights / backend.sum(measure_weights) for j in range(len(window)): gradient = 0 for i in range(len(measure_values) - len(window) - 1): gradient += backend.dot(measure_values[i + j + 1, :], errors[i, :]) momentum[j] = beta1 * momentum[j] + ( 1 - beta1) * gradient #*np.sign(window[j]) rms[j] = beta2 * rms[j] + (1 - beta2) * gradient * gradient window[j] -= 0.01 * momentum[j] / (1 - beta1t) / ( (rms[j] / (1 - beta2t))**0.5 + 1.E-8) #window[j] -= 0.01*gradient*np.sign(window[j]) error = backend.mean(backend.abs(errors)) if error == 0 or abs(error - prev_error) / error < 1.E-6: best_parameters = parameters break best_parameters = backend.mean(best_parameters[:self.num_parameters, :] * backend.to_primitive(measure_weights), axis=1) + backend.mean(mean_value) if self.tunable_offset is not None: div = backend.max(best_parameters) if div != 0: best_parameters /= div measure = self.tunable_offset(validation, training) base = basis[0] if self.basis != "krylov" else None best_offset = optimize( lambda params: -measure.best_direction() * measure( self._run(training, [(best_parameters[i] + params[ 2]) * params[0]**i + params[1] for i in range( len(best_parameters))], base, *args, **kwargs)), #lambda params: - measure.evaluate(self._run(training, best_parameters + params[0], *args, **kwargs)), max_vals=[1, 0, 0], min_vals=[0, 0, 0], deviation_tol=0.005, parameter_tol=1, partitions=5, divide_range=2) #best_parameters += best_offset[0] best_parameters = [ (best_parameters[i] + best_offset[2]) * best_offset[0]**i + best_offset[1] for i in range(len(best_parameters)) ] best_parameters = backend.to_primitive(best_parameters) if backend.sum(backend.abs(best_parameters)) != 0: best_parameters /= backend.mean(backend.abs(best_parameters)) if self.tuning_backend is not None and self.tuning_backend != previous_backend: best_parameters = [ float(param) for param in best_parameters ] # convert parameters to backend-independent list backend.load_backend(previous_backend) #kwargs["graph_dropout"] = graph_dropout if self.basis != "krylov": return Tautology(), self._run( personalization, best_parameters, *args, **kwargs) # TODO: make this unecessary return self.ranker_generator(best_parameters), personalization
def evaluate(self, scores: GraphSignalData) -> BackendPrimitive: known_scores, scores = self.to_numpy(scores) return backend.sum( (known_scores - scores) * (known_scores - scores))**0.5