Exemplo n.º 1
0
 def __init__(self, ranker=None, method="O"):
     if ranker is not None and not callable(getattr(ranker, "rank", None)):
         ranker, method = method, ranker
         if not callable(getattr(ranker, "rank", None)):
             ranker = None
     self.ranker = Tautology() if ranker is None else ranker
     self.method = method
Exemplo n.º 2
0
    def __init__(self, ranker: NodeRanking = None):
        """Initializes the postprocessor with a base ranker.

        Args:
            ranker: Optional. The base ranker instance. A Tautology() ranker is created if None (default) was specified.

        Example:
            >>> import pygrank as pg
            >>> graph, personalization, algorithm, test = ...
            >>> algorithm = algorithm >> pg.Top(10) >> pg.Threshold() >> pg.Subgraph() >> pg.PageRank() >> pg.Supergraph()
            >>> top_10_reranked = algorithm(graph, personalization)  # top 10 non-zeroes ranked in their induced subgraph
            >>> print(pg.AUC(pg.to_signal(graph, test))(top_10_reranked))  # supergraph has returned to the original graph
        """
        super().__init__(Tautology() if ranker is None else ranker)
Exemplo n.º 3
0
    def __init__(self, ranker: NodeRanking = None):
        """Initializes the postprocessor with a base ranker.

        Args:
            ranker: Optional. The base ranker instance. A Tautology() ranker is created if None (default) was specified.

        Example:
            >>> import pygrank as pg
            >>> graph, personalization, algorithm = ...
            >>> algorithm = pg.Subgraph(pg.Top(algorithm, 10))
            >>> top_10_subgraph = algorithm(graph, personalization).graph

        Example (same result):
            >>> import pygrank as pg
            >>> graph, personalization, algorithm = ...
            >>> algorithm = algorithm >> pg.Top(10) >> pg.Subgraph()
            >>> top_10_subgraph = algorithm(graph, personalization).graph
        """
        super().__init__(Tautology() if ranker is None else ranker)
Exemplo n.º 4
0
    def __init__(self,
                 ranker: Optional[Union[NodeRanking, str]] = None,
                 method: Optional[Union[NodeRanking, str]] = "O",
                 eps: float = 1.E-12):
        """
        Initializes the fairness-aware postprocessor.

        Args:
            ranker: The base ranking algorithm.
            method: The method with which to adjust weights. If "O" (default) an optimal gradual adjustment is performed
                [tsioutsiouliklis2020fairness].
                If "B" node scores are weighted according to whether the nodes are sensitive, so that
                the sum of sensitive node scores becomes equal to the sum of non-sensitive node scores
                [tsioutsiouliklis2020fairness].
            eps: A small value to consider rank redistribution to have converged. Default is 1.E-12.
        """
        if ranker is not None and not callable(getattr(ranker, "rank", None)):
            ranker, method = method, ranker
            if not callable(getattr(ranker, "rank", None)):
                ranker = None
        super().__init__(Tautology() if ranker is None else ranker)
        self.method = method
        self.eps = eps  # TODO: investigate whether using backend.epsilon() is a preferable alternative
Exemplo n.º 5
0
class FairPostprocessor:
    def __init__(self, ranker=None, method="O"):
        if ranker is not None and not callable(getattr(ranker, "rank", None)):
            ranker, method = method, ranker
            if not callable(getattr(ranker, "rank", None)):
                ranker = None
        self.ranker = Tautology() if ranker is None else ranker
        self.method = method

    def __distribute(self, DR, ranks, sensitive):
        #ranks = {v: ranks[v] * sensitive.get(v, 0) for v in ranks if ranks[v] * sensitive.get(v, 0) > 1.E-6}
        while True:
            ranks = {v: ranks[v] * sensitive.get(v, 0) for v in ranks if ranks[v] * sensitive.get(v, 0) != 0}
            d = DR / len(ranks)
            min_rank = min(val for val in ranks.values())
            if min_rank > d:
                ranks = {v: val - d for v, val in ranks.items()}
                break
            ranks = {v: val - min_rank for v, val in ranks.items()}
            DR -= len(ranks) * min_rank
        return ranks

    def __reweight(self, G, sensitive):
        if not getattr(self, "reweights", None):
            self.reweights = dict()
        if G not in self.reweights:
            phi = sum(sensitive.values())/len(G)
            Gnew = G.copy()
            for u, v, d in Gnew.edges(data=True):
                d["weight"] = 1./(sensitive[u]*phi+(1-sensitive[u])*(1-phi))
            self.reweights[G] = Gnew
        return self.reweights[G]

    def _transform(self, ranks, sensitive):
        phi = sum(sensitive.values())/len(ranks)
        if self.method == "O":
            ranks = Normalize(method="sum").transform(ranks)
            sumR = sum(ranks[v] * sensitive.get(v, 0) for v in ranks)
            sumB = sum(ranks[v] * (1 - sensitive.get(v, 0)) for v in ranks)
            numR = sum(sensitive.values())
            numB = len(ranks) - numR
            if sumR < phi:
                red = self.__distribute(phi - sumR, ranks, {v: 1 - sensitive.get(v, 0) for v in ranks})
                ranks = {v: red.get(v, ranks[v] + (phi - sumR) / numR) for v in ranks}
            elif sumB < 1-phi:
                red = self.__distribute(1-phi - sumB, ranks, {v: sensitive.get(v, 0) for v in ranks})
                ranks = {v: red.get(v, ranks[v] + (1-phi - sumB) / numB) for v in ranks}
        elif self.method == "B":
            sumR = sum(ranks[v]*sensitive.get(v, 0) for v in ranks)
            sumB = sum(ranks[v]*(1-sensitive.get(v, 0)) for v in ranks)
            sum_total = sumR + sumB
            sumR /= sum_total
            sumB /= sum_total
            ranks = {v: ranks[v]*(phi*sensitive.get(v, 0)/sumR+(1-phi)*(1-sensitive.get(v, 0))/sumB) for v in ranks}
        else:
            raise Exception("Invalid fairness postprocessing method", self.method)
        return ranks

    def transform(self, ranks, sensitive, *args, **kwargs):
        if self.method == "reweight":
            raise Exception("Reweighting can only occur by preprocessing the graph")
        return self._transform(self.ranker.transform(ranks, *args, **kwargs), sensitive)

    def rank(self, G, personalization, sensitive, *args, **kwargs):
        if self.method == "reweight":
            return self.ranker.rank(self.__reweight(G, sensitive), personalization, *args, **kwargs)
        return self._transform(self.ranker.rank(G, personalization, *args, **kwargs), sensitive)
Exemplo n.º 6
0
    def _tune(self, graph=None, personalization=None, *args, **kwargs):
        #graph_dropout = kwargs.get("graph_dropout", 0)
        #kwargs["graph_dropout"] = 0
        previous_backend = backend.backend_name()
        personalization = to_signal(graph, personalization)
        graph = personalization.graph
        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            backend.load_backend(self.tuning_backend)
        backend_personalization = to_signal(
            personalization, backend.to_array(personalization.np))
        #training, validation = split(backend_personalization, 0.8)
        #training2, validation2 = split(backend_personalization, 0.6)
        #measure_weights = [1, 1, 1, 1, 1]
        #propagated = [training.np, validation.np, backend_personalization.np, training2.np, validation2.np]

        measure_values = [None] * (self.num_parameters + self.autoregression)
        M = self.ranker_generator(measure_values).preprocessor(graph)

        #for _ in range(10):
        #    backend_personalization.np = backend.conv(backend_personalization.np, M)
        training, validation = split(backend_personalization, 0.8)
        training1, training2 = split(training, 0.5)

        propagated = [training1.np, training2.np]
        measures = [
            self.measure(backend_personalization, training1),
            self.measure(backend_personalization, training2)
        ]
        #measures = [self.measure(validation, training), self.measure(training, validation)]

        if self.basis == "krylov":
            for i in range(len(measure_values)):
                measure_values[i] = [
                    measure(p) for p, measure in zip(propagated, measures)
                ]
                propagated = [backend.conv(p, M) for p in propagated]
        else:
            basis = [
                arnoldi_iteration(M, p, len(measure_values))[0]
                for p in propagated
            ]
            for i in range(len(measure_values)):
                measure_values[i] = [
                    float(measure(base[:, i]))
                    for base, measure in zip(basis, measures)
                ]
        measure_values = backend.to_primitive(measure_values)
        mean_value = backend.mean(measure_values, axis=0)
        measure_values = measure_values - mean_value
        best_parameters = measure_values
        measure_weights = [1] * measure_values.shape[1]
        if self.autoregression != 0:
            #vals2 = -measure_values-mean_value
            #measure_values = np.concatenate([measure_values, vals2-np.mean(vals2, axis=0)], axis=1)
            window = backend.repeat(1. / self.autoregression,
                                    self.autoregression)
            beta1 = 0.9
            beta2 = 0.999
            beta1t = 1
            beta2t = 1
            rms = window * 0
            momentum = window * 0
            error = float('inf')
            while True:
                beta1t *= beta1
                beta2t *= beta2
                prev_error = error
                parameters = backend.copy(measure_values)
                for i in range(len(measure_values) - len(window) - 2, -1, -1):
                    parameters[i, :] = backend.dot(
                        (window),
                        measure_values[(i + 1):(i + len(window) + 1), :])
                errors = (parameters - measure_values
                          ) * measure_weights / backend.sum(measure_weights)
                for j in range(len(window)):
                    gradient = 0
                    for i in range(len(measure_values) - len(window) - 1):
                        gradient += backend.dot(measure_values[i + j + 1, :],
                                                errors[i, :])
                    momentum[j] = beta1 * momentum[j] + (
                        1 - beta1) * gradient  #*np.sign(window[j])
                    rms[j] = beta2 * rms[j] + (1 - beta2) * gradient * gradient
                    window[j] -= 0.01 * momentum[j] / (1 - beta1t) / (
                        (rms[j] / (1 - beta2t))**0.5 + 1.E-8)
                    #window[j] -= 0.01*gradient*np.sign(window[j])
                error = backend.mean(backend.abs(errors))
                if error == 0 or abs(error - prev_error) / error < 1.E-6:
                    best_parameters = parameters
                    break
        best_parameters = backend.mean(best_parameters[:self.num_parameters, :]
                                       * backend.to_primitive(measure_weights),
                                       axis=1) + backend.mean(mean_value)

        if self.tunable_offset is not None:
            div = backend.max(best_parameters)
            if div != 0:
                best_parameters /= div
            measure = self.tunable_offset(validation, training)
            base = basis[0] if self.basis != "krylov" else None
            best_offset = optimize(
                lambda params: -measure.best_direction() * measure(
                    self._run(training, [(best_parameters[i] + params[
                        2]) * params[0]**i + params[1] for i in range(
                            len(best_parameters))], base, *args, **kwargs)),
                #lambda params: - measure.evaluate(self._run(training, best_parameters + params[0], *args, **kwargs)),
                max_vals=[1, 0, 0],
                min_vals=[0, 0, 0],
                deviation_tol=0.005,
                parameter_tol=1,
                partitions=5,
                divide_range=2)
            #best_parameters += best_offset[0]
            best_parameters = [
                (best_parameters[i] + best_offset[2]) * best_offset[0]**i +
                best_offset[1] for i in range(len(best_parameters))
            ]

        best_parameters = backend.to_primitive(best_parameters)
        if backend.sum(backend.abs(best_parameters)) != 0:
            best_parameters /= backend.mean(backend.abs(best_parameters))
        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            best_parameters = [
                float(param) for param in best_parameters
            ]  # convert parameters to backend-independent list
            backend.load_backend(previous_backend)
        #kwargs["graph_dropout"] = graph_dropout
        if self.basis != "krylov":
            return Tautology(), self._run(
                personalization, best_parameters, *args,
                **kwargs)  # TODO: make this unecessary
        return self.ranker_generator(best_parameters), personalization