Exemplo n.º 1
0
    def __init__(self,
                 graph: GraphSignalGraph,
                 obj: GraphSignalData,
                 node2id: Optional[Mapping[object, int]] = None):
        """Should **ALWAYS** instantiate graph signals with the method to_signal,
        which handles non-instantiation semantics."""

        self.graph = graph
        self.node2id = {v: i
                        for i, v in enumerate(graph)
                        } if node2id is None else node2id
        if backend.is_array(obj):
            if backend.length(graph) != backend.length(obj):
                raise Exception("Graph signal array dimensions " +
                                str(backend.length(obj)) +
                                " should be equal to graph nodes " +
                                str(backend.length(graph)))
            self.np = backend.to_array(obj)
        elif obj is None:
            self.np = backend.repeat(1.0, len(graph))
        else:
            self.np = np.repeat(
                0.0, len(graph)
            )  # tensorflow does not initialize editing of eager tensors
            for key, value in obj.items():
                self[key] = value
            self.np = backend.to_array(
                self.np
            )  # make all operations with numpy and then potentially switch to tensorflow
Exemplo n.º 2
0
 def to_numpy(
     self,
     scores: GraphSignalData,
     normalization: bool = False
 ) -> Union[Tuple[GraphSignal, GraphSignal], Tuple[BackendPrimitive,
                                                   BackendPrimitive]]:
     if isinstance(scores, numbers.Number) and isinstance(
             self.known_scores, numbers.Number):
         return backend.to_array([self.known_scores
                                  ]), backend.to_array([scores])
     elif isinstance(scores, GraphSignal):
         return to_signal(scores, self.known_scores).filter(
             exclude=self.exclude), scores.normalized(normalization).filter(
                 exclude=self.exclude)
     elif isinstance(self.known_scores, GraphSignal):
         return self.known_scores.filter(exclude=self.exclude), to_signal(
             self.known_scores,
             scores).normalized(normalization).filter(exclude=self.exclude)
     else:
         if self.exclude is not None:
             raise Exception(
                 "Needs to parse graph signal scores or known_scores to be able to exclude specific nodes"
             )
         scores = backend.self_normalize(
             backend.to_array(scores, copy_array=True)
         ) if normalization else backend.to_array(scores)
         return backend.to_array(self.known_scores), scores
Exemplo n.º 3
0
 def _tune(self, graph=None, personalization=None, *args, **kwargs):
     previous_backend = backend.backend_name()
     personalization = to_signal(graph, personalization)
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(self.tuning_backend)
     backend_personalization = to_signal(
         graph, backend.to_array(personalization.np))
     prev_dropout = kwargs.get("graph_dropout")
     kwargs["graph_dropout"] = 0
     best_value = -float('inf')
     best_ranker = None
     fraction_of_training = self.fraction_of_training if isinstance(
         self.fraction_of_training,
         Iterable) else [self.fraction_of_training]
     for ranker in self.rankers:
         values = list()
         for seed, fraction in enumerate(fraction_of_training):
             training, validation = split(backend_personalization,
                                          fraction,
                                          seed=seed)
             measure = self.measure(validation, training)
             values.append(
                 measure.best_direction() *
                 measure.evaluate(ranker.rank(training, *args, **kwargs)))
         value = np.min(values)
         if value > best_value:
             best_value = value
             best_ranker = ranker
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(previous_backend)
         # TODO: make training back-propagate through tensorflow for combined_prediction==False
     kwargs["graph_dropout"] = prev_dropout
     return best_ranker, personalization if self.combined_prediction else training
Exemplo n.º 4
0
    def __init__(self,
                 graph: GraphSignalGraph,
                 obj: GraphSignalData,
                 node2id: Optional[Mapping[object, int]] = None):
        """Should **ALWAYS** instantiate graph signals with the method to_signal,
        which handles non-instantiation semantics."""

        if node2id is not None:
            self.node2id = node2id
        elif hasattr(graph, "_pygrank_node2id"):  # obtained from preprocessing
            self.node2id = graph._pygrank_node2id
        elif hasattr(graph, "shape"):  # externally defined type
            self.node2id = {i: i for i in range(graph.shape[0])}
        else:  # this is the case where it is an actual graph
            self.node2id = {v: i for i, v in enumerate(graph)}
        self.graph = graph
        #self.node2id = ({i: i for i in range(graph.shape[0])} if hasattr(graph, "shape")
        #                else {v: i for i, v in enumerate(graph)}) if node2id is None else node2id
        graph_len = graph.shape[0] if hasattr(graph, "shape") else len(graph)
        if backend.is_array(obj):
            if graph_len != backend.length(obj):
                raise Exception("Graph signal array dimensions " +
                                str(backend.length(obj)) +
                                " should be equal to graph nodes " +
                                str(len(graph)))
            self._np = backend.to_array(obj)
        elif obj is None:
            self._np = backend.repeat(1.0, graph_len)
        else:
            import numpy as np
            self._np = np.repeat(
                0.0, graph_len
            )  # tensorflow does not initialize editing of eager tensors
            for key, value in obj.items():
                self[key] = value
            self._np = backend.to_array(
                self._np
            )  # make all operations with numpy and then potentially switch to tensorflow
Exemplo n.º 5
0
 def _tune(self, graph=None, personalization=None, *args, **kwargs):
     previous_backend = backend.backend_name()
     personalization = to_signal(graph, personalization)
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(self.tuning_backend)
     backend_personalization = to_signal(
         graph, backend.to_array(personalization.np))
     training, validation = split(backend_personalization,
                                  self.fraction_of_training)
     measure = self.measure(validation, training)
     best_params = optimize(
         lambda params: -measure.best_direction() * measure.evaluate(
             self._run(training, params, *args, **kwargs)),
         **self.optimize_args)
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(previous_backend)
         # TODO: make training back-propagate through tensorflow for combined_prediction==False (do this with a gather in the split method)
     return self.ranker_generator(
         best_params
     ), personalization if self.combined_prediction else training
Exemplo n.º 6
0
 def _tune(self, graph=None, personalization=None, *args, **kwargs):
     previous_backend = backend.backend_name()
     personalization = to_signal(graph, personalization)
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(self.tuning_backend)
     backend_personalization = to_signal(
         graph, backend.to_array(personalization.np))
     training, validation = split(backend_personalization,
                                  self.fraction_of_training)
     measure = self.measure(validation, training)
     best_value = -float('inf')
     best_ranker = None
     for ranker in self.rankers:
         value = measure.best_direction() * measure.evaluate(
             ranker.rank(training, *args, **kwargs))
         if value > best_value:
             best_value = value
             best_ranker = ranker
     if self.tuning_backend is not None and self.tuning_backend != previous_backend:
         backend.load_backend(previous_backend)
         # TODO: make training back-propagate through tensorflow for combined_prediction==False
     return best_ranker, personalization if self.combined_prediction else training
Exemplo n.º 7
0
 def np(self, value):
     self._np = backend.to_array(self.__compliant_value(value))
Exemplo n.º 8
0
 def np(self):
     return backend.to_array(self._np)
Exemplo n.º 9
0
def krylov2original(V, filterH, krylov_space_degree: int):
    if isinstance(V, int) or isinstance(V, float):
        V = backend.ones((krylov_space_degree, krylov_space_degree)) * V
    ret = V @ filterH
    return backend.to_array(ret[:, 0])
Exemplo n.º 10
0
    def _tune(self, graph=None, personalization=None, *args, **kwargs):
        #graph_dropout = kwargs.get("graph_dropout", 0)
        #kwargs["graph_dropout"] = 0
        previous_backend = backend.backend_name()
        personalization = to_signal(graph, personalization)
        graph = personalization.graph
        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            backend.load_backend(self.tuning_backend)
        backend_personalization = to_signal(
            personalization, backend.to_array(personalization.np))
        #training, validation = split(backend_personalization, 0.8)
        #training2, validation2 = split(backend_personalization, 0.6)
        #measure_weights = [1, 1, 1, 1, 1]
        #propagated = [training.np, validation.np, backend_personalization.np, training2.np, validation2.np]

        measure_values = [None] * (self.num_parameters + self.autoregression)
        M = self.ranker_generator(measure_values).preprocessor(graph)

        #for _ in range(10):
        #    backend_personalization.np = backend.conv(backend_personalization.np, M)
        training, validation = split(backend_personalization, 0.8)
        training1, training2 = split(training, 0.5)

        propagated = [training1.np, training2.np]
        measures = [
            self.measure(backend_personalization, training1),
            self.measure(backend_personalization, training2)
        ]
        #measures = [self.measure(validation, training), self.measure(training, validation)]

        if self.basis == "krylov":
            for i in range(len(measure_values)):
                measure_values[i] = [
                    measure(p) for p, measure in zip(propagated, measures)
                ]
                propagated = [backend.conv(p, M) for p in propagated]
        else:
            basis = [
                arnoldi_iteration(M, p, len(measure_values))[0]
                for p in propagated
            ]
            for i in range(len(measure_values)):
                measure_values[i] = [
                    float(measure(base[:, i]))
                    for base, measure in zip(basis, measures)
                ]
        measure_values = backend.to_primitive(measure_values)
        mean_value = backend.mean(measure_values, axis=0)
        measure_values = measure_values - mean_value
        best_parameters = measure_values
        measure_weights = [1] * measure_values.shape[1]
        if self.autoregression != 0:
            #vals2 = -measure_values-mean_value
            #measure_values = np.concatenate([measure_values, vals2-np.mean(vals2, axis=0)], axis=1)
            window = backend.repeat(1. / self.autoregression,
                                    self.autoregression)
            beta1 = 0.9
            beta2 = 0.999
            beta1t = 1
            beta2t = 1
            rms = window * 0
            momentum = window * 0
            error = float('inf')
            while True:
                beta1t *= beta1
                beta2t *= beta2
                prev_error = error
                parameters = backend.copy(measure_values)
                for i in range(len(measure_values) - len(window) - 2, -1, -1):
                    parameters[i, :] = backend.dot(
                        (window),
                        measure_values[(i + 1):(i + len(window) + 1), :])
                errors = (parameters - measure_values
                          ) * measure_weights / backend.sum(measure_weights)
                for j in range(len(window)):
                    gradient = 0
                    for i in range(len(measure_values) - len(window) - 1):
                        gradient += backend.dot(measure_values[i + j + 1, :],
                                                errors[i, :])
                    momentum[j] = beta1 * momentum[j] + (
                        1 - beta1) * gradient  #*np.sign(window[j])
                    rms[j] = beta2 * rms[j] + (1 - beta2) * gradient * gradient
                    window[j] -= 0.01 * momentum[j] / (1 - beta1t) / (
                        (rms[j] / (1 - beta2t))**0.5 + 1.E-8)
                    #window[j] -= 0.01*gradient*np.sign(window[j])
                error = backend.mean(backend.abs(errors))
                if error == 0 or abs(error - prev_error) / error < 1.E-6:
                    best_parameters = parameters
                    break
        best_parameters = backend.mean(best_parameters[:self.num_parameters, :]
                                       * backend.to_primitive(measure_weights),
                                       axis=1) + backend.mean(mean_value)

        if self.tunable_offset is not None:
            div = backend.max(best_parameters)
            if div != 0:
                best_parameters /= div
            measure = self.tunable_offset(validation, training)
            base = basis[0] if self.basis != "krylov" else None
            best_offset = optimize(
                lambda params: -measure.best_direction() * measure(
                    self._run(training, [(best_parameters[i] + params[
                        2]) * params[0]**i + params[1] for i in range(
                            len(best_parameters))], base, *args, **kwargs)),
                #lambda params: - measure.evaluate(self._run(training, best_parameters + params[0], *args, **kwargs)),
                max_vals=[1, 0, 0],
                min_vals=[0, 0, 0],
                deviation_tol=0.005,
                parameter_tol=1,
                partitions=5,
                divide_range=2)
            #best_parameters += best_offset[0]
            best_parameters = [
                (best_parameters[i] + best_offset[2]) * best_offset[0]**i +
                best_offset[1] for i in range(len(best_parameters))
            ]

        best_parameters = backend.to_primitive(best_parameters)
        if backend.sum(backend.abs(best_parameters)) != 0:
            best_parameters /= backend.mean(backend.abs(best_parameters))
        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            best_parameters = [
                float(param) for param in best_parameters
            ]  # convert parameters to backend-independent list
            backend.load_backend(previous_backend)
        #kwargs["graph_dropout"] = graph_dropout
        if self.basis != "krylov":
            return Tautology(), self._run(
                personalization, best_parameters, *args,
                **kwargs)  # TODO: make this unecessary
        return self.ranker_generator(best_parameters), personalization
Exemplo n.º 11
0
    def _tune(self, graph=None, personalization=None, *args, **kwargs):
        previous_backend = backend.backend_name()
        personalization = to_signal(graph, personalization)
        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            backend.load_backend(self.tuning_backend)
        backend_personalization = to_signal(
            graph, backend.to_array(personalization.np))
        total_params = list()
        for seed0 in range(self.cross_validate):
            fraction_of_training = self.fraction_of_training if isinstance(
                self.fraction_of_training,
                Iterable) else [self.fraction_of_training]
            #fraction_of_training = [random.choice(fraction_of_training)]
            internal_training_list = list()
            validation_list = list()
            for seed, fraction in enumerate(fraction_of_training):
                training, validation = split(backend_personalization, fraction,
                                             seed0 + seed)
                internal_training = training
                if self.pre_diffuse is not None:
                    internal_training = self.pre_diffuse(internal_training)
                    validation = self.pre_diffuse(validation)
                internal_training_list.append(internal_training)
                validation_list.append(validation)

            def eval(params):
                val = 0
                for internal_training, validation in zip(
                        internal_training_list, validation_list):
                    """import pygrank as pg

                    scores = self._run(backend_personalization, params, *args, **kwargs)
                    internal_training = pg.Undersample(int(backend.sum(internal_training)))(scores*backend_personalization)
                    validation = backend_personalization - internal_training"""
                    measure = self.measure(
                        validation, internal_training
                        if internal_training != validation else None)
                    val = val - measure.best_direction() * measure.evaluate(
                        self._run(internal_training, params, *args, **kwargs))
                return val / len(internal_training_list)

            best_params = self.optimizer(eval, **self.optimize_args)
            """import cma
            es = cma.CMAEvolutionStrategy([0.5 for _ in range(len(self.optimize_args["max_vals"]))], 1./12**0.5)
            es.optimize(eval, verb_disp=False)
            best_params = es.result.xbest"""
            total_params.append(best_params)
        best_params = [0 for _ in best_params]
        best_squares = [0 for _ in best_params]
        best_means = [0 for _ in best_params]
        for params in total_params:
            for i in range(len(best_params)):
                best_params[i] = max(best_params[i], params[i])
                best_means[i] += params[i] / self.cross_validate
                best_squares[i] += params[i]**2 / self.cross_validate
        best_params = best_means

        if self.tuning_backend is not None and self.tuning_backend != previous_backend:
            backend.load_backend(previous_backend)
            # TODO: make training back-propagate through tensorflow for combined_prediction==False (do this with a gather in the split method)
        self.last_params = best_params
        return self.ranker_generator(
            best_params
        ), personalization if self.combined_prediction else internal_training