示例#1
0
    def merge_weights(self, px_model):
        """
        """

        global_states = np.ascontiguousarray(self.state_space,
                                             dtype=np.uint64) + 1
        global_cliques = [
            global_states[i] * global_states[j] for i, j in self.edgelist
        ]
        model_size = np.sum(global_cliques)

        weights = px_model.weights
        if weights.shape[0] < model_size:
            weights = np.ascontiguousarray(np.copy(px_model.weights))
            local_states = px_model.states
            local_cliques = [
                local_states[i] * local_states[j] for i, j in self.edgelist
            ]

            missing_states = np.array(global_cliques) - np.array(local_cliques)
            offset = 0
            for j, idx in enumerate(global_cliques):
                if missing_states[j] > 0:
                    inserts = np.zeros(missing_states[j]) + np.min(
                        weights[int(offset):int(offset + local_cliques[j])])
                    weights = np.insert(weights,
                                        int(offset + local_cliques[j]),
                                        inserts)
                offset += idx
        self.best_weights[self.epoch][self.curr_model] = weights
        return px.Model(weights=weights.astype(np.float64),
                        graph=px_model.graph,
                        states=global_states)
示例#2
0
    def _aggregate(self, opt, **kwargs):
        naivekl = np.zeros(self.model[0].weights.shape[0])
        K = self.K
        X = self.X
        if opt:
            logger.debug("===KL CREATE DATA===")
            data = np.ascontiguousarray(X, dtype=np.uint16)
            data = np.ascontiguousarray(
                np.vstack((data, self.states - 1)).astype(np.uint16))
            logger.debug("===KL CREATE DUMMY MODEL===")
            model = px.train(data=data, graph=self.graph, iters=0)
            s = np.ctypeslib.as_array(model.empirical_stats,
                                      shape=(model.dimension, ))
            s -= model.phi((self.states - 1).ravel())
            model.num_instances -= 1
            logger.debug("===KL TRAIN BOOTSTRAP===")
            res = px.train(in_model=model,
                           opt_regularization_hook=CONFIG.REGULARIZATION)
            logger.debug("===KL MERGE WEIGHTS===")
            weights = np.ascontiguousarray(np.copy(res.weights))
            states = np.ascontiguousarray(self.states)
            logger.debug("===KL CREATE RESULT MODEL===")
            kl_model = px.Model(weights=weights.astype(np.float64),
                                graph=self.graph,
                                states=states)
        """
        else:
            average_statistics = []
            for i, samples in enumerate(X):
                avg = np.mean([self.phi[i](x) for x in samples], axis=0)
                average_statistics.append(avg)
            self.average_suff_stats = average_statistics
            x0 = np.zeros(self.model[0].weights.shape[0])
            obj = partial(self.naive_kl, average_statistics=average_statistics,
                          graph=self.model[0].graph,
                          states=np.copy(self.model[0].states))
            res = minimize(obj, x0, callback=self.callback, tol=self.eps, options={"maxiter": 50, "gtol": 1e-3})
            kl_model = px.Model(weights=res.x, graph=self.model[0].graph, states=self.model[0].states)
        """
        naivekl += np.copy(kl_model.weights)
        # self.test(kl_model)
        """
        try:
            fisher_matrix = []
            inverse_fisher = []
            for i in range(K):
                fisher_matrix.append(self.fisher_information(i, kl_m[:kl_model.weights.shape[0]], kl_model.weights))
                inverse_fisher.append(np.linalg.inv(fisher_matrix[i]))
        except np.linalg.LinAlgError as e:
            pass
        """

        return kl_model.weights
示例#3
0
 def callback(self, theta):
     model = px.Model(weights=theta, graph=self.graph, states=self.states)
     _, A = model.infer()
     obj = -(np.inner(theta, np.mean(self.average_suff_stats, axis=0)) - A)
     # print("OBJ: " + str(obj))
     # print("REG: " + str(self.l2_regularization(theta)))
     # print("DELTA:" + str(np.abs(self.obj - obj)))
     if np.abs(self.obj - obj) < self.eps:
         self.obj = np.nanmin([obj, self.obj])
         warnings.warn("Terminating optimization: time limit reached")
         return True
     else:
         self.obj = np.nanmin([obj, self.obj])
         return False
示例#4
0
    def __init__(self,
                 model,
                 samples,
                 label,
                 graph=None,
                 states=None,
                 edgelist=None):
        logger.debug("===VAR INIT===")
        super(Variance, self).__init__(model)

        self.edgelist = []
        self.local_data = []
        self.y_true = []
        self.states = states
        if isinstance(self.model, np.ndarray):
            logger.debug("===VAR INIT MODEL IS NUMPY===")
            if graph is None or states is None:
                raise ValueError(
                    "Models were provided as Collection of weight vectors. "
                    "Graph or States were None, but need to be specified.")
            if isinstance(graph, np.ndarray):
                self.graph = px.create_graph(graph)
            self.weights = self.model
            self.model = [
                px.Model(weights=weights, graph=self.graph, states=self.states)
                for weights in self.model.T
            ]
        else:
            logger.debug("===VAR INIT MODEL IS PX===")
            self.px_edgelist = np.ascontiguousarray(
                np.copy(self.model[0].graph.edgelist))
            self.graph = px.create_graph(self.px_edgelist)
            self.states = np.ascontiguousarray(np.copy(self.model[0].states))
            self.weights = np.array(
                [np.copy(mod.weights) for mod in self.model])
        if edgelist is None:
            logger.debug("===VAR INIT CREATE EDGELIST===")
            self.edgelist = self._full_graph(len(self.model))
        for sample in samples:
            self.y_true.append(np.copy(sample[:, label]))
            sample[:, label] = -1
            self.local_data.append(
                np.ascontiguousarray(np.copy(sample), dtype=np.uint16))
示例#5
0
    def parallel_train(self, split=None):
        # This is slow and bad, maybe distribute proc   esses among devices.
        models = []
        processes = []
        train = np.ascontiguousarray(self.data_set.train.to_numpy().astype(
            np.uint16))
        states = np.ascontiguousarray(np.array(self.state_space, copy=True))
        weights = np.ascontiguousarray(self.init_weights())
        for i in range(len(split.split_idx)):
            model = px.Model(weights, self.graph, states=states)
            models.append(model)

        for model, idx in zip(models, split.split()):
            data = np.ascontiguousarray(train[idx.flatten()])
            p = Process(target=self._parallel_train, args=(data, model))
            processes.append(p)

        count = 0
        n_proc = cpu_count() - 2
        while count < len(processes):
            if count == len(processes):
                break
            for i in range(count, n_proc):
                if i < len(processes):
                    processes[i].start()

            for i in range(count, n_proc):
                if i < len(processes):
                    processes[i].join()
                    logger.info(
                        "Training Models: " +
                        "{:.2%}".format(float(count) / float(len(processes))))

            count += n_proc

        self.px_model = models
示例#6
0
    def predict(self, weights=None, n_test=None):
        logger.debug("===PREDICT PREPARE DATA===")
        test = np.ascontiguousarray(self.data_set.test.to_numpy().astype(
            np.uint16))

        tmp = np.ascontiguousarray(
            np.full(shape=(1, self.state_space.shape[0]),
                    fill_value=self.state_space,
                    dtype=np.uint16))
        tmp_test = np.vstack((test, tmp))

        test_model = px.train(data=tmp_test,
                              graph=self._px_create_graph(),
                              mode=px.ModelType.mrf,
                              opt_regularization_hook=CONFIG.REGULARIZATION,
                              iters=0,
                              k=4)

        test_model = self.scale_phi_emp(test_model)
        statistics = np.copy(test_model.statistics)

        if weights is not None:
            np.copyto(test_model.weights, weights)
            _, a = test_model.infer()
            test_ll = [a - np.inner(weights, statistics)]
            test_model.delete()
        else:
            partitions = []
            if CONFIG.MODELTYPE == px.ModelType.integer:
                for mod in self.px_model_scaled:
                    if test_model.weights.shape[0] != mod.weights.shape[0]:
                        print("error")
                    np.copyto(test_model.weights, mod.weights)
                    _, a = test_model.infer()
                    partitions.append(a)
            else:
                for mod in self.px_model:
                    if test_model.weights.shape[0] != mod.weights.shape[0]:
                        print("error")
                    np.copyto(test_model.weights, mod.weights)
                    _, a = test_model.infer()
                    partitions.append(a)
        test_model.delete()
        if isinstance(self.data_set.label_column, str):
            label_column_idx = self.data_set.test.columns.get_loc(
                self.data_set.label_column)
            test[:, label_column_idx] = -1
        else:
            test[:, self.data_set.label_column] = -1
        if n_test is None:
            n_test = test.shape[0] - 1
        else:
            n_test = np.min([n_test, test.shape[0] - 1])
        test = np.ascontiguousarray(test[:n_test])
        logger.debug("===PREDICT START PREDICTIONS===")
        if weights is None:
            logger.debug("===PREDICT ALL LOCAL MODELS===")
            if self.trained:
                if CONFIG.MODELTYPE == px.ModelType.integer:
                    predictions = [
                        px_model.predict(
                            np.ascontiguousarray(np.copy(test[:n_test])))
                        for px_model in self.px_model_scaled
                    ]
                    test_ll = [
                        partitions[i] -
                        np.inner(self.px_model_scaled[i].weights, statistics)
                        for i in range(len(self.px_model_scaled))
                    ]
                    return predictions, test_ll
                else:
                    test_ll = [
                        partitions[i] -
                        np.inner(self.px_model[i].weights, statistics)
                        for i in range(len(self.px_model))
                    ]
                    return [
                        px_model.predict(
                            np.ascontiguousarray(np.copy(test[:n_test])))
                        for px_model in self.px_model
                    ], test_ll
        else:
            logger.debug("===PREDICT INPUT MODEL===")
            px_model = px.Model(weights=weights,
                                graph=px.create_graph(self.edgelist),
                                states=self.state_space + 1)
            return px_model.predict(test[:n_test]), test_ll
示例#7
0
 def _px_create_model(self):
     return px.Model(weights=self.weights,
                     graph=self.graph,
                     states=self.state_space.reshape(
                         self.state_space.shape[0], 1),
                     stats=px.StatisticsType.overcomplete)
示例#8
0
 def naive_kl(self, theta, average_statistics, graph, states):
     model = px.Model(weights=theta, graph=graph, states=states)
     avg_stats = np.mean(average_statistics, axis=0)
     _, A = model.infer()
     return -(np.inner(theta, np.mean(average_statistics, axis=0)) -
              A) + self.l1_regularization(theta)
示例#9
0
    def __init__(self,
                 models,
                 n=100,
                 samples=None,
                 graph=None,
                 states=None,
                 eps=1e-2):
        """

        Parameters
        ----------
        models :
        n :
        samples :
        graph :
        states :
        eps :
        """
        logger.debug("===KL INIT===")
        super(KL, self).__init__(models)

        if not (all(isinstance(x, px.Model)
                    for x in models) or isinstance(models, np.ndarray)):
            raise TypeError(
                "Models have to be either a list of pxpy models or a numpy ndarray containing weights"
            )

        if isinstance(self.model, np.ndarray):
            logger.debug("===KL MODEL IS NDARRAY===")
            if graph is None or states is None:
                raise ValueError("Graph and States must be supplied.")
            if isinstance(graph, np.ndarray):
                if graph.shape[1] != 2:
                    raise ValueError(
                        "Provided Edgelist has to have exactly 2 Columns")
                self.graph = px.create_graph(graph)
            else:
                self.graph = graph
            self.states = np.ascontiguousarray(np.copy(states))
            self.weights = self.model
            self.model = [
                px.Model(weights=weights, graph=graph, states=states)
                for weights in self.model.T
            ]
        else:
            logger.debug("===KL MODEL IS PX MODEL===")
            self.states = np.ascontiguousarray(np.copy(self.model[0].states))
            self.edgelist = np.ascontiguousarray(
                np.copy(self.model[0].graph.edgelist))
            self.graph = px.create_graph(self.edgelist)

        if samples is not None:
            self.hint = "_BootsMap "
            if isinstance(samples, np.ndarray):
                cols = samples.shape[1]
                if cols == self.states.shape[0]:
                    self.X = samples[np.random.choice(
                        np.arange(samples.shape[0]),
                        np.min([n, samples.shape[0]]),
                        replace=False)]
            else:
                self.X = [np.copy(sample) for sample in samples]
                self.X = np.concatenate(self.X)
        else:
            self.hint = "_Bootstrap "
            self.X = [
                model.sample(num_samples=n,
                             sampler=CONFIG.SAMPLER,
                             iterations=20) for model in self.model
            ]
            self.X = np.concatenate(self.X)
        self.K = len(self.model)
        self.obj = np.infty
        self.eps = eps