Exemplo n.º 1
0
    def fit(self, X, y, sample_weight=None):
        """ Prepare different things for fast computation of metrics """
        X, y, sample_weight = check_xyw(X, y, sample_weight=sample_weight)
        self._mask = numpy.array(y == self.uniform_label)
        assert sum(self._mask) > 0, 'No event of class, along which uniformity is desired'
        self._masked_weight = sample_weight[self._mask]

        X_part = numpy.array(take_features(X, self.uniform_features))[self._mask, :]
        self._bin_indices = ut.compute_bin_indices(X_part=X_part, n_bins=self.n_bins)
        self._bin_weights = ut.compute_bin_weights(bin_indices=self._bin_indices,
                                                   sample_weight=self._masked_weight)
Exemplo n.º 2
0
    def fit(self, X, y, sample_weight=None):
        """ Prepare different things for fast computation of metrics """
        X, y, sample_weight = check_xyw(X, y, sample_weight=sample_weight)
        self._mask = numpy.array(y == self.uniform_label)
        assert sum(self._mask) > 0, 'No events of uniform class!'
        self._masked_weight = sample_weight[self._mask]

        X_part = numpy.array(take_features(X, self.uniform_features))[self._mask, :]
        # computing knn indices
        neighbours = NearestNeighbors(n_neighbors=self.n_neighbours, algorithm='kd_tree').fit(X_part)
        _, self._groups_indices = neighbours.kneighbors(X_part)
        self._group_weights = ut.compute_group_weights(self._groups_indices, sample_weight=self._masked_weight)
Exemplo n.º 3
0
    def fit(self, X, y, sample_weight=None):
        """ Prepare different things for fast computation of metrics """
        X, y, sample_weight = check_xyw(X, y, sample_weight=sample_weight)
        self._mask = numpy.array(y == self.uniform_label)
        assert sum(self._mask) > 0, 'No events of uniform class!'
        self._masked_weight = sample_weight[self._mask]

        X_part = numpy.array(take_features(
            X, self.uniform_features))[self._mask, :]
        # computing knn indices
        neighbours = NearestNeighbors(n_neighbors=self.n_neighbours,
                                      algorithm='kd_tree').fit(X_part)
        _, self._groups_indices = neighbours.kneighbors(X_part)
        self._group_weights = ut.compute_group_weights(
            self._groups_indices, sample_weight=self._masked_weight)
Exemplo n.º 4
0
    def fit(self, X, y, sample_weight=None):
        """ Prepare different things for fast computation of metrics """
        X, y, sample_weight = check_xyw(X, y, sample_weight=sample_weight)
        self._mask = numpy.array(y == self.uniform_label)
        assert sum(
            self._mask
        ) > 0, 'No event of class, along which uniformity is desired'
        self._masked_weight = sample_weight[self._mask]

        X_part = numpy.array(take_features(
            X, self.uniform_features))[self._mask, :]
        self._bin_indices = ut.compute_bin_indices(X_part=X_part,
                                                   n_bins=self.n_bins)
        self._bin_weights = ut.compute_bin_weights(
            bin_indices=self._bin_indices, sample_weight=self._masked_weight)
Exemplo n.º 5
0
    def fit(self, X, y, sample_weight=None):
        if self._is_classifier:
            self.classes_, y = numpy.unique(y, return_inverse=True)
            assert len(
                self.classes_) == 2, 'only binary classification supported'

        X, y, sample_weight = check_xyw(X,
                                        y,
                                        sample_weight=sample_weight,
                                        classification=self._is_classifier)

        if self.loss is None:
            if self._is_classifier:
                self.loss = losses.LogLoss(n_threads=self.n_threads)
            else:
                self.loss = losses.MSELoss()

        self.loss.fit(X, y, sample_weight=sample_weight)

        X = self._transform(X)
        n_samples, self.n_features_ = X.shape

        if isinstance(self.max_features, int):
            used_features = self.max_features
        else:
            assert isinstance(self.max_features, float)
            used_features = int(
                numpy.ceil(self.max_features * self.n_features_))
        assert 0 < used_features <= self.n_features_, 'wrong max_features: {}'.format(
            self.max_features)

        assert numpy.max(X) < 128, 'bin indices should be smaller than 128'
        n_thresholds = int(numpy.max(X)) + 1

        self.estimators = []
        current_indices = numpy.zeros(n_samples, dtype=self._indices_type)
        pred = numpy.zeros(n_samples, dtype='float32')
        self.initial_bias_ = self.compute_optimal_step(pred)
        pred += self.initial_bias_

        bootstrapper = _Bootstrapper(self.random_state,
                                     bootstrap=self.bootstrap,
                                     n_samples=n_samples)

        targets, weights = self.loss.prepare_tree_params(pred)
        for stage in range(self.n_estimators):
            bootstrap_weights = bootstrapper.generate_weights()

            columns_to_test = numpy.sort(
                self.random_state.choice(self.n_features_,
                                         size=used_features,
                                         replace=False))

            feature, cut, best_improvements, best_cuts = build_decision(
                X,
                targets=targets,
                weights=weights,
                bootstrap_weights=bootstrap_weights,
                current_indices=current_indices,
                columns_to_test=columns_to_test,
                depth=self.depth,
                n_thresh=n_thresholds,
                reg=self._l2_regularization,
                use_friedman_mse=self.use_friedman_mse,
                n_threads=self.n_threads)

            leaf_values_placeholder = numpy.zeros(2**self.depth,
                                                  dtype='float32')
            self.estimators.append([feature, cut, leaf_values_placeholder])

            if (self.n_estimators - 1 - stage) % self.update_step == 0:
                self._update_leaves_and_predictions(current_indices,
                                                    pred,
                                                    target=targets,
                                                    hessians=weights,
                                                    stage=stage,
                                                    n_stages=min(
                                                        self.update_step,
                                                        len(self.estimators)))
                # computing new tree parameters
                targets, weights = self.loss.prepare_tree_params(pred)

        return self