Exemplo n.º 1
0
def test_subspace_alignment():
    """Test the alignment between datasets."""
    X = rnd.randn(100, 10)
    Z = np.dot(rnd.randn(100, 10), np.diag(np.arange(1, 11)))
    clf = SubspaceAlignedClassifier()
    V, CX, CZ = clf.subspace_alignment(X, Z, num_components=3)
    assert not np.any(np.isnan(V))
    assert CX.shape[1] == 3
    assert CZ.shape[1] == 3
Exemplo n.º 2
0
    def prepare(self, initial_target_labels=False, X_t_init=[], y_t_init=[]):
        np.random.seed(self.seed)
        self.initial_target_labels = initial_target_labels

        suba = SubspaceAlignedClassifier()
        if not self.initial_target_labels:
            self.X_t_init = np.empty([0, self.X_t_ori.shape[1]])
            self.y_t_init = np.array([])

            if self.domain_adaptation:
                V, CX, self.CZ = suba.subspace_alignment(
                    self.X_s_ori,
                    self.X_t_ori,
                    num_components=self.num_components)
        else:
            assert X_t_init.shape[
                0] > 0, 'Initial target data must not be empty'
            self.X_t_init = X_t_init
            self.y_t_init = y_t_init
            if self.domain_adaptation:
                V, CX, self.CZ = suba.subspace_alignment(
                    self.X_s_ori,
                    np.vstack([X_t_init, self.X_t_ori]),
                    num_components=self.num_components)

        if self.domain_adaptation:
            self.X_s = self.X_s_ori @ CX  # map to principal component
            self.X_s = self.X_s @ V  # align to subspace
            self.X_t = self.X_t_ori @ self.CZ
        else:
            self.X_s = self.X_s_ori
            self.X_t = self.X_t_ori

        if self.initial_target_labels and self.domain_adaptation:
            self.X_t_init = self.X_t_init @ self.CZ

        if self.use_expert:
            assert initial_target_labels, "to use expert classifier, initial target labels must be true"
            self.expert = RandomForestClassifier(n_estimators=64).fit(
                self.X_t_init, self.y_t_init)

        self.model.fit(self.X_s,
                       self.X_t_init,
                       self.y_s,
                       self.y_t_init,
                       warm_start=False,
                       max_iter=self.max_iter,
                       use_dropout=True,
                       desc='Preparing',
                       regularize=True)

        ## TRANSDUCTION THROUGH SOURCE-SPECIFIC NET
        pred_proba_f = self.model.predict_proba(self.X_t, 1).T
        pred_proba = (pred_proba_f)

        if self.initial_target_labels:
            alpha = self.alpha
            pred_proba_g = self.model.predict_proba(self.X_t, 2).T
            pred_proba = alpha * pred_proba_f + (1 - alpha) * pred_proba_g
            if self.use_expert:
                pred_proba_expert = self.expert.predict_proba(self.X_t)
                pred_proba = ((1 - self.beta) * pred_proba +
                              (self.beta) * pred_proba_expert)

        self.p = pred_proba

        # max confidence of prediction on each instance
        proba_max = pred_proba.max(axis=1)

        idx_gt_threshold = np.where(proba_max > self.min_confidence)
        proba_gt_threshold = proba_max[idx_gt_threshold]

        self.initial_selected_num = len(proba_gt_threshold)

        # Evaluate 1st phase transduction
        pred = (pred_proba_f).argmax(axis=1)
        acc = accuracy_score(pred, self.y_t)
        acc_sel = accuracy_score(pred[idx_gt_threshold],
                                 self.y_t[idx_gt_threshold])

        if self.verbosity > 1:
            print('selected      : ', self.initial_selected_num)
            print('trans acc     :', acc)
            print('trans sel acc :', acc_sel)

        # Select label with high confidence
        if self.initial_target_labels:
            self.X_trans = np.vstack(
                [self.X_t_init, self.X_t[idx_gt_threshold]])
            self.y_trans = np.concatenate(
                [self.y_t_init, pred[idx_gt_threshold]])
        else:
            self.X_trans = self.X_t[idx_gt_threshold]
            self.y_trans = pred[idx_gt_threshold]

        self.trained = True