示例#1
0
    def SelectLabeled(self, labeled_data_x, labeled_data_y, unlabeled_data_x):
        # just append train data to labeled data
        labeled_x = np.concatenate((self.init_labeled_data_x, labeled_data_x)) \
            if len(labeled_data_x) > 0 else self.init_labeled_data_x
        labeled_y = np.concatenate((self.init_labeled_data_y, labeled_data_y)) \
            if len(labeled_data_x) > 0 else self.init_labeled_data_y
        #

        # create model to predict with confidence and credibility
        model = ClassifierAdapter(
            DecisionTreeClassifier(random_state=config.random_state,
                                   min_samples_leaf=config.min_samples_leaf))
        nc = ClassifierNc(model, MarginErrFunc())
        model_icp = IcpClassifier(nc, smoothing=True)
        model_icp.fit(labeled_x, labeled_y)
        model_icp.calibrate(self.calibration_data_x, self.calibration_data_y)
        s = model_icp.predict_conf(unlabeled_data_x)
        print(s)
        #

        # selection method
        labeled_ind = [
            i for i, a in enumerate(s)
            if a[1] > config.confidence and a[2] > config.credibility
        ]
        unlabeled_ind = [
            i for i, a in enumerate(s)
            if a[1] < config.confidence or a[2] < config.credibility
        ]

        labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x = \
            np.take(unlabeled_data_x, labeled_ind, axis=0), np.take(s.T, labeled_ind), np.take(unlabeled_data_x,
                                                                                               unlabeled_ind, axis=0)
        #

        return labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x
示例#2
0
    def test_confidence_credibility(self):

        data = load_iris()
        x, y = data.data, data.target

        for i, y_ in enumerate(np.unique(y)):
            y[y == y_] = i

        n_instances = y.size
        idx = np.random.permutation(n_instances)

        train_idx = idx[:int(n_instances / 3)]
        cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
        test_idx = idx[2 * int(n_instances / 3):]

        nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
        icp = IcpClassifier(nc)

        icp.fit(x[train_idx, :], y[train_idx])
        icp.calibrate(x[cal_idx, :], y[cal_idx])

        print(
            pd.DataFrame(icp.predict_conf(x[test_idx, :]),
                         columns=["Label", "Confidence", "Credibility"]))
示例#3
0
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc

data = load_iris()
x, y = data.data, data.target

for i, y_ in enumerate(np.unique(y)):
    y[y == y_] = i

n_instances = y.size
idx = np.random.permutation(n_instances)

train_idx = idx[:int(n_instances / 3)]
cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
test_idx = idx[2 * int(n_instances / 3):]

nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
icp = IcpClassifier(nc)

icp.fit(x[train_idx, :], y[train_idx])
icp.calibrate(x[cal_idx, :], y[cal_idx])

print(
    pd.DataFrame(icp.predict_conf(x[test_idx, :]),
                 columns=["Label", "Confidence", "Credibility"]))
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc

data = load_iris()
x, y = data.data, data.target

for i, y_ in enumerate(np.unique(y)):
	y[y == y_] = i

n_instances = y.size
idx = np.random.permutation(n_instances)

train_idx = idx[:int(n_instances / 3)]
cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
test_idx = idx[2 * int(n_instances / 3):]

nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
icp = IcpClassifier(nc)

icp.fit(x[train_idx, :], y[train_idx])
icp.calibrate(x[cal_idx, :], y[cal_idx])


print(pd.DataFrame(icp.predict_conf(x[test_idx, :]),
				   columns=['Label', 'Confidence', 'Credibility']))