Пример #1
0
    def AC_(self, X, y):

        # X, y = shuffle(X, Y)
        # y = y.astype('int')
        alibox = ToolBox(X=X, y=y, query_type='AllLabels')

        alibox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10)

        model = alibox.get_default_model()

        # stopping_criterion = alibox.get_stopping_criterion('num_of_queries',50)

        model.fit(X, y)
        pred = model.predict(X)

        # 整理矩阵系数为信任度,返回start
        w = model.class_weight
        dim = w.shape[0]
        trustValue = []

        for i in range(0, dim):
            value = math.exp(w[i])  # exp() 方法返回x的指数,ex。
            trustValue.append(value)
        return trustValue
import copy
from sklearn.datasets import load_iris
from alipy import ToolBox

X, y = load_iris(return_X_y=True)
alibox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.')

# Split data
alibox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10)

# Use the default Logistic Regression classifier
model = alibox.get_default_model()

# The cost budget is 50 times querying
stopping_criterion = alibox.get_stopping_criterion('num_of_queries', 50)

# Use pre-defined strategy
QBCStrategy = alibox.get_query_strategy(strategy_name='QueryInstanceQBC')
QBC_result = []

for round in range(10):
    # Get the data split of one fold experiment
    train_idx, test_idx, label_ind, unlab_ind = alibox.get_split(round)
    # Get intermediate results saver for one fold experiment
    saver = alibox.get_stateio(round)

    while not stopping_criterion.is_stop():
        # Select a subset of Uind according to the query strategy
        # Passing model=None to use the default model for evaluating the committees' disagreement
        select_ind = QBCStrategy.select(label_ind,
                                        unlab_ind,
Пример #3
0
def create_and_implement_strategy(strategy_name, data, labels, queries):

    # Keep only the values of data and labels dataframe (Later, we use the global split based on idxs)
    X = data.values
    y = np.asarray(labels)
    toolbox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.')

    # Create Logistic Regression model ( Default Setting with liblinear solver)
    model = toolbox.get_default_model()

    # Implement query strategy
    uncertainty_strategy = toolbox.get_query_strategy(strategy_name=strategy_name)

    # Create array to save the results
    examples = []

    # Set stopping criterion, we will stop in 1000 labeled examples
    stopping_criterion = toolbox.get_stopping_criterion('num_of_queries', queries)

    # Get the indexes of the global split
    with open("dataset_al", "rb") as f:
        train_idx, test_idx, labeled_idx, unlabeled_idx = pickle.load(f)

    # Create saver to save the results
    saver = StateIO(round=0, train_idx=train_idx,
                    test_idx=test_idx, init_L=labeled_idx,
                    init_U=unlabeled_idx, saving_path='.')

    # print(train_idx.shape, test_idx.shape)

    # Starting with some labeled examples
    model.fit(X=X[labeled_idx.index, :], y=y[labeled_idx.index])
    y_pred = model.predict(X[test_idx, :])

    # Calculate the accuracy of the prediction
    accuracy = toolbox.calc_performance_metric(y_true=y[test_idx], y_pred=y_pred, performance_metric='accuracy_score')

    # Save accuracy of the prediction
    saver.set_initial_point(accuracy)

    while not stopping_criterion.is_stop():
        # Select example of the unlabeled dataset
        example = uncertainty_strategy.select(labeled_idx, unlabeled_idx, model=model, batch_size=1)
        # Update the label idxs
        labeled_idx.update(example)
        unlabeled_idx.difference_update(example)
        # Train model for the added example
        model.fit(X=X[labeled_idx.index, :], y=y[labeled_idx.index])
        y_pred = model.predict(X[test_idx, :])
        # Calculate accuracy
        accuracy = toolbox.calc_performance_metric(y_true=y[test_idx], y_pred=y_pred,
                                                   performance_metric='accuracy_score')
        # f1 = alibox.calc_performance_metric(y_true=y[test_idx], y_pred=y_pred, performance_metric='f1_score')

        # Save update results
        state = toolbox.State(select_index=example, performance=accuracy)
        saver.add_state(state)
        saver.save()

        # Update progress for stopping criterion
        stopping_criterion.update_information(saver)

    stopping_criterion.reset()
    examples.append(copy.deepcopy(saver))

    # Uncomment and return in order to save the new active learning dataset
    # Save selected x_train examples
    X_train = X[labeled_idx, :]
    # Save labels for the examples
    y_train = y[labeled_idx, :]
    # Reshape target
    y_train = np.array(y_train).reshape(-1)

    # Save to pickle
    # with open('qbc_dataset','wb') as f:
    #     pickle.dump((X_train, y_train), f)

    return examples