def main():
    classifier_random = [
        LogisticRegression(solver='lbfgs',
                           multi_class='multinomial',
                           max_iter=1000) for i in range(10)
    ]
    classifier_active = [
        LogisticRegression(solver='lbfgs',
                           multi_class='multinomial',
                           max_iter=1000) for i in range(10)
    ]

    k = 0
    all_results_record = {'entropy': []}
    passive_results = []

    mnist = fetch_mldata('MNIST original')
    x_train, x_test, y_train, y_test = train_test_split(
        mnist.data, mnist.target)

    labeled_data, X_unlabeled, available_label, oracle_label_results = train_test_split(
        x_train, y_train, test_size=0.99)

    for model in classifier_random:
        model.classes_ = np.arange(10)
        model.fit(labeled_data, available_label)

    for model in classifier_active:
        model.classes_ = np.arange(10)
        model.fit(labeled_data, available_label)

    labeled_data_rand = deepcopy(labeled_data)
    available_label_rand = deepcopy(available_label)
    labeled_data_active = deepcopy(labeled_data)
    available_label_active = deepcopy(available_label)

    batch_size = 32
    examples_list = [32] * 30
    seen_examples_count = 32

    for num_queries in examples_list:
        seen_examples_count = seen_examples_count + num_queries
        num_samples.append(num_queries)

        random_queries = np.random.choice(unlabeled_data.shape[0],
                                          num_queries,
                                          replace=False)

        labeled_data_rand = np.concatenate(
            (labeled_data_rand, unlabeled_data[random_queries, :]))
        available_label_rand = np.concatenate(
            (available_label_rand, oracle_label[random_queries]))

        predictions = []
        for model in classifier_random:
            model.fit(labeled_data_rand, available_label_rand)
            predictions.append(model.predict(X_test))

        prediction_stack = np.stack(predictions)
        commitee_decision = np.apply_along_axis(\
            lambda x: Counter(x).most_common()[0][0],\
            0, prediction_stack)
        matches = np.sum(commitee_decision == y_test)
        average_accuracy = matches / np.shape(X_test)[0]
        passive_results.append(average_accuracy)

        al_obj = ActiveLearner(strategy='entropy')
        for model in classifier_active:
            model.classes_ = np.arange(10)
        indexes = al_obj.rank(classifier_active, unlabeled_data, num_queries)

        labeled_data_active = np.concatenate(
            (labeled_data_active, unlabeled_data[indexes, :]))
        available_label_active = np.concatenate(
            (available_label_active, oracle_label[indexes]))

        predictions = []

        for model in classifier_active:
            model.fit(labeled_data_active, available_label_active)
            curr_pred = model.predict(X_test)
            predictions.append(curr_pred)

        prediction_stack = np.stack(predictions)
        commitee_decision = np.apply_along_axis(\
            lambda x: Counter(x).most_common()[0][0],\
            0, prediction_stack)
        matches = np.sum(commitee_decision == y_test)
        average_accuracy = matches / np.shape(X_test)[0]
        all_results_record['entropy'].append(average_accuracy)

        k = k + 1

    np.savetxt('./misc/random_model_accuracy.txt', passive_results)
    np.savetxt('./misc/active_model_accuracy.txt',
               all_results_record['entropy'])
示例#2
0
        X_labeled, X_unlabeled, y_labeled, y_oracle = train_test_split(
            X_train, y_train, test_size=0.8)

        for num_queries in (0, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500):
            num_samples.append(num_queries)

            random_queries = np.random.choice(X_unlabeled.shape[0], num_queries, replace=False)
            X_augmented = vstack((X_labeled, X_unlabeled[random_queries, :]))
            y_augmented = np.concatenate((y_labeled, y_oracle[random_queries]))
            clf.fit(X_augmented, y_augmented)
            random_sampling_results\
                .append(np.sum(clf.predict(X_test) == y_test) / np.shape(X_test)[0])

            for strategy in uncertainty_sampling_results:
                clf.fit(X_labeled, y_labeled)
                idx = ActiveLearner(strategy=strategy).rank(clf, X_unlabeled, num_queries)
                X_augmented = vstack((X_labeled, X_unlabeled[idx, :]))
                y_augmented = np.concatenate((y_labeled, y_oracle[idx]))
                clf.fit(X_augmented, y_augmented)
                uncertainty_sampling_results[strategy]\
                    .append(np.sum(clf.predict(X_test) == y_test) / np.shape(X_test)[0])

        all_random_sampling_results.append(random_sampling_results)
        for strategy in uncertainty_sampling_results:
            all_uncertainty_sampling_results[strategy]\
                .append(uncertainty_sampling_results[strategy])


    sns.set_style("darkgrid")
    plt.plot(num_samples, np.mean(all_random_sampling_results, axis=0), 'red', 
             num_samples, np.mean(all_uncertainty_sampling_results['least_confident'], axis=0), 'blue',
示例#3
0
def main():
    classifier_random = [
        LogisticRegression(solver='lbfgs',
                           multi_class='multinomial',
                           max_iter=1000) for i in range(10)
    ]
    classifier_active = [
        LogisticRegression(solver='lbfgs',
                           multi_class='multinomial',
                           max_iter=1000) for i in range(10)
    ]

    k = 0
    decision = {'vote_entropy': []}
    passive_results = []

    (X_train_set, y_train_set), (X_test_set,
                                 y_test_set) = fashion_mnist.load_data()
    x_train, x_test, y_train, y_test = train_test_split(
        X_train_set, y_train_set)
    x_train = x_train.reshape(x_train.shape[0],
                              x_train.shape[1] * x_train.shape[2])
    x_test = x_test.reshape(x_test.shape[0], x_test.shape[1] * x_test.shape[2])

    labeled_data, unlabeled_data, available_label, oracle_label = train_test_split(
        x_train, y_train, test_size=0.99)

    for model in classifier_random:
        model.classes_ = np.arange(10)
        model.fit(labeled_data, available_label)

    for model in classifier_active:
        model.classes_ = np.arange(10)
        model.fit(labeled_data, available_label)

    labeled_data_rand = deepcopy(labeled_data)
    available_label_rand = deepcopy(available_label)
    labeled_data_active = deepcopy(labeled_data)
    available_label_active = deepcopy(available_label)

    batch_size = 32

    new_samples_listing = [32] * 10

    for new_samples in new_samples_listing:
        random_queries = np.random.choice(unlabeled_data.shape[0],
                                          new_samples,
                                          replace=False)

        labeled_data_rand = np.concatenate(
            (labeled_data_rand, unlabeled_data[random_queries, :]))
        available_label_rand = np.concatenate(
            (available_label_rand, oracle_label[random_queries]))

        predictions = []
        for model in classifier_random:
            model.fit(labeled_data_rand, available_label_rand)
            predictions.append(model.predict(X_test))

        prediction_stack = np.stack(predictions)
        commitee_decision = np.apply_along_axis(\
            lambda x: Counter(x).most_common()[0][0], 0,\
            prediction_stack)
        matches = np.sum(commitee_decision == y_test)
        average_accuracy = matches / np.shape(X_test)[0]
        passive_results.append(average_accuracy)

        al_obj = ActiveLearner(strategy='vote_entropy')
        for model in classifier_active:
            model.classes_ = np.arange(10)
        indexes = al_obj.rank(classifier_active, unlabeled_data, new_samples)

        labeled_data_active = np.concatenate(
            (labeled_data_active, unlabeled_data[indexes, :]))
        available_label_active = np.concatenate(
            (available_label_active, oracle_label[indexes]))

        predictions = []

        for model in classifier_active:
            model.fit(labeled_data_active, available_label_active)
            curr_pred = model.predict(X_test)
            predictions.append(curr_pred)

        prediction_stack = np.stack(predictions)
        commitee_decision = np.apply_along_axis(
            lambda x: Counter(x).most_common()[0][0], 0, prediction_stack)
        matches = np.sum(commitee_decision == y_test)
        average_accuracy = matches / np.shape(X_test)[0]
        decision['vote_entropy'].append(average_accuracy)

        k = k + 1
    np.savetxt('./misc/random_model_accuracy.txt', passive_results)
    np.savetxt('./misc/active_model_accuracy.txt', decision)
示例#4
0
def main():

    classifier_random = [
        LogisticRegression(solver='lbfgs',
                           multi_class='multinomial',
                           max_iter=1000) for i in range(10)
    ]
    classifier_active = [
        LogisticRegression(solver='lbfgs',
                           multi_class='multinomial',
                           max_iter=1000) for i in range(10)
    ]

    k = 0
    results_record = {'entropy': []}
    passive_results = []

    (X_train_set, y_train_set), (X_test_set,
                                 y_test_set) = fashion_mnist.load_data()
    x_train, x_test, y_train, y_test = train_test_split(
        X_train_set, y_train_set)
    x_train = x_train.reshape(x_train.shape[0],
                              x_train.shape[1] * x_train.shape[2])
    x_test = x_test.reshape(x_test.shape[0], x_test.shape[1] * x_test.shape[2])

    X_labeled, X_unlabeled, y_labeled, y_oracle = train_test_split(
        x_train, y_train, test_size=0.99)

    for model in classifier_random:
        model.classes_ = np.arange(10)
        model.fit(X_labeled, y_labeled)

    for model in classifier_active:
        model.classes_ = np.arange(10)
        model.fit(X_labeled, y_labeled)

    X_labeled_rand = deepcopy(X_labeled)
    y_labeled_rand = deepcopy(y_labeled)
    X_labeled_active = deepcopy(X_labeled)
    y_labeled_active = deepcopy(y_labeled)

    batch_size = 32

    examples_list = [32] * 30

    seen_examples_count = 32
    for new_examples_count in examples_list:
        seen_examples_count = seen_examples_count + new_examples_count
        num_samples.append(new_examples_count)

        random_datapoint = np.random.choice(X_unlabeled.shape[0],
                                            new_examples_count,
                                            replace=False)

        X_labeled_rand = np.concatenate(
            (X_labeled_rand, X_unlabeled[random_datapoint, :]))
        y_labeled_rand = np.concatenate(
            (y_labeled_rand, y_oracle[random_datapoint]))

        predictions = []
        for model in classifier_random:
            model.fit(X_labeled_rand, y_labeled_rand)
            predictions.append(model.predict(X_test))

        prediction_stack = np.stack(predictions)
        commitee_decision = np.apply_along_axis(\
            lambda x: Counter(x).most_common()[0][0],\
            0, prediction_stack)
        matches = np.sum(commitee_decision == y_test)
        average_accuracy = matches / np.shape(X_test)[0]

        passive_results.append(average_accuracy)

        al_obj = ActiveLearner(strategy='entropy')
        for model in classifier_active:
            model.classes_ = np.arange(10)
        indexes = al_obj.rank(classifier_active, X_unlabeled,
                              new_examples_count)

        X_labeled_active = np.concatenate(
            (X_labeled_active, X_unlabeled[indexes, :]))
        y_labeled_active = np.concatenate(
            (y_labeled_active, y_oracle[indexes]))

        predictions = []

        for model in classifier_active:
            model.fit(X_labeled_active, y_labeled_active)
            curr_pred = model.predict(X_test)
            predictions.append(curr_pred)

        commitee_decision = np.apply_along_axis(\
            lambda x: Counter(x).most_common()[0][0],\
            0, np.stack(predictions))
        matches = np.sum(commitee_decision == y_test)

        average_accuracy = matches / np.shape(X_test)[0]
        results_record['entropy'].append(average_accuracy)

        k = k + 1

    np.savetxt('./misc/random_model_accuracy.txt', passive_results)
    np.savetxt('./misc/active_model_accuracy.txt', results_record['entropy'])
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from active_learning.active_learning import ActiveLearner
from collections import Counter

X, y = load_iris(return_X_y=True)
n_queries = 100

X_labeled, y_labeled = X[[0, 50, 100]], y[[0, 50, 100]]
estimators = [LogisticRegression(solver='lbfgs', multi_class='auto'), LogisticRegression(solver='lbfgs', multi_class='auto')]

for estimator in estimators:
    estimator.fit(X_labeled, y_labeled)

learner = ActiveLearner(strategy='vote_entropy')

preds = []
results = []
correct = 0

for _ in range(n_queries):
    query_idx = learner.rank(estimators, X, num_queries=1)
    X_labeled = np.concatenate((X_labeled, X[query_idx]), axis=0)
    y_labeled = np.concatenate((y_labeled, y[query_idx]), axis=0)
    for estimator in estimators:
        estimator.fit(X_labeled, y_labeled)
        preds.append(estimator.predict(X))
    majority_votes = np.apply_along_axis(lambda x: Counter(x).most_common()[0][0], 0, np.stack(preds))
    accuracy = np.sum(majority_votes == y) / np.shape(X)[0]
    correct += np.sum(majority_votes == y)