def run_test(sampler_type, X_train, y_train, X_test, y_test):
    # Samplers
    sampler = None
    if sampler_type == 'rs':
        sampler = RandomSampler(X_train, y_train, X_unlabeled, y_unlabeled)
    elif sampler_type == 'ms':
        sampler = MarginSampler(X_train, y_train, X_unlabeled, y_unlabeled)
    elif sampler_type == 'hs':
        sampler = HierarchicalSampler(X_train, y_train, X_unlabeled,
                                      y_unlabeled)
    else:
        raise ValueError

    print("Finish constructing sampler class " + sampler_type)

    errors = []
    X_train, y_train = sampler.X_train, sampler.y_train
    i = 0
    while i < max_unlabeled_size:
        x_samples, y_samples = np.empty((batch_size, X_train.shape[1]),
                                        float), np.empty((batch_size, ), int)
        for b in range(batch_size):
            x_sample, y_sample = sampler.sample()
            # hack
            if sampler_type != 'hs':
                x_sample = x_sample.toarray()
            x_samples[b] = x_sample
            y_samples[b] = y_sample
        X_train = vstack([X_train, x_samples])
        y_train = np.append(y_train, y_samples)
        model = LogisticRegression(multi_class="multinomial",
                                   solver="lbfgs",
                                   max_iter=200)
        model.fit(X_train, y_train)
        #y_pred = model.predict(X_test)
        error = 1 - model.score(X_test, y_test)
        print(sampler_type + ' number of labels: ' + str(training_size + i) +
              ' error=' + str(error))
        errors.append(error)
        i += batch_size
    output.put((sampler_type, errors))
Exemplo n.º 2
0
    X_test = test_dataset.data
    X_test = vstack([X_test, X_train_base[2000:, :]]).toarray()
    y_test = test_dataset.target
    y_test = np.append(y_test, y_train_base[2000:])

    X_train_base = X_train_base[:2000, :]
    y_train_base = y_train_base[:2000]

    X_train, y_train = X_train_base[:
                                    training_size], y_train_base[:
                                                                 training_size]
    X_unlabeled, y_unlabeled = X_train_base[training_size:], y_train_base[
        training_size:]

    rs = RandomSampler(X_train, y_train, X_unlabeled, y_unlabeled)
    ms = MarginSampler(X_train, y_train, X_unlabeled, y_unlabeled)
    hs = HierarchicalSampler(X_train, y_train, X_unlabeled, y_unlabeled)

    x_train_random = X_train
    y_train_random = y_train
    x_train_margin = X_train
    y_train_margin = y_train
    x_train_Hierarchical = X_train
    y_train_Hierarchical = y_train

    print(
        'Successfully loaded the Newsgroups dataset into train and test set.')

    for num_samples in range(max_unlabeled_size):
        #Add data, random
        x_sample, y_sample = rs.sample()