Python BaggingClassifierPU примеры использования

Язык программирования: Python

Пространство имен/Пакет: baggingPU

Класс/Тип: BaggingClassifierPU

Примеров на hotexamples.com: 6

Python BaggingClassifierPU - 6 примеров найдено. Это лучшие примеры Python кода для baggingPU.BaggingClassifierPU, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BaggingClassifierPU(6)

fit(3)

predict(1)

predict_proba(1)

Пример #1

Показать файл

# 2.2 conclusion - PU-Learning bagging的数据 可视化展示`label=0`即 `unlabeled` 的数据
# 展示unlabeled数据集 在经过Bagging方法后的效果
plt.scatter(X[y == 0].feature1,
            X[y == 0].feature2,
            c=results[y == 0].output_bag,
            linewidth=0,
            s=50,
            alpha=0.5,
            cmap='jet_r')
plt.colorbar(label='Unlabeled样本的预测分值')
plt.title('PU Bagging')
plt.show()

# 3.1 Using `BaggingClassifierPU`
bc = BaggingClassifierPU(DecisionTreeClassifier(),
                         n_estimators=1000,
                         max_samples=sum(y),
                         n_jobs=-1)
bc.fit(X, y)
results['output_skb'] = bc.oob_decision_function_[:, 1]
# Visualize the approach's result
plt.scatter(X[y == 0].feature1,
            X[y == 0].feature2,
            c=results[y == 0].output_skb,
            linewidth=0,
            s=50,
            alpha=0.5,
            cmap='jet_r')
plt.colorbar(label='Scores given to unlabeled points')
plt.title(r'Using ${\tt BaggingClassifierPU}$')
plt.show()

Пример #2

Показать файл

Файл: puc.py Проект: prescriptive-possibilities-april-15-19/mocking

)



tfidf = seq_vectorizer(ngram_max=4, downsample=40000)

print(f"FOR BaggingClassifierPU MODELS:\t Sequences: {sequences.shape};\t Binding: {binding.shape};\t Number of ligand id values: {len(lig_id_vals)}. ")

spinner = Spinner()
models = {}
for lig_id in lig_id_vals:
    try:
        X, y = fitter_df_maker(lig_id)
        sys.stdout.write(f"Models dict now populated to length {len(models.keys())}; ")
        bc = BaggingClassifierPU(DecisionTreeClassifier(),
                                 n_estimators=estimators,
                                 #n_jobs=-1,
                                 max_samples=int(sum(y.values)))

        sys.stdout.write(f"next, fitting on ligand #{lig_id}")
        spinner.start()
        bc.fit(X,y)
        spinner.stop()
        models[lig_id] = bc
        #sys.stdout.flush()
        sys.stdout.write('\r') # yes finally https://stackoverflow.com/questions/23138413/clearing-old-data-from-sys-stdout-in-python
    except:
        pass

#with open('models.pickle', 'wb') as mp:
#    pickle.dump(models, mp)

Пример #3

Показать файл

        print(
            print_cm(sklearn.metrics.confusion_matrix(y_orig,
                                                      model.predict(X)),
                     labels=['negative', 'positive']))
        print('')
        print('Precision: ', precision_score(y_orig, model.predict(X)))
        print('Recall: ', recall_score(y_orig, model.predict(X)))
        print('Accuracy: ', accuracy_score(y_orig, model.predict(X)))
        print('f1_score: ', f1_score(y_orig, model.predict(X)))

        f1_orig.append(f1_score(y_orig, model.predict(X)))

        print('Training bagging classifier...')
        pu_start = time.perf_counter()
        model = BaggingClassifierPU(xgb.XGBClassifier(),
                                    n_estimators=50,
                                    n_jobs=-1,
                                    max_samples=sum(y1))
        model.fit(X, y1)
        pu_end = time.perf_counter()
        print('Done!')
        print('Time:', pu_end - pu_start)

        # train data
        print('---- {} ----'.format('PU Bagging'))
        print(
            print_cm(sklearn.metrics.confusion_matrix(y_orig,
                                                      model.predict(X)),
                     labels=['negative', 'positive']))
        print('')
        print('Precision: ', precision_score(y_orig, model.predict(X)))
        print('Recall: ', recall_score(y_orig, model.predict(X)))

Пример #4

Показать файл

Файл: Bagging with DecisionTreeClassifier.py Проект: bjutliulei/Positive-and-Unlabeled-Learning

y.loc[np.random.choice(y[y == 1].index, replace = False, size = hidden_size)] = 0

# Check the new contents of the set
print('%d positive out of %d total' % (sum(y), len(y)))

# Plot the data set, as the models will see it
plt.scatter(X[y==0].feature1, X[y==0].feature2, c='k', marker='.', linewidth=1, s=10, alpha=0.5, label='Unlabeled')
plt.scatter(X[y==1].feature1, X[y==1].feature2, c='b', marker='o', linewidth=0, s=50, alpha=0.5, label='Positive')
plt.legend()
plt.title('Data set (as seen by the classifiers)')
plt.show()


bc = BaggingClassifierPU(
    DecisionTreeClassifier(),
    n_estimators = 1000,  # 1000 trees as usual
    max_samples = sum(y), # Balance the positives and unlabeled in each bag
    n_jobs = -1           # Use all cores
)
bc.fit(X, y)

# Store the scores assigned by this approach
results = pd.DataFrame({
    'truth'      : y_orig,   # The true labels
    'label'      : y,        # The labels to be shown to models in experiment
}, columns = ['truth', 'label'])
results['output_bag_tree'] = bc.oob_decision_function_[:,1]

# Visualize this approach's results
plt.scatter(
    X[y==0].feature1, X[y==0].feature2,
    c = results[y==0].output_bag_tree, linewidth = 0, s = 50, alpha = 0.5,

Пример #5

Показать файл

Файл: SupervisedMethods.py Проект: zhouzl7/SoftwareDefectPrediction

def run(data_train, data_test, clf_name):
    X_train, y_train = train_data_process(data_train)
    X_test, y_true = test_data_process(data_test)
    classifiers = {
        "XGBOD": XGBOD(random_state=0),
        "KNeighborsClassifier": KNeighborsClassifier(3),
        "SVC": SVC(random_state=0),
        "GaussianProcessClassifier": GaussianProcessClassifier(1.0 * RBF(1.0)),
        "DecisionTreeClassifier": DecisionTreeClassifier(random_state=0),
        "RandomForestClassifier": RandomForestClassifier(random_state=0),
        "MLPClassifier": MLPClassifier(random_state=0),
        "AdaBoostClassifier": AdaBoostClassifier(),
        "GaussianNB": GaussianNB(),
        "QuadraticDiscriminantAnalysis": QuadraticDiscriminantAnalysis(),
        "BaggingClassifierPU": BaggingClassifierPU(
            DecisionTreeClassifier(),
            n_estimators=1000,  # 1000 trees as usual
            max_samples=sum(y_train),  # Balance the positives and unlabeled in each bag
            n_jobs=-1  # Use all cores
        )
    }

    clf = classifiers[clf_name]
    try:
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        TP = 0
        FN = 0
        FP = 0
        TN = 0
        for i, label in enumerate(y_true):
            if label:
                if y_pred[i]:
                    TP += 1
                else:
                    FN += 1
            else:
                if y_pred[i]:
                    FP += 1
                else:
                    TN += 1
        if (FP + TN) == 0:
            pf = "no negative samples."
        else:
            pf = FP / (FP + TN)

        try:
            auc = roc_auc_score(y_true, y_pred)
        except ValueError as e:
            auc = str(e)
        return {
            'train samples': str(X_train.shape[0]),
            'defective train samples': str(np.sum(y_train)),
            'precision': precision_score(y_true, y_pred),
            'recall': recall_score(y_true, y_pred),
            'pf': pf,
            'F-measure': f1_score(y_true, y_pred),
            'accuracy': accuracy_score(y_true, y_pred),
            'AUC': auc
        }
    except ValueError as e:
        return str(e)

Пример #6

Показать файл

Файл: train_PU.py Проект: summerhuang2014/An-information-theoretic-framework-for-learning-models-of-instance-independent-label-noise

def train_val_PU(max_a, val, tr1, tr2, loc='./log', start_ep=0, end_ep=45):
    '''
    this function trains a triple (a combination of 3 random seeds).
    max_a: the maximum alpha used to train the PU model.
    val: the random seed number that produces the LID sequence for test
    tr1: the 1st random seed number that produces the LID sequence for training
    tr2: the 2nd random seed number that produces the LID sequence for training
    loc: the LID sequences' location
    start_ep: the starting epoch of the LID sequence
    end_ep: the ending epoch of the LID sequence
    '''
    records = [
        [tr1, tr2, val],
    ]
    train1 = LID_assmb(tr1, max_a, start_ep, end_ep, loc)
    train2 = LID_assmb(tr2, max_a, start_ep, end_ep, loc)
    total = train2.append(train1)
    total = total.sample(frac=1).reset_index(drop=True)  #shuffle everything
    labels = (total.iloc[:, -2]).to_numpy()
    loss_labels = (total.iloc[:, -1]).to_numpy()
    total = total.iloc[:, start_ep:end_ep]
    bc = BaggingClassifierPU(DecisionTreeClassifier(),
                             n_estimators=1000,
                             max_samples=int(sum(loss_labels)),
                             n_jobs=-1)
    bc.fit(total, loss_labels)

    v_total = LID_assmb(val, max_a, start_ep, end_ep, loc)
    v_total = v_total.sample(frac=1).reset_index(drop=True)
    v_labels = (v_total.iloc[:, -2]).to_numpy()
    v_loss_labels = (v_total.iloc[:, -1]).to_numpy()
    v_total = v_total.iloc[:, start_ep:end_ep]
    pred = bc.predict_proba(v_total)

    v_summary = {}
    for i in set(v_labels):
        v_summary[i] = 0.0
        if float(i) > 100:
            bl_label = i
    for i in range(len(pred)):
        if np.isnan(pred[i][1]):
            raise ValueError(
                'prediction has illegal value nan, please check the model and data!'
            )
            break
        elif pred[i][1] < 0.5:
            continue
        else:
            v_summary[v_labels[i]] += 1

    records.extend(
        [v_summary[bl_label] / float(sum(v_loss_labels)), v_summary[bl_label]])
    temp = set(v_labels)
    temp.remove(bl_label)
    header = list(sorted(temp))
    header = ['train 1, 2 val', 'recall'] + [
        bl_label,
    ] + header
    for r in range(3, len(header)):
        records.append(v_summary[header[r]])
    return (records, header)
    '''