Пример #1
0
 def getModel(self, _params):
     return AdaBoost(n_estimators=int(_params['n_estimators']),
                     learning_rate=_params['learning_rate'],
                     loss=_params['loss'])
Пример #2
0
samplenames = np.array(dataframe.columns)
# »ñȡ̽ÕëÃû³Æ
featurenames = np.array(dataframe.index)
# »ñÈ¡±í´ïÖµµÄ¾ØÕó
esetarray = np.array(dataframe)
# ±í´ïÖµµÄ¾ØÕóתÖÃ
esetarray = esetarray.transpose()

# ½«Ñù±¾·ÖÀ࣬
# ·Ö³ÉÈýÀà:PB BM LN
# sampletype = [1]*26+[2]*19+[3]*17
# ·Ö³ÉÁ½Àà:PB BMorLN
sampletype = [0]*10+[1]*10


# Adaboost 方法

# =============================================================================
 clf = AdaBoost()  # AdaBoost(n_estimators=100)
 rfe = RFE(clf, n_features_to_select=1) 
# =============================================================================


rfe.fit(esetarray, sampletype)

print("Features sorted by their rank:")
print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), featurenames)))
result = sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), featurenames))
resultframe = DataFrame(result)
resultframe.to_csv("ranklist_ABrfe.txt", sep="\t")
Пример #3
0
        if (i == 35):
            tmp.append(word)
        else:
            tmp.append(float(word))
    Y.append(tmp.pop())
    X.append(tmp[:])
#print("X: ")
#print(X)
#print("Y: ")
#print(Y)
f.close
Z = []
odp = []
for k in range(0, 9):
    i = randint(0, 300)
    Z.append(X.pop(i))
    odp.append(Y.pop(i))
#X = [[0, 2, 4, 6, 8], [1, 3, 5, 7, 9], [7, 9, 11, 13, 15], [6, 8, 10, 12, 14]]
#Y = [1, 0, 0, 1]
#Z = [[8, 9, 10, 14, 15], [4, 5, 7, 11, 13], [4, 6, 8, 10, 12],  [3, 5, 7, 9, 11]]

ada = AdaBoost(DecTree(max_depth=1))
ada.fit(X, Y)

print("Predict: ")
print(ada.predict(Z))
print("Probabil: ")
print(ada.predict_proba(Z))
print("Score: ")
print(ada.score(Z, odp))
Пример #4
0
 def getModel(self, _params):
     return AdaBoost(n_estimators=int(_params['n_estimators']),
                     learning_rate=_params['learning_rate'],
                     algorithm=_params['algorithm'])
if __name__ == '__main__':
    # Get raw data
    df = pd.read_excel('data/Loan Book Nov-16.xlsx')

    X, y, weights = clean_df(df)

    # Split data into training Set and Testing Set
    indices = np.arange(X.shape[0])
    X_train, X_test, y_train, y_test, train_indices, test_indices = train_test_split(
        X, y, indices, test_size=0.3, random_state=0, stratify=y)

    pipeline = Pipeline([
        ('rescale', MinMaxScaler()),
        # ('model', SGD(loss = 'modified_huber',class_weight = 'balanced', penalty = None))  # classifier
        # ('model', RF(class_weight = 'balanced_subsample'))  # classifier
        ('model', AdaBoost(n_estimators=50))  # classifier
    ])

    parameters = {'model__n_estimators': [50, 100, 150]}

    best_model = GridSearchCV(
        pipeline,
        parameters,
        cv=5,
        verbose=1,
        scoring='f1',
        fit_params={'model__sample_weight': np.array(weights[train_indices])})
    best_model.fit(X_train, y_train)

    y_pred_probs = best_model.predict_proba(X_test)
    y_pred_probs = pd.DataFrame(y_pred_probs).ix[:, 1]
Пример #6
0
 def __init__(self, C = 1, penalty = 'l2'):
     self.C = C
     self.penalty = penalty
     self._model = AdaBoost(n_estimators = 50)
     self._model.set_params(C = C, penalty = penalty)
     pass