def getModel(self, _params): return AdaBoost(n_estimators=int(_params['n_estimators']), learning_rate=_params['learning_rate'], loss=_params['loss'])
samplenames = np.array(dataframe.columns) # »ñȡ̽ÕëÃû³Æ featurenames = np.array(dataframe.index) # »ñÈ¡±í´ïÖµµÄ¾ØÕó esetarray = np.array(dataframe) # ±í´ïÖµµÄ¾ØÕóתÖà esetarray = esetarray.transpose() # ½«Ñù±¾·ÖÀ࣬ # ·Ö³ÉÈýÀà:PB BM LN # sampletype = [1]*26+[2]*19+[3]*17 # ·Ö³ÉÁ½Àà:PB BMorLN sampletype = [0]*10+[1]*10 # Adaboost 方法 # ============================================================================= clf = AdaBoost() # AdaBoost(n_estimators=100) rfe = RFE(clf, n_features_to_select=1) # ============================================================================= rfe.fit(esetarray, sampletype) print("Features sorted by their rank:") print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), featurenames))) result = sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), featurenames)) resultframe = DataFrame(result) resultframe.to_csv("ranklist_ABrfe.txt", sep="\t")
if (i == 35): tmp.append(word) else: tmp.append(float(word)) Y.append(tmp.pop()) X.append(tmp[:]) #print("X: ") #print(X) #print("Y: ") #print(Y) f.close Z = [] odp = [] for k in range(0, 9): i = randint(0, 300) Z.append(X.pop(i)) odp.append(Y.pop(i)) #X = [[0, 2, 4, 6, 8], [1, 3, 5, 7, 9], [7, 9, 11, 13, 15], [6, 8, 10, 12, 14]] #Y = [1, 0, 0, 1] #Z = [[8, 9, 10, 14, 15], [4, 5, 7, 11, 13], [4, 6, 8, 10, 12], [3, 5, 7, 9, 11]] ada = AdaBoost(DecTree(max_depth=1)) ada.fit(X, Y) print("Predict: ") print(ada.predict(Z)) print("Probabil: ") print(ada.predict_proba(Z)) print("Score: ") print(ada.score(Z, odp))
def getModel(self, _params): return AdaBoost(n_estimators=int(_params['n_estimators']), learning_rate=_params['learning_rate'], algorithm=_params['algorithm'])
if __name__ == '__main__': # Get raw data df = pd.read_excel('data/Loan Book Nov-16.xlsx') X, y, weights = clean_df(df) # Split data into training Set and Testing Set indices = np.arange(X.shape[0]) X_train, X_test, y_train, y_test, train_indices, test_indices = train_test_split( X, y, indices, test_size=0.3, random_state=0, stratify=y) pipeline = Pipeline([ ('rescale', MinMaxScaler()), # ('model', SGD(loss = 'modified_huber',class_weight = 'balanced', penalty = None)) # classifier # ('model', RF(class_weight = 'balanced_subsample')) # classifier ('model', AdaBoost(n_estimators=50)) # classifier ]) parameters = {'model__n_estimators': [50, 100, 150]} best_model = GridSearchCV( pipeline, parameters, cv=5, verbose=1, scoring='f1', fit_params={'model__sample_weight': np.array(weights[train_indices])}) best_model.fit(X_train, y_train) y_pred_probs = best_model.predict_proba(X_test) y_pred_probs = pd.DataFrame(y_pred_probs).ix[:, 1]
def __init__(self, C = 1, penalty = 'l2'): self.C = C self.penalty = penalty self._model = AdaBoost(n_estimators = 50) self._model.set_params(C = C, penalty = penalty) pass