def train_xgboost(train_X, train_y, valid_X, valid_y, test_X, test_y, learning_rate=None, mask=None): learning_rate = 0.01 n_estimators = 350 subsample = 0.3 if mask: train_X = mask_columns(train_X, mask) valid_X = mask_columns(valid_X, mask) test_X = mask_columns(test_X, mask) xgb_model = xgb.XGBClassifier(objective="binary:logistic", tree_method='gpu_hist', predictor='gpu_predictor') xgb_model.learning_rate = learning_rate xgb_model.n_estimators = n_estimators xgb_model.subsample = subsample xgb_model.fit(train_X, train_y.values.ravel()) train_pred = xgb_model.predict(train_X) valid_pred = xgb_model.predict(valid_X) test_pred = xgb_model.predict(test_X) return (get_metrics(train_y, train_pred, verbose=True), get_metrics(valid_y, valid_pred, verbose=True), get_metrics(test_y, test_pred, verbose=True))
def __call__(self, chrom): train_X = mask_columns(self.train_X, chrom.genes) test_X = mask_columns(self.test_X, chrom.genes) classifier = svm.SVC(kernel=self.kernel, class_weight=None) classifier.fit(train_X, self.train_y.values.ravel()) pred_y = classifier.predict(test_X) chrom.fitness = metrics.accuracy_score(self.test_y, pred_y) return chrom.fitness
def __call__(self, chrom): train_X = mask_columns(self.train_X, chrom.genes) test_X = mask_columns(self.test_X, chrom.genes) classifier = xgboost.XGBClassifier(objective="binary:logistic", tree_method='gpu_hist', predictor='gpu_predictor') classifier.learning_rate = 0.01 classifier.n_estimators = 350 classifier.subsample = 0.3 classifier.fit(train_X, self.train_y.values.ravel()) pred_y = classifier.predict(test_X) chrom.fitness = metrics.accuracy_score(self.test_y, pred_y) return chrom.fitness
def __call__(self, master_chrom: Chromosome, train_chrom: Chromosome, valid_chrom: Chromosome): train_X = mask_columns(self.train_X, master_chrom.genes) valid_X = mask_columns(self.valid_X, master_chrom.genes) train_X = mask_rows(train_X, train_chrom.genes) train_y = mask_rows(self.train_y, train_chrom.genes) valid_X = mask_rows(valid_X, valid_chrom.genes) valid_y = mask_rows(self.valid_y, valid_chrom.genes) classifier = svm.SVC(kernel=self.kernel) classifier.fit(train_X, train_y.values.ravel()) pred_y = classifier.predict(valid_X) fitness = metrics.accuracy_score(valid_y, pred_y) # print(fitness) return fitness
def train_cumulative_voting(train_X, train_y, valid_X, valid_y, test_X, test_y, threshold=0.5, mask=None): if mask: train_X = mask_columns(train_X, mask) valid_X = mask_columns(valid_X, mask) test_X = mask_columns(test_X, mask) model = CumulativeVoting(threshold) train_pred = model.predict(train_X) valid_pred = model.predict(valid_X) test_pred = model.predict(test_X) return (get_metrics(train_y, train_pred, verbose=True), get_metrics(valid_y, valid_pred, verbose=True), get_metrics(test_y, test_pred, verbose=True))
def train_svm(kernel, train_X, train_y, valid_X, valid_y, test_X, test_y, mask=None): if mask: train_X = mask_columns(train_X, mask) valid_X = mask_columns(valid_X, mask) test_X = mask_columns(test_X, mask) classifier = svm.SVC(kernel=kernel) classifier.fit(train_X, train_y.values.ravel()) train_pred = classifier.predict(train_X) valid_pred = classifier.predict(valid_X) test_pred = classifier.predict(test_X) return (get_metrics(train_y, train_pred, verbose=True), get_metrics(valid_y, valid_pred, verbose=True), get_metrics(test_y, test_pred, verbose=True))
def train_majority_voting(train_X, train_y, valid_X, valid_y, test_X, test_y, vote_threshold=0.5, count_threshold=0.5, mask=None): if mask: train_X = mask_columns(train_X, mask) valid_X = mask_columns(valid_X, mask) test_X = mask_columns(test_X, mask) model = MajorityVoting(vote_threshold, count_threshold) train_pred = model.predict(train_X) valid_pred = model.predict(valid_X) test_pred = model.predict(test_X) return (get_metrics(train_y, train_pred, verbose=True), get_metrics(valid_y, valid_pred, verbose=True), get_metrics(test_y, test_pred, verbose=True))
def train_xgboost_gridsearch(train_X, train_y, valid_X, valid_y, test_X, test_y, learning_rate=None, mask=None): if mask: train_X = mask_columns(train_X, mask) valid_X = mask_columns(valid_X, mask) test_X = mask_columns(test_X, mask) param_grid_gb = { 'learning_rate': [0.01], 'n_estimators': [350], 'subsample': [0.3] } # Regressor Instantiation gb = xgb.XGBClassifier() mse_grid = GridSearchCV(estimator=gb, param_grid=param_grid_gb, scoring='neg_mean_squared_error', cv=4, verbose=2) # xgb_model = xgb.XGBClassifier(objective="binary:logistic") mse_grid.fit(train_X, train_y.values.ravel()) train_pred = mse_grid.predict(train_X) valid_pred = mse_grid.predict(valid_X) test_pred = mse_grid.predict(test_X) print("Best parameters:", mse_grid.best_params_) return (get_metrics(train_y, train_pred, verbose=True), get_metrics(valid_y, valid_pred, verbose=True), get_metrics(test_y, test_pred, verbose=True))
def __call__(self, chrom): test_X = mask_columns(self.test_X, chrom.genes) classifier = CumulativeVoting() pred_y = classifier.predict(test_X) chrom.fitness = metrics.accuracy_score(self.test_y, pred_y) return chrom.fitness