def __init__(self, K=10, alpha=0.75, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', diversity_metric='e', positive_label=1): self.K = K self.alpha = alpha self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.combination_rule = combination_rule self.positive_label = positive_label self.classifiers = None self.ensemble = None self.combiner = Combiner(rule=combination_rule) self.diversity_metric = diversity_metric self.diversity = Diversity(metric=diversity_metric) self.validation_X = None self.validation_y = None
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.ensemble = None self.combiner = Combiner(rule=combination_rule)
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', max_features=0.5): self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.combiner = Combiner(rule=combination_rule) self.classifiers = None self.ensemble = None self.max_features = max_features
def __init__(self, ensemble=None, selector=None, combiner=None): self.ensemble = ensemble self.selector = selector if combiner is None: self.combiner = Combiner(rule='majority_vote') elif isinstance(combiner, str): self.combiner = Combiner(rule=combiner) elif isinstance(combiner, Combiner): self.combiner = combiner else: raise ValueError('Invalid parameter combiner')
def __init__(self, K=10, alpha=0.75, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', diversity_metric='e', positive_label=1): self.K = K self.alpha = alpha self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.combination_rule = combination_rule self.positive_label = positive_label self.classifiers = None self.ensemble = None self.combiner = Combiner(rule=combination_rule) self.diversity_metric = diversity_metric self.diversity = Diversity(metric=diversity_metric) self.validation_X = None self.validation_y = None
class BaggingSK(PoolGenerator): ''' This class should not be used, use brew.generation.bagging.Bagging instead. ''' def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers # using the sklearn implementation of bagging for now self.sk_bagging = BaggingClassifier(base_estimator=base_classifier, n_estimators=n_classifiers, max_samples=1.0, max_features=1.0) self.ensemble = Ensemble() self.combiner = Combiner(rule=combination_rule) def fit(self, X, y): self.sk_bagging.fit(X, y) self.ensemble.add_classifiers(self.sk_bagging.estimators_) #self.classes_ = set(y) def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
class RandomSubspace(PoolGenerator): def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', max_features=0.5): self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.combiner = Combiner(rule=combination_rule) self.classifiers = None self.ensemble = None self.max_features = max_features def fit(self, X, y): self.ensemble = Ensemble() for i in range(self.n_classifiers): chosen_features = np.random.choice(X.shape[1], int( np.ceil(X.shape[1] * self.max_features)), replace=False) transformer = FeatureSubsamplingTransformer( features=chosen_features) classifier = BrewClassifier(classifier=sklearn.base.clone( self.base_classifier), transformer=transformer) classifier.fit(X, y) self.ensemble.add(classifier) return def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', max_features=0.5): self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.combiner = Combiner(rule=combination_rule) self.classifiers = None self.ensemble = None self.max_features = max_features
class RandomSubspace(PoolGenerator): def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', max_features=0.5): self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.combiner = Combiner(rule=combination_rule) self.classifiers = None self.ensemble = None self.max_features = max_features def fit(self, X, y): self.ensemble = Ensemble() for i in range(self.n_classifiers): chosen_features = np.random.choice(X.shape[1], int(np.ceil(X.shape[1]*self.max_features)), replace=False) transformer = FeatureSubsamplingTransformer(features=chosen_features) classifier = BrewClassifier(classifier=sklearn.base.clone(self.base_classifier), transformer=transformer) classifier.fit(X, y) self.ensemble.add(classifier) return def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
class BaggingSK(PoolGenerator): ''' This class should not be used, use brew.generation.bagging.Bagging instead. ''' def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers # using the sklearn implementation of bagging for now self.sk_bagging = BaggingClassifier(base_estimator=base_classifier, n_estimators=n_classifiers, max_samples=1.0, max_features=1.0) self.ensemble = Ensemble() self.combiner = Combiner(rule=combination_rule) def fit(self, X, y): self.sk_bagging.fit(X, y) self.ensemble.add_classifiers(self.sk_bagging.estimators_) #self.classes_ = set(y) def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
class Bagging(PoolGenerator): def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.ensemble = None self.combiner = Combiner(rule=combination_rule) def fit(self, X, y): self.ensemble = Ensemble() for _ in range(self.n_classifiers): # bootstrap idx = np.random.choice(X.shape[0], X.shape[0], replace=True) data, target = X[idx, :], y[idx] classifier = sklearn.base.clone(self.base_classifier) classifier.fit(data, target) self.ensemble.add(classifier) return def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
class Bagging(PoolGenerator): def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.ensemble = None self.combiner = Combiner(rule=combination_rule) def fit(self, X, y): self.ensemble = Ensemble() for _ in range(self.n_classifiers): # bootstrap idx = np.random.choice(X.shape[0], X.shape[0], replace=True) data, target = X[idx, :], y[idx] classifier = sklearn.base.clone(self.base_classifier) classifier.fit(data, target) self.ensemble.add(classifier) return def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers # using the sklearn implementation of bagging for now self.sk_bagging = BaggingClassifier(base_estimator=base_classifier, n_estimators=n_classifiers, max_samples=1.0, max_features=1.0) self.ensemble = Ensemble() self.combiner = Combiner(rule=combination_rule)
def test__arguments(self): c = MockClassifier() pool = Ensemble(classifiers=[c]) combiner = Combiner(rule='majority_vote') model = EnsembleClassifier(ensemble=pool, combiner=combiner)
def __init__(self, ensemble=None, selector=None, combiner=None): self.ensemble = ensemble self.selector = selector if combiner == None: combiner = Combiner(rule='majority_vote') self.combiner = combiner
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.ensemble = None self.combiner = Combiner(rule=combination_rule)
def __init__(self, classifierList, combiningMethod): classifiers = [None] * (len(classifierList)) for key, tuple in enumerate(classifierList): classifiers[key] = tuple[1] hybridEnsemble = Ensemble(classifiers=classifiers) hybridEnsembleClassifier = EnsembleClassifier( ensemble=hybridEnsemble, combiner=Combiner(combiningMethod)) super().__init__(hybridEnsembleClassifier) self.name = "ensemble"
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'): self.base_classifier = base_classifier self.n_classifiers = n_classifiers # using the sklearn implementation of bagging for now self.sk_bagging = BaggingClassifier(base_estimator=base_classifier, n_estimators=n_classifiers, max_samples=1.0, max_features=1.0) self.ensemble = Ensemble() self.combiner = Combiner(rule=combination_rule)
class EnsembleStackClassifier(object): def __init__(self, stack, combiner=None): self.stack = stack self.combiner = combiner if combiner is None: self.combiner = Combiner(rule='majority_vote') def fit(self, X, y): self.stack.fit(X, y) def predict(self, X): out = self.stack.output(X) return self.combiner.combine(out) def predict_proba(self, X): out = self.stack.output(X) return np.mean(out, axis=2)
def test_majority_vote(self): comb = Combiner(rule='majority_vote') assert comb.rule == majority_vote_rule
class ICSBagging(PoolGenerator): def __init__(self, K=10, alpha=0.75, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', diversity_metric='e', max_samples=1.0, positive_label=1): self.K = K self.alpha = alpha self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.combination_rule = combination_rule self.positive_label = positive_label self.classifiers = None self.ensemble = None self.combiner = Combiner(rule=combination_rule) self.diversity_metric = diversity_metric self.diversity = Diversity(metric=diversity_metric) self.validation_X = None self.validation_y = None def set_validation(self, X, y): self.validation_X = X self.validation_y = y def fitness(self, classifier): ''' #TODO normalize diversity metric. ''' self.ensemble.add(classifier) out = self.ensemble.output(self.validation_X) y_pred = self.combiner.combine(out) y_true = self.validation_y auc = evaluation.auc_score(y_true, y_pred) div = self.diversity.calculate(self.ensemble, self.validation_X, self.validation_y) #diversity = entropy_measure_e(self.ensemble, # self.validation_X, self.validation_y) self.ensemble.classifiers.pop() return self.alpha * auc + (1.0 - self.alpha) * div def _calc_pos_prob(self): y_pred = self.combiner.combine(self.ensemble.output(self.validation_X)) mask = self.positive_label == self.validation_y pos_acc = float(sum(y_pred[mask] == self.validation_y[mask]))/len(self.validation_y[mask]) neg_acc = float(sum(y_pred[~mask] == self.validation_y[~mask]))/len(self.validation_y[~mask]) return 1.0 - (pos_acc / (pos_acc + neg_acc)) def bootstrap_classifiers(self, X, y, K, pos_prob): mask = self.positive_label == y negative_label = y[~mask][0] clfs = [] sets_cX, sets_cy = [], [] for i in range(K): cX, cy = [], [] for j in range(X.shape[0]): if np.random.random() < pos_prob: idx = np.random.random_integers(0, len(X[mask]) - 1) cX = cX + [X[mask][idx]] cy = cy + [self.positive_label] else: idx = np.random.random_integers(0, len(X[~mask]) - 1) cX = cX + [X[~mask][idx]] cy = cy + [negative_label] if not self.positive_label in cy: idx_1 = np.random.random_integers(0, len(cX) - 1) idx_2 = np.random.random_integers(0, len(X[mask])- 1) cX[idx_1] = X[mask][idx_2] cy[idx_1] = self.positive_label elif not negative_label in cy: idx_1 = np.random.random_integers(0, len(cX) - 1) idx_2 = np.random.random_integers(0, len(X[~mask])- 1) cX[idx_1] = X[~mask][idx_2] cy[idx_1] = negative_label #print len(cX), len(cy), X.shape[0], len(X), np.bincount(cy) sets_cX, sets_cy = sets_cX + [cX], sets_cy + [cy] clf = sklearn.base.clone(self.base_classifier) clfs = clfs + [clf.fit(cX, cy)] return clfs def fit(self, X, y): #if self.validation_X == None and self.validation_y == None: self.validation_X = X self.validation_y = y self.classes_ = set(y) self.ensemble = Ensemble() clfs = self.bootstrap_classifiers(X, y, self.K, 0.5) self.ensemble.add(np.random.choice(clfs)) for i in range(1, self.n_classifiers): clfs = self.bootstrap_classifiers(X, y, self.K, self._calc_pos_prob()) self.ensemble.add(max(clfs, key=lambda clf: self.fitness(clf))) self.validation_X = None self.validation_y = None return self def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
class ICSBaggingNew(PoolGenerator): def __init__(self, K=10, alpha=0.75, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', diversity_metric='e', max_samples=1.0, positive_label=1): self.K = K self.alpha = alpha self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.positive_label = positive_label self.ensemble = None self.combiner = Combiner(rule=combination_rule) self.diversity = Diversity(metric=diversity_metric) self.validation_X = None self.validation_y = None def set_validation(self, X, y): self.validation_X = X self.validation_y = y def fitness(self, classifier): ''' #TODO normalize diversity metric. ''' self.ensemble.add(classifier) y_pred = self.predict(self.validation_X) y_true = self.validation_y auc = evaluation.auc_score(y_true, y_pred) div = self.diversity.calculate(self.ensemble, self.validation_X, y_true) self.ensemble.classifiers.pop() # create interface for this later return self.alpha * auc + (1.0 - self.alpha) * div def _calc_pos_prob(self): y_pred = self.predict(self.validation_X) y_true = self.validation_y # obtaining recall scores for each label (assuming the labels are binary) pos_acc = recall_score(y_true, y_pred, average='binary', pos_label=self.positive_label) neg_acc = recall_score(y_true, y_pred, average='binary', pos_label=int(not self.positive_label)) return neg_acc / (pos_acc + neg_acc) def bootstrap_classifiers(self, X, y, K, pos_prob): pos_idx = (y == self.positive_label) neg_idx = (y == int(not self.positive_label)) X_pos, y_pos = X[pos_idx,:], y[pos_idx] # positive examples X_neg, y_neg = X[neg_idx,:], y[neg_idx] # negative examples classifiers = [] for i in range(K): X_new = np.zeros(X.shape) y_new = np.zeros(y.shape) for j in range(X.shape[0]): if pos_prob > np.random.random(): # add a randomly chosen positive example idx = np.random.randint(X_pos.shape[0]) X_new[j,:] = X_pos[idx,:] y_new[j] = self.positive_label else: # add a randomly chosen negative example idx = np.random.randint(X_neg.shape[0]) X_new[j,:] = X_neg[idx,:] y_new[j] = int(not self.positive_label) # if no positive example is present, make sure you insert at least one if not np.any(y_new == self.positive_label): idx_new = np.random.randint(X_new.shape[0]) # chosen spot for replacement on new array idx_pos = np.random.randint(X_pos.shape[0]) # chosen positive example index X_new[idx_new,:] = X_pos[idx_pos,:] y_new[idx_new] = self.positive_label # if no negative example is present, make sure you insert at least one elif not np.any(y_new == int(not self.positive_label)): idx_new = np.random.randint(X_new.shape[0]) # chosen spot for replacement on new array idx_neg = np.random.randint(X_neg.shape[0]) # chosen positive example index X_new[idx_new,:] = X_neg[idx_neg,:] y_new[idx_new] = int(not self.positive_label) # train classifier with the bootstrapped data clf = sklearn.base.clone(self.base_classifier) clf.fit(X_new, y_new) classifiers.append(clf) return classifiers def fit(self, X, y): #if self.validation_X == None and self.validation_y == None: self.validation_X = X self.validation_y = y self.classes_ = set(y) self.ensemble = Ensemble() clfs = self.bootstrap_classifiers(X, y, self.K, 0.5) self.ensemble.add(np.random.choice(clfs)) for i in range(1, self.n_classifiers): clfs = self.bootstrap_classifiers(X, y, self.K, self._calc_pos_prob()) self.ensemble.add(max(clfs, key=lambda clf: self.fitness(clf))) self.validation_X = None self.validation_y = None return self def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
import fris_stolp_test clf4 = fris_stolp_test.SklearnHelper # Creating Ensemble ensemble = Ensemble([clf1, clf2, clf3, clf4]) eclf = EnsembleClassifier(ensemble=ensemble, combiner='mean') # Creating Stacking layer_1 = Ensemble([clf1, clf2, clf3]) layer_2 = Ensemble([sklearn.clone(clf1)]) stack = EnsembleStack(cv=3) stack.add_layer(layer_1) stack.add_layer(layer_2) sclf = EnsembleStackClassifier(stack, combiner=Combiner('mean')) sclf.fit(X_train.values, y_train.values) y_pre = sclf.predict(X_test.values) precision = precision_score(y_test, y_pre) recall = recall_score(y_test, y_pre) accuracy = accuracy_score(y_test, y_pre) fmera = f1_score(y_test, y_pre) if __name__ == '__main__': print("presicion ", precision, " recall ", recall, " fmera ", fmera, " accuracy ", accuracy)
def test_max(self): comb = Combiner(rule='max') assert comb.rule == max_rule
def __init__(self, stack, combiner=None): self.stack = stack self.combiner = combiner if combiner is None: self.combiner = Combiner(rule='majority_vote')
def test_median(self): comb = Combiner(rule='median') assert comb.rule == median_rule
class ICSBaggingNew(PoolGenerator): def __init__(self, K=10, alpha=0.75, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', diversity_metric='e', positive_label=1): self.K = K self.alpha = alpha self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.positive_label = positive_label self.ensemble = None self.combiner = Combiner(rule=combination_rule) self.diversity = Diversity(metric=diversity_metric) self.validation_X = None self.validation_y = None def set_validation(self, X, y): self.validation_X = X self.validation_y = y def fitness(self, classifier): ''' #TODO normalize diversity metric. ''' self.ensemble.add(classifier) y_pred = self.predict(self.validation_X) y_true = self.validation_y auc = evaluation.auc_score(y_true, y_pred) div = self.diversity.calculate(self.ensemble, self.validation_X, y_true) self.ensemble.classifiers.pop() # create interface for this later return self.alpha * auc + (1.0 - self.alpha) * div def _calc_pos_prob(self): y_pred = self.predict(self.validation_X) y_true = self.validation_y # obtaining recall scores for each label (assuming the labels are binary) pos_acc = recall_score(y_true, y_pred, average='binary', pos_label=self.positive_label) neg_acc = recall_score(y_true, y_pred, average='binary', pos_label=int(not self.positive_label)) return neg_acc / (pos_acc + neg_acc) def bootstrap_classifiers(self, X, y, K, pos_prob): pos_idx = (y == self.positive_label) neg_idx = (y == int(not self.positive_label)) X_pos, y_pos = X[pos_idx, :], y[pos_idx] # positive examples X_neg, y_neg = X[neg_idx, :], y[neg_idx] # negative examples classifiers = [] for i in range(K): X_new = np.zeros(X.shape) y_new = np.zeros(y.shape) for j in range(X.shape[0]): if pos_prob > np.random.random(): # add a randomly chosen positive example idx = np.random.randint(X_pos.shape[0]) X_new[j, :] = X_pos[idx, :] y_new[j] = self.positive_label else: # add a randomly chosen negative example idx = np.random.randint(X_neg.shape[0]) X_new[j, :] = X_neg[idx, :] y_new[j] = int(not self.positive_label) # if no positive example is present, make sure you insert at least one if not np.any(y_new == self.positive_label): idx_new = np.random.randint( X_new.shape[0]) # chosen spot for replacement on new array idx_pos = np.random.randint( X_pos.shape[0]) # chosen positive example index X_new[idx_new, :] = X_pos[idx_pos, :] y_new[idx_new] = self.positive_label # if no negative example is present, make sure you insert at least one elif not np.any(y_new == int(not self.positive_label)): idx_new = np.random.randint( X_new.shape[0]) # chosen spot for replacement on new array idx_neg = np.random.randint( X_neg.shape[0]) # chosen positive example index X_new[idx_new, :] = X_neg[idx_neg, :] y_new[idx_new] = int(not self.positive_label) # train classifier with the bootstrapped data clf = sklearn.base.clone(self.base_classifier) clf.fit(X_new, y_new) classifiers.append(clf) return classifiers def fit(self, X, y): #if self.validation_X == None and self.validation_y == None: self.validation_X = X self.validation_y = y self.classes_ = set(y) self.ensemble = Ensemble() clfs = self.bootstrap_classifiers(X, y, self.K, 0.5) self.ensemble.add(np.random.choice(clfs)) for i in range(1, self.n_classifiers): clfs = self.bootstrap_classifiers(X, y, self.K, self._calc_pos_prob()) self.ensemble.add(max(clfs, key=lambda clf: self.fitness(clf))) self.validation_X = None self.validation_y = None return self def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
class ICSBagging(PoolGenerator): def __init__(self, K=10, alpha=0.75, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', diversity_metric='e', positive_label=1): self.K = K self.alpha = alpha self.base_classifier = base_classifier self.n_classifiers = n_classifiers self.combination_rule = combination_rule self.positive_label = positive_label self.classifiers = None self.ensemble = None self.combiner = Combiner(rule=combination_rule) self.diversity_metric = diversity_metric self.diversity = Diversity(metric=diversity_metric) self.validation_X = None self.validation_y = None def set_validation(self, X, y): self.validation_X = X self.validation_y = y def fitness(self, classifier): ''' #TODO normalize diversity metric. ''' self.ensemble.add(classifier) out = self.ensemble.output(self.validation_X) y_pred = self.combiner.combine(out) y_true = self.validation_y auc = evaluation.auc_score(y_true, y_pred) div = self.diversity.calculate(self.ensemble, self.validation_X, self.validation_y) #diversity = entropy_measure_e(self.ensemble, # self.validation_X, self.validation_y) self.ensemble.classifiers.pop() return self.alpha * auc + (1.0 - self.alpha) * div def _calc_pos_prob(self): y_pred = self.combiner.combine(self.ensemble.output(self.validation_X)) mask = self.positive_label == self.validation_y pos_acc = float(sum(y_pred[mask] == self.validation_y[mask])) / len( self.validation_y[mask]) neg_acc = float(sum(y_pred[~mask] == self.validation_y[~mask])) / len( self.validation_y[~mask]) return 1.0 - (pos_acc / (pos_acc + neg_acc)) def bootstrap_classifiers(self, X, y, K, pos_prob): mask = self.positive_label == y negative_label = y[~mask][0] clfs = [] sets_cX, sets_cy = [], [] for i in range(K): cX, cy = [], [] for j in range(X.shape[0]): if np.random.random() < pos_prob: idx = np.random.random_integers(0, len(X[mask]) - 1) cX = cX + [X[mask][idx]] cy = cy + [self.positive_label] else: idx = np.random.random_integers(0, len(X[~mask]) - 1) cX = cX + [X[~mask][idx]] cy = cy + [negative_label] if not self.positive_label in cy: idx_1 = np.random.random_integers(0, len(cX) - 1) idx_2 = np.random.random_integers(0, len(X[mask]) - 1) cX[idx_1] = X[mask][idx_2] cy[idx_1] = self.positive_label elif not negative_label in cy: idx_1 = np.random.random_integers(0, len(cX) - 1) idx_2 = np.random.random_integers(0, len(X[~mask]) - 1) cX[idx_1] = X[~mask][idx_2] cy[idx_1] = negative_label #print len(cX), len(cy), X.shape[0], len(X), np.bincount(cy) sets_cX, sets_cy = sets_cX + [cX], sets_cy + [cy] clf = sklearn.base.clone(self.base_classifier) clfs = clfs + [clf.fit(cX, cy)] return clfs def fit(self, X, y): #if self.validation_X == None and self.validation_y == None: self.validation_X = X self.validation_y = y self.classes_ = set(y) self.ensemble = Ensemble() clfs = self.bootstrap_classifiers(X, y, self.K, 0.5) self.ensemble.add(np.random.choice(clfs)) for _ in range(1, self.n_classifiers): clfs = self.bootstrap_classifiers(X, y, self.K, self._calc_pos_prob()) self.ensemble.add(max(clfs, key=lambda clf: self.fitness(clf))) self.validation_X = None self.validation_y = None return self def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
max_features=1.0), AdaBoostClassifier(DecisionTreeClassifier(max_depth=2), n_estimators=600, learning_rate=1), BaggingClassifier(ExtraTreesClassifier(criterion='entropy', max_depth=100, n_estimators=100), max_samples=1.0, max_features=1.0) ] clfs = classifiers # [clf1, clf2] ens = Ensemble(classifiers=clfs) # create your Combiner # the rules can be 'majority_vote', 'max', 'min', 'mean' or 'median' comb = Combiner(rule='max') # now create your ensemble classifier ensemble_clf = EnsembleClassifier(ensemble=ens, combiner=comb) ensemble_clf = ensemble_clf.fit(X_train, Y_train) y_tested = ensemble_clf.predict(X_test) # for i in xrange(1,10): # clf = BaggingClassifier(DecisionTreeClassifier(criterion = 'entropy', max_depth = i + 100),max_samples=1.0, max_features=1.0) # clf = clf.fit(X_train, Y_train) # y_tested1 = clf.predict(X_test) # for a in range(len(y_tested)): # y_tested[a] = (y_tested[a] & y_tested1[a]) # clf = BaggingClassifier(ExtraTreesClassifier(criterion = 'entropy', max_depth = i + 100,n_estimators=100+i),max_samples=1.0, max_features=1.0) # clf = clf.fit(X_train, Y_train) # y_tested2 = clf.predict(X_test)
def test_default_rule(self): comb = Combiner() assert comb.rule == majority_vote_rule
class SmoteBagging(PoolGenerator): def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', k=5): #self.b = b self.k = k self.n_classifiers = n_classifiers self.base_classifier = base_classifier self.ensemble = None self.combiner = Combiner(rule=combination_rule) def smote_bootstrap_sample(self, X, y, b, k): classes = np.unique(y) count = np.bincount(y) # number of instances of each class majority_class = count.argmax() # majority clas majority_count = count.max() # majority class data = np.empty((0, X.shape[1])) target = np.empty((0, )) for i in classes: class_data = X[(y == i), :] if i == majority_class: # majority class # regular bootstrap (i.e. 100% sampling rate) idx = np.random.choice(majority_count, (majority_count, )) data = np.concatenate((data, class_data[idx, :])) target = np.concatenate((target, i * np.ones( (majority_count, )))) #print('original class data = {}'.format(class_data.shape)) #print('sampled class data = {}'.format(class_data[idx,:].shape)) #print() else: # minority classes # bootstrap the class data with defined sampling rate sample_rate = (majority_count / class_data.shape[0]) * (b / 100) idx = np.random.choice( class_data.shape[0], (int(sample_rate * class_data.shape[0]), )) sampled_class_data = class_data[idx, :] #print('original class data = {}'.format(class_data.shape)) #print('majority_count = {}'.format(majority_count)) #print('class data = {}'.format(class_data.shape)) #print('b = {}'.format(b)) #print('sample rate = {}'.format(sample_rate)) #print('sampled class data = {}'.format(sampled_class_data.shape)) # run smote on bootstrapped data to obtain synthetic samples # ceil to make sure N_smote is a multiple of 100, and the small value to avoid a zero N_smote = int( np.ceil((majority_count / sampled_class_data.shape[0]) * (1 - b / 100 + 10e-8)) * 100) #print(N_smote) #print('----------') #print('smote parameters:') #print('T : {}'.format(sampled_class_data.shape)) #print('N : {}'.format(N_smote)) synthetic = smote(sampled_class_data, N=N_smote, k=self.k) #print('synthetic data = {})'.format(synthetic.shape)) #print(synthetic) # add synthetic samples to sampled class data n_missing = majority_count - sampled_class_data.shape[0] idx = np.random.choice(synthetic.shape[0], (n_missing, )) new_class_data = np.concatenate( (sampled_class_data, synthetic[idx, :])) #print('new class data = {})'.format(new_class_data.shape)) #print() data = np.concatenate((data, new_class_data)) target = np.concatenate((target, i * np.ones( (new_class_data.shape[0], )))) return data, target def fit(self, X, y): self.ensemble = Ensemble() # this parameter should change between [10, 100] with # increments of 10, for every classifier in the ensemble b = 10 for i in range(self.n_classifiers): #print() #print('classifier : {}'.format(i)) #print('------------------------') #print('b = {}'.format(b)) data, target = self.smote_bootstrap_sample(X, y, b=float(b), k=self.k) #print('data = {}'.format(data.shape)) #print() classifier = sklearn.base.clone(self.base_classifier) classifier.fit(data, target) self.ensemble.add(classifier) if b >= 100: b = 10 else: b += 10 return def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
# Create plot plt.title("Learning Curve") plt.xlabel("Training Set Size"), plt.ylabel("Accuracy Score"), plt.legend(loc="best") plt.tight_layout() plt.show() # Merge two classifier Randomforest and KNN from brew.base import Ensemble from brew.base import EnsembleClassifier from brew.combination.combiner import Combiner # Random Sampling X_resampled, y_resampled = RandomUnderSampler(random_state=0).fit_sample(X_train, y_train) clfs = [classifier_rf, classifier_knn] ens = Ensemble(classifiers = clfs) comb = Combiner(rule='max') eclf = EnsembleClassifier(ensemble=ens, combiner=Combiner('mean')) eclf.fit(X_resampled, y_resampled) y_pred = eclf.predict(X_test) cm = confusion_matrix(y_test, y_pred) print(classification_report(y_test, y_pred)) # PCA Using feature reduction technique # Check how many components needed in a way which will express maximum variance from sklearn.decomposition import PCA pca = PCA(n_components = None) X_train_pca = pca.fit(X_train) X_test_pca = pca.fit(X_test) explained_variance = pca.explained_variance_ratio_
def test_min(self): comb = Combiner(rule='min') assert comb.rule == min_rule
class SmoteBagging(PoolGenerator): def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote', k=2): #self.b = b self.k = k self.n_classifiers = n_classifiers self.base_classifier = base_classifier self.ensemble = None self.combiner = Combiner(rule=combination_rule) def smote_bootstrap_sample(self, X, y, b, k): classes = np.unique(y) count = np.bincount(y) # number of instances of each class majority_class = count.argmax() # majority clas majority_count = count.max() # majority class data = np.empty((0, X.shape[1])) target = np.empty((0,)) for i in classes: class_data = X[(y==i),:] if i == majority_class: # majority class # regular bootstrap (i.e. 100% sampling rate) idx = np.random.choice(majority_count, (majority_count,)) data = np.concatenate((data, class_data[idx,:])) target = np.concatenate((target, i * np.ones((majority_count,)))) #print('original class data = {}'.format(class_data.shape)) #print('sampled class data = {}'.format(class_data[idx,:].shape)) #print() else: # minority classes # bootstrap the class data with defined sampling rate sample_rate = (majority_count / class_data.shape[0]) * (b/100) idx = np.random.choice(class_data.shape[0], (int(sample_rate * class_data.shape[0]),)) sampled_class_data = class_data[idx,:] #print('original class data = {}'.format(class_data.shape)) #print('majority_count = {}'.format(majority_count)) #print('class data = {}'.format(class_data.shape)) #print('b = {}'.format(b)) #print('sample rate = {}'.format(sample_rate)) #print('sampled class data = {}'.format(sampled_class_data.shape)) # run smote on bootstrapped data to obtain synthetic samples # ceil to make sure N_smote is a multiple of 100, and the small value to avoid a zero N_smote = int( np.ceil((majority_count / sampled_class_data.shape[0]) * (1 - b/100 + 10e-8)) * 100 ) #print(N_smote) #print('----------') #print('smote parameters:') #print('T : {}'.format(sampled_class_data.shape)) #print('N : {}'.format(N_smote)) synthetic = smote(sampled_class_data, N=N_smote, k=self.k) #print('synthetic data = {})'.format(synthetic.shape)) #print(synthetic) # add synthetic samples to sampled class data n_missing = majority_count - sampled_class_data.shape[0] idx = np.random.choice(synthetic.shape[0], (n_missing,)) new_class_data = np.concatenate((sampled_class_data, synthetic[idx,:])) #print('new class data = {})'.format(new_class_data.shape)) #print() data = np.concatenate((data, new_class_data)) target = np.concatenate((target, i * np.ones((new_class_data.shape[0],)))) return data, target def fit(self, X, y): self.ensemble = Ensemble() # this parameter should change between [10, 100] with # increments of 10, for every classifier in the ensemble b = 10 for i in range(self.n_classifiers): #print() #print('classifier : {}'.format(i)) #print('------------------------') #print('b = {}'.format(b)) data, target = self.smote_bootstrap_sample(X, y, b=b, k=self.k) #print('data = {}'.format(data.shape)) #print() classifier = sklearn.base.clone(self.base_classifier) classifier.fit(data, target) self.ensemble.add(classifier) if b >= 100: b = 10 else: b += 10 return def predict(self, X): out = self.ensemble.output(X) return self.combiner.combine(out)
# creating a new ensemble of ensembles ens = Ensemble(classifiers=[clf1,ensemble_clf]) ensemble_ens = EnsembleClassifier(ensemble=ens, combiner=cmb) # and you can use it in the same way as a regular ensemble ensemble_ens.fit(X, y) ensemble_ens.predict(X) ensemble_ens.predict_proba(X) ''' # l'altra libreria # create your Ensemble clf1 can be an EnsembleClassifier object too ens = Ensemble(classifiers=[mode_9, mode_9]) # create your Combiner (combination rule) # it can be 'min', 'max', 'majority_vote' ... cmb = Combiner(rule='mean') # and now, create your Ensemble Classifier ensemble_clf = EnsembleClassifier(ensemble=ens, combiner=cmb) # assuming you have a X, y data you can use ensemble_clf.fit(val_path, val_path) print("-----------d-----------") ensemble_clf.predict(val_path)
my_data = genfromtxt('/Users/samarth/Desktop/data.csv', delimiter=',') for item in range(0, my_data.shape[0]): var = my_data[item][4] my_data[item][4] = int(range_scaler(5538, 600000, 100, 1000, var)) ''' if my_data[item][6] < 100 or my_data[item][6] > 1000 or (my_data[item][6]>my_data[item][4]): my_data = np.delete(my_data, (item), axis = 0) ''' my_data = my_data[np.logical_not( np.logical_and(my_data[:, 4] < 100, my_data[:, 4] > 1000))] my_data = my_data[np.logical_not(my_data[:, 4] > my_data[:, 6])] ensemble = Ensemble([clf1, clf2, clf3]) eclf = EnsembleClassifier(ensemble=ensemble, combiner=Combiner('mean')) layer_1 = Ensemble([clf1, clf2, clf3]) layer_2 = Ensemble([sklearn.clone(clf1)]) stack = EnsembleStack(cv=3) stack.add_layer(layer_1) stack.add_layer(layer_2) sclf = EnsembleStackClassifier(stack) clf_list = [clf1, clf2, clf3, eclf, sclf] lbl_list = [ 'Logistic Regression', 'Random Forest', 'RBF kernel SVM', 'Ensemble', 'Stacking'
class EnsembleClassifier(object): def __init__(self, ensemble=None, selector=None, combiner=None): self.ensemble = ensemble self.selector = selector if combiner is None: self.combiner = Combiner(rule='majority_vote') elif isinstance(combiner, str): self.combiner = Combiner(rule=combiner) elif isinstance(combiner, Combiner): self.combiner = combiner else: raise ValueError('Invalid parameter combiner') def fit(self, X, y): self.ensemble.fit(X, y) def predict(self, X): # TODO: warn the user if mode of ensemble # output excludes the chosen combiner? if self.selector is None: out = self.ensemble.output(X) y = self.combiner.combine(out) else: y = [] for i in range(X.shape[0]): ensemble, weights = self.selector.select( self.ensemble, X[i, :][np.newaxis, :]) if weights is not None: # use the ensemble with weights if self.combiner.combination_rule == 'majority_vote': out = ensemble.output(X[i, :][np.newaxis, :]) else: out = ensemble.output(X[i, :][np.newaxis, :], mode='probs') # apply weights for i in range(out.shape[2]): out[:, :, i] = out[:, :, i] * weights[i] [tmp] = self.combiner.combine(out) y.append(tmp) else: # use the ensemble, but ignore the weights if self.combiner.combination_rule == 'majority_vote': out = ensemble.output(X[i, :][np.newaxis, :]) else: out = ensemble.output(X[i, :][np.newaxis, :], mode='probs') [tmp] = self.combiner.combine(out) y.append(tmp) return np.asarray(y) def predict_proba(self, X): # TODO: warn the user if mode of ensemble # output excludes the chosen combiner? if self.selector is None: out = self.ensemble.output(X, mode='probs') return np.mean(out, axis=2) else: out_full = [] for i in range(X.shape[0]): ensemble, weights = self.selector.select( self.ensemble, X[i, :][np.newaxis, :]) if weights is not None: # use the ensemble with weights out = ensemble.output(X[i, :][np.newaxis, :]) # apply weights for i in range(out.shape[2]): out[:, :, i] = out[:, :, i] * weights[i] # [tmp] = self.combiner.combine(out) out_full.extend(list(np.mean(out, axis=2))) else: # use the ensemble, but ignore the weights out = ensemble.output(X[i, :][np.newaxis, :]) out_full.extend(list(np.mean(out, axis=2))) # return np.asarray(y) return np.array(out_full) def score(self, X, y, sample_weight=None): return accuracy_score(y, self.predict(X), sample_weight=sample_weight)