def test_CNN(x, y): print('Condensed Nearest Neighbour') CNN = CondensedNearestNeighbour(verbose=verbose) cnnx, cnny = CNN.fit_transform(x, y) print('One-Sided Selection') OSS = OneSidedSelection(verbose=verbose) ossx, ossy = OSS.fit_transform(x, y) print('BalanceCascade') BS = BalanceCascade(verbose=verbose) bsx, bsy = BS.fit_transform(x, y)
def sampling(): verbose = False y = np.bincount(target_train1) print y ratio = float(y[2]) / float(y[1]) # 'Random over-sampling' OS = OverSampler(ratio=ratio, verbose=verbose) osx, osy = OS.fit_transform(data_train1, target_train1) random_methods(osx,osy) # 'SMOTE' smote = SMOTE(ratio=ratio, verbose=verbose, kind='regular') smox, smoy = smote.fit_transform(data_train1, target_train1) random_methods(smox,smoy) # 'SMOTE bordeline 1' bsmote1 = SMOTE(ratio=ratio, verbose=verbose, kind='borderline1') bs1x, bs1y = bsmote1.fit_transform(data_train, target_train) random_methods(bs1x,bs1y) # 'SMOTE bordeline 2' bsmote2 = SMOTE(ratio=ratio, verbose=verbose, kind='borderline2') bs2x, bs2y = bsmote2.fit_transform(data_train1, target_train1) random_methods(bs2x,bs2y) # 'SMOTE SVM' svm_args={'class_weight' : 'auto'} svmsmote = SMOTE(ratio=ratio, verbose=verbose, kind='svm', **svm_args) svsx, svsy = svmsmote.fit_transform(data_train1, target_train1) random_methods(svsx,svsy) # 'SMOTE Tomek links' STK = SMOTETomek(ratio=ratio, verbose=verbose) stkx, stky = STK.fit_transform(data_train1, target_train1) random_methods(stkx,stky) # 'SMOTE ENN' SENN = SMOTEENN(ratio=ratio, verbose=verbose) ennx, enny = SENN.fit_transform(data_train1, target_train1) random_methods(ennx,enny) # 'EasyEnsemble' EE = EasyEnsemble(verbose=verbose) eex, eey = EE.fit_transform(data_train1, target_train1) random_methods(eex,eey) # 'BalanceCascade' BS = BalanceCascade(verbose=verbose) bsx, bsy = BS.fit_transform(data_train1, target_train1) random_methods(bsx,bsy)
def balance_cascade(sel): BS = BalanceCascade(verbose=self.verbose) bsx, bsy = BS.fit_transform(self.x, self.y) return bsx, bsy
bs2x, bs2y = bsmote2.fit_transform(x, y) # 'SMOTE SVM' svm_args={'class_weight' : 'auto'} svmsmote = SMOTE(ratio=ratio, verbose=verbose, kind='svm', **svm_args) svsx, svsy = svmsmote.fit_transform(x, y) # 'SMOTE Tomek links' STK = SMOTETomek(ratio=ratio, verbose=verbose) stkx, stky = STK.fit_transform(x, y) # 'SMOTE ENN' SENN = SMOTEENN(ratio=ratio, verbose=verbose) ennx, enny = SENN.fit_transform(x, y) # 'EasyEnsemble' EE = EasyEnsemble(verbose=verbose) eex, eey = EE.fit_transform(x, y) # 'BalanceCascade' BS = BalanceCascade(verbose=verbose) bsx, bsy = BS.fit_transform(x, y) # Apply PCA to be able to visualise the results osx_vis = pca.transform(osx) smox_vis = pca.transform(smox) bs1x_vis = pca.transform(bs1x) bs2x_vis = pca.transform(bs2x) svsx_vis = pca.transform(svsx) stkx_vis = pca.transform(stkx) ennx_vis = pca.transform(ennx) # Project each subset of the ensemble eex_vis = [] for e in eex: eex_vis.append(pca.transform(e))