def test_rest(x, y): print('Random under-sampling') US = UnderSampler(verbose=verbose) usx, usy = US.fit_transform(x, y) print('Tomek links') TL = TomekLinks(verbose=verbose) tlx, tly = TL.fit_transform(x, y) print('Clustering centroids') CC = ClusterCentroids(verbose=verbose) ccx, ccy = CC.fit_transform(x, y) print('NearMiss-1') NM1 = NearMiss(version=1, verbose=verbose) nm1x, nm1y = NM1.fit_transform(x, y) print('NearMiss-2') NM2 = NearMiss(version=2, verbose=verbose) nm2x, nm2y = NM2.fit_transform(x, y) print('NearMiss-3') NM3 = NearMiss(version=3, verbose=verbose) nm3x, nm3y = NM3.fit_transform(x, y) print('Neighboorhood Cleaning Rule') NCR = NeighbourhoodCleaningRule(verbose=verbose) ncrx, ncry = NCR.fit_transform(x, y) print('Random over-sampling') OS = OverSampler(verbose=verbose) ox, oy = OS.fit_transform(x, y) print('SMOTE Tomek links') STK = SMOTETomek(verbose=verbose) stkx, stky = STK.fit_transform(x, y) print('SMOTE ENN') SENN = SMOTEENN(verbose=verbose) sennx, senny = SENN.fit_transform(x, y) print('EasyEnsemble') EE = EasyEnsemble(verbose=verbose) eex, eey = EE.fit_transform(x, y)
def sampling(): verbose = False y = np.bincount(target_train1) print y ratio = float(y[2]) / float(y[1]) # 'Random over-sampling' OS = OverSampler(ratio=ratio, verbose=verbose) osx, osy = OS.fit_transform(data_train1, target_train1) random_methods(osx,osy) # 'SMOTE' smote = SMOTE(ratio=ratio, verbose=verbose, kind='regular') smox, smoy = smote.fit_transform(data_train1, target_train1) random_methods(smox,smoy) # 'SMOTE bordeline 1' bsmote1 = SMOTE(ratio=ratio, verbose=verbose, kind='borderline1') bs1x, bs1y = bsmote1.fit_transform(data_train, target_train) random_methods(bs1x,bs1y) # 'SMOTE bordeline 2' bsmote2 = SMOTE(ratio=ratio, verbose=verbose, kind='borderline2') bs2x, bs2y = bsmote2.fit_transform(data_train1, target_train1) random_methods(bs2x,bs2y) # 'SMOTE SVM' svm_args={'class_weight' : 'auto'} svmsmote = SMOTE(ratio=ratio, verbose=verbose, kind='svm', **svm_args) svsx, svsy = svmsmote.fit_transform(data_train1, target_train1) random_methods(svsx,svsy) # 'SMOTE Tomek links' STK = SMOTETomek(ratio=ratio, verbose=verbose) stkx, stky = STK.fit_transform(data_train1, target_train1) random_methods(stkx,stky) # 'SMOTE ENN' SENN = SMOTEENN(ratio=ratio, verbose=verbose) ennx, enny = SENN.fit_transform(data_train1, target_train1) random_methods(ennx,enny) # 'EasyEnsemble' EE = EasyEnsemble(verbose=verbose) eex, eey = EE.fit_transform(data_train1, target_train1) random_methods(eex,eey) # 'BalanceCascade' BS = BalanceCascade(verbose=verbose) bsx, bsy = BS.fit_transform(data_train1, target_train1) random_methods(bsx,bsy)
def easy_ensemble(self): EE = EasyEnsemble(verbose=self.verbose) eex, eey = EE.fit_transform(self.x, self.y) return eex, eey
bs1x, bs1y = bsmote1.fit_transform(x, y) # 'SMOTE bordeline 2' bsmote2 = SMOTE(ratio=ratio, verbose=verbose, kind='borderline2') bs2x, bs2y = bsmote2.fit_transform(x, y) # 'SMOTE SVM' svm_args={'class_weight' : 'auto'} svmsmote = SMOTE(ratio=ratio, verbose=verbose, kind='svm', **svm_args) svsx, svsy = svmsmote.fit_transform(x, y) # 'SMOTE Tomek links' STK = SMOTETomek(ratio=ratio, verbose=verbose) stkx, stky = STK.fit_transform(x, y) # 'SMOTE ENN' SENN = SMOTEENN(ratio=ratio, verbose=verbose) ennx, enny = SENN.fit_transform(x, y) # 'EasyEnsemble' EE = EasyEnsemble(verbose=verbose) eex, eey = EE.fit_transform(x, y) # 'BalanceCascade' BS = BalanceCascade(verbose=verbose) bsx, bsy = BS.fit_transform(x, y) # Apply PCA to be able to visualise the results osx_vis = pca.transform(osx) smox_vis = pca.transform(smox) bs1x_vis = pca.transform(bs1x) bs2x_vis = pca.transform(bs2x) svsx_vis = pca.transform(svsx) stkx_vis = pca.transform(stkx) ennx_vis = pca.transform(ennx) # Project each subset of the ensemble