def imblearn_fit_var2d(self, X, X_3d, y, variables, variables_3d, random_state=42): from Fuzzy_clustering.ver_tf2.Adasyn_var2d import ADASYN flag = False Std = 0.01 while (flag == False and Std <= 1): try: std = np.maximum(Std * np.std(y), 0.2) yy = np.digitize(y, np.arange(np.min(y), np.max(y), std), right=True) bins = np.arange(np.min(y), np.max(y), std) bins = bins[(np.bincount(yy.ravel()) >= 2)[:-1]] yy = np.digitize(y, bins, right=True) # if Std==0.01 and np.max(yy)!=0: # strategy = {cl:int(100*X.shape[0]/np.max(yy)) for cl in np.unique(yy)} # else: strategy = "auto" if np.unique(yy).shape[0] == 1: X2 = X X_3d2 = X_3d yy2 = y return X2, X_3d2, yy2 if np.any(np.bincount(yy.ravel()) < 2): for cl in np.where(np.bincount(yy.ravel()) < 2)[0]: X = X[np.where(yy != cl)[0]] X_3d = X_3d[np.where(yy != cl)[0]] y = y[np.where(yy != cl)[0]] yy = yy[np.where(yy != cl)[0]] sm = ADASYN(sampling_strategy=strategy, random_state=random_state, variables=variables, variables_3d=variables_3d, n_neighbors=np.min(np.bincount(yy.ravel()) - 1), n_jobs=self.n_jobs) X2, X_3d2, yy2 = sm.fit_resample(X, X_3d, yy.ravel(), y.ravel()) flag = True except: Std *= 10 if flag == True: return X2, X_3d2, yy2 else: return X, X_3d, y
def imblearn_fit_obsolete(self, X, y, random_state=42): if self.model_type == {'pv', 'wind'}: from Fuzzy_clustering.ver_tf2.imblearn.over_sampling import BorderlineSMOTE, SVMSMOTE, SMOTE, ADASYN else: from imblearn.over_sampling import BorderlineSMOTE, SVMSMOTE, SMOTE, ADASYN flag = False Std = 0.01 while (flag == False and Std <= 1): try: std = np.maximum(Std * np.std(y), 0.2) yy = np.digitize(y, np.arange(np.min(y), np.max(y), std), right=True) bins = np.arange(np.min(y), np.max(y), std) bins = bins[(np.bincount(yy.ravel()) >= 2)[:-1]] yy = np.digitize(y, bins, right=True) # if Std==0.01 and np.max(yy)!=0: # strategy = {cl:int(100*X.shape[0]/np.max(yy)) for cl in np.unique(yy)} # else: strategy = "auto" if np.unique(yy).shape[0] == 1: X2 = X yy2 = y return X2, yy2 if np.any(np.bincount(yy.ravel()) < 2): for cl in np.where(np.bincount(yy.ravel()) < 2)[0]: X = X[np.where(yy != cl)[0]] y = y[np.where(yy != cl)[0]] yy = yy[np.where(yy != cl)[0]] if self.method == 'ADASYN': sm = ADASYN( sampling_strategy=strategy, random_state=random_state, n_neighbors=np.min(np.bincount(yy.ravel()) - 1), n_jobs=self.n_jobs) elif self.method == 'SVMSMOTE': sm = SVMSMOTE( sampling_strategy=strategy, random_state=random_state, k_neighbors=np.min(np.bincount(yy.ravel()) - 1), m_neighbors=2 * np.min(np.bincount(yy.ravel()) - 1), n_jobs=self.n_jobs) else: sm = BorderlineSMOTE( sampling_strategy=strategy, random_state=random_state, k_neighbors=np.min(np.bincount(yy.ravel()) - 1), m_neighbors=2 * np.min(np.bincount(yy.ravel()) - 1), n_jobs=self.n_jobs) try: X2, yy2 = sm.fit_resample(X, yy.ravel()) except: sm = SMOTE(sampling_strategy=strategy, random_state=random_state, k_neighbors=np.min(np.bincount(yy.ravel()) - 1), n_jobs=self.n_jobs) X2, yy2 = sm.fit_resample(X, yy.ravel()) X2 = X2[X.shape[0] + 1:] X2[np.where(X2 < 0)] = 0 yy2 = yy2[X.shape[0] + 1:] yy2 = bins[yy2 - 1] flag = True except: Std *= 10 if flag == True: return X2, yy2 else: raise RuntimeError('Cannot make resampling ')