示例#1
0
    def imblearn_fit_var2d(self,
                           X,
                           X_3d,
                           y,
                           variables,
                           variables_3d,
                           random_state=42):

        from Fuzzy_clustering.ver_tf2.Adasyn_var2d import ADASYN

        flag = False
        Std = 0.01
        while (flag == False and Std <= 1):
            try:
                std = np.maximum(Std * np.std(y), 0.2)
                yy = np.digitize(y,
                                 np.arange(np.min(y), np.max(y), std),
                                 right=True)
                bins = np.arange(np.min(y), np.max(y), std)
                bins = bins[(np.bincount(yy.ravel()) >= 2)[:-1]]
                yy = np.digitize(y, bins, right=True)
                # if Std==0.01 and np.max(yy)!=0:
                #     strategy = {cl:int(100*X.shape[0]/np.max(yy)) for cl in np.unique(yy)}
                # else:
                strategy = "auto"
                if np.unique(yy).shape[0] == 1:
                    X2 = X
                    X_3d2 = X_3d
                    yy2 = y
                    return X2, X_3d2, yy2
                if np.any(np.bincount(yy.ravel()) < 2):
                    for cl in np.where(np.bincount(yy.ravel()) < 2)[0]:
                        X = X[np.where(yy != cl)[0]]
                        X_3d = X_3d[np.where(yy != cl)[0]]
                        y = y[np.where(yy != cl)[0]]
                        yy = yy[np.where(yy != cl)[0]]
                sm = ADASYN(sampling_strategy=strategy,
                            random_state=random_state,
                            variables=variables,
                            variables_3d=variables_3d,
                            n_neighbors=np.min(np.bincount(yy.ravel()) - 1),
                            n_jobs=self.n_jobs)

                X2, X_3d2, yy2 = sm.fit_resample(X, X_3d, yy.ravel(),
                                                 y.ravel())

                flag = True
            except:
                Std *= 10
        if flag == True:
            return X2, X_3d2, yy2
        else:
            return X, X_3d, y
    def imblearn_fit_obsolete(self, X, y, random_state=42):
        if self.model_type == {'pv', 'wind'}:
            from Fuzzy_clustering.ver_tf2.imblearn.over_sampling import BorderlineSMOTE, SVMSMOTE, SMOTE, ADASYN
        else:
            from imblearn.over_sampling import BorderlineSMOTE, SVMSMOTE, SMOTE, ADASYN

        flag = False
        Std = 0.01
        while (flag == False and Std <= 1):
            try:
                std = np.maximum(Std * np.std(y), 0.2)
                yy = np.digitize(y,
                                 np.arange(np.min(y), np.max(y), std),
                                 right=True)
                bins = np.arange(np.min(y), np.max(y), std)
                bins = bins[(np.bincount(yy.ravel()) >= 2)[:-1]]
                yy = np.digitize(y, bins, right=True)
                # if Std==0.01 and np.max(yy)!=0:
                #     strategy = {cl:int(100*X.shape[0]/np.max(yy)) for cl in np.unique(yy)}
                # else:
                strategy = "auto"
                if np.unique(yy).shape[0] == 1:
                    X2 = X
                    yy2 = y
                    return X2, yy2
                if np.any(np.bincount(yy.ravel()) < 2):
                    for cl in np.where(np.bincount(yy.ravel()) < 2)[0]:
                        X = X[np.where(yy != cl)[0]]
                        y = y[np.where(yy != cl)[0]]
                        yy = yy[np.where(yy != cl)[0]]
                if self.method == 'ADASYN':
                    sm = ADASYN(
                        sampling_strategy=strategy,
                        random_state=random_state,
                        n_neighbors=np.min(np.bincount(yy.ravel()) - 1),
                        n_jobs=self.n_jobs)
                elif self.method == 'SVMSMOTE':
                    sm = SVMSMOTE(
                        sampling_strategy=strategy,
                        random_state=random_state,
                        k_neighbors=np.min(np.bincount(yy.ravel()) - 1),
                        m_neighbors=2 * np.min(np.bincount(yy.ravel()) - 1),
                        n_jobs=self.n_jobs)
                else:
                    sm = BorderlineSMOTE(
                        sampling_strategy=strategy,
                        random_state=random_state,
                        k_neighbors=np.min(np.bincount(yy.ravel()) - 1),
                        m_neighbors=2 * np.min(np.bincount(yy.ravel()) - 1),
                        n_jobs=self.n_jobs)

                try:
                    X2, yy2 = sm.fit_resample(X, yy.ravel())
                except:
                    sm = SMOTE(sampling_strategy=strategy,
                               random_state=random_state,
                               k_neighbors=np.min(np.bincount(yy.ravel()) - 1),
                               n_jobs=self.n_jobs)
                    X2, yy2 = sm.fit_resample(X, yy.ravel())

                X2 = X2[X.shape[0] + 1:]
                X2[np.where(X2 < 0)] = 0
                yy2 = yy2[X.shape[0] + 1:]
                yy2 = bins[yy2 - 1]
                flag = True
            except:
                Std *= 10

        if flag == True:
            return X2, yy2
        else:
            raise RuntimeError('Cannot make resampling ')