def CrossValDE(modelName, X, y, metric, procsessor=None, cv=3, times=1, random_state=0): """ 优化DE时用的交叉验证 """ res = [] yt = [] for i in y: if (i == 0): yt.append(0) else: yt.append(1) for t in range(times): skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=random_state + t) indices = list(skf.split(X=X, y=yt)) # print (" ",time.strftime("%M:%S")) for k in indices: x_train, y_train, x_test, y_test = X[k[0]], y[k[0]], X[k[1]], y[ k[1]] if (procsessor is not None): # t=procsessor.transform(x_train,y_train) # print("t",len(t)) # if(len(t)==3): # qqq=1 # qqq+=1 x_train, y_train = procsessor.transform(x_train, y_train) estimator = buildModel(x_train, y_train, modelName) res.append(E.Eva(estimator, x_test, y_test, metric)) # print (" ",time.strftime("%M:%S")) res = np.array(res) return res.mean()
def fitness(self, targetPars, otherPars): """ 对于SMOTUNED,otherPars有一个值,数据集 """ res = [] data = deepcopy(otherPars[0]) stf = self.stratify(K=3, dataSet=data, randomSeed=1) for i in range(3): trainingSet = deepcopy(data.iloc[stf[i][0], :]) testSet = deepcopy(data.iloc[stf[i][1], :]) S = SMOTER(targetPars[0, 0], targetPars[0, 1], targetPars[0, 2], trainingSet) trainingSet_SMOTE = S.smoteR() model = BM.buildModel(trainingSet=trainingSet_SMOTE, modelType=self.modelName) res.append( E.Eva(model, testSet=testSet, metric=self.metric, predValue="bug")) return sum(res) / len(res)
# testSets=["ant-1.5","camel-1.2","ivy-2.0"] trainingProjs=["ant","camel","jedit","synapse","ivy","xalan","xerces"] unsupervisedModels=['U_LocRfc_A','U_LocRfc_D','U_amc_A','U_amc_D','U_avg_cc_A','U_avg_cc_D','U_ca_A','U_ca_D','U_cam_A','U_cam_D','U_cbm_A', 'U_cbm_D','U_cbo_A','U_cbo_D','U_ce_A','U_ce_D','U_dam_A','U_dam_D','U_dit_A','U_dit_D','U_ic_A','U_ic_D','U_lcom3_A', 'U_lcom3_D','U_lcom_A','U_lcom_D','U_loc_A','U_loc_D','U_max_cc_A','U_max_cc_D','U_mfa_A','U_mfa_D','U_moa_A','U_moa_D', 'U_noc_A','U_noc_D','U_npm_A','U_npm_D','U_rfc_A','U_rfc_D','U_wmc_A','U_wmc_D'] #i will give you a simple example about how to use our codes # In[]Step 1 get traning set and test set x_train,y_train,x_test,y_test=Data(paths=datasetsPaths, scenario="Cross-Version", testSetName="ant-1.7", trainingProjName="").getTrainingAndTestSet() # In[] step 2 build and train a model smo=SMOTEND(k=10, m=3, r=2)#you can use SMOTEND or SMOTEND+DE, NOTICE:the smotendde return the last generation x_train,y_train=smo.transform(x_train,y_train) m=BM.buildModel(x_train,y_train,modelName="RF") # In[] step3 model selection FPAResult=E.Eva(m,x_test,y_test,metric="FPA") print(FPAResult)
def f2(trainingSet, testSet, modelNames, metrics, burak): """ 不适用与SMOTERDE的测试 Only suitable to SMOTEND+DE On given trainingSer and testSet, we return the metrcics' values for each models @Parameters: --------------------------- trainingSet: train model ---------------------------------- testSet: test set --------------------------------- modelNames: list-like, store each models' names ---------------------------------- metrics: measures, lisst-like ------------------------------------- burak: whether use burak ------------------------------------- smoter: whether use smoter -------------------------------------- rus: whether use rus ------------------------------------------- """ metricValues, metricParas = [], [] for metric in metrics: modelValues, modelParas = [], [] #存放每一个模型的值,存放每个模型的最佳参数 for model in modelNames: maxValue, bestParas = -10.0, None for ti in range(4): dpp = DataPreProcessing(data=trainingSet, targetData=testSet, burakKNN=10, burak=burak, smoter=False, rusRatio=75, rus=False, Drange=[ list(range(1, 21)), [0, 1, 2, 3, 4, 5, 6], (0.1, 5) ], metric=metric, modelName=model, F=0.7, CR=0.3, PopulationSize=10, Lives=8, smoterde=True) newTrainingSet = dpp.preProcess() mo = BM.buildModel(trainingSet=newTrainingSet, modelType=model) t = E.Eva(model=mo, testSet=testSet, metric=metric, predValue="bug") if (maxValue < t): maxValue = t bestParas = dpp.bestParas bestParas = str(bestParas[0][0]) + "," + str( bestParas[0][1]) + "," + str(bestParas[0][2]) # maxValue=2 # bestParas="2,2,3" #这里的maxValue就是针对特定 训练集、测试集、指标和模型四个维度的一个特定值 modelValues.append(maxValue) modelParas.append(bestParas) metricValues.append(modelValues) metricParas.append(modelParas) return (np.array(metricValues), np.array(metricParas))
def f1(trainingSet, testSet, modelNames, metrics, burak, smoter, rus): """ 不适用与SMOTERDE的测试 Not suitable to SMOTEND+DE On given trainingSer and testSet, we return the metrcics' values for each models @Parameters: --------------------------- trainingSet: train model ---------------------------------- testSet: test set --------------------------------- modelNames: list-like, store each models' names ---------------------------------- metrics: measures, lisst-like ------------------------------------- burak: whether use burak ------------------------------------- smoter: whether use smoter -------------------------------------- rus: whether use rus ------------------------------------------- """ res = [] ms = [] dpp = DataPreProcessing( data=trainingSet, targetData=testSet, burakKNN=10, burak=burak, smoter=smoter, rusRatio=None, rus=rus, Drange=[list(range(5, 21)), [0, 1, 2, 3, 4, 5, 6], (0.1, 5)], metric=None, modelName=None, F=0.7, CR=0.3, PopulationSize=10, Lives=8, smoterde=False) newTrainingSet = dpp.preProcess() for i in modelNames: #建立好所有模型 ms.append(BM.buildModel(trainingSet=newTrainingSet, modelType=i)) for metric in metrics: t = [] for model in ms: t.append( E.Eva(model=model, testSet=testSet, metric=metric, predValue="bug")) res.append(t) return np.array(res)