示例#1
0
def CrossValDE(modelName,
               X,
               y,
               metric,
               procsessor=None,
               cv=3,
               times=1,
               random_state=0):
    """
    优化DE时用的交叉验证
    """
    res = []
    yt = []
    for i in y:
        if (i == 0):
            yt.append(0)
        else:
            yt.append(1)
    for t in range(times):
        skf = StratifiedKFold(n_splits=cv,
                              shuffle=True,
                              random_state=random_state + t)
        indices = list(skf.split(X=X, y=yt))
        #        print (" ",time.strftime("%M:%S"))
        for k in indices:
            x_train, y_train, x_test, y_test = X[k[0]], y[k[0]], X[k[1]], y[
                k[1]]

            if (procsessor is not None):
                #                t=procsessor.transform(x_train,y_train)
                #                print("t",len(t))
                #                if(len(t)==3):
                #                    qqq=1
                #                    qqq+=1
                x_train, y_train = procsessor.transform(x_train, y_train)
            estimator = buildModel(x_train, y_train, modelName)
            res.append(E.Eva(estimator, x_test, y_test, metric))


#        print (" ",time.strftime("%M:%S"))
    res = np.array(res)
    return res.mean()
示例#2
0
 def fitness(self, targetPars, otherPars):
     """
     对于SMOTUNED,otherPars有一个值,数据集
     """
     res = []
     data = deepcopy(otherPars[0])
     stf = self.stratify(K=3, dataSet=data, randomSeed=1)
     for i in range(3):
         trainingSet = deepcopy(data.iloc[stf[i][0], :])
         testSet = deepcopy(data.iloc[stf[i][1], :])
         S = SMOTER(targetPars[0, 0], targetPars[0, 1], targetPars[0, 2],
                    trainingSet)
         trainingSet_SMOTE = S.smoteR()
         model = BM.buildModel(trainingSet=trainingSet_SMOTE,
                               modelType=self.modelName)
         res.append(
             E.Eva(model,
                   testSet=testSet,
                   metric=self.metric,
                   predValue="bug"))
     return sum(res) / len(res)
示例#3
0
#    testSets=["ant-1.5","camel-1.2","ivy-2.0"]
    trainingProjs=["ant","camel","jedit","synapse","ivy","xalan","xerces"]
    unsupervisedModels=['U_LocRfc_A','U_LocRfc_D','U_amc_A','U_amc_D','U_avg_cc_A','U_avg_cc_D','U_ca_A','U_ca_D','U_cam_A','U_cam_D','U_cbm_A',
                        'U_cbm_D','U_cbo_A','U_cbo_D','U_ce_A','U_ce_D','U_dam_A','U_dam_D','U_dit_A','U_dit_D','U_ic_A','U_ic_D','U_lcom3_A',
                        'U_lcom3_D','U_lcom_A','U_lcom_D','U_loc_A','U_loc_D','U_max_cc_A','U_max_cc_D','U_mfa_A','U_mfa_D','U_moa_A','U_moa_D',
                        'U_noc_A','U_noc_D','U_npm_A','U_npm_D','U_rfc_A','U_rfc_D','U_wmc_A','U_wmc_D']
    #i will give you a simple example about how to use our codes
    # In[]Step 1 get traning set and test set
    x_train,y_train,x_test,y_test=Data(paths=datasetsPaths, scenario="Cross-Version", testSetName="ant-1.7", trainingProjName="").getTrainingAndTestSet()

    # In[] step 2 build and train a model 
    smo=SMOTEND(k=10, m=3, r=2)#you can use SMOTEND or SMOTEND+DE, NOTICE:the smotendde return the last generation
    x_train,y_train=smo.transform(x_train,y_train)
    m=BM.buildModel(x_train,y_train,modelName="RF")
    # In[] step3 model selection
    FPAResult=E.Eva(m,x_test,y_test,metric="FPA")
    print(FPAResult)
    
    











示例#4
0
def f2(trainingSet, testSet, modelNames, metrics, burak):
    """
    不适用与SMOTERDE的测试
    Only suitable to SMOTEND+DE
    On given trainingSer and testSet, we return the metrcics' values for each models
    
    @Parameters:
        ---------------------------
    trainingSet:
        train model
        ----------------------------------
        
    testSet:
        test set
        ---------------------------------
        
    modelNames:
        list-like, store each models' names
        ----------------------------------
    
    metrics:
        measures, lisst-like
        -------------------------------------
        
    burak:
        whether use burak
        -------------------------------------
    
    smoter:
        whether use smoter
        --------------------------------------
        
    rus:
        whether use rus
        -------------------------------------------
    """
    metricValues, metricParas = [], []
    for metric in metrics:
        modelValues, modelParas = [], []  #存放每一个模型的值,存放每个模型的最佳参数
        for model in modelNames:
            maxValue, bestParas = -10.0, None
            for ti in range(4):
                dpp = DataPreProcessing(data=trainingSet,
                                        targetData=testSet,
                                        burakKNN=10,
                                        burak=burak,
                                        smoter=False,
                                        rusRatio=75,
                                        rus=False,
                                        Drange=[
                                            list(range(1, 21)),
                                            [0, 1, 2, 3, 4, 5, 6], (0.1, 5)
                                        ],
                                        metric=metric,
                                        modelName=model,
                                        F=0.7,
                                        CR=0.3,
                                        PopulationSize=10,
                                        Lives=8,
                                        smoterde=True)
                newTrainingSet = dpp.preProcess()
                mo = BM.buildModel(trainingSet=newTrainingSet, modelType=model)
                t = E.Eva(model=mo,
                          testSet=testSet,
                          metric=metric,
                          predValue="bug")
                if (maxValue < t):
                    maxValue = t
                    bestParas = dpp.bestParas
                    bestParas = str(bestParas[0][0]) + "," + str(
                        bestParas[0][1]) + "," + str(bestParas[0][2])
#                maxValue=2
#                bestParas="2,2,3"
#这里的maxValue就是针对特定 训练集、测试集、指标和模型四个维度的一个特定值
            modelValues.append(maxValue)
            modelParas.append(bestParas)
        metricValues.append(modelValues)
        metricParas.append(modelParas)
    return (np.array(metricValues), np.array(metricParas))
示例#5
0
def f1(trainingSet, testSet, modelNames, metrics, burak, smoter, rus):
    """
    不适用与SMOTERDE的测试
    Not suitable to SMOTEND+DE
    On given trainingSer and testSet, we return the metrcics' values for each models
    
    @Parameters:
        ---------------------------
    trainingSet:
        train model
        ----------------------------------
        
    testSet:
        test set
        ---------------------------------
        
    modelNames:
        list-like, store each models' names
        ----------------------------------
    
    metrics:
        measures, lisst-like
        -------------------------------------
        
    burak:
        whether use burak
        -------------------------------------
    
    smoter:
        whether use smoter
        --------------------------------------
        
    rus:
        whether use rus
        -------------------------------------------
    """
    res = []
    ms = []
    dpp = DataPreProcessing(
        data=trainingSet,
        targetData=testSet,
        burakKNN=10,
        burak=burak,
        smoter=smoter,
        rusRatio=None,
        rus=rus,
        Drange=[list(range(5, 21)), [0, 1, 2, 3, 4, 5, 6], (0.1, 5)],
        metric=None,
        modelName=None,
        F=0.7,
        CR=0.3,
        PopulationSize=10,
        Lives=8,
        smoterde=False)
    newTrainingSet = dpp.preProcess()
    for i in modelNames:  #建立好所有模型
        ms.append(BM.buildModel(trainingSet=newTrainingSet, modelType=i))

    for metric in metrics:
        t = []
        for model in ms:
            t.append(
                E.Eva(model=model,
                      testSet=testSet,
                      metric=metric,
                      predValue="bug"))
        res.append(t)
    return np.array(res)