Пример #1
0
    def WMulti(files,
               file,
               testSet,
               fout,
               features,
               name,
               clfName,
               dp,
               convertToBinary=True):

        train = []
        for file2 in files:
            if (file2[0:3] == file[0:3] and file2 < file < 0):
                train.append(file2)

        if (len(train)):
            trainSet = DPLIB.LoadCSV(train, dp, features, convertToBinary)

            if (name.lower().find("infogain") >= 0):
                #int indi[] = DPLIB.fSelectInfoGain(trainSet);
                #if (DPLIB.useIterativeInfoGainSubsetting)
                #{
                #    indi = DPLIB.iterativeInfoGainSubsetting(trainSet, indi,clfName);
                #}
                #else
                #    indi = DPLIB.getTopX(indi);
                #trainSet = DPLIB.fSelectSet(trainSet, indi);
                #testSet = DPLIB.fSelectSet(testSet, indi);
                pass

            l = GLOB(clfName).getClassifier()
            l.buildClassifier(trainSet)
            vec = l.evaluateModel(testSet)

            tvals = DPLIB.getConfMatrix(testSet[:, -1], vec)
            auc = DPLIB.getAUC(testSet[:, -1], vec)
            vals = DPLIB.getMeasures(tvals)
            print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

            fout.write("\n" + name + ":" + file + ": " + " AUC = " + str(auc) +
                       ";" + "Vals=" + str(vals))

        else:

            print(name + ":" + file + ": " + "!!!" + " AUC = !!!")
            fout.write("\n" + name + ":" + file + ": !!!")
Пример #2
0
    def run(self):
        lrnrnames = self.lrnrs
        try:
            rnd = random.Random(Common.getCurrentTimeMil())            
            if self.expType == 'GIS':
                if self.isKS:
                    self.gis = GISKS2(self.pars,self.file)
                else:
                    self.gis = GIS(self.pars)
            elif self.expType == 'LSH':
                lsh = CPDP_LSH_Binary(self.pars, self.file)

            trainSetAll = DPLIB.LoadCSV(self.train, self.dp, self.features, convertToBinary = not self.isCount);
            testSetAll = DPLIB.LoadCSV(self.test, self.dp, self.features, convertToBinary = not self.isCount);  
            

            ft = 'A'
            indi = None

            if not self.isCount:

                if self.pars['features'] == 'Iterative InfoGain Subsetting':
                    ft = 'IG'
                    indi = DPLIB.fSelectInfoGain(trainSetAll);

            
            if self.pars['features'] == 'All':
                print ('All')

            if self.pars['features'] == 'PCA':
                ft = 'PCA'
                print ('PCA')
                trainSetAll, testSetAll = DPLIB.applyPCA(trainSetAll, testSetAll, 0.95)
                        
            for lk in range(len(lrnrnames)):                

                lrnr = "-" + lrnrnames[lk];

                clfName = lrnrnames[lk];

                vSets = None                                
                
                if not self.isCount:
                    if self.pars['features'] == 'Iterative InfoGain Subsetting':

                        indis2 = DPLIB.iterativeInfoGainSubsetting(trainSetAll, indi, clfName);
                        trainSetAll = DPLIB.fSelectSet(trainSetAll, indis2);
                        testSetAll = DPLIB.fSelectSet(testSetAll, indis2);
                
                if self.pars['vSetType'] in ['Single Random','Multiple Random']:
                    vSets = DatasetUtils.getRandomDatasets()
                c = 0
                while (c < self.iters):

                    print("Start:" + self.file + ": " + str(c));
                    print("====================================================");
                    #fout.write("#ITERINFO:For File=" +self.file + "-Iter:" + str(c) + "\n");

                    stages = None
                    buckets = None
                    sbtx = ""
                                                                    
                    if self.expType == 'GIS':
                        self.doGIS(trainSetAll, testSetAll, "FIXED-VMUL-GEN-"+ft, lrnr, fout, vSets, False, clfName, gis=gis);
                        gis.prnt('---------------------------------------\n')
                    
                        
                    elif self.expType == 'LSH':
                        lsh.CreateBucketsTune(trainSetAll, testSetAll, vSets, name= "LSHTune-ALL-TOP-SUPER" + sbtx + lrnr, testCut= self.pars['tunecut'], iternum=c, save=False, superbit=self.pars['lshType'] =='SuperBit', clfName=clfName,tunelrn = self.pars['tunelrnr']);
                        lsh.prnt('---------------------------------------\n')

                    c+=1
                    
            #fout.write("===================================================================\n");
            #fout.close();

            print("File Processing Ended:" +self.file);
        except Exception as e:

            try:
                print (str(e))
                print(traceback.format_exc())                
            except Exception as ex2:
                print("X2", str(ex2));
                print(traceback.format_exc())
                
        if self.expType == 'GIS':
            return gis
        elif self.expType == 'LSH':
            return lsh