Exemplo n.º 1
0
    def process(self):
        '''
        Description : Process function is designed to pull all the function , load the data & process in the gui.Initially all the scores, tuned parameter ,categorical variable parameters & data parameters from svm frame are assigned to respective values.
        The mask row list generated from mask row function  is loaded to mask row value.
        The imputer parameters are given to Scalar IV, Categorical IV & binary DV then these values are assigned to the respective imputer parameters.
        '''
        scoring = self.svmFrame.getScores()
        tuned_parameters = self.svmFrame.getTunedParameters()
        cv_parameters = self.svmFrame.getCVParameters()
        data_parameters = self.svmFrame.getDataParameters()

        maskRow = self.getMaskRow()
        imputerSIV = preprocessing.Imputer(missing_values='NaN', strategy=data_parameters['impute'], axis=0, copy=True)
        imputerCIV = preprocessing.Imputer(missing_values='NaN', strategy='most_frequent', axis=0, copy=True)
        imputerBDV = preprocessing.Imputer(missing_values='NaN', strategy='most_frequent', axis=1, copy=True)
            
        numRow = len(self.csv) - 1 #-1 because of header row in csv)
        numSIV = 0
        numCIV = 0
        for variable in self.variables:
            if variable.selectedType.get() == 'Scalar IV':
                numSIV += 1
            elif variable.selectedType.get() == 'Categorical IV':
                numCIV += 1

        SIV = np.empty(shape=(numRow,numSIV))
        i = 0
        for variable in self.variables:
            if variable.selectedType.get() == 'Scalar IV':
                SIV[:,i] = np.asarray(variable.values).T
                i += 1

        CIV = np.empty(shape=(numRow,numCIV))
        i = 0
        for variable in self.variables:
            if variable.selectedType.get() == 'Categorical IV':
                variable.catDict = variable.makeCatDict()
                temp = []
                for v in variable.values:
                    temp.append(variable.catDict.get(v, None))
                CIV[:,i] = np.asarray(temp).T
                i += 1

        self.variables[self.indexDV].catDict = self.dvFrame.makeCatDict()
        temp = []
        for v in self.variables[self.indexDV].values:
            temp.append(self.variables[self.indexDV].catDict[v])
        y = np.asarray(temp).T
        
        if data_parameters['cleanup'] == 'delete':
            SIV = np.delete(SIV, maskRow, axis=0)
            CIV = np.delete(CIV, maskRow, axis=0)
            y = np.delete(y, maskRow, axis=0)
        else:
            imputerSIV.fit(SIV)
            SIV = imputerSIV.transform(SIV)
            imputerCIV.fit(CIV)
            CIV = imputerCIV.transform(CIV)
            imputerBDV.fit(y)
            y = imputerBDV.transform(y)[0]

        if data_parameters['scale']:
            self.stdScaler = preprocessing.StandardScaler().fit(SIV)
            SIV = self.stdScaler.transform(SIV)

        if data_parameters['oneHot']:
            self.encScaler = preprocessing.OneHotEncoder().fit(CIV)
            CIV = self.encScaler.transform(CIV).toarray()

        X = np.concatenate((SIV, CIV), axis=1)

        SVM.skSVM(X, y, scoring, tuned_parameters, data_parameters, cv_parameters)