示例#1
0
    def Deserialize(self, model_struct):
        #Unpack the model_struct dictionary
        self.data_dictionary = model_struct['data_dictionary']
        self.target = model_struct['target']
        self.specificity = model_struct['specificity']
        self.left = model_struct['left']
        self.right = model_struct['right']
        self.adapt = model_struct['adapt']
        self.overshrink = model_struct['overshrink']

        #Get the data into R
        self.data_frame = utils.DictionaryToR(self.data_dictionary)
        self.data_dictionary = copy.deepcopy(self.data_dictionary)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a PLS model in R.
        self.formula = r.Call('as.formula',
                              obj=utils.SanitizeVariableName(self.target) +
                              '~.')
        self.pls_params = {'formula' : self.formula, \
            'data' : self.data_frame, \
            'left' : self.left, \
            'right' : self.right, \
            'adapt' : self.adapt, \
            'overshrink' : self.overshrink}
        self.model = r.Call(function='censlars', **self.pls_params).AsList()

        #Get some information out of the model.
        self.GetActual()
        self.GetFitted()

        #Establish a decision threshold
        self.specificity = model_struct['specificity']
        self.threshold = model_struct['threshold']
        self.regulatory_threshold = model_struct['regulatory_threshold']
示例#2
0
    def Deserialize(self, model_struct):
        #Unpack the model_struct dictionary
        self.data_dictionary = model_struct['data_dictionary']
        self.target = model_struct['target']
        self.specificity = model_struct['specificity']
        self.weights = model_struct['weights']
        self.stepdirection = model_struct['stepdirection']
        self.formula = model_struct['formula']

        #Get the data into R
        self.nobs = len(self.data_dictionary[self.target])
        self.data_frame = utils.DictionaryToR(self.data_dictionary)
        self.data_dictionary = copy.deepcopy(self.data_dictionary)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a logistic regression model in R.
        self.logistic_params = {'formula' : self.formula, \
            'family' : 'binomial', \
            'data' : self.data_frame, \
            'weights' : self.weights, \
            'x' : True }
        self.model = r.Call(function='glm', **self.logistic_params).AsList()

        #Use cross-validation to find the best number of components in the model.
        self.SelectModel(direction=self.stepdirection)
        self.GetActual()
        self.GetFitted()

        #Establish a decision threshold
        self.specificity = model_struct['specificity']
        self.threshold = model_struct['threshold']
        self.regulatory_threshold = model_struct['regulatory_threshold']
示例#3
0
    def Predict(self, data_dictionary):
        data_frame = utils.DictionaryToR(data_dictionary)
        prediction_params = {'obj': self.model, 'newx': data_frame}
        prediction = r.Call(function="predict.galogistic",
                            **prediction_params).AsVector()

        #Translate the R output to a type that can be navigated in Python
        #prob = array('d', prediction)
        return [float(p) for p in prediction]
示例#4
0
    def Predict(self, data_dictionary):
        data_frame = utils.DictionaryToR(data_dictionary)
        prediction_params = {'object': self.model, 'newdata': data_frame}
        prediction = r.Call(function='predict', **prediction_params).AsVector()

        #Translate the R output to a type that can be navigated in Python
        prediction = np.array(prediction).squeeze()

        return list(prediction)
示例#5
0
    def PredictValues(self, data_dictionary, **args):
        data = copy.copy(data_dictionary)
        data.pop(self.target)
        data_frame = utils.DictionaryToR(data)
        prediction_params = {'obj': self.model, 'newx': data_frame}

        prediction = r.Call(function='predict', **prediction_params).AsVector()
        prediction = [float(p) for p in prediction]

        return prediction
示例#6
0
    def Create(self, **args):
        #Check to see if a threshold has been specified in the function's arguments
        if 'regulatory_threshold' in args:
            self.threshold = args['regulatory_threshold']
        else:
            self.threshold = 2.3711  # if there is no 'threshold' key, then use the default (2.3711)
        self.regulatory_threshold = self.threshold

        self.target = args['target']

        if 'adapt' in args: self.adapt = args['adapt']
        else: self.adapt = False

        if 'overshrink' in args: self.overshrink = args['overshrink']
        else: self.overshrink = False

        if 'precondition' in args: self.precondition = args['precondition']
        else: self.precondition = False

        if 'selectvars' in args: self.selectvars = args['selectvars']
        else: self.selectvars = False

        if 'specificity' in args: specificity = args['specificity']
        else: specificity = 0.9

        #Get the data into R
        data = args['data']
        self.data_frame = utils.DictionaryToR(data)
        self.data_dictionary = copy.copy(data)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a PLS model in R.
        self.formula = r.Call('as.formula',
                              obj=utils.SanitizeVariableName(self.target) +
                              '~.')
        self.pls_params = {'formula' : self.formula, \
            'data' : self.data_frame, \
            'adapt' : self.adapt, \
            'overshrink' : self.overshrink, \
            'precondition' : self.precondition, \
            'selectvars' : self.selectvars}
        self.model = r.Call(function='adalars', **self.pls_params).AsList()

        #Get some information out of the model
        self.GetActual()
        self.GetFitted()
        self.vars = [
            str(v) for v in self.model['lars'].AsList()['vars'].AsVector()
        ]
        self.coefs = [
            float(v) for v in self.model['lars'].AsList()['coefs'].AsVector()
        ]

        #Establish a decision threshold
        self.Threshold(specificity)
示例#7
0
    def Create(self, **args):
        #Check to see if a threshold has been specified in the function's arguments
        if 'regulatory_threshold' in args:
            self.threshold = args['regulatory_threshold']
        else:
            self.threshold = 2.3711  # if there is no 'threshold' key, then use the default (2.3711)
        self.regulatory_threshold = self.threshold

        self.target = args['target']

        if 'population' in args: self.population = args['population']
        else: self.population = 200

        if 'generations' in args: self.generations = args['generations']
        else: self.generations = 100

        if 'mutate' in args: self.mutate = args['mutate']
        else: self.mutate = 0.02

        if 'ZOR' in args: self.ZOR = args['ZOR']
        else: self.ZOR = 10

        if 'verbose' in args: self.verbose = args['verbose']
        else: self.verbose = False

        if 'specificity' in args: specificity = args['specificity']
        else: specificity = 0.90

        #Get the data into R
        data = args['data']
        self.data_frame = utils.DictionaryToR(data)
        self.data_dictionary = copy.copy(data)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a PLS model in R.
        self.formula = r.Call('as.formula',
                              obj=utils.SanitizeVariableName(self.target) +
                              '~.')
        self.pls_params = {'formula' : self.formula, \
            'data' : self.data_frame, \
            'population' : self.population, \
            'generations' : self.generations, \
            'mutateRate' : self.mutate, \
            'zeroOneRatio' : self.ZOR, \
            'verbose' : self.verbose}
        self.model = r.Call(function='galm', **self.pls_params).AsList()

        #Get some information out of the model
        self.GetActual()
        self.GetFitted()
        self.vars = [str(v) for v in self.model['vars'].AsVector()]

        #Establish a decision threshold
        self.Threshold(specificity)
示例#8
0
    def Predict(self, data_dictionary):
        data_frame = utils.DictionaryToR(data_dictionary)
        prediction_params = {'obj': self.model, 'newx': data_frame}
        prediction = array.array(
            'd',
            r.Call(function="predict.adalasso",
                   **prediction_params).AsVector())

        #Translate the R output to a type that can be navigated in Python
        prob = [float(prediction[k]) for k in range(len(prediction))]
        return prob
示例#9
0
    def PredictValues(self, data_dictionary, **args):
        data = copy.copy(data_dictionary)
        data.pop(self.target)
        data_frame = utils.DictionaryToR(data)
        prediction_params = {'obj': self.model, 'newx': data_frame}

        prediction = r.Call(function='predict.adalars',
                            **prediction_params).AsVector()
        #prediction = np.array(prediction, dtype=float)

        return [float(p) for p in prediction]
示例#10
0
    def Create(self, **args):
        #Check to see if a threshold has been specified in the function's arguments
        if 'threshold' in args: self.threshold = args['threshold']
        else:
            self.threshold = 2.3711  # if there is no 'threshold' key, then use the default (2.3711)
        self.regulatory_threshold = self.threshold

        if 'specificity' in args: specificity = args['specificity']
        else: specificity = 0.9

        #Get the data into R
        self.target = args['target']
        data = self.data_dictionary = copy.copy(args['data'])
        self.data_frame = utils.DictionaryToR(data)
        self.num_predictors = len(self.data_dictionary.keys()) - 1

        #Generate a PLS model in R. Special handling for only one predictor.
        self.formula = r.Call('as.formula',
                              obj=utils.SanitizeVariableName(self.target) +
                              '~.')
        if len(data) > 2:
            self.pls_params = {'formula' : self.formula, \
                'data' : self.data_frame, \
                'validation' : 'LOO', \
                'x' : True }
        else:
            self.pls_params = {'formula' : self.formula, \
                'data' : self.data_frame, \
                'validation' : 'none', \
                'x' : True }
        self.model = r.Call(function='plsr', **self.pls_params).AsList()

        #Get the number of columns from the validation step
        #(Might be fewer than the number of predictor variables if n<p)
        if len(data) > 2:
            self.ncomp_max = int(
                list(
                    r.Call(function="dim",
                           x=self.model['validation'].AsList()
                           ['pred']).AsNumeric())[2])
        else:
            self.ncomp_max = 1

        #Use cross-validation to find the best number of components in the model.
        self.GetActual()
        if len(data) > 2: self.CrossValidation(**args)
        else: self.ncomp = 1
        self.GetFitted()

        #Establish a decision threshold
        self.Threshold(specificity)
        self.vars = [str(v) for v in data.keys()]
        self.vars.remove(self.target)
示例#11
0
    def Create(self, **args):
        #Check to see if a threshold has been specified in the function's arguments
        if 'regulatory_threshold' in args:
            self.threshold = args['regulatory_threshold']
        else:
            self.threshold = 2.3711  # if there is no 'threshold' key, then use the default (2.3711)
        self.regulatory_threshold = self.threshold

        self.target = args['target']

        if 'left' in args: self.left = args['left']
        else: self.left = -np.inf

        if 'right' in args: self.right = args['right']
        else: self.right = np.inf

        if 'adapt' in args: self.adapt = args['adapt']
        else: self.adapt = False

        if 'overshrink' in args: self.overshrink = args['overshrink']
        else: self.overshrink = False

        if 'specificity' in args: specificity = args['specificity']
        else: specificity = 0.9

        #Get the data into R
        data = args['data']
        self.data_frame = utils.DictionaryToR(data)
        self.data_dictionary = copy.deepcopy(data)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a PLS model in R.
        self.formula = r.Call('as.formula',
                              obj=utils.SanitizeVariableName(self.target) +
                              '~.')
        self.pls_params = {'formula' : self.formula, \
            'data' : self.data_frame, \
            'left' : self.left, \
            'right' : self.right, \
            'adapt' : self.adapt, \
            'overshrink' : self.overshrink}
        self.model = r.Call(function='censlars', **self.pls_params).AsList()

        #Get some information out of the model
        self.GetActual()
        self.GetFitted()

        #Establish a decision threshold
        self.Threshold(specificity)
示例#12
0
    def PredictValues(self, data_dictionary, **args):
        data_frame = utils.DictionaryToR(data_dictionary)
        prediction_params = {'object': self.model, 'newdata': data_frame}

        prediction = r.Call(function='predict', **prediction_params).AsVector()
        prediction = array.array('d', prediction)

        #Reshape the vector of predictions
        columns = min(self.num_predictors, self.ncomp_max)
        rows = len(prediction) / columns

        pp = []
        for k in range(int(columns)):
            b = k * rows
            e = b + rows
            pp.append(array.array('d', prediction[b:e]))
        prediction = pp

        return prediction
示例#13
0
    def Deserialize(self, model_struct):
        #Unpack the model_struct dictionary
        self.data_dictionary = model_struct['data_dictionary']
        self.target = model_struct['target']
        self.specificity = model_struct['specificity']
        self.weights = model_struct['weights']
        self.population = model_struct['population']
        self.generations = model_struct['generations']
        self.mutate = model_struct['mutate']
        self.ZOR = model_struct['ZOR']
        self.formula = model_struct['formula']

        #Get the data into R
        self.nobs = len(self.data_dictionary[self.target])
        self.data_frame = utils.DictionaryToR(self.data_dictionary)
        self.data_dictionary = copy.copy(self.data_dictionary)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a logistic regression model in R.
        self.logistic_params = {'formula' : self.formula, \
            'family' : 'binomial', \
            'data' : self.data_frame, \
            'weights' : self.weights, \
            'family' : 'binomial', \
            'population' : self.population, \
            'generations' : self.generations, \
            'mutateRate' : self.mutate, \
            'zeroOneRatio' : self.ZOR, \
            'verbose' : True }
        self.model = r.Call(function='galogistic',
                            **self.logistic_params).AsList()

        #Use cross-validation to find the best number of components in the model.
        self.GetActual()
        self.GetFitted()
        self.vars = [str(v) for v in self.model['vars'].AsVector()]

        #Establish a decision threshold
        self.specificity = model_struct['specificity']
        self.threshold = model_struct['threshold']
        self.regulatory_threshold = model_struct['regulatory_threshold']
示例#14
0
    def Deserialize(self, model_struct):
        #Unpack the model_struct dictionary
        self.data_dictionary = model_struct['data_dictionary']
        self.target = model_struct['target']
        self.specificity = model_struct['specificity']
        self.adapt = model_struct['adapt']
        self.overshrink = model_struct['overshrink']
        self.precondition = model_struct['precondition']
        self.selectvars = model_struct['selectvars']

        #Get the data into R
        self.data_frame = utils.DictionaryToR(self.data_dictionary)
        self.data_dictionary = copy.copy(self.data_dictionary)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a PLS model in R.
        self.formula = r.Call('as.formula',
                              obj=utils.SanitizeVariableName(self.target) +
                              '~.')
        self.pls_params = {'formula' : self.formula, \
            'data' : self.data_frame, \
            'adapt' : self.adapt, \
            'overshrink' : self.overshrink, \
            'precondition' : self.precondition, \
            'selectvars' : self.selectvars}
        self.model = r.Call(function='adalars', **self.pls_params).AsList()

        #Get some information out of the model.
        self.GetActual()
        self.GetFitted()
        self.vars = [
            str(v) for v in self.model['lars'].AsList()['vars'].AsVector()
        ]
        self.coefs = [
            float(v) for v in self.model['lars'].AsList()['coefs'].AsVector()
        ]

        #Establish a decision threshold
        self.specificity = model_struct['specificity']
        self.threshold = model_struct['threshold']
        self.regulatory_threshold = model_struct['regulatory_threshold']
示例#15
0
    def Deserialize(self, model_struct):
        #Unpack the model_struct dictionary
        self.data_dictionary = model_struct['data_dictionary']
        self.target = model_struct['target']
        self.specificity = model_struct['specificity']
        self.weights = model_struct['weights']
        #self.s = model_struct['s']
        self.formula = model_struct['formula']
        self.adapt = model_struct['adapt']
        self.overshrink = model_struct['overshrink']
        self.selectvars = model_struct['selectvars']

        #Get the data into R
        self.nobs = len(self.data_dictionary[self.target])
        self.data_frame = utils.DictionaryToR(self.data_dictionary)
        self.data_dictionary = copy.copy(self.data_dictionary)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a logistic regression model in R.
        self.logistic_params = {'formula' : self.formula, \
            'family' : 'binomial', \
            'data' : self.data_frame, \
            'weights' : self.weights, \
            'verbose' : True, \
            'adapt' : self.adapt, \
            'overshrink' : self.overshrink, \
            'selectvars' : self.selectvars}
        self.model = r.Call(function='adalasso',
                            **self.logistic_params).AsList()

        #Use cross-validation to find the best number of components in the model.
        self.GetActual()
        self.GetFitted()
        self.vars = [
            str(v) for v in self.model['lasso'].AsList()['vars'].AsVector()
        ]

        #Establish a decision threshold
        self.specificity = model_struct['specificity']
        self.threshold = model_struct['threshold']
        self.regulatory_threshold = model_struct['regulatory_threshold']
示例#16
0
    def Deserialize(self, model_struct):
        #Unpack the model_struct dictionary
        self.data_dictionary = model_struct['data_dictionary']
        self.target = model_struct['target']
        self.specificity = model_struct['specificity']
        self.population = model_struct['population']
        self.generations = model_struct['generations']
        self.mutate = model_struct['mutate']
        self.ZOR = model_struct['ZOR']
        self.verbose = model_struct['verbose']

        #Get the data into R
        self.data_frame = utils.DictionaryToR(self.data_dictionary)
        self.data_dictionary = copy.copy(self.data_dictionary)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a PLS model in R.
        self.formula = r.Call('as.formula',
                              obj=utils.SanitizeVariableName(self.target) +
                              '~.')
        self.pls_params = {'formula' : self.formula, \
            'data' : self.data_frame, \
            'population' : self.population, \
            'generations' : self.generations, \
            'mutateRate' : self.mutate, \
            'zeroOneRatio' : self.ZOR, \
            'verbose' : self.verbose }
        self.model = r.Call(function='galm', **self.pls_params).AsList()

        #Get some information out of the model.
        self.GetActual()
        self.GetFitted()

        #Establish a decision threshold
        self.specificity = model_struct['specificity']
        self.threshold = model_struct['threshold']
        self.regulatory_threshold = model_struct['regulatory_threshold']
示例#17
0
    def Deserialize(self, model_struct):
        '''Use the model_struct dictionary to recreate a model object'''

        #Unpack the model_struct dictionary
        self.data_dictionary = model_struct['data_dictionary']
        self.target = model_struct['target']
        self.specificity = model_struct['specificity']
        self.julian = model_struct['julian']
        self.k = model_struct['k']
        self.penalty = model_struct['penalty']

        #Get the data into R
        self.data_frame = utils.DictionaryToR(self.data_dictionary)
        self.data_dictionary = copy.deepcopy(self.data_dictionary)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Generate a gam model in R.
        rows = len(self.data_dictionary.values()[0])
        unique_values = map(lambda (x): np.unique(x).shape[0] - 1,
                            np.array(self.data_dictionary.values()))
        self.predictors = predictors = self.data_dictionary.keys()
        try:
            indx = predictors.index(self.target)
            del (unique_values[indx])
            predictors.remove(self.target)
        except:
            pass
        if self.julian:
            indx = predictors.index(self.julian)
            del (unique_values[indx])
            predictors.remove(self.julian)
        self.k = np.min([self.k, np.floor(rows / len(predictors))])

        formula = utils.SanitizeVariableName(self.target) + "~"
        for i in range(len(predictors)):
            if self.julian:
                formula += "s(" + utils.SanitizeVariableName(
                    predictors[i]) + ", k=" + str(
                        np.min([self.k, unique_values[i]
                                ])) + ", by=" + utils.SanitizeVariableName(
                                    self.julian) + ")+"
            else:
                formula += "s(" + utils.SanitizeVariableName(
                    predictors[i]) + ", k=" + str(
                        np.min([self.k, unique_values[i]])) + ")+"
        formula = formula[:-1]

        self.formula = r.Call('as.formula', obj=formula)
        self.gbm_params = {'formula' : self.formula, \
            'family' : 'gaussian', \
            'data' : self.data_frame, \
            'lambda' : self.penalty }
        self.model = r.Call(function='gam', **self.gbm_params).AsList()

        #Use cross-validation to find the best number of components in the model.
        self.GetActual()
        self.GetFitted()

        #Establish a decision threshold
        self.threshold = model_struct['threshold']
        self.regulatory_threshold = model_struct['regulatory_threshold']
示例#18
0
    def Create(self, **args):
        '''Create a new gbm model object'''

        #Check to see if a threshold has been specified in the function's arguments
        try:
            self.regulatory_threshold = args['threshold']
        except KeyError:
            self.regulatory_threshold = 2.3711  # if there is no 'threshold' key, then use the default (2.3711)
        self.threshold = 0  #decision threshold

        #Check to see if a julian day has been specified in the function's arguments
        try:
            self.julian = args['julian']
        except KeyError:
            self.julian = ""

        #Check to see if the maximum number of basis functions was specified. The default is 100.
        try:
            self.k = args['k']
        except KeyError:
            self.k = 100

        #Check to see if the penalty parameter was specified. The default is 1.4.
        try:
            self.penalty = args['lambda']
        except KeyError:
            self.penalty = 1.4

        if 'specificity' in args: specificity = args['specificity']
        else: specificity = 0.9

        #Store some object data
        self.data_dictionary = copy.deepcopy(args['data'])
        self.target = target = args['target']

        #Get the data into R
        self.data_frame = utils.DictionaryToR(self.data_dictionary)

        #Generate a gam model in R.
        rows = len(self.data_dictionary.values()[0])
        unique_values = map(lambda (x): np.unique(x).shape[0] - 1,
                            np.array(self.data_dictionary.values()))
        self.predictors = predictors = self.data_dictionary.keys()
        try:
            indx = predictors.index(self.target)
            del (unique_values[indx])
            predictors.remove(self.target)
        except:
            pass
        if self.julian:
            indx = predictors.index(self.julian)
            del (unique_values[indx])
            predictors.remove(self.julian)
        self.k = np.min([self.k, np.floor(rows / len(predictors))])

        formula = utils.SanitizeVariableName(self.target) + "~"
        for i in range(len(predictors)):
            if self.julian:
                formula += "s(" + utils.SanitizeVariableName(
                    predictors[i]) + ", k=" + str(
                        np.min([self.k, unique_values[i]
                                ])) + ", by=" + utils.SanitizeVariableName(
                                    self.julian) + ")+"
            else:
                formula += "s(" + utils.SanitizeVariableName(
                    predictors[i]) + ", k=" + str(
                        np.min([self.k, unique_values[i]])) + ")+"
        formula = formula[:-1]

        self.formula = r.Call('as.formula', obj=formula)
        self.gbm_params = {'formula' : self.formula, \
            'family' : 'gaussian', \
            'data' : self.data_frame, \
            'lambda' : self.penalty }
        self.model = r.Call(function='gam', **self.gbm_params).AsList()

        #Use cross-validation to find the best number of components in the model.
        self.GetActual()
        self.GetFitted()

        #Establish a decision threshold
        self.Threshold(specificity)
示例#19
0
    def Create(self, **args):
        #Create a logistic model object

        #Check to see if a threshold has been specified in the function's arguments
        try:
            self.regulatory_threshold = args['regulatory_threshold']
        except KeyError:
            self.regulatory_threshold = 2.3711  # if there is no 'threshold' key, then use the default (2.3711)

        #Check to see if a specificity has been specified in the function's arguments
        try:
            self.specificity = args['specificity']
        except KeyError:
            self.specificity = 0.9

        #Set the direction for stepwise variable selection
        #try: self.s = s = args['lambda']
        #except KeyError: self.s = s = ''

        try:
            self.adapt = args['adapt']
        except KeyError:
            self.adapt = False

        try:
            self.selectvars = args['selectvars']
        except KeyError:
            self.selectvars = False

        try:
            self.overshrink = args['overshrink']
        except KeyError:
            self.overshrink = False

        #Get the data into R
        data = args['data']
        self.target = target = args['target']
        self.nobs = len(data[self.target])
        self.data_frame = utils.DictionaryToR(data)
        self.data_dictionary = copy.copy(data)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Check to see if a weighting method has been specified in the function's arguments
        try:
            #integer (discrete) weighting
            if str(args['weights']).lower()[0] in ['d', 'i']:
                self.weights = self.AssignWeights(method=1)

            #float (continuous) weighting
            elif str(args['weights']).lower()[0] in ['c', 'f']:
                self.weights = self.AssignWeights(method=2)

            else:
                self.weights = self.AssignWeights(method=0)

        #If there is no 'weights' key, set all weights to one.
        except KeyError:
            self.weights = self.AssignWeights(method=0)

        #Label the exceedances in the training set.
        self.data_dictionary[target] = self.AssignLabels(
            self.data_dictionary[target])

        #Get the data into R
        self.data_frame = utils.DictionaryToR(self.data_dictionary)

        #Generate a logistic regression model in R.
        self.formula = formula = r.Call(
            'as.formula', obj=utils.SanitizeVariableName(self.target) + '~ .')
        self.logistic_params = {'formula' : formula, \
            'family' : 'binomial', \
            'data' : self.data_frame, \
            'weights' : self.weights, \
            'verbose' : True, \
            'adapt' : self.adapt, \
            'overshrink' : self.overshrink, \
            'selectvars' : self.selectvars}
        self.model = r.Call(function='adalasso',
                            **self.logistic_params).AsList()

        #Select model components and a decision threshold
        self.GetActual()
        self.GetFitted()
        self.Threshold(self.specificity)
        self.vars = [
            str(v) for v in self.model['lasso'].AsList()['vars'].AsVector()
        ]
示例#20
0
    def Create(self, **args):
        #Create a logistic model object

        #Check to see if a threshold has been specified in the function's arguments
        try:
            self.regulatory_threshold = args['regulatory_threshold']
        except KeyError:
            self.regulatory_threshold = 2.3711  # if there is no 'threshold' key, then use the default (2.3711)

        #Check to see if a specificity has been specified in the function's arguments
        try:
            self.specificity = args['specificity']
        except KeyError:
            self.specificity = 0.9

        #Get the data into R
        data = args['data']
        self.target = target = args['target']
        self.nobs = len(data[self.target])
        self.data_frame = utils.DictionaryToR(data)
        self.data_dictionary = copy.copy(data)
        self.predictors = len(self.data_dictionary.keys()) - 1

        if 'population' in args: self.population = args['population']
        else: self.population = 200

        if 'generations' in args: self.generations = args['generations']
        else: self.generations = 100

        if 'mutate' in args: self.mutate = args['mutate']
        else: self.mutate = 0.02

        if 'ZOR' in args: self.ZOR = args['ZOR']
        else: self.ZOR = 10

        #Check to see if a weighting method has been specified in the function's arguments
        try:
            #integer (discrete) weighting
            if str(args['weights']).lower()[0] in ['d', 'i']:
                self.weights = self.AssignWeights(method=1)

            #float (continuous) weighting
            elif str(args['weights']).lower()[0] in ['c', 'f']:
                self.weights = self.AssignWeights(method=2)

            else:
                self.weights = self.AssignWeights(method=0)

        #If there is no 'weights' key, set all weights to one.
        except KeyError:
            self.weights = self.AssignWeights(method=0)

        #Label the exceedances in the training set.
        self.data_dictionary[target] = self.AssignLabels(
            self.data_dictionary[target])

        #Get the data into R
        self.data_frame = utils.DictionaryToR(self.data_dictionary)

        #Generate a logistic regression model in R.
        self.formula = formula = r.Call(
            'as.formula', obj=utils.SanitizeVariableName(self.target) + '~ .')
        self.logistic_params = {'formula' : formula, \
            'family' : 'binomial', \
            'data' : self.data_frame, \
            'weights' : self.weights, \
            'family' : 'binomial', \
            'population' : self.population, \
            'generations' : self.generations, \
            'mutateRate' : self.mutate, \
            'zeroOneRatio' : self.ZOR, \
            'verbose' : True }
        self.model = r.Call(function='galogistic',
                            **self.logistic_params).AsList()

        #Select model components and a decision threshold
        self.GetActual()
        self.GetFitted()
        self.Threshold(self.specificity)
        self.vars = [str(v) for v in self.model['vars'].AsVector()]
示例#21
0
    def Create(self, **args):
        #Create a logistic model object

        #Check to see if a threshold has been specified in the function's arguments
        try:
            self.regulatory_threshold = args['regulatory_threshold']
        except KeyError:
            self.regulatory_threshold = 2.3711  # if there is no 'threshold' key, then use the default (2.3711)

        #Check to see if a specificity has been specified in the function's arguments
        try:
            self.specificity = args['specificity']
        except KeyError:
            self.specificity = 0.9

        #Set the direction for stepwise variable selection
        try:
            self.stepdirection = stepdirection = args['stepdirection']
        except KeyError:
            self.stepdirection = stepdirection = ''

        #Get the data into R
        data = args['data']
        self.target = target = args['target']
        self.nobs = len(data[self.target])
        self.data_frame = utils.DictionaryToR(data)
        self.data_dictionary = copy.deepcopy(data)
        self.predictors = len(self.data_dictionary.keys()) - 1

        #Check to see if a weighting method has been specified in the function's arguments
        try:
            #integer (discrete) weighting
            if str(args['weights']).lower()[0] in ['d', 'i']:
                self.weights = self.AssignWeights(method=1)

            #float (continuous) weighting
            elif str(args['weights']).lower()[0] in ['c', 'f']:
                self.weights = self.AssignWeights(method=2)

            else:
                self.weights = self.AssignWeights(method=0)

        #If there is no 'weights' key, set all weights to one.
        except KeyError:
            self.weights = self.AssignWeights(method=0)

        #Label the exceedances in the training set.
        self.data_dictionary[target] = self.AssignLabels(
            self.data_dictionary[target])

        #Get the data into R
        self.data_frame = utils.DictionaryToR(self.data_dictionary)

        #Generate a logistic regression model in R.
        interceptonly = r.Call('as.formula',
                               obj=utils.SanitizeVariableName(self.target) +
                               '~ 1')
        self.logistic_params = {'formula' : interceptonly, \
            'family' : 'binomial', \
            'data' : self.data_frame, \
            'weights' : self.weights, \
            'x' : True }
        self.model = r.Call(function='glm', **self.logistic_params).AsList()

        #Select model components and a decision threshold
        self.SelectModel(direction=self.stepdirection)
        self.GetActual()
        self.GetFitted()
        self.Threshold(self.specificity)
示例#22
0
    def Deserialize(self, model_struct, scratchdir=""):
        #Unpack the model_struct dictionary
        self.data_dictionary = model_struct['data_dictionary']
        self.target = model_struct['target']
        self.specificity = model_struct['specificity']

        #Get the data into R
        self.data_frame = utils.DictionaryToR(self.data_dictionary)
        self.data_dictionary = copy.copy(self.data_dictionary)
        self.num_predictors = len(self.data_dictionary.keys()) - 1

        #First, save the serialized R object to disk (so it can be read from within R)
        robject_file = "pls" + "".join(
            random.choice(string.letters) for i in xrange(10)) + ".robj"
        if scratchdir:
            scratchdir = scratchdir.split(os.sep)
            scratchdir.append(robject_file)
            robject_file = os.sep.join(scratchdir)
        robject_file = robject_file.replace("\\", "\\\\")

        modelstring = model_struct["modelstring"]
        f = open(robject_file, "wb")
        f.write(modelstring)
        f.close()

        #Read the serialized model object into R:
        load_params = {'file': robject_file}
        objects = r.Call(function='load', **load_params).AsVector()
        get_params = {'x': str(objects[0])}
        self.model = r.Call(function="get", **get_params).AsList()
        os.remove(robject_file)

        #Generate a PLS model in R.
        self.formula = r.Call('as.formula',
                              obj=utils.SanitizeVariableName(self.target) +
                              '~.')
        if len(self.data_dictionary) > 2:
            self.pls_params = {'formula' : self.formula, \
                'data' : self.data_frame, \
                'validation' : 'LOO', \
                'x' : True }
        else:
            self.pls_params = {'formula' : self.formula, \
                'data' : self.data_frame, \
                'validation' : 'none', \
                'x' : True }
        #self.model = r.Call(function='plsr', **self.pls_params).AsList()

        #Get the number of columns from the validation step
        #(Might be fewer than the number of predictor variables if n<p)
        if len(self.data_dictionary) > 2:
            self.ncomp_max = int(
                list(
                    r.Call(function="dim",
                           x=self.model['validation'].AsList()
                           ['pred']).AsNumeric())[2])
        else:
            self.ncomp_max = 1

        #Use cross-validation to find the best number of components in the model.
        self.GetActual()
        self.ncomp = model_struct['ncomp']
        self.GetFitted()

        #Establish a decision threshold
        self.specificity = model_struct['specificity']
        self.threshold = model_struct['threshold']
        self.regulatory_threshold = model_struct['regulatory_threshold']
        self.vars = [str(v) for v in self.data_dictionary.keys()]
        self.vars.remove(self.target)