def Deserialize(self, model_struct): #Unpack the model_struct dictionary self.data_dictionary = model_struct['data_dictionary'] self.target = model_struct['target'] self.specificity = model_struct['specificity'] self.left = model_struct['left'] self.right = model_struct['right'] self.adapt = model_struct['adapt'] self.overshrink = model_struct['overshrink'] #Get the data into R self.data_frame = utils.DictionaryToR(self.data_dictionary) self.data_dictionary = copy.deepcopy(self.data_dictionary) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a PLS model in R. self.formula = r.Call('as.formula', obj=utils.SanitizeVariableName(self.target) + '~.') self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'left' : self.left, \ 'right' : self.right, \ 'adapt' : self.adapt, \ 'overshrink' : self.overshrink} self.model = r.Call(function='censlars', **self.pls_params).AsList() #Get some information out of the model. self.GetActual() self.GetFitted() #Establish a decision threshold self.specificity = model_struct['specificity'] self.threshold = model_struct['threshold'] self.regulatory_threshold = model_struct['regulatory_threshold']
def Deserialize(self, model_struct): #Unpack the model_struct dictionary self.data_dictionary = model_struct['data_dictionary'] self.target = model_struct['target'] self.specificity = model_struct['specificity'] self.weights = model_struct['weights'] self.stepdirection = model_struct['stepdirection'] self.formula = model_struct['formula'] #Get the data into R self.nobs = len(self.data_dictionary[self.target]) self.data_frame = utils.DictionaryToR(self.data_dictionary) self.data_dictionary = copy.deepcopy(self.data_dictionary) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a logistic regression model in R. self.logistic_params = {'formula' : self.formula, \ 'family' : 'binomial', \ 'data' : self.data_frame, \ 'weights' : self.weights, \ 'x' : True } self.model = r.Call(function='glm', **self.logistic_params).AsList() #Use cross-validation to find the best number of components in the model. self.SelectModel(direction=self.stepdirection) self.GetActual() self.GetFitted() #Establish a decision threshold self.specificity = model_struct['specificity'] self.threshold = model_struct['threshold'] self.regulatory_threshold = model_struct['regulatory_threshold']
def Predict(self, data_dictionary): data_frame = utils.DictionaryToR(data_dictionary) prediction_params = {'obj': self.model, 'newx': data_frame} prediction = r.Call(function="predict.galogistic", **prediction_params).AsVector() #Translate the R output to a type that can be navigated in Python #prob = array('d', prediction) return [float(p) for p in prediction]
def Predict(self, data_dictionary): data_frame = utils.DictionaryToR(data_dictionary) prediction_params = {'object': self.model, 'newdata': data_frame} prediction = r.Call(function='predict', **prediction_params).AsVector() #Translate the R output to a type that can be navigated in Python prediction = np.array(prediction).squeeze() return list(prediction)
def PredictValues(self, data_dictionary, **args): data = copy.copy(data_dictionary) data.pop(self.target) data_frame = utils.DictionaryToR(data) prediction_params = {'obj': self.model, 'newx': data_frame} prediction = r.Call(function='predict', **prediction_params).AsVector() prediction = [float(p) for p in prediction] return prediction
def Create(self, **args): #Check to see if a threshold has been specified in the function's arguments if 'regulatory_threshold' in args: self.threshold = args['regulatory_threshold'] else: self.threshold = 2.3711 # if there is no 'threshold' key, then use the default (2.3711) self.regulatory_threshold = self.threshold self.target = args['target'] if 'adapt' in args: self.adapt = args['adapt'] else: self.adapt = False if 'overshrink' in args: self.overshrink = args['overshrink'] else: self.overshrink = False if 'precondition' in args: self.precondition = args['precondition'] else: self.precondition = False if 'selectvars' in args: self.selectvars = args['selectvars'] else: self.selectvars = False if 'specificity' in args: specificity = args['specificity'] else: specificity = 0.9 #Get the data into R data = args['data'] self.data_frame = utils.DictionaryToR(data) self.data_dictionary = copy.copy(data) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a PLS model in R. self.formula = r.Call('as.formula', obj=utils.SanitizeVariableName(self.target) + '~.') self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'adapt' : self.adapt, \ 'overshrink' : self.overshrink, \ 'precondition' : self.precondition, \ 'selectvars' : self.selectvars} self.model = r.Call(function='adalars', **self.pls_params).AsList() #Get some information out of the model self.GetActual() self.GetFitted() self.vars = [ str(v) for v in self.model['lars'].AsList()['vars'].AsVector() ] self.coefs = [ float(v) for v in self.model['lars'].AsList()['coefs'].AsVector() ] #Establish a decision threshold self.Threshold(specificity)
def Create(self, **args): #Check to see if a threshold has been specified in the function's arguments if 'regulatory_threshold' in args: self.threshold = args['regulatory_threshold'] else: self.threshold = 2.3711 # if there is no 'threshold' key, then use the default (2.3711) self.regulatory_threshold = self.threshold self.target = args['target'] if 'population' in args: self.population = args['population'] else: self.population = 200 if 'generations' in args: self.generations = args['generations'] else: self.generations = 100 if 'mutate' in args: self.mutate = args['mutate'] else: self.mutate = 0.02 if 'ZOR' in args: self.ZOR = args['ZOR'] else: self.ZOR = 10 if 'verbose' in args: self.verbose = args['verbose'] else: self.verbose = False if 'specificity' in args: specificity = args['specificity'] else: specificity = 0.90 #Get the data into R data = args['data'] self.data_frame = utils.DictionaryToR(data) self.data_dictionary = copy.copy(data) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a PLS model in R. self.formula = r.Call('as.formula', obj=utils.SanitizeVariableName(self.target) + '~.') self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'population' : self.population, \ 'generations' : self.generations, \ 'mutateRate' : self.mutate, \ 'zeroOneRatio' : self.ZOR, \ 'verbose' : self.verbose} self.model = r.Call(function='galm', **self.pls_params).AsList() #Get some information out of the model self.GetActual() self.GetFitted() self.vars = [str(v) for v in self.model['vars'].AsVector()] #Establish a decision threshold self.Threshold(specificity)
def Predict(self, data_dictionary): data_frame = utils.DictionaryToR(data_dictionary) prediction_params = {'obj': self.model, 'newx': data_frame} prediction = array.array( 'd', r.Call(function="predict.adalasso", **prediction_params).AsVector()) #Translate the R output to a type that can be navigated in Python prob = [float(prediction[k]) for k in range(len(prediction))] return prob
def PredictValues(self, data_dictionary, **args): data = copy.copy(data_dictionary) data.pop(self.target) data_frame = utils.DictionaryToR(data) prediction_params = {'obj': self.model, 'newx': data_frame} prediction = r.Call(function='predict.adalars', **prediction_params).AsVector() #prediction = np.array(prediction, dtype=float) return [float(p) for p in prediction]
def Create(self, **args): #Check to see if a threshold has been specified in the function's arguments if 'threshold' in args: self.threshold = args['threshold'] else: self.threshold = 2.3711 # if there is no 'threshold' key, then use the default (2.3711) self.regulatory_threshold = self.threshold if 'specificity' in args: specificity = args['specificity'] else: specificity = 0.9 #Get the data into R self.target = args['target'] data = self.data_dictionary = copy.copy(args['data']) self.data_frame = utils.DictionaryToR(data) self.num_predictors = len(self.data_dictionary.keys()) - 1 #Generate a PLS model in R. Special handling for only one predictor. self.formula = r.Call('as.formula', obj=utils.SanitizeVariableName(self.target) + '~.') if len(data) > 2: self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'validation' : 'LOO', \ 'x' : True } else: self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'validation' : 'none', \ 'x' : True } self.model = r.Call(function='plsr', **self.pls_params).AsList() #Get the number of columns from the validation step #(Might be fewer than the number of predictor variables if n<p) if len(data) > 2: self.ncomp_max = int( list( r.Call(function="dim", x=self.model['validation'].AsList() ['pred']).AsNumeric())[2]) else: self.ncomp_max = 1 #Use cross-validation to find the best number of components in the model. self.GetActual() if len(data) > 2: self.CrossValidation(**args) else: self.ncomp = 1 self.GetFitted() #Establish a decision threshold self.Threshold(specificity) self.vars = [str(v) for v in data.keys()] self.vars.remove(self.target)
def Create(self, **args): #Check to see if a threshold has been specified in the function's arguments if 'regulatory_threshold' in args: self.threshold = args['regulatory_threshold'] else: self.threshold = 2.3711 # if there is no 'threshold' key, then use the default (2.3711) self.regulatory_threshold = self.threshold self.target = args['target'] if 'left' in args: self.left = args['left'] else: self.left = -np.inf if 'right' in args: self.right = args['right'] else: self.right = np.inf if 'adapt' in args: self.adapt = args['adapt'] else: self.adapt = False if 'overshrink' in args: self.overshrink = args['overshrink'] else: self.overshrink = False if 'specificity' in args: specificity = args['specificity'] else: specificity = 0.9 #Get the data into R data = args['data'] self.data_frame = utils.DictionaryToR(data) self.data_dictionary = copy.deepcopy(data) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a PLS model in R. self.formula = r.Call('as.formula', obj=utils.SanitizeVariableName(self.target) + '~.') self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'left' : self.left, \ 'right' : self.right, \ 'adapt' : self.adapt, \ 'overshrink' : self.overshrink} self.model = r.Call(function='censlars', **self.pls_params).AsList() #Get some information out of the model self.GetActual() self.GetFitted() #Establish a decision threshold self.Threshold(specificity)
def PredictValues(self, data_dictionary, **args): data_frame = utils.DictionaryToR(data_dictionary) prediction_params = {'object': self.model, 'newdata': data_frame} prediction = r.Call(function='predict', **prediction_params).AsVector() prediction = array.array('d', prediction) #Reshape the vector of predictions columns = min(self.num_predictors, self.ncomp_max) rows = len(prediction) / columns pp = [] for k in range(int(columns)): b = k * rows e = b + rows pp.append(array.array('d', prediction[b:e])) prediction = pp return prediction
def Deserialize(self, model_struct): #Unpack the model_struct dictionary self.data_dictionary = model_struct['data_dictionary'] self.target = model_struct['target'] self.specificity = model_struct['specificity'] self.weights = model_struct['weights'] self.population = model_struct['population'] self.generations = model_struct['generations'] self.mutate = model_struct['mutate'] self.ZOR = model_struct['ZOR'] self.formula = model_struct['formula'] #Get the data into R self.nobs = len(self.data_dictionary[self.target]) self.data_frame = utils.DictionaryToR(self.data_dictionary) self.data_dictionary = copy.copy(self.data_dictionary) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a logistic regression model in R. self.logistic_params = {'formula' : self.formula, \ 'family' : 'binomial', \ 'data' : self.data_frame, \ 'weights' : self.weights, \ 'family' : 'binomial', \ 'population' : self.population, \ 'generations' : self.generations, \ 'mutateRate' : self.mutate, \ 'zeroOneRatio' : self.ZOR, \ 'verbose' : True } self.model = r.Call(function='galogistic', **self.logistic_params).AsList() #Use cross-validation to find the best number of components in the model. self.GetActual() self.GetFitted() self.vars = [str(v) for v in self.model['vars'].AsVector()] #Establish a decision threshold self.specificity = model_struct['specificity'] self.threshold = model_struct['threshold'] self.regulatory_threshold = model_struct['regulatory_threshold']
def Deserialize(self, model_struct): #Unpack the model_struct dictionary self.data_dictionary = model_struct['data_dictionary'] self.target = model_struct['target'] self.specificity = model_struct['specificity'] self.adapt = model_struct['adapt'] self.overshrink = model_struct['overshrink'] self.precondition = model_struct['precondition'] self.selectvars = model_struct['selectvars'] #Get the data into R self.data_frame = utils.DictionaryToR(self.data_dictionary) self.data_dictionary = copy.copy(self.data_dictionary) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a PLS model in R. self.formula = r.Call('as.formula', obj=utils.SanitizeVariableName(self.target) + '~.') self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'adapt' : self.adapt, \ 'overshrink' : self.overshrink, \ 'precondition' : self.precondition, \ 'selectvars' : self.selectvars} self.model = r.Call(function='adalars', **self.pls_params).AsList() #Get some information out of the model. self.GetActual() self.GetFitted() self.vars = [ str(v) for v in self.model['lars'].AsList()['vars'].AsVector() ] self.coefs = [ float(v) for v in self.model['lars'].AsList()['coefs'].AsVector() ] #Establish a decision threshold self.specificity = model_struct['specificity'] self.threshold = model_struct['threshold'] self.regulatory_threshold = model_struct['regulatory_threshold']
def Deserialize(self, model_struct): #Unpack the model_struct dictionary self.data_dictionary = model_struct['data_dictionary'] self.target = model_struct['target'] self.specificity = model_struct['specificity'] self.weights = model_struct['weights'] #self.s = model_struct['s'] self.formula = model_struct['formula'] self.adapt = model_struct['adapt'] self.overshrink = model_struct['overshrink'] self.selectvars = model_struct['selectvars'] #Get the data into R self.nobs = len(self.data_dictionary[self.target]) self.data_frame = utils.DictionaryToR(self.data_dictionary) self.data_dictionary = copy.copy(self.data_dictionary) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a logistic regression model in R. self.logistic_params = {'formula' : self.formula, \ 'family' : 'binomial', \ 'data' : self.data_frame, \ 'weights' : self.weights, \ 'verbose' : True, \ 'adapt' : self.adapt, \ 'overshrink' : self.overshrink, \ 'selectvars' : self.selectvars} self.model = r.Call(function='adalasso', **self.logistic_params).AsList() #Use cross-validation to find the best number of components in the model. self.GetActual() self.GetFitted() self.vars = [ str(v) for v in self.model['lasso'].AsList()['vars'].AsVector() ] #Establish a decision threshold self.specificity = model_struct['specificity'] self.threshold = model_struct['threshold'] self.regulatory_threshold = model_struct['regulatory_threshold']
def Deserialize(self, model_struct): #Unpack the model_struct dictionary self.data_dictionary = model_struct['data_dictionary'] self.target = model_struct['target'] self.specificity = model_struct['specificity'] self.population = model_struct['population'] self.generations = model_struct['generations'] self.mutate = model_struct['mutate'] self.ZOR = model_struct['ZOR'] self.verbose = model_struct['verbose'] #Get the data into R self.data_frame = utils.DictionaryToR(self.data_dictionary) self.data_dictionary = copy.copy(self.data_dictionary) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a PLS model in R. self.formula = r.Call('as.formula', obj=utils.SanitizeVariableName(self.target) + '~.') self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'population' : self.population, \ 'generations' : self.generations, \ 'mutateRate' : self.mutate, \ 'zeroOneRatio' : self.ZOR, \ 'verbose' : self.verbose } self.model = r.Call(function='galm', **self.pls_params).AsList() #Get some information out of the model. self.GetActual() self.GetFitted() #Establish a decision threshold self.specificity = model_struct['specificity'] self.threshold = model_struct['threshold'] self.regulatory_threshold = model_struct['regulatory_threshold']
def Deserialize(self, model_struct): '''Use the model_struct dictionary to recreate a model object''' #Unpack the model_struct dictionary self.data_dictionary = model_struct['data_dictionary'] self.target = model_struct['target'] self.specificity = model_struct['specificity'] self.julian = model_struct['julian'] self.k = model_struct['k'] self.penalty = model_struct['penalty'] #Get the data into R self.data_frame = utils.DictionaryToR(self.data_dictionary) self.data_dictionary = copy.deepcopy(self.data_dictionary) self.predictors = len(self.data_dictionary.keys()) - 1 #Generate a gam model in R. rows = len(self.data_dictionary.values()[0]) unique_values = map(lambda (x): np.unique(x).shape[0] - 1, np.array(self.data_dictionary.values())) self.predictors = predictors = self.data_dictionary.keys() try: indx = predictors.index(self.target) del (unique_values[indx]) predictors.remove(self.target) except: pass if self.julian: indx = predictors.index(self.julian) del (unique_values[indx]) predictors.remove(self.julian) self.k = np.min([self.k, np.floor(rows / len(predictors))]) formula = utils.SanitizeVariableName(self.target) + "~" for i in range(len(predictors)): if self.julian: formula += "s(" + utils.SanitizeVariableName( predictors[i]) + ", k=" + str( np.min([self.k, unique_values[i] ])) + ", by=" + utils.SanitizeVariableName( self.julian) + ")+" else: formula += "s(" + utils.SanitizeVariableName( predictors[i]) + ", k=" + str( np.min([self.k, unique_values[i]])) + ")+" formula = formula[:-1] self.formula = r.Call('as.formula', obj=formula) self.gbm_params = {'formula' : self.formula, \ 'family' : 'gaussian', \ 'data' : self.data_frame, \ 'lambda' : self.penalty } self.model = r.Call(function='gam', **self.gbm_params).AsList() #Use cross-validation to find the best number of components in the model. self.GetActual() self.GetFitted() #Establish a decision threshold self.threshold = model_struct['threshold'] self.regulatory_threshold = model_struct['regulatory_threshold']
def Create(self, **args): '''Create a new gbm model object''' #Check to see if a threshold has been specified in the function's arguments try: self.regulatory_threshold = args['threshold'] except KeyError: self.regulatory_threshold = 2.3711 # if there is no 'threshold' key, then use the default (2.3711) self.threshold = 0 #decision threshold #Check to see if a julian day has been specified in the function's arguments try: self.julian = args['julian'] except KeyError: self.julian = "" #Check to see if the maximum number of basis functions was specified. The default is 100. try: self.k = args['k'] except KeyError: self.k = 100 #Check to see if the penalty parameter was specified. The default is 1.4. try: self.penalty = args['lambda'] except KeyError: self.penalty = 1.4 if 'specificity' in args: specificity = args['specificity'] else: specificity = 0.9 #Store some object data self.data_dictionary = copy.deepcopy(args['data']) self.target = target = args['target'] #Get the data into R self.data_frame = utils.DictionaryToR(self.data_dictionary) #Generate a gam model in R. rows = len(self.data_dictionary.values()[0]) unique_values = map(lambda (x): np.unique(x).shape[0] - 1, np.array(self.data_dictionary.values())) self.predictors = predictors = self.data_dictionary.keys() try: indx = predictors.index(self.target) del (unique_values[indx]) predictors.remove(self.target) except: pass if self.julian: indx = predictors.index(self.julian) del (unique_values[indx]) predictors.remove(self.julian) self.k = np.min([self.k, np.floor(rows / len(predictors))]) formula = utils.SanitizeVariableName(self.target) + "~" for i in range(len(predictors)): if self.julian: formula += "s(" + utils.SanitizeVariableName( predictors[i]) + ", k=" + str( np.min([self.k, unique_values[i] ])) + ", by=" + utils.SanitizeVariableName( self.julian) + ")+" else: formula += "s(" + utils.SanitizeVariableName( predictors[i]) + ", k=" + str( np.min([self.k, unique_values[i]])) + ")+" formula = formula[:-1] self.formula = r.Call('as.formula', obj=formula) self.gbm_params = {'formula' : self.formula, \ 'family' : 'gaussian', \ 'data' : self.data_frame, \ 'lambda' : self.penalty } self.model = r.Call(function='gam', **self.gbm_params).AsList() #Use cross-validation to find the best number of components in the model. self.GetActual() self.GetFitted() #Establish a decision threshold self.Threshold(specificity)
def Create(self, **args): #Create a logistic model object #Check to see if a threshold has been specified in the function's arguments try: self.regulatory_threshold = args['regulatory_threshold'] except KeyError: self.regulatory_threshold = 2.3711 # if there is no 'threshold' key, then use the default (2.3711) #Check to see if a specificity has been specified in the function's arguments try: self.specificity = args['specificity'] except KeyError: self.specificity = 0.9 #Set the direction for stepwise variable selection #try: self.s = s = args['lambda'] #except KeyError: self.s = s = '' try: self.adapt = args['adapt'] except KeyError: self.adapt = False try: self.selectvars = args['selectvars'] except KeyError: self.selectvars = False try: self.overshrink = args['overshrink'] except KeyError: self.overshrink = False #Get the data into R data = args['data'] self.target = target = args['target'] self.nobs = len(data[self.target]) self.data_frame = utils.DictionaryToR(data) self.data_dictionary = copy.copy(data) self.predictors = len(self.data_dictionary.keys()) - 1 #Check to see if a weighting method has been specified in the function's arguments try: #integer (discrete) weighting if str(args['weights']).lower()[0] in ['d', 'i']: self.weights = self.AssignWeights(method=1) #float (continuous) weighting elif str(args['weights']).lower()[0] in ['c', 'f']: self.weights = self.AssignWeights(method=2) else: self.weights = self.AssignWeights(method=0) #If there is no 'weights' key, set all weights to one. except KeyError: self.weights = self.AssignWeights(method=0) #Label the exceedances in the training set. self.data_dictionary[target] = self.AssignLabels( self.data_dictionary[target]) #Get the data into R self.data_frame = utils.DictionaryToR(self.data_dictionary) #Generate a logistic regression model in R. self.formula = formula = r.Call( 'as.formula', obj=utils.SanitizeVariableName(self.target) + '~ .') self.logistic_params = {'formula' : formula, \ 'family' : 'binomial', \ 'data' : self.data_frame, \ 'weights' : self.weights, \ 'verbose' : True, \ 'adapt' : self.adapt, \ 'overshrink' : self.overshrink, \ 'selectvars' : self.selectvars} self.model = r.Call(function='adalasso', **self.logistic_params).AsList() #Select model components and a decision threshold self.GetActual() self.GetFitted() self.Threshold(self.specificity) self.vars = [ str(v) for v in self.model['lasso'].AsList()['vars'].AsVector() ]
def Create(self, **args): #Create a logistic model object #Check to see if a threshold has been specified in the function's arguments try: self.regulatory_threshold = args['regulatory_threshold'] except KeyError: self.regulatory_threshold = 2.3711 # if there is no 'threshold' key, then use the default (2.3711) #Check to see if a specificity has been specified in the function's arguments try: self.specificity = args['specificity'] except KeyError: self.specificity = 0.9 #Get the data into R data = args['data'] self.target = target = args['target'] self.nobs = len(data[self.target]) self.data_frame = utils.DictionaryToR(data) self.data_dictionary = copy.copy(data) self.predictors = len(self.data_dictionary.keys()) - 1 if 'population' in args: self.population = args['population'] else: self.population = 200 if 'generations' in args: self.generations = args['generations'] else: self.generations = 100 if 'mutate' in args: self.mutate = args['mutate'] else: self.mutate = 0.02 if 'ZOR' in args: self.ZOR = args['ZOR'] else: self.ZOR = 10 #Check to see if a weighting method has been specified in the function's arguments try: #integer (discrete) weighting if str(args['weights']).lower()[0] in ['d', 'i']: self.weights = self.AssignWeights(method=1) #float (continuous) weighting elif str(args['weights']).lower()[0] in ['c', 'f']: self.weights = self.AssignWeights(method=2) else: self.weights = self.AssignWeights(method=0) #If there is no 'weights' key, set all weights to one. except KeyError: self.weights = self.AssignWeights(method=0) #Label the exceedances in the training set. self.data_dictionary[target] = self.AssignLabels( self.data_dictionary[target]) #Get the data into R self.data_frame = utils.DictionaryToR(self.data_dictionary) #Generate a logistic regression model in R. self.formula = formula = r.Call( 'as.formula', obj=utils.SanitizeVariableName(self.target) + '~ .') self.logistic_params = {'formula' : formula, \ 'family' : 'binomial', \ 'data' : self.data_frame, \ 'weights' : self.weights, \ 'family' : 'binomial', \ 'population' : self.population, \ 'generations' : self.generations, \ 'mutateRate' : self.mutate, \ 'zeroOneRatio' : self.ZOR, \ 'verbose' : True } self.model = r.Call(function='galogistic', **self.logistic_params).AsList() #Select model components and a decision threshold self.GetActual() self.GetFitted() self.Threshold(self.specificity) self.vars = [str(v) for v in self.model['vars'].AsVector()]
def Create(self, **args): #Create a logistic model object #Check to see if a threshold has been specified in the function's arguments try: self.regulatory_threshold = args['regulatory_threshold'] except KeyError: self.regulatory_threshold = 2.3711 # if there is no 'threshold' key, then use the default (2.3711) #Check to see if a specificity has been specified in the function's arguments try: self.specificity = args['specificity'] except KeyError: self.specificity = 0.9 #Set the direction for stepwise variable selection try: self.stepdirection = stepdirection = args['stepdirection'] except KeyError: self.stepdirection = stepdirection = '' #Get the data into R data = args['data'] self.target = target = args['target'] self.nobs = len(data[self.target]) self.data_frame = utils.DictionaryToR(data) self.data_dictionary = copy.deepcopy(data) self.predictors = len(self.data_dictionary.keys()) - 1 #Check to see if a weighting method has been specified in the function's arguments try: #integer (discrete) weighting if str(args['weights']).lower()[0] in ['d', 'i']: self.weights = self.AssignWeights(method=1) #float (continuous) weighting elif str(args['weights']).lower()[0] in ['c', 'f']: self.weights = self.AssignWeights(method=2) else: self.weights = self.AssignWeights(method=0) #If there is no 'weights' key, set all weights to one. except KeyError: self.weights = self.AssignWeights(method=0) #Label the exceedances in the training set. self.data_dictionary[target] = self.AssignLabels( self.data_dictionary[target]) #Get the data into R self.data_frame = utils.DictionaryToR(self.data_dictionary) #Generate a logistic regression model in R. interceptonly = r.Call('as.formula', obj=utils.SanitizeVariableName(self.target) + '~ 1') self.logistic_params = {'formula' : interceptonly, \ 'family' : 'binomial', \ 'data' : self.data_frame, \ 'weights' : self.weights, \ 'x' : True } self.model = r.Call(function='glm', **self.logistic_params).AsList() #Select model components and a decision threshold self.SelectModel(direction=self.stepdirection) self.GetActual() self.GetFitted() self.Threshold(self.specificity)
def Deserialize(self, model_struct, scratchdir=""): #Unpack the model_struct dictionary self.data_dictionary = model_struct['data_dictionary'] self.target = model_struct['target'] self.specificity = model_struct['specificity'] #Get the data into R self.data_frame = utils.DictionaryToR(self.data_dictionary) self.data_dictionary = copy.copy(self.data_dictionary) self.num_predictors = len(self.data_dictionary.keys()) - 1 #First, save the serialized R object to disk (so it can be read from within R) robject_file = "pls" + "".join( random.choice(string.letters) for i in xrange(10)) + ".robj" if scratchdir: scratchdir = scratchdir.split(os.sep) scratchdir.append(robject_file) robject_file = os.sep.join(scratchdir) robject_file = robject_file.replace("\\", "\\\\") modelstring = model_struct["modelstring"] f = open(robject_file, "wb") f.write(modelstring) f.close() #Read the serialized model object into R: load_params = {'file': robject_file} objects = r.Call(function='load', **load_params).AsVector() get_params = {'x': str(objects[0])} self.model = r.Call(function="get", **get_params).AsList() os.remove(robject_file) #Generate a PLS model in R. self.formula = r.Call('as.formula', obj=utils.SanitizeVariableName(self.target) + '~.') if len(self.data_dictionary) > 2: self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'validation' : 'LOO', \ 'x' : True } else: self.pls_params = {'formula' : self.formula, \ 'data' : self.data_frame, \ 'validation' : 'none', \ 'x' : True } #self.model = r.Call(function='plsr', **self.pls_params).AsList() #Get the number of columns from the validation step #(Might be fewer than the number of predictor variables if n<p) if len(self.data_dictionary) > 2: self.ncomp_max = int( list( r.Call(function="dim", x=self.model['validation'].AsList() ['pred']).AsNumeric())[2]) else: self.ncomp_max = 1 #Use cross-validation to find the best number of components in the model. self.GetActual() self.ncomp = model_struct['ncomp'] self.GetFitted() #Establish a decision threshold self.specificity = model_struct['specificity'] self.threshold = model_struct['threshold'] self.regulatory_threshold = model_struct['regulatory_threshold'] self.vars = [str(v) for v in self.data_dictionary.keys()] self.vars.remove(self.target)