def score(self, X, confounder_types, assignment='assignment', store_model_fit=False, intercept=True): df = X[[assignment]] regression_confounders = [] for confounder, var_type in confounder_types.items(): if var_type == 'o' or var_type == 'u': c_dummies = pd.get_dummies(X[[confounder]], prefix=confounder) if len(c_dummies.columns) == 1: df[c_dummies.columns] = c_dummies[c_dummies.columns] regression_confounders.extend(c_dummies.columns) else: df[c_dummies.columns[1:]] = c_dummies[c_dummies.columns[1:]] regression_confounders.extend(c_dummies.columns[1:]) else: regression_confounders.append(confounder) df.loc[:,confounder] = X[confounder].copy() # df.loc[:,confounder] = X[confounder].copy() # if intercept: df.loc[:,'intercept'] = 1. regression_confounders.append('intercept') logit = Logit(df[assignment], df[regression_confounders]) result = logit.fit() if store_model_fit: self.model_fit = result X.loc[:,'propensity score'] = result.predict(df[regression_confounders]) return X
def SM_logit(X, y): """Computing logit function using statsmodels Logit and output is coefficient array.""" logit = Logit(y, X) result = logit.fit() coeff = result.params return coeff
def logit_reg(): X_smoted, X_test, y_smoted, y_test = prep_X_y(df, constant=True) lm = Logit(y_smoted, X_smoted).fit(method = 'powell') y_pred = lm.predict(X_test).round(0) print 'Statsmodels Logit Regression--------------------------------' print 'Confusion Matrix:', confusion_matrix(y_test, y_pred) print 'Accuracy:', accuracy_score(y_test, y_pred) print 'Precision:', precision_score(y_test, y_pred) print 'Recall:', recall_score(y_test, y_pred) return lm
def _initialize(cls): y, x = cls.y, cls.x modp = Logit(y, x) cls.res2 = modp.fit(disp=0) mod = LogitPenalized(y, x, penal=cls.penalty) mod.pen_weight = 0 cls.res1 = mod.fit(disp=0) cls.atol = 1e-4 # why not closer ?
def _initialize(cls): y, x = cls.y, cls.x modp = Logit(y, x[:, :cls.k_nonzero]) cls.res2 = modp.fit(disp=0) mod = LogitPenalized(y, x, penal=cls.penalty) mod.pen_weight *= .5 mod.penal.tau = 0.05 cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0) cls.exog_index = slice(None, cls.k_nonzero, None) cls.atol = 5e-3
def test_perfect_prediction(): cur_dir = os.path.dirname(os.path.abspath(__file__)) iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results') iris_dir = os.path.abspath(iris_dir) iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'), delimiter=",", skip_header=1) y = iris[:,-1] X = iris[:,:-1] X = X[y != 2] y = y[y != 2] X = sm.add_constant(X, prepend=True) mod = Logit(y,X) assert_raises(PerfectSeparationError, mod.fit) #turn off raise PerfectSeparationError mod.raise_on_perfect_prediction = False mod.fit(disp=False) #should not raise
class LogReg: def __init__(self): self.coef_=None def fit(self,X,y): X=add_constant(X) self.lr=Logit(y,X) self.l_fitted=self.lr.fit() self.coef_=self.l_fitted.params[:-1] def predict(self,X): if self.coef_ is None: print('you must first fit the model') return X=add_constant(X) return(self.lr.predict(self.l_fitted.params,X))
class LogisticRegression(object): def __init__(self): pass def fit(self, X, y, **kwargs): self.model = Logit(y, X) self.result = self.model.fit() def predict_proba(self, X): return self.result.predict(X)
class LogisticRegression(object): def __init__(self): pass def fit(self, X, y, **kwargs): from statsmodels.discrete.discrete_model import Logit self.model = Logit(y, X) self.result = self.model.fit() def predict_proba(self, X): return self.result.predict(X)
def regressOnFeatureSM(self): ''' What: create a regression model with Statsmodels and try and find the Betas, ie, try to find feature causation. In: trimmed-down data Out: print statements and a linear model regression ''' df_dummies = pd.get_dummies(self.df_data) df_dummies = pd.concat([df_dummies, self.df_data], axis=0) X_train, X_test, y_train, y_test = train_test_split(df_dummies, self.df_data[self.predict].values, test_size=0.3, random_state=42) X_train = tools.add_constant(X_train) modelSM = Logit(y_train, X_train).fit() print modelSM.summary()
def fit(self, X, y, **kwargs): from statsmodels.discrete.discrete_model import Logit self.model = Logit(y, X) self.result = self.model.fit()
def fit(self, X, y, **kwargs): self.model = Logit(y, X) self.result = self.model.fit()
def fit(self,X,y): X=add_constant(X) self.lr=Logit(y,X) self.l_fitted=self.lr.fit() self.coef_=self.l_fitted.params[:-1]
df.hist() plt.show() # Part 2 # 2.1 from statsmodels.discrete.discrete_model import Logit from statsmodels.tools import add_constant X = df[['gre', 'gpa', 'rank']].values X_const = add_constant(X, prepend=True) y = df['admit'].values logit_model = Logit(y, X_const).fit() # 2.2 logit_model.summary() # 2.3 import numpy as np from sklearn.cross_validation import KFold from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, precision_score, recall_score kfold = KFold(len(y)) accuracies = []