Exemplo n.º 1
0
 def score(self, X, confounder_types, assignment='assignment', store_model_fit=False, intercept=True):
     df = X[[assignment]]
     regression_confounders = []
     for confounder, var_type in confounder_types.items():
         if var_type == 'o' or var_type == 'u':
             c_dummies = pd.get_dummies(X[[confounder]], prefix=confounder)
             if len(c_dummies.columns) == 1:
                 df[c_dummies.columns] = c_dummies[c_dummies.columns]
                 regression_confounders.extend(c_dummies.columns)
             else:
                 df[c_dummies.columns[1:]] = c_dummies[c_dummies.columns[1:]]
                 regression_confounders.extend(c_dummies.columns[1:])
         else:
             regression_confounders.append(confounder)
             df.loc[:,confounder] = X[confounder].copy() #
             df.loc[:,confounder] = X[confounder].copy() #
     if intercept:
         df.loc[:,'intercept'] = 1.
         regression_confounders.append('intercept')
     logit = Logit(df[assignment], df[regression_confounders])
     result = logit.fit()
     if store_model_fit:
         self.model_fit = result
     X.loc[:,'propensity score'] = result.predict(df[regression_confounders])
     return X
def SM_logit(X, y):
    """Computing logit function using statsmodels Logit and 
    output is coefficient array."""
    logit = Logit(y, X)
    result = logit.fit()
    coeff = result.params
    return coeff
Exemplo n.º 3
0
def logit_reg():
	X_smoted, X_test, y_smoted, y_test = prep_X_y(df, constant=True)
	lm = Logit(y_smoted, X_smoted).fit(method = 'powell')
	y_pred = lm.predict(X_test).round(0)
	print 'Statsmodels Logit Regression--------------------------------'
	print 'Confusion Matrix:', confusion_matrix(y_test, y_pred)
	print 'Accuracy:', accuracy_score(y_test, y_pred)
	print 'Precision:', precision_score(y_test, y_pred)
	print 'Recall:', recall_score(y_test, y_pred)
	return lm
Exemplo n.º 4
0
    def _initialize(cls):
        y, x = cls.y, cls.x

        modp = Logit(y, x)
        cls.res2 = modp.fit(disp=0)

        mod = LogitPenalized(y, x, penal=cls.penalty)
        mod.pen_weight = 0
        cls.res1 = mod.fit(disp=0)

        cls.atol = 1e-4  # why not closer ?
Exemplo n.º 5
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        modp = Logit(y, x[:, :cls.k_nonzero])
        cls.res2 = modp.fit(disp=0)

        mod = LogitPenalized(y, x, penal=cls.penalty)
        mod.pen_weight *= .5
        mod.penal.tau = 0.05
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.exog_index = slice(None, cls.k_nonzero, None)

        cls.atol = 5e-3
Exemplo n.º 6
0
def test_perfect_prediction():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results')
    iris_dir = os.path.abspath(iris_dir)
    iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'), delimiter=",",
                            skip_header=1)
    y = iris[:,-1]
    X = iris[:,:-1]
    X = X[y != 2]
    y = y[y != 2]
    X = sm.add_constant(X, prepend=True)
    mod = Logit(y,X)
    assert_raises(PerfectSeparationError, mod.fit)
    #turn off raise PerfectSeparationError
    mod.raise_on_perfect_prediction = False
    mod.fit(disp=False)  #should not raise
class LogReg:
    def __init__(self):
        self.coef_=None

    def fit(self,X,y):
        X=add_constant(X)
        self.lr=Logit(y,X)
        self.l_fitted=self.lr.fit()
        self.coef_=self.l_fitted.params[:-1]

    def predict(self,X):
        if self.coef_ is None:
            print('you must first fit the model')
            return
        X=add_constant(X)
        return(self.lr.predict(self.l_fitted.params,X))
Exemplo n.º 8
0
class LogisticRegression(object):
    def __init__(self):
        pass

    def fit(self, X, y, **kwargs):
        self.model = Logit(y, X)
        self.result = self.model.fit()
    
    def predict_proba(self, X):
        return self.result.predict(X)
Exemplo n.º 9
0
class LogisticRegression(object):
    def __init__(self):
        pass

    def fit(self, X, y, **kwargs):
        from statsmodels.discrete.discrete_model import Logit
        self.model = Logit(y, X)
        self.result = self.model.fit()
    
    def predict_proba(self, X):
        return self.result.predict(X)
Exemplo n.º 10
0
    def regressOnFeatureSM(self):
        '''
        What: create a regression model with Statsmodels and try and find the Betas,
        ie, try to find feature causation.

        In: trimmed-down data
        Out: print statements and a linear model regression
        '''
        df_dummies = pd.get_dummies(self.df_data)

        df_dummies = pd.concat([df_dummies, self.df_data], axis=0)

        X_train, X_test, y_train, y_test = train_test_split(df_dummies, self.df_data[self.predict].values,
                                                            test_size=0.3, random_state=42)
        X_train = tools.add_constant(X_train)
        modelSM = Logit(y_train, X_train).fit()



        print modelSM.summary()
Exemplo n.º 11
0
 def fit(self, X, y, **kwargs):
     from statsmodels.discrete.discrete_model import Logit
     self.model = Logit(y, X)
     self.result = self.model.fit()
Exemplo n.º 12
0
 def fit(self, X, y, **kwargs):
     self.model = Logit(y, X)
     self.result = self.model.fit()
Exemplo n.º 13
0
 def fit(self,X,y):
     X=add_constant(X)
     self.lr=Logit(y,X)
     self.l_fitted=self.lr.fit()
     self.coef_=self.l_fitted.params[:-1]
df.hist()
plt.show()


# Part 2
# 2.1

from statsmodels.discrete.discrete_model import Logit
from statsmodels.tools import add_constant

X = df[['gre', 'gpa', 'rank']].values
X_const = add_constant(X, prepend=True)
y = df['admit'].values

logit_model = Logit(y, X_const).fit()

# 2.2

logit_model.summary()

# 2.3

import numpy as np
from sklearn.cross_validation import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score

kfold = KFold(len(y))

accuracies = []