for year in article.GSSYearsUsed:
    design = dataCont.df.loc[year, [DV] + article.IVs]
#     design = design.fillna(design.mean())
    formula = GU.createFormula(dataCont, design)
#     results = smf.ols(formula2, data=design.dropna()).fit()

    if len(design.dropna()) <= design.shape[1]: 
        nominals = GU.createFormula(dataCont, design, return_nominals=True)
        non_nominals = list(set(design.columns) - set(nominals)) # list because sets are unhashable and cant be used for indices
        if len(non_nominals)>0: 
            design[non_nominals] = design[non_nominals].fillna(design[non_nominals].mean()) # the naive way
        if len(nominals)>0:
            design[nominals] = design[nominals].fillna(design[nominals].mode())
    
    design = GU.removeConstantColumns(design.dropna())
    print smf.ols(formula, data=design.dropna()).fit().summary()
    break


# In[49]:

print formula
design.head()


# In[68]:

DV = 'GRASS'
for year in article.GSSYearsUsed:
    design = dataCont.df.loc[year, [DV] + article.IVs]