outputs:

@author: Misha

"""

import GSSUtility as GU  # this also imports a whole bunch of other modules

if __name__ == "__main__":

    pathToData = "../../Data/"
    dataCont = GU.dataContainer(pathToData)

    articlesToUse = GU.filterArticles(
        dataCont.articleClasses, GSSYearsUsed=True, GSSYearsPossible=False, centralIVs=True
    )
    print "len of articleClasses:", len(articlesToUse)
    raw_input("...")

    # define the storage containers for outputs
    group1 = "onDataUsed"
    group2 = "onFutureYear"
    output = defaultdict(dict)
    groups = [group1, group2]
    outcomes = [
        "propSig",
        "paramSizesNormed",
        "Rs",
        "adjRs",
        "pvalues",
Пример #2
0

# In[148]:

grouped = df[df.year_published.notnull()].groupby('year_published')
grouped.get_group(2004).head()


# # Number of variables over time

# In[52]:

pathToData = '../../Data/'
dataCont = GU.dataContainer(pathToData)
    
articlesClasses = GU.filterArticles(dataCont.articleClasses, GSSYearsUsed=True)            

df = pd.DataFrame(columns=['aid', 'yearpublished', 'dvs', 'ivs', 'controls', 'total'])
for a in articleClasses:
    df.loc[a.articleID, :] = np.array([a.articleID, a.yearPublished, a.DVs, a.IVs, a.controls, 0], dtype=object)

df = df[df.yearpublished.notnull()]
df.yearpublished = df.yearpublished.astype(int)
df.aid = df.aid.astype(int)
df.index = df.aid
    
df.dvs = [len(v) for k, v in df.dvs.iteritems()]
df.ivs = [len(v) for k, v in df.ivs.iteritems()]
df.controls = [len(v) for k, v in df.controls.iteritems()]
df.total = df.dvs + df.ivs + df.controls
Пример #3
0
 
############################################################
if __name__ == "__main__":    

    try:
        get_ipython().magic(u'rm ../GSSUtility.pyc # remove this file because otherwise it will be used instead of the updated .py file')
        reload(GU)
    except:
        pass


    pathToData = '../../Data/'
    dataCont = GU.dataContainer(pathToData)
    
    articlesToUse = GU.filterArticles(dataCont.articleClasses, GSSYearsUsed=True, GSSYearsPossible=False,                                         centralIVs=False, nextYearBound=0, linearModels=False)            
    print 'len of articleClasses:', len(articlesToUse)
#     raw_input('...')
    
    
    # define the storage containers for outputs
    group1 = 'on last GSS year'
    group2 = 'on first "future" GSS year'   
    groups = [group1, group2]
    outcomes = ['propSig', 'paramSizesNormed', 'Rs', 'adjRs', 'pvalues',  'numTotal',                 'propSig_CentralVars', 'paramSizesNormed_CentralVars', 'pvalues_CentralVars']

    output = defaultdict(dict)
    output['metadata'] = {'article_id':[]}
    for group in groups:
        for outcome in outcomes:
            output[group][outcome] = []
#*********************************************************
allPropsForYearsUsed = []
allPropsForYearsPossible =[]
allParamSizesForYearsUsed = []
allParamSizesForYearsPossible = []
allRsForYearsUsed, allRsForYearsPossible = [], []

 
############################################################
if __name__ == "__main__":    
    
    pathToData = '../../Data/'
    dataCont = GU.dataContainer(pathToData)
    
    articlesToUse = GU.filterArticles(dataCont.articleClasses, GSSYearsUsed=True, GSSYearsPossible=True, centralIVs=False, nextYearBound=3, yearPublished=True)            
    print 'len of articleClasses:', len(articlesToUse)
    raw_input('...')

    YEARS = range(1972, 2013)
    outcomes = ['propSig', 'paramSizesNormed', 'Rs', 'adjRs', 'pvalues', \
                'propSig_CentralVars', 'paramSizesNormed_CentralVars', 'pvalues_CentralVars']
    output = pd.DataFrame(np.empty((len(YEARS), len(outcomes))), columns=outcomes, index=YEARS)    
    output = output.astype(object)
    for row in output.iterrows():
        for col in range(len(row[1])):
            row[1][col] = []
    
    #for article in random.sample(articlesToUse, 150):
    for article in articlesToUse:
    #for article in [a for a in articlesToUse if a.articleID == 6755]: