示例#1
0
print('df.describe() stats:')
short_desc = df.describe()
for col, stats in short_desc.T.iterrows():
    print('{} ({})'.format(col, df[col].dtype if isinstance(df[col], pd.Series) else type(df[col])))
    print(dict(zip(list(stats.index.values[[0,1,2,3,7]].T), list(stats.values[[0,1,2,3,7]].T))))
    


# In[ ]:

# this takes a few minutes
print('Using pandas_profiling to generate more detailed stats, including correlation between columns, skew etc')
# pandas_profiling.ProfileReport raises Tkinter exceptions before it can produce any output,
#  at least describe produces a dataframe of stats
desc = pandas_profiling.describe(df)
desc['table']
# for col, stats in desc['variables'].iterrows():
#     print('')
#     print(col)
#     print('{} ({})'.format(col, df[col].dtype if isinstance(df[col], pd.Series) else type(df[col])))
#     print(stats)

# and if you thought that was tough to read, try printing out all the report['freq'] dicts of histograms


# In[43]:


display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_rows', 500)
示例#2
0
文件: explore.py 项目: Redwa/twip
    print('{} ({})'.format(col, df[col].dtype if isinstance(df[col], pd.Series) else type(df[col])))
    print(stats)

html = pandas_profiling.to_html(df.head(3), desc)
open('report.html', 'w').write(html)

# this is redundant with stats above and takes way longer than it should (30 minutes?)
# print('Column, Count, Min, Mean, Max:')
# for k, c, colmin, colmean, colmax in izip(df.columns, df.count().T, df.min().T, df.mean().T, df.max().T):
#     print('{:40s}\t{}\t{}\t{}\t{}'.format(k, c, colmin, colmean, colmax))

# this takes a few minutes
print('Trying to compute a ProfileReport, including correlation between columns, skew etc')
# pandas_profiling.ProfileReport raises Tkinter exceptions before it can produce any output,
#  at least describe produces a dataframe of stats
report = dict2obj(pandas_profiling.describe(df))
print(report['table'])

print('')
for col, stats in report['variables'].iterrows():
    print('')
    print(col)
    # print('{} ({})'.format(col, df[col].dtype if isinstance(df[col], pd.Series) else type(df[col])))
    print(stats)

print('')
for col, stats in report['freq'].iteritems():
    print('')
    print(stats)

示例#3
0

# In[9]:

print('df.describe() stats:')
desc = df.describe()
for col, stats in desc.T.iterrows():
    print('')
    print('{} ({})'.format(col, df[col].dtype if isinstance(df[col], pd.Series) else type(df[col])))
    print(stats)


# In[10]:

# this takes a few minutes
print('Using pandas_profiling to generate more detailed stats, including correlation between columns, skew etc')
# pandas_profiling.ProfileReport raises Tkinter exceptions before it can produce any output,
#  at least describe produces a dataframe of stats
report = pandas_profiling.describe(df)
print(report['table'])

print('')
for col, stats in report['variables'].iterrows():
    print('')
    print(col)
    # print('{} ({})'.format(col, df[col].dtype if isinstance(df[col], pd.Series) else type(df[col])))
    print(stats)

# and if you thought that was bad, try printing out all the report['freq'] dict of histograms

示例#4
0
    print(stats)

html = pandas_profiling.to_html(df.head(3), desc)
open('report.html', 'w').write(html)

# this is redundant with stats above and takes way longer than it should (30 minutes?)
# print('Column, Count, Min, Mean, Max:')
# for k, c, colmin, colmean, colmax in izip(df.columns, df.count().T, df.min().T, df.mean().T, df.max().T):
#     print('{:40s}\t{}\t{}\t{}\t{}'.format(k, c, colmin, colmean, colmax))

# this takes a few minutes
print(
    'Trying to compute a ProfileReport, including correlation between columns, skew etc'
)
# pandas_profiling.ProfileReport raises Tkinter exceptions before it can produce any output,
#  at least describe produces a dataframe of stats
report = dict2obj(pandas_profiling.describe(df))
print(report['table'])

print('')
for col, stats in report['variables'].iterrows():
    print('')
    print(col)
    # print('{} ({})'.format(col, df[col].dtype if isinstance(df[col], pd.Series) else type(df[col])))
    print(stats)

print('')
for col, stats in report['freq'].iteritems():
    print('')
    print(stats)
示例#5
0
        col,
        df[col].dtype if isinstance(df[col], pd.Series) else type(df[col])))
    print(
        dict(
            zip(list(stats.index.values[[0, 1, 2, 3, 7]].T),
                list(stats.values[[0, 1, 2, 3, 7]].T))))

# In[ ]:

# this takes a few minutes
print(
    'Using pandas_profiling to generate more detailed stats, including correlation between columns, skew etc'
)
# pandas_profiling.ProfileReport raises Tkinter exceptions before it can produce any output,
#  at least describe produces a dataframe of stats
desc = pandas_profiling.describe(df)
desc['table']
# for col, stats in desc['variables'].iterrows():
#     print('')
#     print(col)
#     print('{} ({})'.format(col, df[col].dtype if isinstance(df[col], pd.Series) else type(df[col])))
#     print(stats)

# and if you thought that was tough to read, try printing out all the report['freq'] dicts of histograms

# In[43]:

desc['variables']

# In[38]: