Exemplo n.º 1
0
 def pandas_profiling_analyze(
     self,
     title: str = "Movie dataset",
     sort: Union[str, None] = None,
     explorative: bool = True,
     **kwargs: Any,
 ):
     """
     Generate pandas profiling report using pandas-profiling.
     https://pandas-profiling.github.io/
     :param title: The name of the report - str
     :param sort: Sort the variables asc(ending), desc(ending) or None (leaves original sorting). - Union[str, None]
     :param explorative: Whether use explorative config file or not. - bool
     :return: None
     """
     # Generate the Profiling Report
     profile = ProfileReport(
         self.data,
         title=title,
         html={"style": {
             "full_width": True
         }},
         sort=sort,
         explorative=explorative,
         **kwargs,
     )
     return profile.to_widgets()
air['log_pass'] = log_pass

# In[7]:

air

# In[8]:

#EDA
report = ProfileReport(air,
                       title="Profile Report of the Airlines data",
                       explorative=True)

# In[29]:

report.to_widgets()

# In[ ]:

#REPORT SAYS THAT:
#1) THERE ARE NO MISSING VALUES
#2) NO DUPLICATE ROWS
#3) NO MULTICOLINEARITY

# In[9]:

#CHECKING THE VARIABLE  DISTRIBUTION THROUGHOUT ALL THE TIME PERIOD AVAILABLE IN THE DATA
df.Passengers.plot()

# In[ ]:
data.describe()

# In[83]:

from pandas_profiling import ProfileReport

# In[84]:

profile = ProfileReport(data,
                        title='Pandas Profiling Report',
                        explorative=True)

# In[85]:

profile.to_widgets()

# # Data Manipulation

# Checking The correlation of Data

# In[4]:

corrmat = data.corr()

f, ax = plt.subplots(figsize=(15, 8))
sns.heatmap(corrmat, ax=ax, cmap="YlGnBu", linewidths=0.9)

# In[6]:

#Correlation with output variable
Exemplo n.º 4
0
# # EDA

# In[253]:


#Data frame at a glance
app_train.info()


# In[254]:


#profile report
trainprofile = ProfileReport(app_train, minimal=True)
trainprofile.to_widgets()


# In[255]:


#how many unique classes do we have for categotical features
app_train.select_dtypes('object').apply(pd.Series.nunique, axis = 0)


# In[256]:


#list coloumns with most missing values
def missing_values_table(df):
        # Total missing values
Exemplo n.º 5
0
# In[ ]:

Df.skew()

# In[ ]:

#Fact Table Quick Summary using panda profiling
profile = ProfileReport(Df,
                        title='Pandas Profiling Report',
                        html={'style': {
                            'full_width': True
                        }})

# In[ ]:

profile.to_widgets()

# In[ ]:

display(profile)

# In[ ]:

#Dimension Table Quick summary using Panda Profiler
DimProfile = ProfileReport(DimDf,
                           title='Pandas Dimension table Profile Report',
                           html={'style': {
                               'full_width': True
                           }})

# In[ ]:
Exemplo n.º 6
0
def do_a_pandas_profile(df, name):
    from pandas_profiling import ProfileReport
    profile = ProfileReport(df, title=name, html={'style':{'full_width':True}})
    profile.to_widgets(), profile.to_notebook_iframe()