示例#1
0
def autoviz(data):
    '''
    Automatically visualize a data set. If dataset is large, autoViz uses a statistically valid sample for plotting.
    Parameter:
    --------------------
        data: Dataframe 
            The data to plot
            
    Return:
        Matplotlib figure
    '''
    #First check if autoviz is installed, if not installed, prompt the user to install it.
    import importlib.util
    import logging
    logging.basicConfig()

    package_name = 'autoviz'
    err_msg = "is not installed, to use this function, you must install " + package_name + ". \n To install, use 'pip install autoviz'"
    package_stat = importlib.util.find_spec(package_name)

    if package_stat is None:
        logging.error(package_name + " " + err_msg)
    else:
        from autoviz.AutoViz_Class import AutoViz_Class

        av = AutoViz_Class()
        av.AutoViz(filename='', dfte=data, max_cols_analyzed=50)
示例#2
0
def run_eda(df, dep_var="", chosen_val="Pandas Profiling"):
    if chosen_val == "Pandas Profiling":
        pr = ProfileReport(df, explorative=True)
        st_profile_report(pr)
    elif chosen_val == "Sweetviz":
        st.write("opening new tab")
        rep = sv.analyze(
            df.select_dtypes(exclude="datetime64[ns]"), target_feat=dep_var
        )
        rep.show_html()
    elif chosen_val == "Autoviz":
        AV = AutoViz_Class()
        chart_format = "jpg"

        dft = AV.AutoViz(
            filename="",
            sep=",",
            depVar=dep_var,
            dfte=df,
            header=0,
            verbose=2,
            lowess=False,
            chart_format=chart_format,
            max_rows_analyzed=len(df),  # 150000,
            max_cols_analyzed=df.shape[1],
        )  # 30
        st.write(dft.head())
        st.write("Autoviz")
        # st.write(os.getcwd()+f"/AutoViz_Plots/empty_string/*.{chart_format}")
        if dep_var != "":
            stored_folder = dep_var
        else:
            stored_folder = "empty_string"
        for i in list(glob(cwd + f"/AutoViz_Plots/{stored_folder}/*.{chart_format}")):

            st.image(Image.open(i))
    elif chosen_val == "DataPrep":
        try:
            dpplot(df, *xy).show_browser()
        except:
            #s_buf = io.BytesIO()
            # dpplot(df).save(s_buf)
            stc.html(display_html(dpplot(df).report))  # .show_browser()
        # create_report(df).show_browser()
    elif chosen_val == "Summary Table":
        get_df(df)
    names=[
        'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'
    ])

print(irisData.head())

wineReviews = pd.read_csv(
    'D:/Personal/Github/UpdatedMain/FreeCodeCamp-DS/With Python/Visualization/Dataset/winemag-data-130k-v2.csv'
)

print(wineReviews.head())
'''Matplotlib is the most popular python plotting library
   Matplotlib is specifically good for creating basic graphs like line charts, bar charts, histograms and many more.
'''

AV = AutoViz_Class()

df = AV.AutoViz(
    'D:/Personal/Github/UpdatedMain/FreeCodeCamp-DS/With Python/Visualization/Dataset/winemag-data-130k-v2.csv'
)

# Sample plot with 4 numbers
plt.plot([1, 3, 2, 4])
plt.title('Sample with 4 numvers')
plt.xlabel('X-Axis')
plt.ylabel('Y-Axis')
plt.show()

# Sample plot with X and Y values
plt.plot([1, 2, 3, 4], [1, 4, 9, 16])
# To add the points in some shape
示例#4
0
"""
AutoViz example, see https://github.com/AutoViML
"""
from autoviz.AutoViz_Class import AutoViz_Class

from weo import get

AV = AutoViz_Class()

w = get(2019, "Oct")
dft = AV.AutoViz(
    "",
    depVar="NGDP_RPCH",
    dfte=w.fix_year(2024).T,
    header=0,
    verbose=2,
    lowess=False,
    chart_format="svg",
    max_rows_analyzed=1000,
    max_cols_analyzed=30,
)
示例#5
0
#print(mydata)
death=df.iloc[:,1:5]
#print(death)
newdata=df.loc[:,['date','total_cases','total_deaths']]
#print(newdata)
alldata=pd.concat([new_df, df], axis=1)



#df['month'] = pd.DatetimeIndex(df['date']).month
#print(df['month'])
df['yyyy-mm'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m')
#print(df['yyyy-mm'])
df_date=df['yyyy-mm'].str[2:]
#print(df_date)
new_cases=df.loc[:, ['new_cases']]
#print(new_cases)
total_cases=mydata.loc[:,['total_cases']]
#print(total_cases)
total_deaths =mydata.loc[:,['total_deaths']]
new_deaths =mydata.loc[:,['new_deaths']]

alldata.to_csv('covid_data.csv', index=False)
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()
df = AV.AutoViz("covid_data.csv")




fig.show()

# dtale eds
# pip install dtale
import dtale
d = dtale.show(departures)
d.open_browser()

# pandas-profiling
# pip install pandas-profiling
import pandas_profiling
pandas_profiling.ProfileReport(departures)

# AutoViz
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()

sep = ','
dft = AV.AutoViz(filename="",
                 sep=sep,
                 depVar='fyear',
                 dfte=departures,
                 header=0,
                 verbose=2,
                 lowess=False,
                 chart_format='svg',
                 max_rows_analyzed=150000,
                 max_cols_analyzed=30)

from dataprep.eda import create_report
create_report(df).show_browser()
示例#7
0
import pandas as pd
titanic = pd.read_csv('titanic.csv')
print(titanic.head)

#Data EDA in two lines of code using pandas_profiling
import pandas_profiling as pp

profile = pp.ProfileReport(titanic, explorative=True)
profile.to_file('output.html')

#EDA using Sweetviz

import sweetviz as sv
sweet_report = sv.analyze(titanic)
sweet_report.show_html('sweet_report.html')

#EDA using Autoviz

from autoviz.AutoViz_Class import AutoViz_Class
aviz = AutoViz_Class(titanic)

#EDA using dtale
import dtale
dtale.show(titanic, ignore_duplicate=True)
示例#8
0
print(subm.shape)
subm.head()

from autoviml.Auto_ViML import Auto_ViML
sample_submission = subm
scoring_parameter = 'rmse'
model, feats, trainm, testm = Auto_ViML(train[sel_feats+[new_col,target]], target, test[sel_feats+[new_col]], sample_submission=subm, 
          hyper_param='GS', feature_reduction=True,
          scoring_parameter=scoring_parameter, Boosting_Flag=False, KMeans_Featurizer=True, 
          Add_Poly=0, Stacking_Flag=True, Binning_Flag=True, Imbalanced_Flag=False, verbose=0)

preds = [x for x in list(train) if x not in [target]]
test[target] = -1.0
df = train.append(test)
print(df.shape)
df.head(2)

from autoviz.AutoViz_Class  import AutoViz_Class
AV = AutoViz_Class()
dft = AV.AutoViz(filename="", sep=',', depVar='target', dfte=df)

### AutoViz has selected top 30 features => let's use them to build a model.
sel_feats = dft.columns[:30].tolist()
sel_feats



#model.predict(testm[feats])
testm.head()

示例#9
0
# -*- coding: utf-8 -*-
"""EDA.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1UgWBehsqz95ylz2pHkS_fKdhpRIRTGkY
"""

!pip install autoviz

from autoviz.AutoViz_Class import AutoViz_Class

EDA_Retail_Store=AutoViz_Class()

EDA_Retail_Store.AutoViz('Task-3-EDA_Retail.csv')

!pip install pandas_visual_analysis

import pandas as pd

df=pd.read_csv('matches.csv')

from pandas_visual_analysis import VisualAnalysis
VisualAnalysis(df)

pip install sweetviz

import sweetviz as sv
my_report = sv.analyze(df)
my_report.show_html() #generate html file,Report will be generated in html file which will not be part of pynb file