def autoviz(data): ''' Automatically visualize a data set. If dataset is large, autoViz uses a statistically valid sample for plotting. Parameter: -------------------- data: Dataframe The data to plot Return: Matplotlib figure ''' #First check if autoviz is installed, if not installed, prompt the user to install it. import importlib.util import logging logging.basicConfig() package_name = 'autoviz' err_msg = "is not installed, to use this function, you must install " + package_name + ". \n To install, use 'pip install autoviz'" package_stat = importlib.util.find_spec(package_name) if package_stat is None: logging.error(package_name + " " + err_msg) else: from autoviz.AutoViz_Class import AutoViz_Class av = AutoViz_Class() av.AutoViz(filename='', dfte=data, max_cols_analyzed=50)
def run_eda(df, dep_var="", chosen_val="Pandas Profiling"): if chosen_val == "Pandas Profiling": pr = ProfileReport(df, explorative=True) st_profile_report(pr) elif chosen_val == "Sweetviz": st.write("opening new tab") rep = sv.analyze( df.select_dtypes(exclude="datetime64[ns]"), target_feat=dep_var ) rep.show_html() elif chosen_val == "Autoviz": AV = AutoViz_Class() chart_format = "jpg" dft = AV.AutoViz( filename="", sep=",", depVar=dep_var, dfte=df, header=0, verbose=2, lowess=False, chart_format=chart_format, max_rows_analyzed=len(df), # 150000, max_cols_analyzed=df.shape[1], ) # 30 st.write(dft.head()) st.write("Autoviz") # st.write(os.getcwd()+f"/AutoViz_Plots/empty_string/*.{chart_format}") if dep_var != "": stored_folder = dep_var else: stored_folder = "empty_string" for i in list(glob(cwd + f"/AutoViz_Plots/{stored_folder}/*.{chart_format}")): st.image(Image.open(i)) elif chosen_val == "DataPrep": try: dpplot(df, *xy).show_browser() except: #s_buf = io.BytesIO() # dpplot(df).save(s_buf) stc.html(display_html(dpplot(df).report)) # .show_browser() # create_report(df).show_browser() elif chosen_val == "Summary Table": get_df(df)
names=[ 'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class' ]) print(irisData.head()) wineReviews = pd.read_csv( 'D:/Personal/Github/UpdatedMain/FreeCodeCamp-DS/With Python/Visualization/Dataset/winemag-data-130k-v2.csv' ) print(wineReviews.head()) '''Matplotlib is the most popular python plotting library Matplotlib is specifically good for creating basic graphs like line charts, bar charts, histograms and many more. ''' AV = AutoViz_Class() df = AV.AutoViz( 'D:/Personal/Github/UpdatedMain/FreeCodeCamp-DS/With Python/Visualization/Dataset/winemag-data-130k-v2.csv' ) # Sample plot with 4 numbers plt.plot([1, 3, 2, 4]) plt.title('Sample with 4 numvers') plt.xlabel('X-Axis') plt.ylabel('Y-Axis') plt.show() # Sample plot with X and Y values plt.plot([1, 2, 3, 4], [1, 4, 9, 16]) # To add the points in some shape
""" AutoViz example, see https://github.com/AutoViML """ from autoviz.AutoViz_Class import AutoViz_Class from weo import get AV = AutoViz_Class() w = get(2019, "Oct") dft = AV.AutoViz( "", depVar="NGDP_RPCH", dfte=w.fix_year(2024).T, header=0, verbose=2, lowess=False, chart_format="svg", max_rows_analyzed=1000, max_cols_analyzed=30, )
#print(mydata) death=df.iloc[:,1:5] #print(death) newdata=df.loc[:,['date','total_cases','total_deaths']] #print(newdata) alldata=pd.concat([new_df, df], axis=1) #df['month'] = pd.DatetimeIndex(df['date']).month #print(df['month']) df['yyyy-mm'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m') #print(df['yyyy-mm']) df_date=df['yyyy-mm'].str[2:] #print(df_date) new_cases=df.loc[:, ['new_cases']] #print(new_cases) total_cases=mydata.loc[:,['total_cases']] #print(total_cases) total_deaths =mydata.loc[:,['total_deaths']] new_deaths =mydata.loc[:,['new_deaths']] alldata.to_csv('covid_data.csv', index=False) from autoviz.AutoViz_Class import AutoViz_Class AV = AutoViz_Class() df = AV.AutoViz("covid_data.csv")
fig.show() # dtale eds # pip install dtale import dtale d = dtale.show(departures) d.open_browser() # pandas-profiling # pip install pandas-profiling import pandas_profiling pandas_profiling.ProfileReport(departures) # AutoViz from autoviz.AutoViz_Class import AutoViz_Class AV = AutoViz_Class() sep = ',' dft = AV.AutoViz(filename="", sep=sep, depVar='fyear', dfte=departures, header=0, verbose=2, lowess=False, chart_format='svg', max_rows_analyzed=150000, max_cols_analyzed=30) from dataprep.eda import create_report create_report(df).show_browser()
import pandas as pd titanic = pd.read_csv('titanic.csv') print(titanic.head) #Data EDA in two lines of code using pandas_profiling import pandas_profiling as pp profile = pp.ProfileReport(titanic, explorative=True) profile.to_file('output.html') #EDA using Sweetviz import sweetviz as sv sweet_report = sv.analyze(titanic) sweet_report.show_html('sweet_report.html') #EDA using Autoviz from autoviz.AutoViz_Class import AutoViz_Class aviz = AutoViz_Class(titanic) #EDA using dtale import dtale dtale.show(titanic, ignore_duplicate=True)
print(subm.shape) subm.head() from autoviml.Auto_ViML import Auto_ViML sample_submission = subm scoring_parameter = 'rmse' model, feats, trainm, testm = Auto_ViML(train[sel_feats+[new_col,target]], target, test[sel_feats+[new_col]], sample_submission=subm, hyper_param='GS', feature_reduction=True, scoring_parameter=scoring_parameter, Boosting_Flag=False, KMeans_Featurizer=True, Add_Poly=0, Stacking_Flag=True, Binning_Flag=True, Imbalanced_Flag=False, verbose=0) preds = [x for x in list(train) if x not in [target]] test[target] = -1.0 df = train.append(test) print(df.shape) df.head(2) from autoviz.AutoViz_Class import AutoViz_Class AV = AutoViz_Class() dft = AV.AutoViz(filename="", sep=',', depVar='target', dfte=df) ### AutoViz has selected top 30 features => let's use them to build a model. sel_feats = dft.columns[:30].tolist() sel_feats #model.predict(testm[feats]) testm.head()
# -*- coding: utf-8 -*- """EDA.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1UgWBehsqz95ylz2pHkS_fKdhpRIRTGkY """ !pip install autoviz from autoviz.AutoViz_Class import AutoViz_Class EDA_Retail_Store=AutoViz_Class() EDA_Retail_Store.AutoViz('Task-3-EDA_Retail.csv') !pip install pandas_visual_analysis import pandas as pd df=pd.read_csv('matches.csv') from pandas_visual_analysis import VisualAnalysis VisualAnalysis(df) pip install sweetviz import sweetviz as sv my_report = sv.analyze(df) my_report.show_html() #generate html file,Report will be generated in html file which will not be part of pynb file