def generatereport(self): try: profile = ProfileReport(self.dataset) profile.to_file(output_file="AnalysisReport.html") print("yes") except Exception as e: print(repr(e))
def test_multiple_times(test_output_dir): n_rows = 10 n_columns = 10 df = pd.DataFrame( np.random.randint(0, 1000, size=(n_rows, n_columns)), columns=[f"column_{c}" for c in range(n_columns)], ) profile = ProfileReport(df, minimal=True, html={ "inline": False, "use_local_assets": False }) report = test_output_dir / "cdn_multi.html" profile.to_file(report) n_images = len( list(Path("./cdn_multi_assets/").joinpath("images").glob("*"))) assert report.exists() assets_dir = test_output_dir / "cdn_multi_assets" check_assets(assets_dir, "svg", n_css=1, n_js=1) profile.to_file(report) assert report.exists() assert n_images == len( list(Path("./cdn_multi_assets/").joinpath("images").glob("*")))
def EDA(source_df, reply_df): """function that does exploratory data analysis on the data""" # Creating profile report source_report = ProfileReport(source_df, title='Profile Report', html={'style': { 'full_width': True }}) source_report.to_notebook_iframe() source_report.to_file(output_file="EDA_source_report.html") reply_report = ProfileReport(reply_df, title='Profile Report', html={'style': { 'full_width': True }}) reply_report.to_notebook_iframe() reply_report.to_file(output_file="EDA_reply_report.html") correlation_heatmap(source_df) correlation_heatmap(reply_df) import pdfkit pdfkit.from_file('EDA_source_report.html', 'EDA_source_report.pdf') pdfkit.from_file('EDA_reply_report.html', 'EDA_reply_report.pdf')
def upload_csv(): if request.method == "POST": file = request.files["file"] # Save the file to ./uploads basepath = os.path.dirname(__file__) file_path = os.path.join(basepath, 'uploads', secure_filename(file.filename)) file.save(file_path) #processing and generating eda report in uploads folder df = pd.read_csv(file_path) filename_ = "uploads/" + file.filename + ".html" profile = ProfileReport(df, title=file.filename, explorative=True) profile.to_file(filename_) sweetviz_eda = sv.analyze(df) res = make_response( jsonify({ "message": "File " + file.filename + " uploaded successfully and report is Ready. " }), 200) #showing both reports in new tabs webbrowser.open_new_tab(file_path + ".html") sweetviz_eda.show_html() return res return render_template("inex.html")
def generate_pandas_prof_report(df, title, explorative=True, dataset=None, minimal=True): """ """ if dataset is not None: module = select_from_db(ModulePandasProfiling, 'dataset_id', dataset.id) update_in_db(module, {'status': 'loading'}) try: profile = ProfileReport( df, title=title, minimal=minimal, explorative=True) output_path = get_save_path() output_path = output_path + title + '.html' profile.to_file(output_path) if dataset is not None: data = {'status': 'loaded', 'path': output_path} res = update_in_db(module, data) if res != 'updated': update_in_db(module, {'status': 'failed'}) except: if dataset is not None: update_in_db(module, {'status': 'failed'})
def create_report(df, filename=None): profile = ProfileReport(df, title='Pandas Profiling Report') if filename: profile.to_file(output_file = filename) else: return profile.to_notebook_iframe()
def result(request): df = pd.read_csv(r'C:\Users\hp\Downloads\diabetes.csv') profile = ProfileReport(df) profile.to_file(output_file='report.html') X = df.drop('Outcome', axis=1) Y = df['Outcome'] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) model = LogisticRegression() model.fit(X_train, Y_train) val1 = float(request.GET['n1']) val2 = float(request.GET['n2']) val3 = float(request.GET['n3']) val4 = float(request.GET['n4']) val5 = float(request.GET['n5']) val6 = float(request.GET['n6']) val7 = float(request.GET['n7']) val8 = float(request.GET['n8']) pred = model.predict([[val1, val2, val3, val4, val5, val6, val7, val8]]) result1 = '' if pred == [1]: result1 = 'Positive' else: result1 = 'Negative' return render(request, 'predict.html', {'result2': result1})
def explore_df(df): profile = ProfileReport(df, title='DataFrame exploration') profile.to_file("report.html") with open("report.html", "r", encoding='utf-8') as f: text = f.read() return text
async def testing(request: web.Request): r = await request.post() data = r['file'] # data is the file headers = request.headers content_length = int(headers['Content-length']) projectName = "testing" os.makedirs(request.app['settings'].PROJECT_DIR + "/" + projectName, exist_ok=True) # Write ".FMU" to disc if ".csv" in data.filename: fmuPath = request.app[ 'settings'].PROJECT_DIR + "/" + projectName + "/" + data.filename with open(fmuPath, 'wb') as file: file.write(data.file.read(content_length)) # writes .fmu to file df = pd.read_csv(request.app['settings'].PROJECT_DIR + "/" + projectName + "/" + data.filename) profile = ProfileReport(df, title='Pandas Profiling Report', html={'style': { 'full_width': True }}) profile.to_file(output_file="your_report.html") with open("your_report.html", "r", encoding='utf-8') as f: text = f.read() print(text) return web.Response(text=text, content_type='text/html') else: return web.HTTPOk()
def profile_file(file_path, file_name, extension, output_path = '.', sep = None): """ This function will load the given file using pandas and then will create a report using pandas-profiling. """ try: if extension in PLAIN_FORMATS: separator = get_separator_char(sep) df = pd.read_csv(file_path, sep = separator) profile = ProfileReport(df) file_name = file_name.split('.')[0] report_name = '{}.html'.format(file_name) profile.to_file(report_name) return elif extension == 'xlsx': excel_name = get_file_basename(file_path) excel_name += '_' + file_name df = pd.read_excel(file_path, sheet_name=file_name) profile = ProfileReport(df) report_name = '{}.html'.format(excel_name) profile.to_file(report_name) else: return except: print("Can't open {}".format(file_path)) return
def test_html_export_theme(test_output_dir): n_rows = 10 n_columns = 10 df = pd.DataFrame( np.random.randint(0, 1000, size=(n_rows, n_columns)), columns=[f"column_{c}" for c in range(n_columns)], ) profile = ProfileReport( df, minimal=True, html={ "inline": False, "style": { "theme": "united" } }, ) report = test_output_dir / "united.html" profile.to_file(report) assert report.exists() assets_dir = test_output_dir / "united_assets" check_assets(assets_dir, "svg", n_css=2, n_js=3)
def analyze_profile(self): """ This function export an HTML file of data's report """ from pandas_profiling import ProfileReport df_profiler = ProfileReport(self.df, title=self.title) df_profiler.to_file(self.path_to_export)
def feature_report(df): ''' print a report of every feature ''' profile = ProfileReport(df, bins=30) profile.to_file(outputfile="output.html") webbrowser.open('file://' + os.path.realpath("output.html"))
def test_example(get_data_file, test_output_dir): file_name = get_data_file( "meteorites.csv", "https://data.nasa.gov/api/views/gh4g-9sfh/rows.csv?accessType=DOWNLOAD", ) df = pd.read_csv(file_name) # Note: Pandas does not support dates before 1880, so we ignore these for this analysis df["year"] = pd.to_datetime(df["year"], errors="coerce") # Example: Constant variable df["source"] = "NASA" # Example: Boolean variable df["boolean"] = np.random.choice([True, False], df.shape[0]) # Example: Mixed with base types df["mixed"] = np.random.choice([1, "A"], df.shape[0]) # Example: Highly correlated variables df["reclat_city"] = df["reclat"] + np.random.normal(scale=5, size=(len(df))) # Example: Duplicate observations duplicates_to_add = pd.DataFrame(df.iloc[0:10]) duplicates_to_add["name"] += " copy" df = df.append(duplicates_to_add, ignore_index=True) output_file = test_output_dir / "profile.html" profile = ProfileReport(df, title="NASA Meteorites", samples={ "head": 5, "tail": 5 }, sort="ascending") profile.to_file(output_file=output_file) assert (test_output_dir / "profile.html").exists(), "Output file does not exist" assert (type(profile.get_description()) == dict and len( profile.get_description().items()) == 7), "Unexpected result" if sys.version_info[1] >= 6: assert list(profile.get_description()["variables"].keys()) == [ "boolean", "fall", "GeoLocation", "id", "mass (g)", "mixed", "name", "nametype", "recclass", "reclat", "reclat_city", "reclong", "source", "year", ], "Ascending sort did not work"
def save_report(df, name_report): if not path.exists( os.path.join(Helper.RAPPORT_DIR, f'{name_report}.html')): prof = ProfileReport(df) prof.to_file(output_file=os.path.join(Helper.RAPPORT_DIR, f'{name_report}.html')) else: print('File exists !')
def profile_report(dfToReport): dfToReport.set_index('date_time', inplace=True) profile = ProfileReport(dfToReport, title='Pandas Profiling Report', html={'style': { 'full_width': True }}) profile.to_file(output_file='output.html')
def main() -> None: if len(sys.argv) != 4: print("Usage: python3 analyze.py title input.csv output.html") return data_source = sys.argv[2] df = pd.read_csv(data_source) profile = ProfileReport(df, title=sys.argv[1]) profile.to_file(sys.argv[3])
def make_report(params: Params): logger.info("EDA report preparation started") source_df = pd.read_csv(params.train_data_path) # report profile = ProfileReport(source_df) profile.to_file(output_file=params.report_path) logger.info("EDA report preparation completed")
def profiling(df: pd.DataFrame, sheet_name: str) -> None: """ Just a function to create an HTML file with the profile from a given dataset """ from pandas_profiling import ProfileReport prof = ProfileReport(df) prof.to_file( output_file='notebooks/full_profiling_{}.html'.format(sheet_name))
def data_scan(df, to_html=False): """ doc : https://github.com/pandas-profiling/pandas-profiling """ data_report = ProfileReport(df, title='Pandas Profiling Report', html={'style': {'full_width': True}}) if to_html: data_report.to_file(output_file="data_scan.html") else: return data_report
def generate_pandas_prof_report(df, title): """ """ profile = ProfileReport(df, title=title, explorative=False) output_path = get_save_path() output_path = output_path + title + '.html' profile.to_file(output_path)
def generate_html_profiling(filename, report_name): """ Generate a panda profiling report for a filename file and save it as a report_name html file. """ df = open_data(filename) profile = ProfileReport(df, title='Profiling Report of {}'.format(filename), explorative=True) profile.to_file(report_name)
def show_graph(): try: if request.method == 'POST': graph_data = pd.read_csv(r'graph_input_files\graph_data.csv') prof = ProfileReport(graph_data) prof.to_file(output_file=r'templates\bulk_graph_output.html') return render_template('bulk_graph_output.html') except Exception as e: raise e
def generate_report(config: ReportConfig): out_path = to_absolute_path(config.output_path) in_path = to_absolute_path(config.input_path) data = pd.read_csv(in_path) profile = ProfileReport(data, title="Profiling Report", explorative=True) logger.info("Save report to %s", out_path) check_dir(os.path.split(out_path)[0]) profile.to_file(str(out_path))
def generate_profile(self): """Produces and saves the result profile in html form.""" profile = ProfileReport( pd.read_csv(self._filename), title=f'{self._filename} profile', explorative=True) profile.to_file( os.path.join(self._dst_folder, self.PROFILING_ARTIFACT))
def generate_incorrect_report(incorrect_list, columns): """given list of incorrect predictions generate profile report""" # make incorrect df to find patterns data = np.vstack([ np.hstack([tup[2], tup[0], tup[-1]]) for list_ in incorrect_list for tup in list_ ]) incorrect_df = pd.DataFrame(data, columns=columns.tolist() + ["p"]) profile = ProfileReport(incorrect_df, explorative=True) profile.to_file("assets/wip/report_incorrect.html")
def make_profile(self, db_info): dsn = 'file:{}?immutable=1'.format(db_info['db_path']) conn = sqlite3.connect(dsn) sql = 'SELECT * FROM [{}]'.format(db_info['table_name']) df = pd.read_sql(sql, con=conn) if (app.config['PANDAS_PROFILING_CONFIG_MIN']): profile = ProfileReport(df, config_file="profiling-minimal.yml") else: profile = ProfileReport(df) profile.to_file(db_info['profile_path']) return Path(db_info['profile_path'])
def profiling(csv_source: str, report_title: str, profile_out: str): """Performs the profiling. Args: csv_source (str): The path to the input file. report_title (str): The report title. profile_out (str): The path to the file where the profiling is stored. """ df = pd.read_csv(csv_source) profile = ProfileReport(df, title=report_title) profile.to_file(profile_out)
def create_pandas_profiling_report(df, df_name): """Creates pandas profiling report an Dataframe and saves it in html format to disk. Args: df ([Pandas Dataframe]): Dataframe which should be analyzed. df_name ([str]): Name of dataframe which is used in stored filename """ df_profile = ProfileReport(df, title=(df_name + ' Report'), minimal=True) df_profile.to_file( (os.getcwd() + '/../data/4_data_analysis/' + df_name + '_report.html')) print(f'\nPandas profiling report of file {df_name} created\n\n')
def fit(df_path, report_path): """ generate report from df :param df_path: dataframe path :returns: null """ df = pd.read_csv(df_path) profile = ProfileReport( df, title="Pandas Profiling Report Before prepocessing") print("report generated on" + report_path + ".html") profile.to_file(report_path + ".html")