def test_export_to_file(self): p = spark_df_profiling_optimus.ProfileReport(self.df) filename = os.path.join(self.test_dir, "profile_%s.html" % hash(self)) p.to_file(outputfile=filename) self.assertLess(200,os.path.getsize(filename))
def profiler(self): """ This function calls the ProfileReport method from spark-df-profiling-optimus, it gets the current DF in the analyzer and them returns the HTML profile" :return: Profile of the DF in HTML format embedded in the Notebook """ df_profiler = self._df return spark_df_profiling_optimus.ProfileReport(df_profiler)
import pandas as pd import spark_df_profiling_optimus if __name__ == "__main__": import argparse import webbrowser parser = argparse.ArgumentParser( description= 'Profile the variables in a CSV file and generate a HTML report.') parser.add_argument("inputfile", help="CSV file to profile") parser.add_argument("-o", "--output", help="Output report file", default=spark_df_profiling_optimus.DEFAULT_OUTPUTFILE) parser.add_argument("-s", "--silent", help="Only generate but do not open report", action="store_true") args = parser.parse_args() df = pd.read_csv(args.inputfile, sep=None, parse_dates=True) p = spark_df_profiling_optimus.ProfileReport(df) p.to_file(outputfile=args.output) if not args.silent: webbrowser.open_new_tab(p.file.name)