def __init__(self, df, minimal=False, config_file: Path = None, **kwargs): if sys.version_info <= (3, 5): warnings.warn( "This is the last release to support Python 3.5, please upgrade.", category=DeprecationWarning, ) if config_file is not None and minimal: raise ValueError( "Arguments `config_file` and `minimal` are mutually exclusive." ) if minimal: config_file = get_config_minimal() if config_file: config.set_file(str(config_file)) config.set_kwargs(kwargs) self.date_start = datetime.utcnow() # Treat index as any other column if (not pd.Index(np.arange(0, len(df))).equals(df.index) or df.index.dtype != np.int64): df = df.reset_index() # Rename reserved column names df = rename_index(df) # Ensure that columns are strings df.columns = df.columns.astype("str") # Get dataset statistics description_set = describe_df(df) # Build report structure self.sample = self.get_sample(df) self.title = config["title"].get(str) self.description_set = description_set self.date_end = datetime.utcnow() disable_progress_bar = not config["progress_bar"].get(bool) with tqdm(total=1, desc="build report structure", disable=disable_progress_bar) as pbar: self.report = get_report_structure(self.date_start, self.date_end, self.sample, description_set) pbar.update()
def __init__(self, df, minimal=False, config_file: Path = None, **kwargs): if config_file is not None and minimal: raise ValueError( "Arguments `config_file` and `minimal` are mutually exclusive." ) if minimal: config_file = get_config_minimal() if config_file: config.config.set_file(str(config_file)) config.set_kwargs(kwargs) self.date_start = datetime.utcnow() # Treat index as any other column if ( not pd.Index(np.arange(0, len(df))).equals(df.index) or df.index.dtype != np.int64 ): df = df.reset_index() # Rename reserved column names df = rename_index(df) # Ensure that columns are strings df.columns = df.columns.astype("str") # Sort names according to config (asc, desc, no sort) df = self.sort_column_names(df) config["column_order"] = df.columns.tolist() # Get dataset statistics description_set = describe_df(df) # Build report structure self.sample = self.get_sample(df) self.title = config["title"].get(str) self.description_set = description_set self.date_end = datetime.utcnow() self.report = get_report_structure( self.date_start, self.date_end, self.sample, description_set )
def report(self): if self._report is None: self._report = get_report_structure(self.description_set) return self._report
def report(self) -> Root: if self._report is None: self._report = get_report_structure(self.config, self.description_set) return self._report