def set_variables(self, **vars): """Change configuration variables (invalidates caches where necessary) Args: **vars: configuration parameters to change Examples: >>> ProfileReport(df).set_variables(title="NewTitle", html={"minify_html": False}) """ changed = set(vars.keys()) if len({"progress_bar", "pool_size"} & changed) > 0: # Cache can persist pass if len({"notebook"} & changed) > 0: self._widgets = None if len({"html", "title"} & changed) > 0: self._html = None if not {"progress_bar", "pool_size", "notebook", "html", "title" } >= changed: # In all other cases, empty cache self._description_set = None self._title = None self._report = None self._html = None self._widgets = None self._json = None if len(vars) == 1: config[list(vars.keys())[0]] = list(vars.values())[0] else: config.set_kwargs(vars)
def __init__( self, df=None, minimal=False, explorative=False, config_file: Union[Path, str] = None, lazy: bool = True, **kwargs, ): """Generate a ProfileReport based on a pandas DataFrame Args: df: the pandas DataFrame minimal: minimal mode is a default configuration with minimal computation config_file: a config file (.yml), mutually exclusive with `minimal` lazy: compute when needed **kwargs: other arguments, for valid arguments, check the default configuration file. """ if config_file is not None and minimal: raise ValueError( "Arguments `config_file` and `minimal` are mutually exclusive." ) if df is None and not lazy: raise ValueError( "Can init a not-lazy ProfileReport with no DataFrame") if config_file: config.set_file(config_file) elif minimal: config.set_file(get_resource("configs/config_minimal.yaml")) elif explorative: config.set_file(get_resource("configs/config_explorative.yaml")) elif not config.is_default: pass # TODO: logging instead of warning # warnings.warn( # "Currently configuration is not the default, if you want to restore " # "default configuration, please run 'pandas_profiling.clear_config()'" # ) config.set_kwargs(kwargs) self.df = None self._df_hash = -1 self._description_set = None self._title = None self._report = None self._html = None self._widgets = None self._json = None if df is not None: # preprocess df self.df = self.preprocess(df) if not lazy: # Trigger building the report structure _ = self.report
def __init__(self, df, config_file: Path = None, **kwargs): if config_file: config.config.set_file(str(config_file)) config.set_kwargs(kwargs) # Treat index as any other column if (not pd.Index(np.arange(0, len(df))).equals(df.index) or df.index.dtype != np.int64): df = df.reset_index() # Rename reserved column names df = rename_index(df) # Remove spaces and colons from column names df = clean_column_names(df) # Sort column names sort = config["sort"].get(str) if sys.version_info[1] <= 5 and sort != "None": warnings.warn("Sorting is supported from Python 3.6+") if sort in ["asc", "ascending"]: df = df.reindex(sorted(df.columns, key=lambda s: s.casefold()), axis=1) elif sort in ["desc", "descending"]: df = df.reindex(reversed( sorted(df.columns, key=lambda s: s.casefold())), axis=1) elif sort != "None": raise ValueError( '"sort" should be "ascending", "descending" or None.') # Store column order config["column_order"] = df.columns.tolist() # Get dataset statistics description_set = describe_df(df) # Get sample sample = {} n_head = config["samples"]["head"].get(int) if n_head > 0: sample["head"] = df.head(n=n_head) n_tail = config["samples"]["tail"].get(int) if n_tail > 0: sample["tail"] = df.tail(n=n_tail) # Render HTML self.html = to_html(sample, description_set) self.minify_html = config["minify_html"].get(bool) self.use_local_assets = config["use_local_assets"].get(bool) self.title = config["title"].get(str) self.description_set = description_set self.sample = sample
def __init__(self, df, minimal=False, config_file: Path = None, **kwargs): if sys.version_info <= (3, 5): warnings.warn( "This is the last release to support Python 3.5, please upgrade.", category=DeprecationWarning, ) if config_file is not None and minimal: raise ValueError( "Arguments `config_file` and `minimal` are mutually exclusive." ) if minimal: config_file = get_config_minimal() if config_file: config.set_file(str(config_file)) config.set_kwargs(kwargs) self.date_start = datetime.utcnow() # Treat index as any other column if (not pd.Index(np.arange(0, len(df))).equals(df.index) or df.index.dtype != np.int64): df = df.reset_index() # Rename reserved column names df = rename_index(df) # Ensure that columns are strings df.columns = df.columns.astype("str") # Get dataset statistics description_set = describe_df(df) # Build report structure self.sample = self.get_sample(df) self.title = config["title"].get(str) self.description_set = description_set self.date_end = datetime.utcnow() disable_progress_bar = not config["progress_bar"].get(bool) with tqdm(total=1, desc="build report structure", disable=disable_progress_bar) as pbar: self.report = get_report_structure(self.date_start, self.date_end, self.sample, description_set) pbar.update()
def __init__(self, df, minimal=False, config_file: Path = None, **kwargs): if config_file is not None and minimal: raise ValueError( "Arguments `config_file` and `minimal` are mutually exclusive." ) if minimal: config_file = get_config_minimal() if config_file: config.config.set_file(str(config_file)) config.set_kwargs(kwargs) self.date_start = datetime.utcnow() # Treat index as any other column if ( not pd.Index(np.arange(0, len(df))).equals(df.index) or df.index.dtype != np.int64 ): df = df.reset_index() # Rename reserved column names df = rename_index(df) # Ensure that columns are strings df.columns = df.columns.astype("str") # Sort names according to config (asc, desc, no sort) df = self.sort_column_names(df) config["column_order"] = df.columns.tolist() # Get dataset statistics description_set = describe_df(df) # Build report structure self.sample = self.get_sample(df) self.title = config["title"].get(str) self.description_set = description_set self.date_end = datetime.utcnow() self.report = get_report_structure( self.date_start, self.date_end, self.sample, description_set )
def __init__( self, df: Optional[pd.DataFrame] = None, minimal: bool = False, explorative: bool = False, sensitive: bool = False, dark_mode: bool = False, orange_mode: bool = False, sample: Optional[dict] = None, config_file: Union[Path, str] = None, lazy: bool = True, **kwargs, ): """Generate a ProfileReport based on a pandas DataFrame Args: df: the pandas DataFrame minimal: minimal mode is a default configuration with minimal computation config_file: a config file (.yml), mutually exclusive with `minimal` lazy: compute when needed sample: optional dict(name="Sample title", caption="Caption", data=pd.DataFrame()) **kwargs: other arguments, for valid arguments, check the default configuration file. """ if config_file is not None and minimal: raise ValueError( "Arguments `config_file` and `minimal` are mutually exclusive." ) if df is None and not lazy: raise ValueError( "Can init a not-lazy ProfileReport with no DataFrame") if config_file: config.set_file(config_file) elif minimal: config.set_file(get_config("config_minimal.yaml")) elif not config.is_default: pass # warnings.warn( # "Currently configuration is not the default, if you want to restore " # "default configuration, please run 'pandas_profiling.clear_config()'" # ) if explorative: config.set_arg_group("explorative") if sensitive: config.set_arg_group("sensitive") if dark_mode: config.set_arg_group("dark_mode") if orange_mode: config.set_arg_group("orange_mode") config.set_kwargs(kwargs) self.df = None self._df_hash = -1 self._description_set = None self._sample = sample self._title = None self._report = None self._html = None self._widgets = None self._json = None self._typeset = None self._summarizer = None if df is not None: # preprocess df self.df = self.preprocess(df) if not lazy: # Trigger building the report structure _ = self.report