def process_inspection_results( inspection_results: List[dict], exclude: Union[list, set] = None, apply: List[Tuple] = None, drop: bool = True, verbose: bool = False, ) -> pd.DataFrame: """Process inspection result into pd.DataFrame.""" if not inspection_results: return ValueError("Empty iterable provided.") datetime_spec = ("created|started_at|finished_at", pd.to_datetime) if apply is None: apply = [datetime_spec] else: apply = [*apply, datetime_spec] exclude = exclude or [] apply = apply or () df = json_normalize(inspection_results, sep="__") # each row resembles InspectionResult if len(df) <= 1: return df for regex, func in apply: for col in df.filter(regex=regex).columns: df[col] = df[col].apply(func) keys = [k for k in inspection_results[0] if k not in exclude] for k in keys: if k in exclude: continue d = df.filter(regex=k) p = profile(d) rejected = (p.description_set["variables"].query( "distinct_count <= 1 & type != 'UNSUPPORTED'").filter( regex="^((?!version).)*$", axis=0)) # explicitly include versions if verbose: print("Rejected columns: ", rejected.index) if drop: df.drop(rejected.index, axis=1, inplace=True) df = df.eval( "status__job__duration = status__job__finished_at - status__job__started_at", engine="python" ).eval( "status__build__duration = status__build__finished_at - status__build__started_at", engine="python") return df
# %% {"init_cell": true, "hidden": true} inspection_results[0].keys() # %% [markdown] {"hidden": true} # #### Status # %% {"require": ["base/js/events", "datatables.net", "d3", "jupyter-datatables"], "hidden": true} df_status = df.filter(regex="status") date_columns = df_status.filter(regex="started_at|finished_at").columns for col in date_columns: df_status[col] = df[col].apply(pd.to_datetime) # %% {"hidden": true} p = profile(df_status) p # %% [markdown] {"hidden": true} # According to the profiling, we can drop the values with the constant value: # %% {"hidden": true} rejected = p.description_set["variables"].query( "distinct_count <= 1 & type != 'UNSUPPORTED'" ) rejected # %% {"hidden": true} df.drop(rejected.index, axis=1, inplace=True) # %% [markdown] {"hidden": true}