def sanitize_dataframe(df: Table): """Sanitize a DataFrame to prepare it for serialization. copied from the ipyvega project * Make a copy * Convert categoricals to strings. * Convert np.bool_ dtypes to Python bool objects * Convert np.int dtypes to Python int objects * Convert floats to objects and replace NaNs/infs with None. * Convert DateTime dtypes into appropriate string representations """ import numpy as np if df is None: return None # raise InternalLogicalError("Cannot sanitize empty df") df = df.copy() def to_list_if_array(val): if isinstance(val, np.ndarray): return val.tolist() else: return val for col_name in df.labels: dtype = df.column(col_name).dtype if str(dtype) == 'category': # XXXX: work around bug in to_json for categorical types # https://github.com/pydata/pandas/issues/10778 df[col_name] = df[col_name].astype(str) elif str(dtype) == 'bool': # convert numpy bools to objects; np.bool is not JSON serializable df[col_name] = df[col_name].astype(object) elif np.issubdtype(dtype, np.integer): # convert integers to objects; np.int is not JSON serializable df[col_name] = df[col_name].astype(object) elif np.issubdtype(dtype, np.floating): # For floats, convert to Python float: np.float is not JSON serializable # Also convert NaN/inf values to null, as they are not JSON serializable col = df[col_name] bad_values = np.isnan(col) | np.isinf(col) df[col_name] = np.where(bad_values, None, col).astype(object) # col.astype(object)[~bad_values]= None elif str(dtype).startswith('datetime'): # Convert datetimes to strings # astype(str) will choose the appropriate resolution new_column = df[col_name].astype(str) new_column[new_column == 'NaT'] = '' df[col_name] = new_column elif dtype == object: # Convert numpy arrays saved as objects to lists # Arrays are not JSON serializable col = np.vectorize(to_list_if_array)(df[col_name]) df[col_name] = np.where(notnull(col), col, None).astype(object) return df
def copy(self, *args, **kwargs): return self._fix_(Table.copy(self, *args, **kwargs))
def _fix_(self, t): """Retain TimeTable attributes across functional Table methods.""" if self.time_column in t.labels : return self.from_table(t, self.time_column, self.time_less) else : return Table.copy(t)