def compress_df(df: XDataFrame, verbose=False) -> XDataFrame: """Reduce memory usage by converting data types. For compatibility with feather, float16 is not used. Returns: The reduce data frame. """ _num_dtypes = [ "int16", "int32", "int64", "float32", "float64", ] start_mem_usage = df.memory_usage().sum() / 1024**2 for col in df.columns: col_type = df[col].dtype if col_type in _num_dtypes: min_val, max_val = df[col].min(), df[col].max() if str(col_type).startswith("int"): if (min_val >= np.iinfo(np.int8).min and max_val <= np.iinfo(np.int8).max): df[col] = df[col].astype(np.int8) elif (min_val >= np.iinfo(np.int16).min and max_val <= np.iinfo(np.int16).max): df[col] = df[col].astype(np.int16) elif (min_val >= np.iinfo(np.int32).min and max_val <= np.iinfo(np.int32).max): df[col] = df[col].astype(np.int32) elif (min_val >= np.iinfo(np.int64).min and max_val <= np.iinfo(np.int64).max): df[col] = df[col].astype(np.int64) else: # NOTE: half float is not supported in feather. if (min_val >= np.finfo(np.float32).min and max_val <= np.finfo(np.float32).max): df[col] = df[col].astype(np.float32) else: df[col] = df[col].astype(np.float64) end_mem_usage = df.memory_usage().sum() / 1024**2 if verbose: logger.warning("Memory reduced from {:.2f} MB to {:.2f} MB".format( start_mem_usage, end_mem_usage, )) return df
def reduce_mem_usage(df: XDataFrame, verbose: bool = True, debug: bool = True) -> XDataFrame: start_mem = df.memory_usage().sum() / 1024**2 df = compress_df(df) end_mem = df.memory_usage().sum() / 1024**2 reduction = (start_mem - end_mem) / start_mem msg = (f"Mem. usage decreased to {end_mem:5.2f} MB" + f" ({reduction * 100:.1f} % reduction)") if verbose: print(msg) if debug: logging.debug(msg) return df