def _dataframe_from_json(path_or_str, schema: Schema = None, pandas_orient: str = "split") -> pd.DataFrame: """ Parse json into pandas.DataFrame. User can pass schema to ensure correct type parsing and to make any necessary conversions (e.g. string -> binary for binary columns). :param path_or_str: Path to a json file or a json string. :param schema: Mlflow schema used when parsing the data. :param pandas_orient: pandas data frame convention used to store the data. :return: pandas.DataFrame. """ if schema is not None: dtypes = dict(zip(schema.column_names(), schema.column_types())) df = pd.read_json(path_or_str, orient=pandas_orient, dtype=dtypes)[schema.column_names()] binary_cols = [ i for i, x in enumerate(schema.column_types()) if x == DataType.binary ] for i in binary_cols: col = df.columns[i] df[col] = np.array(df[col].map(_base64decode), dtype=np.bytes_) return df else: return pd.read_json(path_or_str, orient=pandas_orient, dtype=False)
def _dataframe_from_json(path_or_str, schema: Schema = None, pandas_orient: str = "split", precise_float=False) -> pd.DataFrame: """ Parse json into pandas.DataFrame. User can pass schema to ensure correct type parsing and to make any necessary conversions (e.g. string -> binary for binary columns). :param path_or_str: Path to a json file or a json string. :param schema: Mlflow schema used when parsing the data. :param pandas_orient: pandas data frame convention used to store the data. :return: pandas.DataFrame. """ if schema is not None: dtypes = dict(zip(schema.column_names(), schema.pandas_types())) df = pd.read_json(path_or_str, orient=pandas_orient, dtype=dtypes, precise_float=precise_float) actual_cols = set(df.columns) for type_, name in zip(schema.column_types(), schema.column_names()): if type_ == DataType.binary and name in actual_cols: df[name] = df[name].map( lambda x: base64.decodebytes(bytes(x, "utf8"))) return df else: return pd.read_json(path_or_str, orient=pandas_orient, dtype=False, precise_float=precise_float)