def generate_input(self): if self._regression: ( dtypes_meta, num_rows, num_cols, seed, ) = self.get_next_regression_params() else: dtypes_list = list( cudf.utils.dtypes.ALL_TYPES - {"category", "datetime64[ns]"} - cudf.utils.dtypes.TIMEDELTA_TYPES ) dtypes_meta, num_rows, num_cols = _generate_rand_meta( self, dtypes_list ) self._current_params["dtypes_meta"] = dtypes_meta seed = random.randint(0, 2 ** 32 - 1) self._current_params["seed"] = seed self._current_params["num_rows"] = num_rows self._current_params["num_cols"] = num_cols logging.info( f"Generating DataFrame with rows: {num_rows} " f"and columns: {num_cols}" ) table = dg.rand_dataframe(dtypes_meta, num_rows, seed) df = pyarrow_to_pandas(table) logging.info(f"Shape of DataFrame generated: {table.shape}") file = io.BytesIO() df.to_parquet(file) file.seek(0) return file.read()
def generate_input(self): if self._regression: ( dtypes_meta, num_rows, num_cols, seed, ) = self.get_next_regression_params() else: seed = random.randint(0, 2 ** 32 - 1) random.seed(seed) dtypes_list = list( cudf.utils.dtypes.ALL_TYPES - {"category", "timedelta64[ns]", "datetime64[ns]"} ) dtypes_meta, num_rows, num_cols = _generate_rand_meta( self, dtypes_list ) self._current_params["dtypes_meta"] = dtypes_meta self._current_params["seed"] = seed self._current_params["num_rows"] = num_rows self._current_params["num_columns"] = num_cols logging.info( f"Generating DataFrame with rows: {num_rows} " f"and columns: {num_cols}" ) df = cudf.DataFrame.from_arrow( dg.rand_dataframe(dtypes_meta, num_rows, seed) ) logging.info(f"Shape of DataFrame generated: {df.shape}") return df
def generate_input(self): if self._regression: ( dtypes_meta, num_rows, num_cols, seed, ) = self.get_next_regression_params() else: seed = random.randint(0, 2**32 - 1) random.seed(seed) dtypes_list = list(cudf.utils.dtypes.ALL_TYPES) dtypes_meta, num_rows, num_cols = _generate_rand_meta( self, dtypes_list) self._current_params["dtypes_meta"] = dtypes_meta self._current_params["file_name"] = self._file_name self._current_params["seed"] = seed self._current_params["num_rows"] = num_rows self._current_params["num_columns"] = num_cols logging.info(f"Generating DataFrame with rows: {num_rows} " f"and columns: {num_cols}") table = dg.rand_dataframe(dtypes_meta, num_rows, seed) df = pyarrow_to_pandas(table) logging.info(f"Shape of DataFrame generated: {df.shape}") return df.to_json()