def convert_dataframe(df, to='pandas', return_library=True, dtypes=None): '''Converts a dataframe to the desired dataframe library format. Parameters ---------- df : pandas.DataFrame or dask.DataFrame or modin.pandas.DataFrame Original dataframe which will be converted. to : string, default 'pandas' The data library to which format the dataframe will be converted to. return_library : bool, default True If set to True, the new dataframe library is also returned as an output. dtypes : dict, default None Dictionary that indicates the desired dtype for each column. e.g. {'Var1': 'float64', 'Var2': 'UInt8', 'Var3': str} Returns ------- df : pandas.DataFrame or dask.DataFrame or modin.pandas.dataframe.DataFrame Converted dataframe, in the desired type. If return_library == True: new_pd : pandas or modin.pandas The dataframe library to which the input dataframe is converted to. ''' lib = str(to).lower() if lib == 'pandas': import pandas as new_pd elif lib == 'modin': import modin.pandas as new_pd else: raise Exception( f'ERROR: Currently, convertion to a dataframe of type {to} is not supported. Availabale options are "pandas" and "modin".' ) converted_df = new_pd.DataFrame(data=df.to_numpy(), columns=df.columns) du.set_pandas_library(lib) if dtypes is None: # Infer adequate dtypes for the dataframe's columns converted_df = converted_df.infer_objects() else: # Set the desired dtypes converted_df = convert_dtypes(converted_df, dtypes=dtypes, inplace=True) if return_library is True: return converted_df, new_pd else: return converted_df
import numpy as np # Mathematical operations package, allowing also for missing values representation import torch # PyTorch for tensor and deep learning operations import plotly.graph_objs as go # Plotly for interactive and pretty plots import data_utils as du # Data science and machine learning relevant methods from model_interpreter.model_interpreter import ModelInterpreter # Model interpretability class import shap # Model-agnostic interpretability package inspired on Shapley values from datetime import datetime # datetime to use proper date and time formats import pickle # Save and load Python objects du.random_seed du.set_random_seed(42) du.random_seed du.set_pandas_library(lib='pandas') import pixiedust # Debugging in Jupyter Notebook cells # Change to scripts directory os.chdir('../../scripts') import Models # Script with all the machine learning model classes # Change to parent directory (presumably "eICU-mortality-prediction") os.chdir('..') # ## Initializing variables # Comet ML settings:
import pandas as pd # Pandas to load the data initially import numpy as np # Mathematical operations package, allowing also for missing values representation import torch # PyTorch for tensor and deep learning operations import data_utils as du # Data science and machine learning relevant methods import os # os handles directory/workspace changes du.random_seed du.set_random_seed(42) du.random_seed du.use_modin du.set_pandas_library('pandas') du.use_modin import pixiedust # Debugging in Jupyter Notebook cells # Change to scripts directory os.chdir('../../scripts') from Tabular_Dataset import Tabular_Dataset # Dataset class that helps fetching batches of data import Models # Script with all the machine learning model classes # Change to parent directory (presumably "eICU-mortality-prediction") os.chdir('..') # ## Initializing variables