def balance_yellowbrick( X, y, features, ): plt.switch_backend('agg') plt.clf() X_train, X_test, y_train, y_test = train_test_split(X[features], y, stratify=y, test_size=0.01) X = pd.DataFrame(X_test, columns=features) y = pd.Series(y_test) visualizer = ClassBalance() visualizer.fit(y) visualizer.finalize() return plt
display(X.shape) display(y.shape) #%% import matplotlib.pyplot as plt from yellowbrick.target import ClassBalance _, y_counts = np.unique(y, return_counts=True) class_labels = ["survived", "deceased"] fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9,4.5)) ax1.pie(y_counts, explode=(0, 0.05), labels = class_labels) visualizer = ClassBalance(labels = class_labels, ax = ax2) visualizer.fit(y) visualizer.finalize() plt.show() #%% print("Number of missing values:", X.isna().sum().sum()) #%% X["timerecurrence"].describe() #%% # for column in X.columns[2:16]: # plt.scatter(X[column], y) # plt.xlabel(column) # plt.show()