mask = IsolationForest(contamination=0.15).fit_predict(X[column].to_frame(), y) == 1 new_X = X[mask] new_y = y[mask] X_scaled = StandardScaler().fit_transform(new_X) target_names = ['f', 's'] visualizer = ParallelCoordinates(classes=target_names, features=list(X.columns), sample=0.5, shuffle=True) visualizer.fit_transform(X_scaled, new_y) visualizer.show() # %% from sklearn.manifold import TSNE from sklearn.decomposition import PCA X, y = tf.cats_to_one_hot(columns=[]).create_X_y() X_scaled = StandardScaler().fit_transform(X) X = X.values pca = PCA(n_components=2) X_pca = pca.fit_transform(X_scaled) tsne = TSNE(n_components=2)
df[feature].max(skipna=True) - df[feature].min(skipna=True)) # convert values to numpy arrays X = data_norm[num_features].to_numpy() y = df.Survived.to_numpy() # set up visualizer from yellowbrick.features import ParallelCoordinates visualizer = ParallelCoordinates(classes=classes, features=num_features) # fit visualizer visualizer.fit(X, y) visualizer.transform(X) # create PNG file and also display in shell visualizer.show(outpath="titanic_fig4.png") visualizer.show() # set figure size, make subplots plt.rcParams['figure.figsize'] = (20, 10) fig, axes = plt.subplots(nrows=2, ncols=2) # convert binary to survived/not survived, group by sex Sex_survived = df.replace({'Survived': { 1: 'Survived', 0: 'Not-survived' }})[df['Survived'] == 1]['Sex'].value_counts() Sex_not_survived = df.replace({'Survived': { 1: 'Survived', 0: 'Not-survived' }})[df['Survived'] == 0]['Sex'].value_counts()