def test_parallel_coords(pandas=False, outpath=None): """ Runs the parallel coordinates visualizer on the dataset. Parameters ---------- pandas : bool Run the pandas version of the function outpath : path or None Save the figure to disk rather than show (if None) """ data = load_data('occupancy') # Load the data features = ['temp', 'humid', 'light', 'co2', 'hratio'] classes = ['unoccupied', 'occupied'] X = data[features].as_matrix() y = data.occupied.as_matrix() if pandas: parallel_coordinates(data[features + ['occupied']], 'occupied') if outpath: plt.savefig(outpath) else: plt.show() else: visualizer = ParallelCoordinates( # Instantiate the visualizer classes=classes, features=features) visualizer.fit(X, y) # Fit the data to the visualizer visualizer.transform(X) # Transform the data visualizer.poof(outpath=outpath) # Draw/show/poof the data
def pcoords(X, y, outpath, **kwargs): # Create a new figure and axes _, ax = plt.subplots() # Create the visualizer visualizer = ParallelCoordinates(ax=ax, **kwargs) visualizer.fit(X, y) visualizer.transform(X) # Save to disk visualizer.poof(outpath=outpath)
def pcoords(X, y, outpath, **kwargs): # Create a new figure and axes fig = plt.figure() ax = fig.add_subplot(111) # Create the visualizer visualizer = ParallelCoordinates(**kwargs) visualizer.fit(X, y) visualizer.transform(X) # Save to disk visualizer.poof(outpath=outpath)
def pcoords(ax): from yellowbrick.features import ParallelCoordinates # Specify the features of interest and the classes of the target features = ["temperature", "relative humidity", "light", "C02", "humidity"] target = "occupancy" classes = ['unoccupied', 'occupied'] # Load the data X, y = load_data('occupancy', cols=features, target=target) # Instantiate and fit the visualizer visualizer = ParallelCoordinates(ax=ax, classes=classes, features=features) visualizer.title = "Parallel Coordinates of Features to Predict Room Occupancy" visualizer.fit(X, y) visualizer.transform(X) return visualizer
# copy data to a new dataframe data_norm = data.copy() # normalize data to 0-1 range for feature in num_features: data_norm[feature] = (data[feature] - data[feature].mean(skipna=True)) / ( data[feature].max(skipna=True) - data[feature].min(skipna=True)) # Extract the numpy arrays from the data frame X = data_norm[num_features].to_numpy() y = data.Survived.to_numpy() # Instantiate the visualizer # Instantiate the visualizer visualizer = ParallelCoordinates(classes=classes, features=num_features) visualizer.fit(X, y) # Fit the data to the visualizer visualizer.transform(X) # Transform the data visualizer.poof(outpath="d://pcoords2.png") # Draw/show/poof the data plt.show() # Step 10 - stacked bar charts to compare survived/not survived # set up the figure size # %matplotlib inline plt.rcParams['figure.figsize'] = (20, 10) # make subplots fig, axes = plt.subplots(nrows=2, ncols=2) # make the data read to feed into the visulizer Sex_survived = data.replace({'Survived': { 1: 'Survived',
data_norm = data.copy() # normalize data to 0-1 range for feature in num_features: data_norm[feature] = (data[feature] - data[feature].mean(skipna=True)) / ( data[feature].max(skipna=True) - data[feature].min(skipna=True)) # Extract the numpy arrays from the data frame # X = data_norm[num_features].as_matrix() # y = data.Survived.as_matrix() X = data_norm[num_features].to_numpy() y = data.BendCurve.to_numpy() # Instantiate the visualizer visualizer = ParallelCoordinates(classes=classes, features=num_features) visualizer.fit(X, y) # Fit the data to the visualizer visualizer.transform(X) # Transform the data visualizer.poof(outpath="pcoords2.png") # Draw/show/poof the data plt.show() print("Starting Step 10 Here ") # Step 10 - stacked bar charts to compare survived/not survived #set up the figure size #%matplotlib inline plt.rcParams['figure.figsize'] = (20, 10) # make subplots fig, axes = plt.subplots(nrows=2, ncols=2) # make the data read to feed into the visulizer Sex_survived = data.replace(
from sklearn.datasets import load_iris data = load_iris() data import pandas as pd pd.read_csv(data.filename) from yellowbrick.features import ParallelCoordinates viz = ParallelCoordinates(features=data.feature_names, classes=data.target_names, normalize='standard') viz.fit(data.data, data.target) viz.transform(data.data) viz.poof() from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split X, y = data.data, data.target X_train, X_test, y_train, y_test = train_test_split(X, y) scaler = StandardScaler() scaler.fit(X_train) X1 = scaler.transform(X_train) clf = DecisionTreeClassifier() clf.fit(X1, y_train) X2 = scaler.transform(X_test) clf.predict(X2) from sklearn.pipeline import Pipeline decision_pipeline = Pipeline([ ('normalize', StandardScaler()), ('decision', DecisionTreeClassifier()) ]) decision_pipeline.fit(X_train, y_train)