# TODO: Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # # .. your code here .. from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(df) T = pca.transform(df) # Plot the transformed data as a scatter plot. Recall that transforming # the data will result in a NumPy NDArray. You can either use MatPlotLib # to graph it directly, or you can convert it to DataFrame and have pandas # do it for you. # # Since we've already demonstrated how to plot directly with MatPlotLib in # Module4/assignment1.py, this time we'll convert to a Pandas Dataframe. # # Since we transformed via PCA, we no longer have column names. We know we # are in P.C. space, so we'll just define the coordinates accordingly: ax = helper.drawVectors(T, pca.components_, df.columns.values, plt, scaleFeatures) T = pd.DataFrame(T) T.columns = ['component1', 'component2'] T.plot.scatter(x='component1', y='component2', marker='o', c=labels, alpha=0.75, ax=ax) plt.show()
df.rc = pd.to_numeric(df.rc, errors='coerce') df = pd.get_dummies(df, columns=[ 'rbc', 'pc', 'pcc', 'ba', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane' ]) df = helper.scaleFeatures(df) iso = manifold.Isomap(n_neighbors=16, n_components=2) iso.fit(df) manifold.Isomap(eigen_solver='auto', max_iter=None, n_components=2, n_neighbors=16, neighbors_algorithm='auto', path_method='auto', tol=0) T = iso.transform(df) # Plot the transformed data as a scatter plot ax = helper.drawVectors(T, pca.components_, df.columns.values, plt, False) T = pd.DataFrame(T) T.columns = ['component1', 'component2'] T.plot.scatter(x='component1', y='component2', marker='o', c=labels, alpha=0.75, ax=ax) plt.show()
# .. your code adjustment here .. if scaleFeatures: df = helper.scaleFeatures(df) # Run PCA on your dataset and reduce it to 2 components # Ensure your PCA instance is saved in a variable called 'pca', # and that the results of your transformation are saved in 'T'. # pca = PCA(n_components=2) pca.fit(df) T = pca.transform(df) # Plot the transformed data as a scatter plot. Recall that transforming # the data will result in a NumPy NDArray. You can either use MatPlotLib # to graph it directly, or you can convert it to DataFrame and have pandas # do it for you. # # Since we've already demonstrated how to plot directly with MatPlotLib in # Module4/assignment1.py, this time we'll convert to a Pandas Dataframe. # # Since we transformed via PCA, we no longer have column names. We know we # are in P.C. space, so we'll just define the coordinates accordingly: ax = helper.drawVectors(T, pca.components_, df.columns.values, plt, scaleFeatures) T = pd.DataFrame(T) T.columns = ['component1', 'component2'] T.plot.scatter(x='component1', y='component2', marker='o', c=labels, alpha=0.75, ax=ax) plt.show()
pca = PCA(svd_solver='full') pca.fit(df_iep) T = pca.transform(df_iep) # In[64]: import assignment2_helper as helper scaleFeatures = True # In[65]: ax = helper.drawVectors(T, pca.components_, df_iep.columns.values, plt, scaleFeatures) T = pd.DataFrame(T) T.columns = ['component1', 'component2'] T.plot.scatter(x='component1', y='component2', marker='o', c=labels, alpha=0.75, ax=ax) plt.show() # In[69]: df_spanish_iep = df_spanish_iep.drop(['PATH', 'ALLGRADEX'], axis = 1) df_spanish_iep = df_spanish_iep.astype(float) # In[70]: pca = PCA(svd_solver='full')