Пример #1
0


# TODO: Run PCA on your dataset and reduce it to 2 components
# Ensure your PCA instance is saved in a variable called 'pca',
# and that the results of your transformation are saved in 'T'.
#
# .. your code here ..
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(df)
T = pca.transform(df)

# Plot the transformed data as a scatter plot. Recall that transforming
# the data will result in a NumPy NDArray. You can either use MatPlotLib
# to graph it directly, or you can convert it to DataFrame and have pandas
# do it for you.
#
# Since we've already demonstrated how to plot directly with MatPlotLib in
# Module4/assignment1.py, this time we'll convert to a Pandas Dataframe.
#
# Since we transformed via PCA, we no longer have column names. We know we
# are in P.C. space, so we'll just define the coordinates accordingly:
ax = helper.drawVectors(T, pca.components_, df.columns.values, plt, scaleFeatures)
T = pd.DataFrame(T)
T.columns = ['component1', 'component2']
T.plot.scatter(x='component1', y='component2', marker='o', c=labels, alpha=0.75, ax=ax)
plt.show()


Пример #2
0
df.rc = pd.to_numeric(df.rc, errors='coerce')
df = pd.get_dummies(df,
                    columns=[
                        'rbc', 'pc', 'pcc', 'ba', 'htn', 'dm', 'cad', 'appet',
                        'pe', 'ane'
                    ])

df = helper.scaleFeatures(df)

iso = manifold.Isomap(n_neighbors=16, n_components=2)
iso.fit(df)
manifold.Isomap(eigen_solver='auto',
                max_iter=None,
                n_components=2,
                n_neighbors=16,
                neighbors_algorithm='auto',
                path_method='auto',
                tol=0)
T = iso.transform(df)

# Plot the transformed data as a scatter plot
ax = helper.drawVectors(T, pca.components_, df.columns.values, plt, False)
T = pd.DataFrame(T)
T.columns = ['component1', 'component2']
T.plot.scatter(x='component1',
               y='component2',
               marker='o',
               c=labels,
               alpha=0.75,
               ax=ax)
plt.show()
Пример #3
0
# .. your code adjustment here ..
if scaleFeatures: df = helper.scaleFeatures(df)



# Run PCA on your dataset and reduce it to 2 components
# Ensure your PCA instance is saved in a variable called 'pca',
# and that the results of your transformation are saved in 'T'.
#
pca = PCA(n_components=2)
pca.fit(df)
T = pca.transform(df)

# Plot the transformed data as a scatter plot. Recall that transforming
# the data will result in a NumPy NDArray. You can either use MatPlotLib
# to graph it directly, or you can convert it to DataFrame and have pandas
# do it for you.
#
# Since we've already demonstrated how to plot directly with MatPlotLib in
# Module4/assignment1.py, this time we'll convert to a Pandas Dataframe.
#
# Since we transformed via PCA, we no longer have column names. We know we
# are in P.C. space, so we'll just define the coordinates accordingly:
ax = helper.drawVectors(T, pca.components_, df.columns.values, plt, scaleFeatures)
T = pd.DataFrame(T)
T.columns = ['component1', 'component2']
T.plot.scatter(x='component1', y='component2', marker='o', c=labels, alpha=0.75, ax=ax)
plt.show()


Пример #4
0

pca = PCA(svd_solver='full')
pca.fit(df_iep)
T = pca.transform(df_iep)


# In[64]:

import assignment2_helper as helper
scaleFeatures = True


# In[65]:

ax = helper.drawVectors(T, pca.components_, df_iep.columns.values, plt, scaleFeatures)
T = pd.DataFrame(T)
T.columns = ['component1', 'component2']
T.plot.scatter(x='component1', y='component2', marker='o', c=labels, alpha=0.75, ax=ax)
plt.show()


# In[69]:

df_spanish_iep = df_spanish_iep.drop(['PATH', 'ALLGRADEX'], axis = 1)
df_spanish_iep = df_spanish_iep.astype(float)


# In[70]:

pca = PCA(svd_solver='full')