# In[9]: pd.DataFrame(_scaled, columns=df.columns).head() # ### Biplot # # A scatterplot projected onto the first two principal components. # In[10]: plt.figure() data_scaled = pd.DataFrame(_scaled, columns=df.columns) triplot(pca, data_scaled, title='ANES 2012 Biplot', color=data_scaled.PartyID) # In[11]: biplot(pca, data_scaled, title='ANES 2012 Biplot', color=data_scaled.PartyID) # Sure, all of the original axes are negative in the first component. That's okay! To quote Dr. Eric Larson: # > Because all the data is somewhat correlated, giving a mostly unidimensional representation. Positive/negative isn't so important because eigenvectors could theoretically start anywhere--but traditionally we use the origin. # # **Update:** The demographic factor of education level has a different sign from the others. # In[12]: def fpc_ordered(corr):
# # How much of the variance in the data is explained by each successive component? # In[11]: plot_explained_variance(pca) # ### Biplot # # A scatterplot projected onto the first two principal components. # In[12]: data_scaled = pd.DataFrame(_scaled, columns=df.columns) triplot(pca, data_scaled, title='ANES {} Biplot'.format(YEAR), color=data_scaled.PartyID) # In[13]: biplot(pca, data_scaled, title='ANES {} Biplot'.format(YEAR), color=data_scaled.PartyID) # In[14]: pca.explained_variance_ # ## Dropping na
plt.ylabel("2nd component") plt.figure() # plt.subplot(4, 1, 3, aspect='equal') plt.plot(X_poly[reds, 0], X_poly[reds, 1], "r.") plt.plot(X_poly[blues, 0], X_poly[blues, 1], "b.") plt.title("Projection by KPCA") plt.xlabel("1st principal component in space induced by $\phi$") plt.ylabel("2nd component") plt.figure() # plt.subplot(4, 1, 4, aspect='equal') plt.plot(X_pback[reds, 0], X_pback[reds, 1], "r.") plt.plot(X_pback[blues, 0], X_pback[blues, 1], "b.") plt.title("Original space after inverse transform") plt.xlabel("$x_1$") plt.ylabel("$x_2$") # In[14]: df.pid_self.value_counts() # In[15]: df.postvote_presvtwho.value_counts() # In[19]: data = pd.DataFrame(X, columns=df.columns) triplot(pca, data) # In[ ]: