# In[9]:

pd.DataFrame(_scaled, columns=df.columns).head()


# ### Biplot
# 
# A scatterplot projected onto the first two principal components.

# In[10]:

plt.figure()
data_scaled = pd.DataFrame(_scaled, columns=df.columns)
triplot(pca, data_scaled, title='ANES 2012 Biplot', color=data_scaled.PartyID)


# In[11]:

biplot(pca, data_scaled, title='ANES 2012 Biplot', color=data_scaled.PartyID)


# Sure, all of the original axes are negative in the first component. That's okay! To quote Dr. Eric Larson: 
# > Because all the data is somewhat correlated, giving a mostly unidimensional representation. Positive/negative isn't so important because eigenvectors could theoretically start anywhere--but traditionally we use the origin.
# 
# **Update:** The demographic factor of education level has a different sign from the others.

# In[12]:

def fpc_ordered(corr):
Пример #2
0
#
# How much of the variance in the data is explained by each successive component?

# In[11]:

plot_explained_variance(pca)

# ### Biplot
#
# A scatterplot projected onto the first two principal components.

# In[12]:

data_scaled = pd.DataFrame(_scaled, columns=df.columns)
triplot(pca,
        data_scaled,
        title='ANES {} Biplot'.format(YEAR),
        color=data_scaled.PartyID)

# In[13]:

biplot(pca,
       data_scaled,
       title='ANES {} Biplot'.format(YEAR),
       color=data_scaled.PartyID)

# In[14]:

pca.explained_variance_

# ## Dropping na
plt.ylabel("2nd component")

plt.figure()  # plt.subplot(4, 1, 3, aspect='equal')
plt.plot(X_poly[reds, 0], X_poly[reds, 1], "r.")
plt.plot(X_poly[blues, 0], X_poly[blues, 1], "b.")
plt.title("Projection by KPCA")
plt.xlabel("1st principal component in space induced by $\phi$")
plt.ylabel("2nd component")

plt.figure()  # plt.subplot(4, 1, 4, aspect='equal')
plt.plot(X_pback[reds, 0], X_pback[reds, 1], "r.")
plt.plot(X_pback[blues, 0], X_pback[blues, 1], "b.")
plt.title("Original space after inverse transform")
plt.xlabel("$x_1$")
plt.ylabel("$x_2$")

# In[14]:

df.pid_self.value_counts()

# In[15]:

df.postvote_presvtwho.value_counts()

# In[19]:

data = pd.DataFrame(X, columns=df.columns)
triplot(pca, data)

# In[ ]: