示例#1
0
df_str = df_v2[list(tipos3t.head(0))]

# In[]

#Normalización de datos para PCA
df_num_norm = StandardScaler().fit_transform(df_num)
df_num_norm = pd.DataFrame(df_num_norm, columns=list(tipos2t.head()))

# In[]

#Matriz de covarianza, correlaciones, gráfica de dependencia líneal y número de condición
cov_df = df_num_norm.cov()
var_global = sum(np.diag(cov_df))
det = np.linalg.det(cov_df)
corr_df = df_num_norm.corr()
sns.heatmap(corr_df, center=0, cmap='Blues_r')
cond_cov = np.linalg.cond(cov_df)

# In[]

#Identificación de outliers y Eliminación del 10%
#a=[]
a_rob = []
media_num_norm = np.array(df_num_norm.mean())
mediana_num_norm = np.array(df_num_norm.median())
inv_cov = np.linalg.inv(np.array(cov_df))
for i in range(len(df_num_norm.index)):
    #b = distance.mahalanobis(np.array(df_num_norm.iloc[i,:]),media_num_norm,inv_cov)
    b_rob = distance.mahalanobis(np.array(df_num_norm.iloc[i, :]),
                                 mediana_num_norm, inv_cov)
示例#2
0
df = y_var_df.join(X_vars_df)
df = df.merge(target_names_df, left_on=df.target, right_index=True)

# %%
"""
Standardize X Data
"""
X_normalized = StandardScaler().fit_transform(X_vars_df)
X_normalized = pd.DataFrame(X_normalized, columns=X_vars_df.columns)

# %%
"""
Exploratory analysis
"""
# Obtain the correlation matrix
correlation_matrix = X_normalized.corr()

# Extract eigenvalues and eigenvectors from the correlation matrix
# Instantiate PCA object selecting all possible dimensions,
# i.e. len(original_variables)
pca = PCA(n_components=len(X_vars_df.columns))
pc_matrix = pca.fit_transform(X_normalized)

# Extract eigenvalues and eigenvectors from class properties and store in dfs
# --Get index for eigenvalues
eigenvalues_index = \
    ['PC_{}'.format(i) for i in range(1, len(X_normalized.columns) + 1)]

# --Get actual eigenvalues df
eigenvalues_df = \
    pd.DataFrame(data=pca.explained_variance_, columns=['Eigenvalues']