tipos3 = tipos3.to_frame() tipos3 = tipos3.drop(tipos3[tipos3.iloc[:, 0] != 'category'].index) tipos3t = np.transpose(tipos3) df_str = df_v2[list(tipos3t.head(0))] # In[] #Normalización de datos para PCA df_num_norm = StandardScaler().fit_transform(df_num) df_num_norm = pd.DataFrame(df_num_norm, columns=list(tipos2t.head())) # In[] #Matriz de covarianza, correlaciones, gráfica de dependencia líneal y número de condición cov_df = df_num_norm.cov() var_global = sum(np.diag(cov_df)) det = np.linalg.det(cov_df) corr_df = df_num_norm.corr() sns.heatmap(corr_df, center=0, cmap='Blues_r') cond_cov = np.linalg.cond(cov_df) # In[] #Identificación de outliers y Eliminación del 10% #a=[] a_rob = [] media_num_norm = np.array(df_num_norm.mean()) mediana_num_norm = np.array(df_num_norm.median()) inv_cov = np.linalg.inv(np.array(cov_df)) for i in range(len(df_num_norm.index)):
# variable = x[vf].as_matrix().reshape(len(x[vf]),1) # co_variance = np.cov(variable, y) # co_variance_list.append(co_variance[0,1]) # print(co_variance_list) #for vf in features: # variable = x.loc[:,vf] # print (variable) # variable = x.loc[:,'volume'].as_matrix().reshape(len(x['volume']),1) # y = y.as_matrix().reshape(len(y),1) # print (type(variable)) # print(type(y)) # print(variable.shape, y.shape) # co_variance = np.cov(variable, y) # print (co_variance) # finalDf = pd.concat([x, y], axis = 1) cov_value = df.loc[:, x_y].replace(np.nan, 0).values cov_value = StandardScaler().fit_transform(cov_value) cov_value = pd.DataFrame(cov_value, columns=x_y) print(cov_value) co_variance_matrix = cov_value.cov() # In[110]: print(co_variance_matrix) print(co_variance_matrix.loc[:, ['price']].sort_values('price', ascending=False))