def de_mean_matrix(A): """returns the result of subtracting from every value in A the mean value of its column; resulting matrix has mean 0 in every column""" nr, nc = algebra.shape(A) column_means, _ = scale(A) return algebra.mk_matrix(nr, nc, lambda i, j: A[i][j] - column_means[j])
def correlation_matrix(data): """returns the num_columns x num_columns matrix whose (i, j)th entry is the correlation between columns i and j of data""" _, num_columns = algebra.shape(data) def matrix_entry(i, j): return stats.correlation(algebra.get_column(data, i), algebra.get_column(data, j)) return algebra.mk_matrix(num_columns, num_columns, matrix_entry)
def rescale(data_matrix): """rescales the input data so that each column has mean 0 and StdDev 1; leaves alone columns with no deviation""" means, stddevs = scale(data_matrix) def rescaled(i, j): if stddevs[j] > 0: return (data_matrix[i][j] - means[j]) / stddevs[j] else: return data_matrix[i][j] num_rows, num_cols = algebra.shape(data_matrix) return algebra.mk_matrix(num_rows, num_cols, rescaled)