def save_corr_heatmap(corr: DenseMatrix, columns: List[str], path: str, title="Correlation Matrix"): rows = corr.toArray().tolist() df = pd.DataFrame(rows) fig = plt.figure(figsize=(13, 8)) sns.heatmap(df, xticklabels=columns, yticklabels=columns, annot=True) plt.title(title) plt.savefig(path) plt.close()
def test_dense_matrix_is_transposed(self): mat1 = DenseMatrix(3, 2, [0, 4, 1, 6, 3, 9], isTransposed=True) mat = DenseMatrix(3, 2, [0, 1, 3, 4, 6, 9]) self.assertEqual(mat1, mat) expected = [[0, 4], [1, 6], [3, 9]] for i in range(3): for j in range(2): self.assertEqual(mat1[i, j], expected[i][j]) self.assertTrue(array_equal(mat1.toArray(), expected)) sm = mat1.toSparse() self.assertTrue(array_equal(sm.rowIndices, [1, 2, 0, 1, 2])) self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5])) self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
g = np.array([0., 1., 2., 0.]) x = np.array([ [1, -1], [2, -2], [3, -3], [4, -4.], ]) b = np.array([0., 1.]) y = g + np.dot(x, b) + np.random.normal(scale=.01, size=g.size) HR = '-' * 50 print(HR) print('Version 1') # Correct version dm = DenseMatrix(numRows=x.shape[0], numCols=x.shape[1], values=x.ravel(order='F').tolist()) np.testing.assert_equal(x, dm.toArray()) print(dm.toArray()) spark.createDataFrame([Row(genotypes=g.tolist(), phenotypes=y.tolist(), covariates=dm)])\ .select(expand_struct(linear_regression_gwas('genotypes', 'phenotypes', 'covariates')))\ .show() print(HR) print('Version 2') # Version also like demo notebook with explicit matrix field (also wrong) dm = DenseMatrix(numRows=x.shape[0], numCols=x.shape[1], values=x.ravel(order='C').tolist()) print(dm.toArray()) spark.createDataFrame([Row(genotypes=g.tolist(), phenotypes=y.tolist(), covariates=dm)])\ .select(expand_struct(linear_regression_gwas('genotypes', 'phenotypes', 'covariates')))\ .show() print(HR)