Exemplo n.º 1
0
def save_corr_heatmap(corr: DenseMatrix,
                      columns: List[str],
                      path: str,
                      title="Correlation Matrix"):
    rows = corr.toArray().tolist()
    df = pd.DataFrame(rows)
    fig = plt.figure(figsize=(13, 8))
    sns.heatmap(df, xticklabels=columns, yticklabels=columns, annot=True)
    plt.title(title)
    plt.savefig(path)
    plt.close()
Exemplo n.º 2
0
    def test_dense_matrix_is_transposed(self):
        mat1 = DenseMatrix(3, 2, [0, 4, 1, 6, 3, 9], isTransposed=True)
        mat = DenseMatrix(3, 2, [0, 1, 3, 4, 6, 9])
        self.assertEqual(mat1, mat)

        expected = [[0, 4], [1, 6], [3, 9]]
        for i in range(3):
            for j in range(2):
                self.assertEqual(mat1[i, j], expected[i][j])
        self.assertTrue(array_equal(mat1.toArray(), expected))

        sm = mat1.toSparse()
        self.assertTrue(array_equal(sm.rowIndices, [1, 2, 0, 1, 2]))
        self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
        self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
Exemplo n.º 3
0
    def test_dense_matrix_is_transposed(self):
        mat1 = DenseMatrix(3, 2, [0, 4, 1, 6, 3, 9], isTransposed=True)
        mat = DenseMatrix(3, 2, [0, 1, 3, 4, 6, 9])
        self.assertEqual(mat1, mat)

        expected = [[0, 4], [1, 6], [3, 9]]
        for i in range(3):
            for j in range(2):
                self.assertEqual(mat1[i, j], expected[i][j])
        self.assertTrue(array_equal(mat1.toArray(), expected))

        sm = mat1.toSparse()
        self.assertTrue(array_equal(sm.rowIndices, [1, 2, 0, 1, 2]))
        self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
        self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
Exemplo n.º 4
0
g = np.array([0., 1., 2., 0.])
x = np.array([
    [1, -1],
    [2, -2],
    [3, -3],
    [4, -4.],
])
b = np.array([0., 1.])
y = g + np.dot(x, b) + np.random.normal(scale=.01, size=g.size)

HR = '-' * 50
print(HR)
print('Version 1')
# Correct version
dm = DenseMatrix(numRows=x.shape[0], numCols=x.shape[1], values=x.ravel(order='F').tolist())
np.testing.assert_equal(x, dm.toArray())
print(dm.toArray())
spark.createDataFrame([Row(genotypes=g.tolist(), phenotypes=y.tolist(), covariates=dm)])\
    .select(expand_struct(linear_regression_gwas('genotypes', 'phenotypes', 'covariates')))\
    .show()

print(HR)
print('Version 2')
# Version also like demo notebook with explicit matrix field (also wrong)
dm = DenseMatrix(numRows=x.shape[0], numCols=x.shape[1], values=x.ravel(order='C').tolist())
print(dm.toArray())
spark.createDataFrame([Row(genotypes=g.tolist(), phenotypes=y.tolist(), covariates=dm)])\
    .select(expand_struct(linear_regression_gwas('genotypes', 'phenotypes', 'covariates')))\
    .show()

print(HR)