Пример #1
0
    def test_linear_mixed_model_function(self):
        n, f, m = 4, 2, 3
        y = np.array([0.0, 1.0, 8.0, 9.0])
        x = np.array([[1.0, 0.0],
                      [1.0, 2.0],
                      [1.0, 1.0],
                      [1.0, 4.0]])
        z = np.array([[0.0, 0.0, 1.0],
                      [0.0, 1.0, 2.0],
                      [1.0, 2.0, 0.0],
                      [2.0, 0.0, 1.0]])

        p_path = utils.new_temp_file()

        def make_call(gt):
            if gt == 0.0:
                return hl.Call([0, 0])
            if gt == 1.0:
                return hl.Call([0, 1])
            if gt == 2.0:
                return hl.Call([1, 1])

        data = [{'v': j, 's': i, 'y': y[i], 'x1': x[i, 1], 'zt': make_call(z[i, j])}
                for i in range(n) for j in range(m)]
        ht = hl.Table.parallelize(data, hl.dtype('struct{v: int32, s: int32, y: float64, x1: float64, zt: tcall}'))
        mt = ht.to_matrix_table(row_key=['v'], col_key=['s'], col_fields=['x1', 'y'])
        colsort = np.argsort(mt.key_cols_by().s.collect()).tolist()
        mt = mt.choose_cols(colsort)

        rrm = hl.realized_relationship_matrix(mt.zt).to_numpy()

        # kinship path agrees with from_kinship
        model, p = hl.linear_mixed_model(mt.y, [1, mt.x1], k=rrm, p_path=p_path, overwrite=True)
        model0, p0 = LinearMixedModel.from_kinship(y, x, rrm, p_path, overwrite=True)
        assert model0._same(model)
        assert np.allclose(p0, p)

        # random effects path with standardize=True agrees with low-rank rrm
        s0, u0 = np.linalg.eigh(rrm)
        s0 = np.flip(s0, axis=0)[:m]
        p0 = np.fliplr(u0).T[:m, :]
        model, p = hl.linear_mixed_model(mt.y, [1, mt.x1], z_t=mt.zt.n_alt_alleles(), p_path=p_path, overwrite=True)
        model0 = LinearMixedModel(p0 @ y, p0 @ x, s0, y, x, p_path=p_path)
        assert model0._same(model)

        # random effects path with standardize=False agrees with from_random_effects
        model0, p0 = LinearMixedModel.from_random_effects(y, x, z, p_path, overwrite=True)
        model, p = hl.linear_mixed_model(mt.y, [1, mt.x1], z_t=mt.zt.n_alt_alleles(), p_path=p_path, overwrite=True, standardize=False)
        assert model0._same(model)
        assert np.allclose(p0, p.to_numpy())
Пример #2
0
    def test_linear_mixed_model_math(self):
        gamma = 2.0  # testing at fixed value of gamma
        n, p, m = 4, 2, 3
        y = np.array([0.0, 1.0, 8.0, 9.0])
        x = np.array([[1.0, 0.0],
                      [1.0, 2.0],
                      [1.0, 1.0],
                      [1.0, 4.0]])
        z = np.array([[0.0, 0.0, 1.0],
                      [0.0, 1.0, 2.0],
                      [1.0, 2.0, 4.0],
                      [2.0, 4.0, 8.0]])
        k = z @ z.T
        v = k + np.eye(4) / gamma
        v_inv = np.linalg.inv(v)

        beta = np.linalg.solve(x.T @ v_inv @ x, x.T @ v_inv @ y)
        residual = y - x @ beta
        sigma_sq = 1 / (n - p) * (residual @ v_inv @ residual)
        sv = sigma_sq * v
        neg_log_lkhd = 0.5 * (np.linalg.slogdet(sv)[1] + np.linalg.slogdet(x.T @ np.linalg.inv(sv) @ x)[1])  # plus C

        x_star = np.array([1.0, 0.0, 1.0, 0.0])
        a = x_star.reshape(n, 1)
        x1 = np.hstack([a, x])
        beta1 = np.linalg.solve(x1.T @ v_inv @ x1, x1.T @ v_inv @ y)
        residual1 = y - x1 @ beta1
        chi_sq = n * np.log((residual @ v_inv @ residual) / (residual1 @ v_inv @ residual1))

        # test full-rank fit
        model, p = LinearMixedModel.from_kinship(y, x, k)
        model.fit(np.log(gamma))
        self.assertTrue(np.allclose(model.beta, beta))
        self.assertAlmostEqual(model.sigma_sq, sigma_sq)
        self.assertAlmostEqual(model.compute_neg_log_reml(np.log(gamma)), neg_log_lkhd)

        # test full-rank alternative
        pa = p @ a
        stats = model.fit_alternatives_numpy(pa).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)

        pa_t_path = utils.new_temp_file()
        BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True)
        stats = model.fit_alternatives(pa_t_path).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)

        # test low-rank fit
        model, p = LinearMixedModel.from_mixed_effects(y, x, z)
        model.fit(np.log(gamma))
        self.assertTrue(np.allclose(model.beta, beta))
        self.assertAlmostEqual(model.sigma_sq, sigma_sq)
        self.assertAlmostEqual(model.compute_neg_log_reml(np.log(gamma)), neg_log_lkhd)

        # test low_rank alternative
        pa = p @ a
        stats = model.fit_alternatives_numpy(pa, a).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)

        a_t_path = utils.new_temp_file()
        BlockMatrix.from_numpy(a.T).write(a_t_path, force_row_major=True)
        pa_t_path = utils.new_temp_file()
        BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True)
        stats = model.fit_alternatives(pa_t_path, a_t_path).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)
Пример #3
0
    def test_linear_mixed_model_function(self):
        n, f, m = 4, 2, 3
        y = np.array([0.0, 1.0, 8.0, 9.0])
        x = np.array([[1.0, 0.0], [1.0, 2.0], [1.0, 1.0], [1.0, 4.0]])
        z = np.array([[0.0, 0.0, 1.0], [0.0, 1.0, 2.0], [1.0, 2.0, 0.0],
                      [2.0, 0.0, 1.0]])

        p_path = utils.new_temp_file()

        def make_call(gt):
            if gt == 0.0:
                return hl.Call([0, 0])
            if gt == 1.0:
                return hl.Call([0, 1])
            if gt == 2.0:
                return hl.Call([1, 1])

        data = [{
            'v': j,
            's': i,
            'y': y[i],
            'x1': x[i, 1],
            'zt': make_call(z[i, j])
        } for i in range(n) for j in range(m)]
        ht = hl.Table.parallelize(
            data,
            hl.dtype(
                'struct{v: int32, s: int32, y: float64, x1: float64, zt: tcall}'
            ))
        mt = ht.to_matrix_table(row_key=['v'],
                                col_key=['s'],
                                col_fields=['x1', 'y'])
        colsort = np.argsort(mt.key_cols_by().s.collect()).tolist()
        mt = mt.choose_cols(colsort)

        rrm = hl.realized_relationship_matrix(mt.zt).to_numpy()

        # kinship path agrees with from_kinship
        model, p = hl.linear_mixed_model(mt.y, [1, mt.x1],
                                         k=rrm,
                                         p_path=p_path,
                                         overwrite=True)
        model0, p0 = LinearMixedModel.from_kinship(y,
                                                   x,
                                                   rrm,
                                                   p_path,
                                                   overwrite=True)
        assert model0._same(model)
        assert np.allclose(p0, p)

        # random effects path with standardize=True agrees with low-rank rrm
        s0, u0 = np.linalg.eigh(rrm)
        s0 = np.flip(s0, axis=0)[:m]
        p0 = np.fliplr(u0).T[:m, :]
        model, p = hl.linear_mixed_model(mt.y, [1, mt.x1],
                                         z_t=mt.zt.n_alt_alleles(),
                                         p_path=p_path,
                                         overwrite=True)
        model0 = LinearMixedModel(p0 @ y, p0 @ x, s0, y, x, p_path=p_path)
        assert model0._same(model)

        # random effects path with standardize=False agrees with from_random_effects
        model0, p0 = LinearMixedModel.from_random_effects(y,
                                                          x,
                                                          z,
                                                          p_path,
                                                          overwrite=True)
        model, p = hl.linear_mixed_model(mt.y, [1, mt.x1],
                                         z_t=mt.zt.n_alt_alleles(),
                                         p_path=p_path,
                                         overwrite=True,
                                         standardize=False)
        assert model0._same(model)
        assert np.allclose(p0, p.to_numpy())
Пример #4
0
    def test_linear_mixed_model_math(self):
        gamma = 2.0  # testing at fixed value of gamma
        n, f, m = 4, 2, 3
        y = np.array([0.0, 1.0, 8.0, 9.0])
        x = np.array([[1.0, 0.0],
                      [1.0, 2.0],
                      [1.0, 1.0],
                      [1.0, 4.0]])
        z = np.array([[0.0, 0.0, 1.0],
                      [0.0, 1.0, 2.0],
                      [1.0, 2.0, 4.0],
                      [2.0, 4.0, 8.0]])
        k = z @ z.T
        v = k + np.eye(4) / gamma
        v_inv = np.linalg.inv(v)

        beta = np.linalg.solve(x.T @ v_inv @ x, x.T @ v_inv @ y)
        residual = y - x @ beta
        sigma_sq = 1 / (n - f) * (residual @ v_inv @ residual)
        sv = sigma_sq * v
        neg_log_lkhd = 0.5 * (np.linalg.slogdet(sv)[1] + np.linalg.slogdet(x.T @ np.linalg.inv(sv) @ x)[1])  # plus C

        x_star = np.array([1.0, 0.0, 1.0, 0.0])
        a = x_star.reshape(n, 1)
        x1 = np.hstack([a, x])
        beta1 = np.linalg.solve(x1.T @ v_inv @ x1, x1.T @ v_inv @ y)
        residual1 = y - x1 @ beta1
        chi_sq = n * np.log((residual @ v_inv @ residual) / (residual1 @ v_inv @ residual1))

        # test from_kinship, full-rank fit
        model, p = LinearMixedModel.from_kinship(y, x, k)
        s0, u0 = np.linalg.eigh(k)
        s0 = np.flip(s0, axis=0)
        p0 = np.fliplr(u0).T
        self.assertTrue(model._same(LinearMixedModel(p0 @ y, p0 @ x, s0)))

        model.fit(np.log(gamma))
        self.assertTrue(np.allclose(model.beta, beta))
        self.assertAlmostEqual(model.sigma_sq, sigma_sq)
        self.assertAlmostEqual(model.compute_neg_log_reml(np.log(gamma)), neg_log_lkhd)

        # test full-rank alternative
        pa = p @ a
        stats = model.fit_alternatives_numpy(pa).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)

        pa_t_path = utils.new_temp_file()
        BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True)
        stats = model.fit_alternatives(pa_t_path).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)

        # test from_random_effects, low-rank fit
        s0, p0 = s0[:m], p0[:m, :]
        # test BlockMatrix path
        temp_path = utils.new_temp_file()
        model, _ = LinearMixedModel.from_random_effects(y, x, 
                                                        BlockMatrix.from_numpy(z),
                                                        p_path=temp_path,
                                                        complexity_bound=0)
        lmm = LinearMixedModel(p0 @ y, p0 @ x, s0, y, x, p_path=temp_path)
        self.assertTrue(model._same(lmm))
        # test ndarray path
        model, p = LinearMixedModel.from_random_effects(y, x, z)
        lmm = LinearMixedModel(p0 @ y, p0 @ x, s0, y, x)
        self.assertTrue(model._same(lmm))

        model.fit(np.log(gamma))
        self.assertTrue(np.allclose(model.beta, beta))
        self.assertAlmostEqual(model.sigma_sq, sigma_sq)
        self.assertAlmostEqual(model.compute_neg_log_reml(np.log(gamma)), neg_log_lkhd)

        # test low_rank alternative
        pa = p @ a
        stats = model.fit_alternatives_numpy(pa, a).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)

        a_t_path = utils.new_temp_file()
        BlockMatrix.from_numpy(a.T).write(a_t_path, force_row_major=True)
        pa_t_path = utils.new_temp_file()
        BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True)
        stats = model.fit_alternatives(pa_t_path, a_t_path).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)
Пример #5
0
    def test_linear_mixed_model_math(self):
        gamma = 2.0  # testing at fixed value of gamma
        n, f, m = 4, 2, 3
        y = np.array([0.0, 1.0, 8.0, 9.0])
        x = np.array([[1.0, 0.0], [1.0, 2.0], [1.0, 1.0], [1.0, 4.0]])
        z = np.array([[0.0, 0.0, 1.0], [0.0, 1.0, 2.0], [1.0, 2.0, 4.0],
                      [2.0, 4.0, 8.0]])
        k = z @ z.T
        v = k + np.eye(4) / gamma
        v_inv = np.linalg.inv(v)

        beta = np.linalg.solve(x.T @ v_inv @ x, x.T @ v_inv @ y)
        residual = y - x @ beta
        sigma_sq = 1 / (n - f) * (residual @ v_inv @ residual)
        sv = sigma_sq * v
        neg_log_lkhd = 0.5 * (np.linalg.slogdet(sv)[1] + np.linalg.slogdet(
            x.T @ np.linalg.inv(sv) @ x)[1])  # plus C

        x_star = np.array([1.0, 0.0, 1.0, 0.0])
        a = x_star.reshape(n, 1)
        x1 = np.hstack([a, x])
        beta1 = np.linalg.solve(x1.T @ v_inv @ x1, x1.T @ v_inv @ y)
        residual1 = y - x1 @ beta1
        chi_sq = n * np.log(
            (residual @ v_inv @ residual) / (residual1 @ v_inv @ residual1))

        # test from_kinship, full-rank fit
        model, p = LinearMixedModel.from_kinship(y, x, k)
        s0, u0 = np.linalg.eigh(k)
        s0 = np.flip(s0, axis=0)
        p0 = np.fliplr(u0).T
        self.assertTrue(model._same(LinearMixedModel(p0 @ y, p0 @ x, s0)))

        model.fit(np.log(gamma))
        self.assertTrue(np.allclose(model.beta, beta))
        self.assertAlmostEqual(model.sigma_sq, sigma_sq)
        self.assertAlmostEqual(model.compute_neg_log_reml(np.log(gamma)),
                               neg_log_lkhd)

        # test full-rank alternative
        pa = p @ a
        stats = model.fit_alternatives_numpy(pa).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)

        pa_t_path = utils.new_temp_file()
        BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True)
        stats = model.fit_alternatives(pa_t_path).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)

        # test from_random_effects, low-rank fit
        s0, p0 = s0[:m], p0[:m, :]
        # test BlockMatrix path
        temp_path = utils.new_temp_file()
        model, _ = LinearMixedModel.from_random_effects(
            y,
            x,
            BlockMatrix.from_numpy(z),
            p_path=temp_path,
            complexity_bound=0)
        lmm = LinearMixedModel(p0 @ y, p0 @ x, s0, y, x, p_path=temp_path)
        self.assertTrue(model._same(lmm))
        # test ndarray path
        model, p = LinearMixedModel.from_random_effects(y, x, z)
        lmm = LinearMixedModel(p0 @ y, p0 @ x, s0, y, x)
        self.assertTrue(model._same(lmm))

        model.fit(np.log(gamma))
        self.assertTrue(np.allclose(model.beta, beta))
        self.assertAlmostEqual(model.sigma_sq, sigma_sq)
        self.assertAlmostEqual(model.compute_neg_log_reml(np.log(gamma)),
                               neg_log_lkhd)

        # test low_rank alternative
        pa = p @ a
        stats = model.fit_alternatives_numpy(pa, a).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)

        a_t_path = utils.new_temp_file()
        BlockMatrix.from_numpy(a.T).write(a_t_path, force_row_major=True)
        pa_t_path = utils.new_temp_file()
        BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True)
        stats = model.fit_alternatives(pa_t_path, a_t_path).collect()[0]
        self.assertAlmostEqual(stats.beta, beta1[0])
        self.assertAlmostEqual(stats.chi_sq, chi_sq)