def _nearpsd(A): r""" Obtain the "closest" positive semidefinite matrix to A.""" n = A.shape[0] eigval, eigvec = np.linalg.eig(A) val = np.matrix(np.maximum(eigval, 0)) vec = np.matrix(eigvec) T = 1 / (np.multiply(vec, vec) * val.T) T = np.matrix(np.sqrt(np.diag(np.array(T).reshape((n))))) B = T * vec * np.diag(np.array(np.sqrt(val)).reshape((n))) out = np.real(B * B.T) return out @typecheck(mt=MatrixTable, genotype=oneof(expr_int32, expr_float64, expr_call), beta=oneof(expr_float64, expr_array(expr_float64)), h2=oneof(float, int, list, np.ndarray), popstrat=nullable(oneof(expr_int32, expr_float64)), popstrat_var=nullable(oneof(float, int)), exact_h2=bool) def calculate_phenotypes(mt, genotype, beta, h2, popstrat=None, popstrat_var=None, exact_h2=False): r"""Calculates phenotypes by multiplying genotypes and betas. Parameters ----------
import hail as hl from hail.expr.expressions import expr_array, expr_call, expr_int32 from hail.typecheck import typecheck @typecheck(lgt=expr_call, la=expr_array(expr_int32)) def lgt_to_gt(lgt, la): """Transform LGT into GT using local alleles array. Parameters ---------- lgt : :class:`.CallExpression` LGT value. la : :class:`.ArrayExpression` Local alleles array. Returns ------- :class:`.CallExpression` Notes ----- This function assumes diploid genotypes. """ return hl.call(la[lgt[0]], la[lgt[1]])
import hail as hl from hail.typecheck import typecheck from hail.expr.expressions import expr_call, expr_numeric, expr_array, \ check_entry_indexed, check_row_indexed @typecheck(call_expr=expr_call, loadings_expr=expr_array(expr_numeric), af_expr=expr_numeric) def pc_project(call_expr, loadings_expr, af_expr): """Projects genotypes onto pre-computed PCs. Requires loadings and allele-frequency from a reference dataset (see example). Note that `loadings_expr` must have no missing data and reflect the rows from the original PCA run for this method to be accurate. Example ------- >>> # Compute loadings and allele frequency for reference dataset >>> _, _, loadings_ht = hl.hwe_normalized_pca(mt.GT, k=10, compute_loadings=True) # doctest: +SKIP >>> mt = mt.annotate_rows(af=hl.agg.mean(mt.GT.n_alt_alleles()) / 2) # doctest: +SKIP >>> loadings_ht = loadings_ht.annotate(af=mt.rows()[loadings_ht.key].af) # doctest: +SKIP >>> # Project new genotypes onto loadings >>> ht = pc_project(mt_to_project.GT, loadings_ht.loadings, loadings_ht.af) # doctest: +SKIP Parameters ---------- call_expr : :class:`.CallExpression` Entry-indexed call expression for genotypes to project onto loadings. loadings_expr : :class:`.ArrayNumericExpression` Location of expression for loadings
import hail as hl from hail.typecheck import typecheck, sequenceof from hail.expr.expressions import expr_str, expr_call, expr_locus, expr_array from hail.matrixtable import MatrixTable from typing import List @typecheck(locus=expr_locus(), alleles=expr_array(expr_str), proband_call=expr_call, father_call=expr_call, mother_call=expr_call) def phase_by_transmission( locus: hl.expr.LocusExpression, alleles: hl.expr.ArrayExpression, proband_call: hl.expr.CallExpression, father_call: hl.expr.CallExpression, mother_call: hl.expr.CallExpression ) -> hl.expr.ArrayExpression: """Phases genotype calls in a trio based allele transmission. Notes ----- In the phased calls returned, the order is as follows: - Proband: father_allele | mother_allele - Parents: transmitted_allele | untransmitted_allele Phasing of sex chromosomes: - Sex chromosomes of male individuals should be haploid to be phased correctly. - If `proband_call` is diploid on non-par regions of the sex chromosomes, it is assumed to be female.
lambda j: hl.tuple([alleles.globl[j], j])))))), ts.row.dtype, ts.globals.dtype) _merge_function_map[(ts.row.dtype, ts.globals.dtype)] = f merge_function = _merge_function_map[(ts.row.dtype, ts.globals.dtype)] ts = Table(TableMapRows(ts._tir, Apply(merge_function._name, TopLevelReference('row'), TopLevelReference('global')))) return ts.transmute_globals(__cols=hl.flatten(ts.g.map(lambda g: g.__cols))) def combine_gvcfs(mts): """merges vcfs using multi way join""" ts = hl.Table._multi_way_zip_join([localize(mt) for mt in mts], 'data', 'g') combined = combine(ts) return unlocalize(combined) @typecheck(lgt=expr_call, la=expr_array(expr_int32)) def lgt_to_gt(lgt, la): """A method for transforming Local GT and Local Alleles into the true GT""" return hl.call(la[lgt[0]], la[lgt[1]]) def quick_summary(mt): """compute aggregate INFO fields that do not require densify""" return mt.annotate_rows( info=hl.struct( MQ_DP=hl.agg.sum(mt.entry.gvcf_info.MQ_DP), QUALapprox=hl.agg.sum(mt.entry.gvcf_info.QUALapprox), RAW_MQ=hl.agg.sum(mt.entry.gvcf_info.RAW_MQ), VarDP=hl.agg.sum(mt.entry.gvcf_info.VarDP), SB_TABLE=hl.array([ hl.agg.sum(mt.entry.SB[0]), hl.agg.sum(mt.entry.SB[1]),
from functools import reduce import hail as hl from hail.expr.functions import _ndarray from hail.expr.functions import array as aarray from hail.expr.types import HailType, tfloat64, ttuple, tndarray from hail.typecheck import typecheck, nullable, oneof, tupleof, sequenceof from hail.expr.expressions import (expr_int32, expr_int64, expr_tuple, expr_any, expr_array, expr_ndarray, expr_numeric, Int64Expression, cast_expr, construct_expr) from hail.expr.expressions.typed_expressions import NDArrayNumericExpression from hail.ir import NDArrayQR, NDArrayInv, NDArrayConcat, NDArraySVD, Apply tsequenceof_nd = oneof(sequenceof(expr_ndarray()), expr_array(expr_ndarray())) shape_type = oneof(expr_int64, tupleof(expr_int64), expr_tuple()) def array(input_array, dtype=None): """Construct an :class:`.NDArrayExpression` Examples -------- >>> hl.eval(hl.nd.array([1, 2, 3, 4])) array([1, 2, 3, 4], dtype=int32) >>> hl.eval(hl.nd.array([[1, 2, 3], [4, 5, 6]])) array([[1, 2, 3], [4, 5, 6]], dtype=int32)
from hail.expr.expressions import expr_int32, expr_array, ArrayExpression from hail.expr.types import tfloat64, tarray from hail.typecheck import typecheck from hail.expr.functions import _func @typecheck(gt_counts=expr_array(expr_int32)) def haplotype_freq_em(gt_counts) -> ArrayExpression: """ Computes estimated haplotype counts based on genotypes for a pair of bi-allelic variants. Implements the Excoffier & Slatkin EM (Exccoffier & Slatkin, Mol. Biol. Evol. 1995) The unphased input genotype counts for the variant pairs has to be provided in the following order: [AABB, AABb, AAbb, AaBB, AaBb, Aabb, aaBB, aaBb, aabb] The estimated haplotype counts are returned in an array in the following order: [AB, Ab, aB, ab] Where _A_ and _a_ are the reference and non-reference alleles for the first variant, resp. And _B_ and _b_ are the reference and non-reference alleles for the second variant, resp. Parameters ---------- gt_counts : :class:`.ArrayExpression` Returns ------- :class:`.ArrayExpression` """ return _func("haplotype_freq_em", tarray(tfloat64), gt_counts)
Parameters ---------- input_array : :class:`.ArrayExpression` or numpy ndarray or nested python lists Returns ------- :class:`.NDArrayExpression` An ndarray based on the input array. """ return _ndarray(input_array) shape_type = oneof(expr_int64, tupleof(expr_int64), expr_tuple()) @typecheck(a=expr_array(), shape=shape_type) def from_column_major(a, shape): assert len(shape) == 2 return array(a).reshape(tuple(reversed(shape))).T @typecheck(start=expr_int32, stop=nullable(expr_int32), step=expr_int32) def arange(start, stop=None, step=1) -> NDArrayNumericExpression: """Returns a 1-dimensions ndarray of integers from `start` to `stop` by `step`. Examples -------- >>> hl.eval(hl.nd.arange(10)) array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)
from functools import reduce import hail as hl from hail.expr.functions import _ndarray from hail.expr.functions import array as aarray from hail.expr.types import HailType, tfloat64, ttuple, tndarray from hail.typecheck import typecheck, nullable, oneof, tupleof, sequenceof from hail.expr.expressions import (expr_int32, expr_int64, expr_tuple, expr_any, expr_array, expr_ndarray, expr_numeric, Int64Expression, cast_expr, construct_expr) from hail.expr.expressions.typed_expressions import NDArrayNumericExpression from hail.ir import NDArrayQR, NDArrayInv, NDArrayConcat tsequenceof_nd = oneof(sequenceof(expr_ndarray()), tupleof(expr_ndarray()), expr_array(expr_ndarray())) shape_type = oneof(expr_int64, tupleof(expr_int64), expr_tuple()) def array(input_array, dtype=None): """Construct an :class:`.NDArrayExpression` Examples -------- >>> hl.eval(hl.nd.array([1, 2, 3, 4])) array([1, 2, 3, 4], dtype=int32) >>> hl.eval(hl.nd.array([[1, 2, 3], [4, 5, 6]])) array([[1, 2, 3], [4, 5, 6]], dtype=int32)