Пример #1
0
def annotate_snp_mismatch(
        t: Union[hl.MatrixTable, hl.Table],
        rg: hl.genetics.ReferenceGenome) -> Union[hl.MatrixTable, hl.Table]:
    """
    Annotates mismatches between reference allele and allele in reference fasta

    Assumes input Table/MatrixTable has t.new_locus annotation

    :param t: Table/MatrixTable of SNPs to be annotated
    :param rg: Reference genome with fasta sequence loaded
    :return: Table annotated with mismatches between reference allele and allele in fasta
    """

    logger.info("Filtering to SNPs")
    snp_expr = hl.is_snp(t.alleles[0], t.alleles[1])
    t = t.filter(snp_expr) if isinstance(t,
                                         hl.Table) else t.filter_rows(snp_expr)

    mismatch_expr = {
        "reference_mismatch":
        hl.cond(
            t.new_locus.is_negative_strand,
            (hl.reverse_complement(t.alleles[0]) != hl.get_sequence(
                t.locus.contig, t.locus.position, reference_genome=rg)),
            (t.alleles[0] != hl.get_sequence(
                t.locus.contig, t.locus.position, reference_genome=rg)),
        )
    }
    logger.info(
        "Checking if reference allele matches what is in reference fasta")
    logger.info(
        "For SNPs on the negative strand, make sure the reverse complement of the ref alleles matches what is in the ref fasta"
    )
    return (t.annotate(**mismatch_expr)
            if isinstance(t, hl.Table) else t.annotate_rows(**mismatch_expr))
Пример #2
0
def liftover_expr(
        locus: hl.expr.LocusExpression, alleles: hl.expr.ArrayExpression,
        destination_ref: hl.ReferenceGenome) -> hl.expr.StructExpression:
    lifted_over_locus = hl.liftover(locus,
                                    destination_ref,
                                    include_strand=True)
    lifted_over_alleles = alleles.map(lambda a: hl.if_else(
        lifted_over_locus.is_negative_strand, hl.reverse_complement(a), a))
    return hl.struct(locus=lifted_over_locus.result,
                     alleles=lifted_over_alleles)
Пример #3
0
def liftover_expr(
    locus: hl.expr.LocusExpression,
    alleles: hl.expr.ArrayExpression,
    destination_reference: hl.ReferenceGenome,
) -> hl.expr.StructExpression:
    """
    Generates struct liftover expression.

    Struct contains:
        - locus: Liftover coordinates
        - alleles: Liftover alleles
        - original_locus: Locus prior to liftover
        - original_alleles: Alleles prior to liftover
        - locus_fail_liftover: Whether the locus failed liftover
        - ref_allele_mismatch: Whether the allele at index 0 of alleles (lifted over reference allele)
            doesn't match the allele at that position in the destination reference

    :param locus: Input locus.
    :param alleles: Input alleles.
    :param destination_reference: Desired reference genome build for liftover.
    :return: Struct containing expressions for lifted over locus/alleles as well as original locus/alleles.
    """
    lifted_over_locus = hl.liftover(locus, destination_reference, include_strand=True)
    lifted_over_alleles = alleles.map(
        lambda a: hl.if_else(
            lifted_over_locus.is_negative_strand, hl.reverse_complement(a), a
        )
    )

    return hl.struct(
        new_locus=lifted_over_locus.result,
        new_alleles=lifted_over_alleles,
        original_locus=locus,
        original_alleles=alleles,
        locus_fail_liftover=hl.is_missing(lifted_over_locus),
        ref_allele_mismatch=lifted_over_locus.result.sequence_context()
        != lifted_over_alleles[0],
    )