def get_reference_genome(
    locus: Union[hl.expr.LocusExpression, hl.expr.IntervalExpression],
    add_sequence: bool = False,
) -> hl.ReferenceGenome:
    """
    Returns the reference genome associated with the input Locus expression
    :param locus: Input locus
    :param add_sequence: If set, the fasta sequence is added to the reference genome
    :return: Reference genome
    """
    if isinstance(locus, hl.expr.LocusExpression):
        ref = locus.dtype.reference_genome
    else:
        assert isinstance(locus, hl.expr.IntervalExpression)
        ref = locus.dtype.point_type.reference_genome
    if add_sequence:
        ref = add_reference_sequence(ref)
    return ref
Пример #2
0
def get_liftover_genome(
    t: Union[hl.MatrixTable, hl.Table]
) -> Tuple[hl.genetics.ReferenceGenome, hl.genetics.ReferenceGenome]:
    """
    Infer reference genome build of input data and assume destination reference genome build.

    Adds liftover chain to source reference genome and sequence to destination reference genome.
    Returns tuple containing both reference genomes in preparation for liftover.

    :param t: Input Table or MatrixTable.
    :return: Tuple of source reference genome (with liftover chain added)
        and destination reference genome (with sequence loaded)
    """
    logger.info("Inferring reference genome of input...")
    input_build = get_reference_genome(t.locus).name
    source = hl.get_reference(input_build)

    logger.info("Loading fasta sequence for destination build...")
    if input_build == "GRCh38":
        target = hl.get_reference("GRCh37")
        chain = GRCH38_TO_GRCH37_CHAIN

    else:
        target = hl.get_reference("GRCh38")
        chain = GRCH37_to_GRCH38_CHAIN

    logger.info("Adding liftover chain to input build...")
    if source.has_liftover(target):
        logger.warning(
            "Source reference build %s already has a chain file: %s! Using whichever chain has already been added.",
            source.name,
            source._liftovers,
        )
    else:
        source.add_liftover(chain, target)

    return (source, add_reference_sequence(target))