Exemplo n.º 1
0
def public_release(data_type: str) -> VersionedTableResource:
    """
    Retrieves publicly released versioned table resource

    :param data_type: One of "exomes" or "genomes"
    :return: Release Table
    """

    if data_type not in DATA_TYPES:
        raise DataException(
            f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}"
        )

    if data_type == "exomes":
        current_release = CURRENT_EXOME_RELEASE
        releases = EXOME_RELEASES
    else:
        current_release = CURRENT_GENOME_RELEASE
        releases = GENOME_RELEASES

    return VersionedTableResource(
        current_release,
        {
            release:
            TableResource(path=_public_release_ht_path(data_type, release))
            for release in releases
        },
    )
Exemplo n.º 2
0
def ancestry_pca_eigenvalues(
    include_unreleasable_samples: bool = False, ) -> VersionedTableResource:
    """
    Gets the ancestry PCA eigenvalues VersionedTableResource

    :param include_unreleasable_samples: Whether to get the PCA that included unreleasable in training
    :return: Ancestry PCA eigenvalues
    """
    return VersionedTableResource(
        CURRENT_RELEASE, {
            release: TableResource(
                _get_ancestry_pca_ht_path("eigenvalues", release,
                                          include_unreleasable_samples))
            for release in RELEASES
        })
Exemplo n.º 3
0
def get_sample_qc(strat: str = "all") -> VersionedTableResource:
    """
    Gets sample QC annotations generated by Hail for the specified stratification:
        - bi_allelic
        - multi_allelic
        - all

    :param strat: Which stratification to return
    :return: Sample QC table
    """
    return VersionedTableResource(
        CURRENT_RELEASE, {
            release: TableResource(
                f"{get_sample_qc_root(release)}/sample_qc_{strat}.ht")
            for release in RELEASES
        })
Exemplo n.º 4
0
def get_rf_result(model_id: Optional[str] = None) -> VersionedTableResource:
    """
    Get the results of RF filtering for a given run

    :param model_id: RF run to load
    :return: VersionedTableResource for RF filtered data
    """
    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release: TableResource(
                f"{get_variant_qc_root(release)}/rf/models/{model_id}/rf_result.ht"
            )
            for release in RELEASES
        },
    )
Exemplo n.º 5
0
def release_sites(public: bool = False) -> VersionedTableResource:
    """
    Retrieve versioned resource for sites-only release Table.

    :param public: Determines whether release sites Table is read from public or private bucket. Defaults to private
    :return: Sites-only release Table
    """
    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release: TableResource(
                path=release_ht_path(release_version=release, public=public)
            )
            for release in RELEASES
        },
    )
Exemplo n.º 6
0
def get_rf_training(model_id: str) -> VersionedTableResource:
    """
    Get the training data for a given run

    :param model_id: RF run to load
    :return: VersionedTableResource for RF training data
    """
    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release: TableResource(
                f"{get_variant_qc_root(release)}/rf/models/{model_id}/training.ht"
            )
            for release in RELEASES
        },
    )
Exemplo n.º 7
0
def hgdp_1kg_subset_annotations(sample: bool = True) -> VersionedTableResource:
    """
    Get the HGDP + 1KG subset release sample or variant TableResource.

    :param sample: If true, will return the sample annotations, otherwise will return the variant annotations
    :return: Table resource with sample/variant annotations for the subset
    """
    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release: TableResource(
                f"gs://gnomad/release/{release}/ht/gnomad.genomes.v{release}.hgdp_1kg_subset{f'_sample_meta' if sample else '_variant_annotations'}.ht"
            )
            for release in RELEASES
            if release != "3"
        },
    )
Exemplo n.º 8
0
def get_info(split: bool = True) -> VersionedTableResource:
    """
    Gets the gnomAD v3 info TableResource

    :param split: Whether to return the split or multi-allelic version of the resource
    :return: gnomAD v3 info VersionedTableResource
    """

    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release:
            TableResource(path="{}/gnomad_genomes_v{}_info{}.ht".format(
                _annotations_root(release), release,
                ".split" if split else ""))
            for release in RELEASES
        },
    )
Exemplo n.º 9
0
def get_score_bins(model_id: str, aggregated: bool) -> VersionedTableResource:
    """
    Returns the path to a Table containing RF or VQSR scores and annotated with a bin based on rank of the metric scores.

    :param model_id: RF or VQSR model ID for which to return score data.
    :param bool aggregated: Whether to get the aggregated data.
         If True, will return the path to Table grouped by bin that contains aggregated variant counts per bin.
    :return: Path to desired hail Table
    """
    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release: TableResource(
                f"{get_variant_qc_root(release)}/score_bins/{model_id}.{'aggregated' if aggregated else 'bins'}.ht"
            )
            for release in RELEASES
        },
    )
Exemplo n.º 10
0
def get_binned_concordance(model_id: str,
                           truth_sample: str) -> VersionedTableResource:
    """
    Returns the path to a truth sample concordance Table (containing TP, FP, FN) between a truth sample within the
    callset and the sample's truth data, grouped by bins of a metric (RF or VQSR scores)

    :param model_id: RF or VQSR model ID for which to return score data.
    :param truth_sample: Which truth sample concordance to analyze (e.g., "NA12878" or "syndip")
    :return: Path to binned truth data concordance Hail Table
    """
    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release: TableResource(
                f"{get_variant_qc_root(release)}/binned_concordance/{truth_sample}_{model_id}_binned_concordance.ht"
            )
            for release in RELEASES
        },
    )
Exemplo n.º 11
0
        "path":
        "gs://gnomad-public-requester-pays/resources/grch38/na12878/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed",
        "reference_genome": "GRCh38",
        "skip_invalid_intervals": True,
    },
)

# Versioned resources: versions should be listed from most recent to oldest
vep_context = VersionedTableResource(
    default_version="95",
    versions={
        "95":
        GnomadPublicTableResource(
            path=
            "gs://gnomad-public-requester-pays/resources/context/grch38_context_vep_annotated.ht",
        ),
        "101":
        GnomadPublicTableResource(
            path=
            "gs://gnomad-public-requester-pays/resources/context/grch38_context_vep_annotated.v101.ht",
        ),
    },
)

syndip = VersionedMatrixTableResource(
    default_version="20180222",
    versions={
        "20180222":
        GnomadPublicMatrixTableResource(
            path=
            "gs://gnomad-public-requester-pays/resources/grch38/syndip/syndip.b38_20180222.mt",
Exemplo n.º 12
0
    "gs://gnomad-public/resources/grch38/gnomad_v2_qc_sites_b38.ht")

# Dense MT of samples at QC sites
qc = VersionedMatrixTableResource(
    CURRENT_RELEASE, {
        release: MatrixTableResource(
            f"gs://gnomad/sample_qc/mt/genomes_v{release}/gnomad_v{release}_qc_mt_v2_sites_dense.mt"
        )
        for release in RELEASES
    })

# PC relate PCA scores
pc_relate_pca_scores = VersionedTableResource(
    CURRENT_RELEASE, {
        release: TableResource(
            f"{get_sample_qc_root(release)}/gnomad_v{release}_qc_mt_v2_sites_pc_scores.ht"
        )
        for release in RELEASES
    })

# PC relate results
relatedness = VersionedTableResource(
    CURRENT_RELEASE, {
        release: TableResource(
            f"{get_sample_qc_root(release)}/gnomad_v{release}_qc_mt_v2_sites_relatedness.ht"
        )
        for release in RELEASES
    })

# Sex imputation results
sex = VersionedTableResource(
Exemplo n.º 13
0
                                       version: str = CURRENT_RELEASE) -> str:
    """
    Provides the path to the transmitted singleton VCF used as input to VQSR

    :param bool adj: Whether to use adj genotypes
    :param version: Version of transmitted singleton VCF path to return
    :return:
    """
    return f'{_annotations_root(version)}/transmitted_singletons_{"adj" if adj else "raw"}.vcf.bgz'


last_END_position = VersionedTableResource(
    CURRENT_RELEASE,
    {
        release: TableResource(
            f"{_annotations_root(release)}/gnomad_genomes_v{release}_last_END_positions.ht"
        )
        for release in RELEASES
    },
)

freq = VersionedTableResource(
    CURRENT_RELEASE,
    {
        release: TableResource(
            f"{_annotations_root(release)}/gnomad_genomes_v{release}.frequencies.ht"
        )
        for release in RELEASES
    },
)
Exemplo n.º 14
0
        },
    )


def get_rf_result(model_id: Optional[str] = None) -> VersionedTableResource:
    """
    Get the results of RF filtering for a given run

    :param model_id: RF run to load
    :return: VersionedTableResource for RF filtered data
    """
    return VersionedTableResource(
        CURRENT_RELEASE,
        {
            release: TableResource(
                f"{get_variant_qc_root(release)}/rf/models/{model_id}/rf_result.ht"
            )
            for release in RELEASES
        },
    )


final_filter = VersionedTableResource(
    CURRENT_RELEASE,
    {
        release:
        TableResource(f"{get_variant_qc_root(release)}/final_filter.ht")
        for release in RELEASES
    },
)
Exemplo n.º 15
0
    ),
    "3.1_raw":
    PedigreeResource(
        "gs://gnomad/metadata/genomes_v3.1/gnomad_v3.1_trios_raw.fam",
        delimiter="\t"),
    "3":
    PedigreeResource(
        "gs://gnomad/metadata/genomes_v3/gnomad_v3_trios.fam",
        delimiter="\t",
    ),
    "3_raw":
    PedigreeResource("gs://gnomad/metadata/genomes_v3/gnomad_v3_trios_raw.fam",
                     delimiter="\t"),
}

meta = VersionedTableResource(CURRENT_META_VERSION, _meta_versions)
project_meta = VersionedTableResource(CURRENT_PROJECT_META_VERSION,
                                      _project_meta_versions)
pedigree = VersionedPedigreeResource("3.1", _pedigree_versions)
trios = VersionedPedigreeResource("3.1", _trios_versions)
ped_mendel_errors = VersionedTableResource(
    CURRENT_RELEASE,
    {
        release: TableResource(
            path=
            f"{_meta_root_path(release)}/gnomad_v{release}_ped_chr20_mendel_errors.ht"
        )
        for release in RELEASES
    },
)
Exemplo n.º 16
0
syndip = GnomadPublicMatrixTableResource(
    path="gs://gnomad-public-requester-pays/resources/grch37/syndip/hybrid.m37m.mt",
    import_func=hl.import_vcf,
    import_args={
        "path": "gs://gnomad-public-requester-pays/resources/grch37/syndip/hybrid.m37m.vcf.bgz",
        "min_partitions": 100,
        "reference_genome": "GRCh37",
    },
)

# Versioned resources: versions should be listed from most recent to oldest
vep_context = VersionedTableResource(
    default_version="85",
    versions={
        "85": GnomadPublicTableResource(
            path="gs://gnomad-public-requester-pays/resources/context/grch37_context_vep_annotated.ht",
        )
    },
)

dbsnp = VersionedTableResource(
    default_version="20180423",
    versions={
        "20180423": GnomadPublicTableResource(
            path="gs://gnomad-public-requester-pays/resources/grch37/dbsnp/All_20180423.ht",
            import_func=import_sites_vcf,
            import_args={
                "path": "gs://gnomad-public-requester-pays/resources/grch37/dbsnp/All_20180423.vcf.bgz",
                "force_bgz": True,
                "skip_invalid_loci": True,
                "min_partitions": 100,
Exemplo n.º 17
0
                "gs://gnomad-public/resources/grch38/syndip/full.38.20180222.vcf.gz",
                "force_bgz": True,
                "min_partitions": 100,
                "reference_genome": "GRCh38"
            })
    },
)

syndip_hc_intervals = VersionedTableResource(
    default_version="20180222",
    versions={
        "20180222":
        TableResource(
            path=
            'gs://gnomad-public/resources/grch38/syndip/syndip_b38_20180222_hc_regions.ht',
            import_func=hl.import_bed,
            import_args={
                "path":
                'gs://gnomad-public/resources/grch38/syndip/syndip.b38_20180222.bed',
                "reference_genome": 'GRCh38',
                "skip_invalid_intervals": True,
                "min_partitions": 10
            })
    })

clinvar = VersionedTableResource(
    default_version="20190923",
    versions={
        "20190923":
        TableResource(
            path=
            "gs://gnomad-public/resources/grch38/clinvar/clinvar_20190923.ht",