def public_release(data_type: str) -> VersionedTableResource: """ Retrieves publicly released versioned table resource :param data_type: One of "exomes" or "genomes" :return: Release Table """ if data_type not in DATA_TYPES: raise DataException( f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}" ) if data_type == "exomes": current_release = CURRENT_EXOME_RELEASE releases = EXOME_RELEASES else: current_release = CURRENT_GENOME_RELEASE releases = GENOME_RELEASES return VersionedTableResource( current_release, { release: TableResource(path=_public_release_ht_path(data_type, release)) for release in releases }, )
def ancestry_pca_eigenvalues( include_unreleasable_samples: bool = False, ) -> VersionedTableResource: """ Gets the ancestry PCA eigenvalues VersionedTableResource :param include_unreleasable_samples: Whether to get the PCA that included unreleasable in training :return: Ancestry PCA eigenvalues """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource( _get_ancestry_pca_ht_path("eigenvalues", release, include_unreleasable_samples)) for release in RELEASES })
def get_sample_qc(strat: str = "all") -> VersionedTableResource: """ Gets sample QC annotations generated by Hail for the specified stratification: - bi_allelic - multi_allelic - all :param strat: Which stratification to return :return: Sample QC table """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{get_sample_qc_root(release)}/sample_qc_{strat}.ht") for release in RELEASES })
def get_rf_result(model_id: Optional[str] = None) -> VersionedTableResource: """ Get the results of RF filtering for a given run :param model_id: RF run to load :return: VersionedTableResource for RF filtered data """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{get_variant_qc_root(release)}/rf/models/{model_id}/rf_result.ht" ) for release in RELEASES }, )
def release_sites(public: bool = False) -> VersionedTableResource: """ Retrieve versioned resource for sites-only release Table. :param public: Determines whether release sites Table is read from public or private bucket. Defaults to private :return: Sites-only release Table """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource( path=release_ht_path(release_version=release, public=public) ) for release in RELEASES }, )
def get_rf_training(model_id: str) -> VersionedTableResource: """ Get the training data for a given run :param model_id: RF run to load :return: VersionedTableResource for RF training data """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{get_variant_qc_root(release)}/rf/models/{model_id}/training.ht" ) for release in RELEASES }, )
def hgdp_1kg_subset_annotations(sample: bool = True) -> VersionedTableResource: """ Get the HGDP + 1KG subset release sample or variant TableResource. :param sample: If true, will return the sample annotations, otherwise will return the variant annotations :return: Table resource with sample/variant annotations for the subset """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"gs://gnomad/release/{release}/ht/gnomad.genomes.v{release}.hgdp_1kg_subset{f'_sample_meta' if sample else '_variant_annotations'}.ht" ) for release in RELEASES if release != "3" }, )
def get_info(split: bool = True) -> VersionedTableResource: """ Gets the gnomAD v3 info TableResource :param split: Whether to return the split or multi-allelic version of the resource :return: gnomAD v3 info VersionedTableResource """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource(path="{}/gnomad_genomes_v{}_info{}.ht".format( _annotations_root(release), release, ".split" if split else "")) for release in RELEASES }, )
def get_score_bins(model_id: str, aggregated: bool) -> VersionedTableResource: """ Returns the path to a Table containing RF or VQSR scores and annotated with a bin based on rank of the metric scores. :param model_id: RF or VQSR model ID for which to return score data. :param bool aggregated: Whether to get the aggregated data. If True, will return the path to Table grouped by bin that contains aggregated variant counts per bin. :return: Path to desired hail Table """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{get_variant_qc_root(release)}/score_bins/{model_id}.{'aggregated' if aggregated else 'bins'}.ht" ) for release in RELEASES }, )
def get_binned_concordance(model_id: str, truth_sample: str) -> VersionedTableResource: """ Returns the path to a truth sample concordance Table (containing TP, FP, FN) between a truth sample within the callset and the sample's truth data, grouped by bins of a metric (RF or VQSR scores) :param model_id: RF or VQSR model ID for which to return score data. :param truth_sample: Which truth sample concordance to analyze (e.g., "NA12878" or "syndip") :return: Path to binned truth data concordance Hail Table """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{get_variant_qc_root(release)}/binned_concordance/{truth_sample}_{model_id}_binned_concordance.ht" ) for release in RELEASES }, )
"path": "gs://gnomad-public-requester-pays/resources/grch38/na12878/HG001_GRCh38_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel_noCENorHET7.bed", "reference_genome": "GRCh38", "skip_invalid_intervals": True, }, ) # Versioned resources: versions should be listed from most recent to oldest vep_context = VersionedTableResource( default_version="95", versions={ "95": GnomadPublicTableResource( path= "gs://gnomad-public-requester-pays/resources/context/grch38_context_vep_annotated.ht", ), "101": GnomadPublicTableResource( path= "gs://gnomad-public-requester-pays/resources/context/grch38_context_vep_annotated.v101.ht", ), }, ) syndip = VersionedMatrixTableResource( default_version="20180222", versions={ "20180222": GnomadPublicMatrixTableResource( path= "gs://gnomad-public-requester-pays/resources/grch38/syndip/syndip.b38_20180222.mt",
"gs://gnomad-public/resources/grch38/gnomad_v2_qc_sites_b38.ht") # Dense MT of samples at QC sites qc = VersionedMatrixTableResource( CURRENT_RELEASE, { release: MatrixTableResource( f"gs://gnomad/sample_qc/mt/genomes_v{release}/gnomad_v{release}_qc_mt_v2_sites_dense.mt" ) for release in RELEASES }) # PC relate PCA scores pc_relate_pca_scores = VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{get_sample_qc_root(release)}/gnomad_v{release}_qc_mt_v2_sites_pc_scores.ht" ) for release in RELEASES }) # PC relate results relatedness = VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{get_sample_qc_root(release)}/gnomad_v{release}_qc_mt_v2_sites_relatedness.ht" ) for release in RELEASES }) # Sex imputation results sex = VersionedTableResource(
version: str = CURRENT_RELEASE) -> str: """ Provides the path to the transmitted singleton VCF used as input to VQSR :param bool adj: Whether to use adj genotypes :param version: Version of transmitted singleton VCF path to return :return: """ return f'{_annotations_root(version)}/transmitted_singletons_{"adj" if adj else "raw"}.vcf.bgz' last_END_position = VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{_annotations_root(release)}/gnomad_genomes_v{release}_last_END_positions.ht" ) for release in RELEASES }, ) freq = VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{_annotations_root(release)}/gnomad_genomes_v{release}.frequencies.ht" ) for release in RELEASES }, )
}, ) def get_rf_result(model_id: Optional[str] = None) -> VersionedTableResource: """ Get the results of RF filtering for a given run :param model_id: RF run to load :return: VersionedTableResource for RF filtered data """ return VersionedTableResource( CURRENT_RELEASE, { release: TableResource( f"{get_variant_qc_root(release)}/rf/models/{model_id}/rf_result.ht" ) for release in RELEASES }, ) final_filter = VersionedTableResource( CURRENT_RELEASE, { release: TableResource(f"{get_variant_qc_root(release)}/final_filter.ht") for release in RELEASES }, )
), "3.1_raw": PedigreeResource( "gs://gnomad/metadata/genomes_v3.1/gnomad_v3.1_trios_raw.fam", delimiter="\t"), "3": PedigreeResource( "gs://gnomad/metadata/genomes_v3/gnomad_v3_trios.fam", delimiter="\t", ), "3_raw": PedigreeResource("gs://gnomad/metadata/genomes_v3/gnomad_v3_trios_raw.fam", delimiter="\t"), } meta = VersionedTableResource(CURRENT_META_VERSION, _meta_versions) project_meta = VersionedTableResource(CURRENT_PROJECT_META_VERSION, _project_meta_versions) pedigree = VersionedPedigreeResource("3.1", _pedigree_versions) trios = VersionedPedigreeResource("3.1", _trios_versions) ped_mendel_errors = VersionedTableResource( CURRENT_RELEASE, { release: TableResource( path= f"{_meta_root_path(release)}/gnomad_v{release}_ped_chr20_mendel_errors.ht" ) for release in RELEASES }, )
syndip = GnomadPublicMatrixTableResource( path="gs://gnomad-public-requester-pays/resources/grch37/syndip/hybrid.m37m.mt", import_func=hl.import_vcf, import_args={ "path": "gs://gnomad-public-requester-pays/resources/grch37/syndip/hybrid.m37m.vcf.bgz", "min_partitions": 100, "reference_genome": "GRCh37", }, ) # Versioned resources: versions should be listed from most recent to oldest vep_context = VersionedTableResource( default_version="85", versions={ "85": GnomadPublicTableResource( path="gs://gnomad-public-requester-pays/resources/context/grch37_context_vep_annotated.ht", ) }, ) dbsnp = VersionedTableResource( default_version="20180423", versions={ "20180423": GnomadPublicTableResource( path="gs://gnomad-public-requester-pays/resources/grch37/dbsnp/All_20180423.ht", import_func=import_sites_vcf, import_args={ "path": "gs://gnomad-public-requester-pays/resources/grch37/dbsnp/All_20180423.vcf.bgz", "force_bgz": True, "skip_invalid_loci": True, "min_partitions": 100,
"gs://gnomad-public/resources/grch38/syndip/full.38.20180222.vcf.gz", "force_bgz": True, "min_partitions": 100, "reference_genome": "GRCh38" }) }, ) syndip_hc_intervals = VersionedTableResource( default_version="20180222", versions={ "20180222": TableResource( path= 'gs://gnomad-public/resources/grch38/syndip/syndip_b38_20180222_hc_regions.ht', import_func=hl.import_bed, import_args={ "path": 'gs://gnomad-public/resources/grch38/syndip/syndip.b38_20180222.bed', "reference_genome": 'GRCh38', "skip_invalid_intervals": True, "min_partitions": 10 }) }) clinvar = VersionedTableResource( default_version="20190923", versions={ "20190923": TableResource( path= "gs://gnomad-public/resources/grch38/clinvar/clinvar_20190923.ht",