def komp_export(request): if "download" in request.GET: logger.info("exporting komp tags") start_date = parse(request.GET.get('start_date')).strftime('%Y-%m-%d') komp_tag_type = VariantTagType.objects.get(guid='VTT_share_with_komp') variants = VariantTag.objects.filter(variant_tag_type=komp_tag_type, created_date__gt=start_date) rows = [{ 'project': v.saved_variant.project.name, 'family_id': v.saved_variant.family.family_id, 'timestamp': v.created_date.strftime('%Y-%m-%d %H:%M:%S'), 'genes': ', '.join( json.loads(v.saved_variant.saved_variant_json)['extras'] ['gene_names'].values()), 'chrom': get_chrom_pos(v.saved_variant.xpos)[0], 'pos': get_chrom_pos(v.saved_variant.xpos)[1], 'ref': v.saved_variant.ref, 'alt': v.saved_variant.alt, } for v in variants] return export_table('komp_tags', HEADERS, rows, 'xls') return render(request, "staff/komp_export.html")
def update_saved_variants(apps, schema_editor): SavedVariant = apps.get_model("seqr", "SavedVariant") db_alias = schema_editor.connection.alias variants = SavedVariant.objects.using(db_alias).all() if variants: print('Updating {} variants'.format(len(variants))) for variant in tqdm(variants, unit=' variants'): if variant.ref == 'X': # Migrate manually created variants variant.ref = None variant.alt = None pos_end = variant.saved_variant_json.pop('pos_end', None) if pos_end: variant.saved_variant_json['end'] = pos_end for genotype in variant.saved_variant_json['genotypes'].values( ): cn_map = DUP_CN_MAP if variant.saved_variant_json[ 'svType'] == 'DUP' else CN_MAP num_alt = genotype.pop('numAlt') genotype['cn'] = cn_map[num_alt] variant_id = variant.saved_variant_json.get('variantId') if not variant_id: if not variant.ref: raise Exception('Invalid variant {}'.format(variant.guid)) chrom, pos = get_chrom_pos(variant.xpos_start) variant_id = '{}-{}-{}-{}'.format(chrom, pos, variant.ref, variant.alt) variant.variant_id = variant_id variant.save()
def _variant_id_filter(xpos_ref_alt_tuples): variant_ids = [] for xpos, ref, alt in xpos_ref_alt_tuples: chrom, pos = get_chrom_pos(xpos) if chrom == 'M': chrom = 'MT' variant_ids.append('{}-{}-{}-{}'.format(chrom, pos, ref, alt)) return Q('terms', variantId=variant_ids)
def get_es_variants_for_variant_tuples(families, xpos_ref_alt_tuples): variant_ids = [] for xpos, ref, alt in xpos_ref_alt_tuples: chrom, pos = get_chrom_pos(xpos) if chrom == 'M': chrom = 'MT' variant_ids.append('{}-{}-{}-{}'.format(chrom, pos, ref, alt)) return get_es_variants_for_variant_ids( families, variant_ids, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS)
def _process_result(variant_json, saved_variant): if add_details: variant_json.update(saved_variant.saved_variant_json) if 'variantId' not in variant_json: chrom, pos = get_chrom_pos(saved_variant.xpos) variant_json['variantId'] = '{}-{}-{}-{}'.format( chrom, pos, saved_variant.ref, saved_variant.alt) variant_json['familyGuids'] = [saved_variant.family.guid] return variant_json
def get_es_variants_for_variant_tuples(families, xpos_ref_alt_tuples): variant_ids = [] for xpos, ref, alt in xpos_ref_alt_tuples: chrom, pos = get_chrom_pos(xpos) if chrom == 'M': chrom = 'MT' variant_ids.append('{}-{}-{}-{}'.format(chrom, pos, ref, alt)) variants = EsSearch(families).filter_by_location( variant_ids=variant_ids).search(num_results=len(xpos_ref_alt_tuples)) return variants
def test_chrom_pos(self): self.assertRaises(ValueError, lambda: get_chrom_pos(0)) self.assertRaises(ValueError, lambda: get_chrom_pos(30*1e9)) self.assertEquals(get_chrom_pos(1e9 + 12345), ('1', 12345)) self.assertEquals(get_chrom_pos(22*1e9 + 12345), ('22', 12345)) self.assertEquals(get_chrom_pos(23*1e9 + 12345), ('X', 12345)) self.assertEquals(get_chrom_pos(24*1e9 + 12345), ('Y', 12345)) self.assertEquals(get_chrom_pos(25*1e9 + 12345), ('M', 12345))
def test_chrom_pos(self): self.assertRaises(ValueError, lambda: get_chrom_pos(0)) self.assertRaises(ValueError, lambda: get_chrom_pos(30 * 1e9)) self.assertEquals(get_chrom_pos(1e9 + 12345), ('1', 12345)) self.assertEquals(get_chrom_pos(22 * 1e9 + 12345), ('22', 12345)) self.assertEquals(get_chrom_pos(23 * 1e9 + 12345), ('X', 12345)) self.assertEquals(get_chrom_pos(24 * 1e9 + 12345), ('Y', 12345)) self.assertEquals(get_chrom_pos(25 * 1e9 + 12345), ('M', 12345))
def _get_gene_row(row, gene_id, inheritances, variant_tag_names, variants): row["actual_inheritance_model"] = ", ".join(inheritances) row["gene_id"] = gene_id row["row_id"] += gene_id has_tier1 = any(name.startswith("Tier 1") for name in variant_tag_names) has_tier2 = any(name.startswith("Tier 2") for name in variant_tag_names) has_known_gene_for_phenotype = 'Known gene for phenotype' in variant_tag_names row.update({ "solved": ("TIER 1 GENE" if (has_tier1 or has_known_gene_for_phenotype) else ("TIER 2 GENE" if has_tier2 else "N")), "komp_early_release": "Y" if 'Share with KOMP' in variant_tag_names else "N", }) if has_tier1 or has_tier2 or has_known_gene_for_phenotype: row.update({ "posted_publicly": "", "analysis_complete_status": "complete", "novel_mendelian_gene": "Y" if any("Novel gene" in name for name in variant_tag_names) else "N", }) if has_tier1 or has_tier2: _set_discovery_details(row, variant_tag_names, variants) elif has_known_gene_for_phenotype: row["phenotype_class"] = "KNOWN" for functional_field in FUNCTIONAL_DATA_FIELD_MAP.values(): row[functional_field] = "KPG" if not row["submitted_to_mme"] == 'Y': if has_tier1 or has_tier2: row["submitted_to_mme"] = "N" if row[ 'months_since_t0'] > 7 else "TBD" elif has_known_gene_for_phenotype: row["submitted_to_mme"] = "KPG" row["extras_variant_tag_list"] = [] for variant in variants: variant_id = "-".join( map( str, list(get_chrom_pos(variant.xpos_start)) + [variant.ref, variant.alt])) row["extras_variant_tag_list"] += [(variant_id, gene_id, vt.variant_tag_type.name.lower()) for vt in variant.discovery_tags] return row
def get_json_for_saved_variant(saved_variant, add_tags=False): """Returns a JSON representation of the given variant. Args: saved_variant (object): dictionary or django model for the SavedVariant. Returns: dict: json object """ fields = _get_record_fields(SavedVariant, 'variant') saved_variant_dict = _record_to_dict(saved_variant, fields, nested_fields=[('family', 'guid')]) result = _get_json_for_record(saved_variant_dict, fields) chrom, pos = get_chrom_pos(result['xpos']) result.update({ 'variantId': result.pop('guid'), 'familyGuid': saved_variant_dict['family_guid'], 'chrom': chrom, 'pos': pos, }) if add_tags: result.update({ 'tags': [ get_json_for_variant_tag(tag) for tag in saved_variant.varianttag_set.all() ], 'functionalData': [ get_json_for_variant_functional_data(tag) for tag in saved_variant.variantfunctionaldata_set.all() ], 'notes': [ get_json_for_variant_note(tag) for tag in saved_variant.variantnote_set.all() ], }) return result
def get_json_for_saved_variant(saved_variant, add_tags=False): """Returns a JSON representation of the given variant. Args: saved_variant (object): Django model for the SavedVariant. Returns: dict: json object """ result = _get_json_for_model(saved_variant, nested_fields=[{ 'fields': ('family', 'guid') }], guid_key='variantId') chrom, pos = get_chrom_pos(result['xpos']) result.update({ 'chrom': chrom, 'pos': pos, }) if add_tags: result.update({ 'tags': [ get_json_for_variant_tag(tag) for tag in saved_variant.varianttag_set.all() ], 'functionalData': [ get_json_for_variant_functional_data(tag) for tag in saved_variant.variantfunctionaldata_set.all() ], 'notes': [ get_json_for_variant_note(tag) for tag in saved_variant.variantnote_set.all() ], }) return result
def _process_result(variant_json, saved_variant): if add_tags: variant_json.update({ 'tags': [ get_json_for_variant_tag(tag) for tag in saved_variant.varianttag_set.all() ], 'functionalData': [ get_json_for_variant_functional_data(tag) for tag in saved_variant.variantfunctionaldata_set.all() ], 'notes': [ get_json_for_variant_note(tag) for tag in saved_variant.variantnote_set.all() ], }) if add_details: variant_json.update(saved_variant.saved_variant_json) if 'variantId' not in variant_json: chrom, pos = get_chrom_pos(saved_variant.xpos) variant_json['variantId'] = '{}-{}-{}-{}'.format( chrom, pos, saved_variant.ref, saved_variant.alt) variant_json['familyGuids'] = [saved_variant.family.guid] return variant_json
def __unicode__(self): chrom, pos = get_chrom_pos(self.xpos_start) return "%s:%s: %s" % (chrom, pos, self.variant_tag_type.name)
def variant_details(variant_json, project, user, individual_guids_by_id=None): if 'populations' in variant_json: return variant_json annotation = variant_json.get('annotation') or {} is_es_variant = annotation.get('db') == 'elasticsearch' chrom, pos = get_chrom_pos(variant_json['xpos']) extras = variant_json.get('extras') or {} genome_version = extras.get('genome_version') or '37' lifted_over_genome_version = '37' if genome_version == '38' else '38' coords_field = 'grch%s_coords' % lifted_over_genome_version coords = extras.get(coords_field).split('-') if extras.get(coords_field) else [] lifted_over_chrom = coords[0].lstrip('chr') if len(coords) > 0 else '' lifted_over_pos = coords[1] if len(coords) > 1 else '' genotypes = { individual_id: { 'ab': genotype.get('ab'), 'ad': genotype.get('extras', {}).get('ad'), 'cnvs': { 'array': genotype.get('extras', {}).get('cnvs', {}).get('array'), 'caller': genotype.get('extras', {}).get('cnvs', {}).get('caller'), 'cn': genotype.get('extras', {}).get('cnvs', {}).get('cn'), 'freq': genotype.get('extras', {}).get('cnvs', {}).get('freq'), 'LRR_median': genotype.get('extras', {}).get('cnvs', {}).get('LRR_median'), 'LRR_sd': genotype.get('extras', {}).get('cnvs', {}).get('LRR_sd'), 'size': genotype.get('extras', {}).get('cnvs', {}).get('size'), 'snps': genotype.get('extras', {}).get('cnvs', {}).get('snps'), 'type': genotype.get('extras', {}).get('cnvs', {}).get('type'), }, 'dp': genotype.get('extras', {}).get('dp'), 'gq': genotype.get('gq'), 'numAlt': genotype.get('num_alt'), 'pl': genotype.get('extras', {}).get('pl'), 'sampleId': individual_id, } for individual_id, genotype in variant_json.get('genotypes', {}).items() } if not individual_guids_by_id: individual_guids_by_id = {i.individual_id: i.guid for i in Individual.objects.filter(family__project=project)} genotypes = {individual_guids_by_id.get(individual_id): genotype for individual_id, genotype in genotypes.items() if individual_guids_by_id.get(individual_id)} transcripts = defaultdict(list) for i, vep_a in enumerate(annotation['vep_annotation'] or []): transcripts[vep_a.get('gene', vep_a.get('gene_id'))].append( _transcript_detail(vep_a, i == annotation.get('worst_vep_annotation_index'))) return { 'chrom': chrom, 'pos': pos, 'predictions': { 'cadd': annotation.get('cadd_phred'), 'dann': annotation.get('dann_score'), 'eigen': annotation.get('eigen_phred'), 'fathmm': annotation.get('fathmm'), 'gerp_rs': annotation.get('GERP_RS'), 'phastcons_100_vert': annotation.get('phastCons100way_vertebrate'), 'mpc': annotation.get('mpc_score'), 'metasvm': annotation.get('metasvm'), 'mut_taster': annotation.get('muttaster'), 'polyphen': annotation.get('polyphen'), 'primate_ai': annotation.get('primate_ai_score'), 'revel': annotation.get('revel_score'), 'sift': annotation.get('sift'), 'splice_ai': annotation.get('splice_ai_delta_score'), }, 'mainTranscript': _variant_main_transcript(variant_json), 'clinvar': { 'clinsig': extras.get('clinvar_clinsig'), 'variantId': extras.get('clinvar_variant_id'), 'alleleId': extras.get('clinvar_allele_id'), 'goldStars': extras.get('clinvar_gold_stars'), }, 'hgmd': { 'accession': extras.get('hgmd_accession'), 'class': extras.get('hgmd_class') if (user and user.is_staff) else None, }, 'genotypes': genotypes, 'genotypeFilters': next((genotype.get('filter') for genotype in variant_json.get('genotypes', {}).values()), None), 'genomeVersion': genome_version, 'liftedOverGenomeVersion': lifted_over_genome_version, 'liftedOverChrom': lifted_over_chrom, 'liftedOverPos': lifted_over_pos, 'originalAltAlleles': extras.get('orig_alt_alleles') or [], 'populations': { 'callset': { 'af': annotation.get('freqs', {}).get('AF'), 'ac': annotation.get('pop_counts', {}).get('AC'), 'an': annotation.get('pop_counts', {}).get('AN'), }, 'topmed': { 'af': annotation.get('freqs', {}).get('topmed_AF'), 'ac': annotation.get('pop_counts', {}).get('topmed_AC'), 'an': annotation.get('pop_counts', {}).get('topmed_AN'), }, 'g1k': { 'af': annotation.get('freqs', {}).get('1kg_wgs_popmax_AF', annotation.get('freqs', {}).get( '1kg_wgs_AF', 0)) if is_es_variant else annotation.get('freqs', {}).get( '1kg_wgs_phase3_popmax', annotation.get('freqs', {}).get('1kg_wgs_phase3', 0)), 'ac': annotation.get('pop_counts', {}).get('g1kAC'), 'an': annotation.get('pop_counts', {}).get('g1kAN'), }, 'exac': { 'af': annotation.get('freqs', {}).get( 'exac_v3_popmax_AF', annotation.get('freqs', {}).get( 'exac_v3_AF', 0)) if is_es_variant else annotation.get('freqs', {}).get( 'exac_v3_popmax', annotation.get('freqs', {}).get('exac_v3', 0)), 'ac': annotation.get('pop_counts', {}).get('exac_v3_AC'), 'an': annotation.get('pop_counts', {}).get('exac_v3_AN'), 'hom': annotation.get('pop_counts', {}).get('exac_v3_Hom'), 'hemi': annotation.get('pop_counts', {}).get('exac_v3_Hemi'), }, 'gnomad_exomes': { 'af': annotation.get('freqs', {}).get( 'gnomad_exomes_popmax_AF', annotation.get('freqs', {}).get( 'gnomad_exomes_AF', 0)) if is_es_variant else annotation.get( 'freqs', {}).get('gnomad-exomes2_popmax', annotation.get('freqs', {}).get('gnomad-exomes2', None)), 'ac': annotation.get('pop_counts', {}).get('gnomad_exomes_AC'), 'an': annotation.get('pop_counts', {}).get('gnomad_exomes_AN'), 'hom': annotation.get('pop_counts', {}).get('gnomad_exomes_Hom'), 'hemi': annotation.get('pop_counts', {}).get('gnomad_exomes_Hemi'), }, 'gnomad_genomes': { 'af': annotation.get('freqs', {}).get('gnomad_genomes_popmax_AF', annotation.get( 'freqs', {}).get('gnomad_genomes_AF', 0)) if is_es_variant else annotation.get('freqs', {}).get( 'gnomad-gnomad-genomes2_popmax', annotation.get('freqs', {}).get('gnomad-genomes2', None)), 'ac': annotation.get('pop_counts', {}).get('gnomad_genomes_AC'), 'an': annotation.get('pop_counts', {}).get('gnomad_genomes_AN'), 'hom': annotation.get('pop_counts', {}).get('gnomad_genomes_Hom'), 'hemi': annotation.get('pop_counts', {}).get('gnomad_genomes_Hemi'), }, }, 'rsid': annotation.get('rsid'), 'transcripts': transcripts, }
def __unicode__(self): chrom, pos = get_chrom_pos(self.xpos_start) return "%s:%s-%s:%s" % (chrom, pos, self.project.guid, self.family.guid if self.family else '')
def _generate_rows(project, loaded_samples_by_project_family, saved_variants_by_project_family, errors): rows = [] loaded_samples_by_family = loaded_samples_by_project_family[project.guid] saved_variants_by_family = saved_variants_by_project_family[project.guid] if not loaded_samples_by_family: errors.append("No data loaded for project: %s" % project) logger.info("No data loaded for project: %s" % project) return [] if "external" in project.name or "reprocessed" in project.name: sequencing_approach = "REAN" else: sequencing_approach = loaded_samples_by_family.values()[0][-1].sample_type now = timezone.now() for family in project.families: samples = loaded_samples_by_family.get(family.guid) if not samples: errors.append("No data loaded for family: %s. Skipping..." % family) continue row = { "project_guid": project.guid, "family_guid": family.guid, "family_id": family.family_id, "collaborator": project.name, "sequencing_approach": sequencing_approach, "extras_pedigree_url": family.pedigree_image.url if family.pedigree_image else "", "coded_phenotype": family.coded_phenotype or "", "pubmed_ids": '; '.join(family.pubmed_ids), "analysis_summary": (family.analysis_summary or '').strip('" \n'), "row_id": family.guid, "num_individuals_sequenced": len({sample.individual for sample in samples}) } row.update(DEFAULT_ROW) t0 = samples[0].loaded_date t0_diff = rdelta.relativedelta(now, t0) t0_months_since_t0 = t0_diff.years * 12 + t0_diff.months row.update({ "t0": t0, "t0_copy": t0, "months_since_t0": t0_months_since_t0, }) if t0_months_since_t0 < 12: row['analysis_complete_status'] = "first_pass_in_progress" submitted_to_mme = any(i.mme_submitted_date for i in family.individual_set.all()) if submitted_to_mme: row["submitted_to_mme"] = "Y" phenotips_individual_data_records = [json.loads(i.phenotips_data) for i in family.individual_set.all() if i.phenotips_data] phenotips_individual_expected_inheritance_model = [ inheritance_mode["label"] for phenotips_data in phenotips_individual_data_records for inheritance_mode in phenotips_data.get("global_mode_of_inheritance", []) ] if len(phenotips_individual_expected_inheritance_model) == 1: row["expected_inheritance_model"] = phenotips_individual_expected_inheritance_model.pop() phenotips_individual_mim_disorders = [phenotips_data.get("disorders", []) for phenotips_data in phenotips_individual_data_records] omim_number_initial = next((disorder["id"] for disorders in phenotips_individual_mim_disorders for disorder in disorders if "id" in disorder), '').replace("MIM:", "") if omim_number_initial: row.update({ "omim_number_initial": omim_number_initial, "phenotype_class": "KNOWN", }) if family.post_discovery_omim_number: row["omim_number_post_discovery"] = family.post_discovery_omim_number phenotips_individual_features = [phenotips_data.get("features", []) for phenotips_data in phenotips_individual_data_records] category_not_set_on_some_features = False for features_list in phenotips_individual_features: for feature in features_list: if "category" not in feature: category_not_set_on_some_features = True continue if feature["observed"].lower() == "yes": hpo_category_id = feature["category"] hpo_category_name = HPO_CATEGORY_NAMES[hpo_category_id] key = hpo_category_name.lower().replace(" ", "_").replace("/", "_") row[key] = "Y" elif feature["observed"].lower() == "no": continue else: raise ValueError("Unexpected value for 'observed' in %s" % (feature,)) if category_not_set_on_some_features: errors.append("HPO category field not set for some HPO terms in %s" % family) saved_variants = saved_variants_by_family.get(family.guid) if not saved_variants: rows.append(row) continue saved_variants_to_json = {} for variant in saved_variants: if not variant.saved_variant_json: errors.append("%s - variant annotation not found" % variant) rows.append(row) continue saved_variant_json = variant_details(json.loads(variant.saved_variant_json), project, user=None) if not saved_variant_json['transcripts']: errors.append("%s - no gene ids" % variant) rows.append(row) continue saved_variants_to_json[variant] = saved_variant_json affected_individual_guids = set() unaffected_individual_guids = set() for sample in samples: if sample.individual.affected == "A": affected_individual_guids.add(sample.individual.guid) elif sample.individual.affected == "N": unaffected_individual_guids.add(sample.individual.guid) potential_compound_het_genes = defaultdict(set) for variant, saved_variant_json in saved_variants_to_json.items(): inheritance_models = set() affected_indivs_with_hom_alt_variants = set() affected_indivs_with_het_variants = set() unaffected_indivs_with_hom_alt_variants = set() unaffected_indivs_with_het_variants = set() is_x_linked = False genotypes = saved_variant_json.get('genotypes') if genotypes: chrom = saved_variant_json['chrom'] is_x_linked = "X" in chrom for sample_guid, genotype in genotypes.items(): if genotype["numAlt"] == 2 and sample_guid in affected_individual_guids: affected_indivs_with_hom_alt_variants.add(sample_guid) elif genotype["numAlt"] == 1 and sample_guid in affected_individual_guids: affected_indivs_with_het_variants.add(sample_guid) elif genotype["numAlt"] == 2 and sample_guid in unaffected_individual_guids: unaffected_indivs_with_hom_alt_variants.add(sample_guid) elif genotype["numAlt"] == 1 and sample_guid in unaffected_individual_guids: unaffected_indivs_with_het_variants.add(sample_guid) # AR-homozygote, AR-comphet, AR, AD, de novo, X-linked, UPD, other, multiple if not unaffected_indivs_with_hom_alt_variants and affected_indivs_with_hom_alt_variants: if is_x_linked: inheritance_models.add("X-linked") else: inheritance_models.add("AR-homozygote") if not unaffected_indivs_with_hom_alt_variants and not unaffected_indivs_with_het_variants and affected_indivs_with_het_variants: if unaffected_individual_guids: inheritance_models.add("de novo") else: inheritance_models.add("AD") if not unaffected_indivs_with_hom_alt_variants and (len( unaffected_individual_guids) < 2 or unaffected_indivs_with_het_variants) and affected_indivs_with_het_variants and not affected_indivs_with_hom_alt_variants: for gene_id in saved_variant_json['transcripts']: potential_compound_het_genes[gene_id].add(variant) saved_variant_json['inheritance'] = inheritance_models gene_ids_to_saved_variants = defaultdict(set) gene_ids_to_variant_tag_names = defaultdict(set) gene_ids_to_inheritance = defaultdict(set) # Compound het variants are reported in the gene that they share for gene_id, variants in potential_compound_het_genes.items(): if len(variants) > 1: gene_ids_to_inheritance[gene_id].add("AR-comphet") # Only include compound hets for one of the genes they are both in existing_gene_id = next(( existing_gene_id for existing_gene_id, existing_variants in gene_ids_to_saved_variants.items() if existing_variants == variants), None) if existing_gene_id: main_gene_ids = { saved_variants_to_json[variant]['mainTranscript']['geneId'] for variant in variants } if gene_id in main_gene_ids: gene_ids_to_saved_variants[gene_id] = gene_ids_to_saved_variants[existing_gene_id] del gene_ids_to_saved_variants[existing_gene_id] gene_ids_to_variant_tag_names[gene_id] = gene_ids_to_variant_tag_names[existing_gene_id] del gene_ids_to_variant_tag_names[existing_gene_id] else: for variant in variants: saved_variants_to_json[variant]['inheritance'] = {"AR-comphet"} gene_ids_to_variant_tag_names[gene_id].update( {vt.variant_tag_type.name for vt in variant.discovery_tags}) gene_ids_to_saved_variants[gene_id].update(variants) # Non-compound het variants are reported in the main transcript gene for variant, saved_variant_json in saved_variants_to_json.items(): if "AR-comphet" not in saved_variant_json['inheritance']: gene_id = saved_variant_json['mainTranscript']['geneId'] gene_ids_to_saved_variants[gene_id].add(variant) gene_ids_to_variant_tag_names[gene_id].update({vt.variant_tag_type.name for vt in variant.discovery_tags}) gene_ids_to_inheritance[gene_id].update(saved_variant_json['inheritance']) if len(gene_ids_to_saved_variants) > 1: row["gene_count"] = len(gene_ids_to_saved_variants) for gene_id, variants in gene_ids_to_saved_variants.items(): # create a copy of the row dict row = dict(row) row["actual_inheritance_model"] = ", ".join(gene_ids_to_inheritance[gene_id]) row["gene_id"] = gene_id row["row_id"] += gene_id variant_tag_names = gene_ids_to_variant_tag_names[gene_id] has_tier1 = any(name.startswith("Tier 1") for name in variant_tag_names) has_tier2 = any(name.startswith("Tier 2") for name in variant_tag_names) has_known_gene_for_phenotype = 'Known gene for phenotype' in variant_tag_names row.update({ "solved": ("TIER 1 GENE" if (has_tier1 or has_known_gene_for_phenotype) else ( "TIER 2 GENE" if has_tier2 else "N")), "komp_early_release": "Y" if 'Share with KOMP' in variant_tag_names else "N", }) if has_tier1 or has_tier2 or has_known_gene_for_phenotype: row.update({ "posted_publicly": "", "analysis_complete_status": "complete", "novel_mendelian_gene": "Y" if any("Novel gene" in name for name in variant_tag_names) else "N", }) if has_known_gene_for_phenotype: row["phenotype_class"] = "KNOWN" elif any(tag in variant_tag_names for tag in [ 'Tier 1 - Known gene, new phenotype', 'Tier 2 - Known gene, new phenotype', ]): row["phenotype_class"] = "NEW" elif any(tag in variant_tag_names for tag in [ 'Tier 1 - Phenotype expansion', 'Tier 1 - Novel mode of inheritance', 'Tier 2 - Phenotype expansion', ]): row["phenotype_class"] = "EXPAN" elif any(tag in variant_tag_names for tag in [ 'Tier 1 - Phenotype not delineated', 'Tier 2 - Phenotype not delineated' ]): row["phenotype_class"] = "UE" if not submitted_to_mme: if has_tier1 or has_tier2: row["submitted_to_mme"] = "N" if t0_months_since_t0 > 7 else "TBD" elif has_known_gene_for_phenotype: row["submitted_to_mme"] = "KPG" if has_tier1 or has_tier2: # Set defaults for functional_field in FUNCTIONAL_DATA_FIELD_MAP.values(): if functional_field == ADDITIONAL_KINDREDS_FIELD: row[functional_field] = "1" elif functional_field in METADATA_FUNCTIONAL_DATA_FIELDS: row[functional_field] = "NA" else: row[functional_field] = "N" # Set values for variant in variants: for f in variant.variantfunctionaldata_set.all(): functional_field = FUNCTIONAL_DATA_FIELD_MAP[f.functional_data_tag] if functional_field in METADATA_FUNCTIONAL_DATA_FIELDS: value = f.metadata if functional_field == ADDITIONAL_KINDREDS_FIELD: value = str(int(value) + 1) elif functional_field == OVERLAPPING_KINDREDS_FIELD: existing_val = row[functional_field] if existing_val != 'NA': value = str(max(int(existing_val), int(value))) elif row[functional_field] != 'NS': value = '{} {}'.format(row[functional_field], value) else: value = 'Y' row[functional_field] = value elif has_known_gene_for_phenotype: for functional_field in FUNCTIONAL_DATA_FIELD_MAP.values(): row[functional_field] = "KPG" row["extras_variant_tag_list"] = [] for variant in variants: variant_id = "-".join(map(str, list(get_chrom_pos(variant.xpos_start)) + [variant.ref, variant.alt])) row["extras_variant_tag_list"] += [ (variant_id, gene_id, vt.variant_tag_type.name.lower()) for vt in variant.discovery_tags ] rows.append(row) _update_gene_symbols(rows) _update_initial_omim_numbers(rows) return rows
def _generate_rows(project, loaded_samples_by_project_family, saved_variants_by_project_family, errors): rows = [] loaded_samples_by_family = loaded_samples_by_project_family[project.guid] saved_variants_by_family = saved_variants_by_project_family[project.guid] if not loaded_samples_by_family: errors.append("No data loaded for project: %s" % project) logger.info("No data loaded for project: %s" % project) return [] if "external" in project.name or "reprocessed" in project.name: sequencing_approach = "REAN" else: sequencing_approach = loaded_samples_by_family.values( )[0][-1].sample_type now = timezone.now() for family in project.families: samples = loaded_samples_by_family.get(family.guid) if not samples: errors.append("No data loaded for family: %s. Skipping..." % family) continue row = { "project_guid": project.guid, "family_guid": family.guid, "family_id": family.family_id, "collaborator": project.name, "sequencing_approach": sequencing_approach, "extras_pedigree_url": family.pedigree_image.url if family.pedigree_image else "", "coded_phenotype": family.coded_phenotype or "", "analysis_summary": (family.analysis_summary or '').strip('" \n'), } row.update(DEFAULT_ROW) t0 = samples[0].loaded_date t0_diff = rdelta.relativedelta(now, t0) t0_months_since_t0 = t0_diff.years * 12 + t0_diff.months row.update({ "t0": t0, "t0_copy": t0, "months_since_t0": t0_months_since_t0, }) if t0_months_since_t0 < 12: row['analysis_complete_status'] = "first_pass_in_progress" submitted_to_mme = SEQR_ID_TO_MME_ID_MAP.find_one({ 'project_id': project.deprecated_project_id, 'family_id': family.family_id }) if submitted_to_mme: row["submitted_to_mme"] = "Y" phenotips_individual_data_records = [ json.loads(i.phenotips_data) for i in family.individual_set.all() if i.phenotips_data ] phenotips_individual_expected_inheritance_model = [ inheritance_mode["label"] for phenotips_data in phenotips_individual_data_records for inheritance_mode in phenotips_data.get( "global_mode_of_inheritance", []) ] if len(phenotips_individual_expected_inheritance_model) == 1: row["expected_inheritance_model"] = phenotips_individual_expected_inheritance_model.pop( ) phenotips_individual_mim_disorders = [ phenotips_data.get("disorders", []) for phenotips_data in phenotips_individual_data_records ] omim_number_initial = next( (disorder["id"] for disorders in phenotips_individual_mim_disorders for disorder in disorders if "id" in disorder), '').replace("MIM:", "") if omim_number_initial: row.update({ "omim_number_initial": omim_number_initial, "phenotype_class": "Known", }) if family.post_discovery_omim_number: row["omim_number_post_discovery"] = family.post_discovery_omim_number phenotips_individual_features = [ phenotips_data.get("features", []) for phenotips_data in phenotips_individual_data_records ] category_not_set_on_some_features = False for features_list in phenotips_individual_features: for feature in features_list: if "category" not in feature: category_not_set_on_some_features = True continue if feature["observed"].lower() == "yes": hpo_category_id = feature["category"] hpo_category_name = HPO_CATEGORY_NAMES[hpo_category_id] key = hpo_category_name.lower().replace(" ", "_").replace( "/", "_") row[key] = "Y" elif feature["observed"].lower() == "no": continue else: raise ValueError("Unexpected value for 'observed' in %s" % (feature, )) if category_not_set_on_some_features: errors.append( "HPO category field not set for some HPO terms in %s" % family) saved_variants = saved_variants_by_family.get(family.guid) if not saved_variants: rows.append(row) continue saved_variants_to_json = {} for variant in saved_variants: if not variant.saved_variant_json: errors.append("%s - variant annotation not found" % variant) rows.append(row) continue saved_variant_json = variant_details(json.loads( variant.saved_variant_json), project, user=None) if not saved_variant_json['transcripts']: errors.append("%s - no gene ids" % variant) rows.append(row) continue saved_variants_to_json[variant] = saved_variant_json affected_sample_guids = set() unaffected_sample_guids = set() for sample in samples: if sample.individual.affected == "A": affected_sample_guids.add(sample.guid) elif sample.individual.affected == "N": unaffected_sample_guids.add(sample.guid) potential_compound_het_genes = defaultdict(set) for variant, saved_variant_json in saved_variants_to_json.items(): inheritance_models = set() affected_indivs_with_hom_alt_variants = set() affected_indivs_with_het_variants = set() unaffected_indivs_with_hom_alt_variants = set() unaffected_indivs_with_het_variants = set() is_x_linked = False genotypes = saved_variant_json.get('genotypes') if genotypes: chrom = saved_variant_json['chrom'] is_x_linked = "X" in chrom for sample_guid, genotype in genotypes.items(): if genotype[ "numAlt"] == 2 and sample_guid in affected_sample_guids: affected_indivs_with_hom_alt_variants.add(sample_guid) elif genotype[ "numAlt"] == 1 and sample_guid in affected_sample_guids: affected_indivs_with_het_variants.add(sample_guid) elif genotype[ "numAlt"] == 2 and sample_guid in unaffected_sample_guids: unaffected_indivs_with_hom_alt_variants.add( sample_guid) elif genotype[ "numAlt"] == 1 and sample_guid in unaffected_sample_guids: unaffected_indivs_with_het_variants.add(sample_guid) # AR-homozygote, AR-comphet, AR, AD, de novo, X-linked, UPD, other, multiple if not unaffected_indivs_with_hom_alt_variants and affected_indivs_with_hom_alt_variants: if is_x_linked: inheritance_models.add("X-linked") else: inheritance_models.add("AR-homozygote") if not unaffected_indivs_with_hom_alt_variants and not unaffected_indivs_with_het_variants and affected_indivs_with_het_variants: if unaffected_sample_guids: inheritance_models.add("de novo") else: inheritance_models.add("AD") if not unaffected_indivs_with_hom_alt_variants and ( len(unaffected_sample_guids) < 2 or unaffected_indivs_with_het_variants ) and affected_indivs_with_het_variants and not affected_indivs_with_hom_alt_variants: for gene_id in saved_variant_json['transcripts']: potential_compound_het_genes[gene_id].add(variant) saved_variant_json['inheritance'] = inheritance_models gene_ids_to_saved_variants = defaultdict(set) gene_ids_to_variant_tag_names = defaultdict(set) gene_ids_to_inheritance = defaultdict(set) # Compound het variants are reported in the gene that they share for gene_id, variants in potential_compound_het_genes.items(): if len(variants) > 1: gene_ids_to_inheritance[gene_id].add("AR-comphet") # Only include compound hets for one of the genes they are both in existing_gene_id = next( (existing_gene_id for existing_gene_id, existing_variants in gene_ids_to_saved_variants.items() if existing_variants == variants), None) if existing_gene_id: main_gene_ids = { saved_variants_to_json[variant]['mainTranscript'] ['geneId'] for variant in variants } if gene_id in main_gene_ids: gene_ids_to_saved_variants[ gene_id] = gene_ids_to_saved_variants[ existing_gene_id] del gene_ids_to_saved_variants[existing_gene_id] gene_ids_to_variant_tag_names[ gene_id] = gene_ids_to_variant_tag_names[ existing_gene_id] del gene_ids_to_variant_tag_names[existing_gene_id] else: for variant in variants: saved_variants_to_json[variant]['inheritance'] = { "AR-comphet" } gene_ids_to_variant_tag_names[gene_id].update({ vt.variant_tag_type.name for vt in variant.discovery_tags }) gene_ids_to_saved_variants[gene_id].update(variants) # Non-compound het variants are reported in the main transcript gene for variant, saved_variant_json in saved_variants_to_json.items(): if "AR-comphet" not in saved_variant_json['inheritance']: gene_id = saved_variant_json['mainTranscript']['geneId'] gene_ids_to_saved_variants[gene_id].add(variant) gene_ids_to_variant_tag_names[gene_id].update({ vt.variant_tag_type.name for vt in variant.discovery_tags }) gene_ids_to_inheritance[gene_id].update( saved_variant_json['inheritance']) if len(gene_ids_to_saved_variants) > 1: row["gene_count"] = len(gene_ids_to_saved_variants) for gene_id, variants in gene_ids_to_saved_variants.items(): # create a copy of the row dict row = dict(row) row["actual_inheritance_model"] = ", ".join( gene_ids_to_inheritance[gene_id]) row["gene_id"] = gene_id variant_tag_names = gene_ids_to_variant_tag_names[gene_id] has_tier1 = any( name.startswith("Tier 1") for name in variant_tag_names) has_tier2 = any( name.startswith("Tier 2") for name in variant_tag_names) has_known_gene_for_phenotype = 'Known gene for phenotype' in variant_tag_names row.update({ "solved": ("TIER 1 GENE" if (has_tier1 or has_known_gene_for_phenotype) else ("TIER 2 GENE" if has_tier2 else "N")), "komp_early_release": "Y" if 'Share with KOMP' in variant_tag_names else "N", }) if has_tier1 or has_tier2 or has_known_gene_for_phenotype: row.update({ "posted_publicly": "", "analysis_complete_status": "complete", "novel_mendelian_gene": "Y" if any("Novel gene" in name for name in variant_tag_names) else "N", }) if any(tag in variant_tag_names for tag in [ 'Tier 1 - Phenotype expansion', 'Tier 1 - Novel mode of inheritance', 'Tier 2 - Phenotype expansion', ]): row["phenotype_class"] = "EXPAN" elif any(tag in variant_tag_names for tag in [ 'Tier 1 - Phenotype not delineated', 'Tier 2 - Phenotype not delineated' ]): row["phenotype_class"] = "UE" if not submitted_to_mme: if has_tier1 or has_tier2: row["submitted_to_mme"] = "N" if t0_months_since_t0 > 7 else "TBD" elif has_known_gene_for_phenotype: row["submitted_to_mme"] = "KPG" if has_tier1 or has_tier2: for functional_field in FUNCTIONAL_DATA_FIELD_MAP.values(): if functional_field == ADDITIONAL_KINDREDS_FIELD: row[functional_field] = "1" elif functional_field in METADATA_FUNCTIONAL_DATA_FIELDS: row[functional_field] = "NA" else: row[functional_field] = "N" elif has_known_gene_for_phenotype: for functional_field in FUNCTIONAL_DATA_FIELD_MAP.values(): row[functional_field] = "KPG" variant_tag_list = [] for variant in variants: variant_id = "-".join( map( str, list(get_chrom_pos(variant.xpos_start)) + [variant.ref, variant.alt])) variant_tag_list += [(variant_id, gene_id, vt.variant_tag_type.name.lower()) for vt in variant.discovery_tags] for f in variant.variantfunctionaldata_set.all(): functional_field = FUNCTIONAL_DATA_FIELD_MAP[ f.functional_data_tag] if functional_field in METADATA_FUNCTIONAL_DATA_FIELDS: value = f.metadata if functional_field == ADDITIONAL_KINDREDS_FIELD: value = str(int(value) + 1) elif row[functional_field] != 'NS': value = '{} {}'.format(row[functional_field], value) else: value = 'Y' row[functional_field] = value row["extras_variant_tag_list"] = variant_tag_list rows.append(row) _update_gene_symbols(rows) _update_initial_omim_numbers(rows) return rows
def __unicode__(self): chrom, pos = get_chrom_pos(self.xpos_start) return "%s:%s-%s" % (chrom, pos, self.family.guid)
def __unicode__(self): chrom, pos = get_chrom_pos(self.xpos_start) return "%s:%s: %s" % (chrom, pos, (self.note or "")[:20])