def get_all_clinical_data_for_family(project_id, family_id, indiv_id): """ Gets phenotype and genotype data for this individual Args: family_id: id of family Returns: A JSON object as per MME spec of a patient """ project = get_object_or_404(Project, project_id=project_id) #species (only human for now) till seqr starts tracking species species = "NCBITaxon:9606" #contact (this should be set in settings href = settings.MME_CONTACT_HREF if settings.MME_PATIENT_PRIMARY_DATA_OWNER[project_id]["email"] != "": href = href + ',' + settings.MME_PATIENT_PRIMARY_DATA_OWNER[ project_id]["email"] contact = { "name": settings.MME_CONTACT_NAME + ' (data owner: ' + settings.MME_PATIENT_PRIMARY_DATA_OWNER[project_id]["PI"] + ')', "institution": settings.MME_CONTACT_INSTITUTION, "href": href } #genomicFeatures section genomic_features = [] variants = [] project_tags = ProjectTag.objects.filter(project__project_id=project_id) for project_tag in project_tags: variant_tags = VariantTag.objects.filter(project_tag=project_tag) for variant_tag in variant_tags: if family_id == variant_tag.toJSON()['family']: variant = get_datastore(project.project_id).get_single_variant( project.project_id, variant_tag.toJSON()['family'], variant_tag.xpos, variant_tag.ref, variant_tag.alt, ) if variant is None: raise ValueError( "Variant no longer called in this family (did the callset version change?)" ) variants.append({ "variant": variant.toJSON(), "tag": project_tag.title, "family": variant_tag.family.toJSON(), "tag_name": variant_tag.toJSON()['tag'] }) #start compiling a matchmaker-esque data structure to send back genomic_features = [] for variant in variants: start = variant['variant']['pos'] reference_bases = variant['variant']['ref'] alternate_bases = variant['variant']['alt'] end = int( variant['variant']['pos_end']) #int and long are unified in python reference_name = variant['variant']['chr'].replace('chr', '') #now we have more than 1 gene associated to these VAR postions, #so we will associate that information to each gene symbol for i, gene_id in enumerate(variant['variant']['gene_ids']): genomic_feature = {} genomic_feature['gene'] = {"id": gene_id} genomic_feature['variant'] = { 'assembly': settings.GENOME_ASSEMBLY_NAME, 'referenceBases': reference_bases, 'alternateBases': alternate_bases, 'start': start, 'end': end, 'referenceName': reference_name } genomic_feature['zygosity'] = variant['variant']['genotypes'][ indiv_id]['num_alt'] gene_symbol = "" if gene_id != "": gene = get_reference().get_gene(gene_id) gene_symbol = gene['symbol'] genomic_feature['auxiliary'] = { "tag_name": variant['tag_name'], "gene_symbol": gene_symbol } genomic_features.append(genomic_feature) #Find phenotype information indiv = Individual.objects.get(indiv_id=indiv_id, project=project) phenotypes_entered = get_phenotypes_entered_for_individual( project_id, indiv.phenotips_id) #need to eventually support "FEMALE"|"MALE"|"OTHER"|"MIXED_SAMPLE"|"NOT_APPLICABLE", #as of now PhenoTips only has M/F sex = "NOT_APPLICABLE" if "M" == indiv.gender: sex = "MALE" if "F" == indiv.gender: sex = "FEMALE" features = [] if phenotypes_entered.has_key('features'): #as of now non-standard features ('nonstandard_features') without HPO #terms cannot be sent to MME for f in phenotypes_entered['features']: features.append({ "id": f['id'], "observed": f['observed'], "label": f['label'] }) #--depracating obfuscation as per discussion on slack and green light by @dgmacarthur #make a unique hash to represent individual in MME for MME_ID #h = hashlib.md5() #h.update(indiv.indiv_id) #id=h.hexdigest() #label=id #using ID as label id = indiv.indiv_id label = indiv.indiv_id #add new patient to affected patients affected_patient = { "id": id, "species": species, "label": label, "contact": contact, "features": features, "sex": sex, "genomicFeatures": genomic_features } #map to put into mongo time_stamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H_%M_%S') detailed_id_map = { "generated_on": time_stamp, "project_id": project_id, "family_id": family_id, "individual_id": indiv.indiv_id, "mme_id": id, "individuals_used_for_phenotypes": affected_patient } return detailed_id_map, affected_patient
def get_all_clinical_data_for_family(project_id,family_id,indiv_id): """ Gets phenotype and genotype data for this individual Args: family_id: id of family Returns: A JSON object as per MME spec of a patient """ project = get_object_or_404(Project, project_id=project_id) #species (only human for now) till seqr starts tracking species species="NCBITaxon:9606" contact={ "name": project.mme_primary_data_owner, "institution" : project.mme_contact_institution, "href" : project.mme_contact_url } #genomicFeatures section genomic_features=[] variants=[] project_tags = ProjectTag.objects.filter(project__project_id=project_id) for project_tag in project_tags: variant_tags = VariantTag.objects.filter(project_tag=project_tag) for variant_tag in variant_tags: if variant_tag.family is not None and family_id == variant_tag.family.family_id: variant = get_datastore(project).get_single_variant( project.project_id, variant_tag.family.family_id, variant_tag.xpos, variant_tag.ref, variant_tag.alt, ) if variant is None: logging.info("Variant no longer called in this family (did the callset version change?)") continue variants.append({"variant": variant.toJSON(), "tag": project_tag.title, "family": variant_tag.family.toJSON(), "tag_name": variant_tag.project_tag.tag, }) current_genome_assembly = find_genome_assembly(project) #start compiling a matchmaker-esque data structure to send back genomic_features=[] for variant in variants: start = variant['variant']['pos'] reference_bases = variant['variant']['ref'] alternate_bases = variant['variant']['alt'] end = int(variant['variant']['pos_end']) #int and long are unified in python reference_name = variant['variant']['chr'].replace('chr','') #now we have more than 1 gene associated to these VAR postions, #so we will associate that information to each gene symbol for i,gene_id in enumerate(variant['variant']['gene_ids']): genomic_feature = {} genomic_feature['gene'] ={"id": gene_id } genomic_feature['variant']={ 'assembly':current_genome_assembly, 'referenceBases':reference_bases, 'alternateBases':alternate_bases, 'start':start, 'end':end, 'referenceName':reference_name } genomic_feature['zygosity'] = variant['variant']['genotypes'][indiv_id]['num_alt'] gene_symbol="" if gene_id != "": gene = get_reference().get_gene(gene_id) if gene: gene_symbol = gene['symbol'] genomic_feature['auxiliary']={ "tag_name":variant['tag_name'], "gene_symbol":gene_symbol } genomic_features.append(genomic_feature) #Find phenotype information indiv = Individual.objects.get(indiv_id=indiv_id, project=project) phenotypes_entered = get_phenotypes_entered_for_individual(project_id,indiv.phenotips_id) #need to eventually support "FEMALE"|"MALE"|"OTHER"|"MIXED_SAMPLE"|"NOT_APPLICABLE", #as of now PhenoTips only has M/F sex="NOT_APPLICABLE" if "M" == indiv.gender: sex="MALE" if "F" == indiv.gender: sex="FEMALE" features=[] if phenotypes_entered.has_key('features'): #as of now non-standard features ('nonstandard_features') without HPO #terms cannot be sent to MME for f in phenotypes_entered['features']: features.append({ "id":f['id'], "observed":f['observed'], "label":f['label']}) id=indiv.indiv_id label=indiv.indiv_id #add new patient to affected patients affected_patient={"id":id, "species":species, "label":label, "contact":contact, "features":features, "sex":sex, "genomicFeatures":genomic_features } #map to put into mongo time_stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') detailed_id_map={"generated_on": time_stamp, "project_id":project_id, "family_id":family_id, "individual_id":indiv.indiv_id, "mme_id":id, "individuals_used_for_phenotypes":affected_patient} return detailed_id_map,affected_patient
def get_all_clinical_data_for_family(project_id,family_id,indiv_id): """ Gets phenotype and genotype data for this individual Args: family_id: id of family Returns: A JSON object as per MME spec of a patient """ project = get_object_or_404(Project, project_id=project_id) seqr_project = project.seqr_project #species (only human for now) till seqr starts tracking species species="NCBITaxon:9606" contact={ "name":seqr_project.mme_primary_data_owner, "institution" : seqr_project.mme_contact_institution, "href" : seqr_project.mme_contact_url } #genomicFeatures section genomic_features=[] variants=[] project_tags = ProjectTag.objects.filter(project__project_id=project_id) for project_tag in project_tags: variant_tags = VariantTag.objects.filter(project_tag=project_tag) for variant_tag in variant_tags: if family_id == variant_tag.toJSON()['family']: variant = get_datastore(project.project_id).get_single_variant( project.project_id, variant_tag.toJSON()['family'], variant_tag.xpos, variant_tag.ref, variant_tag.alt, ) if variant is None: logging.info("Variant no longer called in this family (did the callset version change?)") continue variants.append({"variant":variant.toJSON(), "tag":project_tag.title, "family":variant_tag.family.toJSON(), "tag_name":variant_tag.toJSON()['tag'] }) current_genome_assembly = find_genome_assembly(seqr_project) #start compiling a matchmaker-esque data structure to send back genomic_features=[] for variant in variants: start = variant['variant']['pos'] reference_bases = variant['variant']['ref'] alternate_bases = variant['variant']['alt'] end = int(variant['variant']['pos_end']) #int and long are unified in python reference_name = variant['variant']['chr'].replace('chr','') #now we have more than 1 gene associated to these VAR postions, #so we will associate that information to each gene symbol for i,gene_id in enumerate(variant['variant']['gene_ids']): genomic_feature = {} genomic_feature['gene'] ={"id": gene_id } genomic_feature['variant']={ 'assembly':current_genome_assembly, 'referenceBases':reference_bases, 'alternateBases':alternate_bases, 'start':start, 'end':end, 'referenceName':reference_name } genomic_feature['zygosity'] = variant['variant']['genotypes'][indiv_id]['num_alt'] gene_symbol="" if gene_id != "": gene = get_reference().get_gene(gene_id) gene_symbol = gene['symbol'] genomic_feature['auxiliary']={ "tag_name":variant['tag_name'], "gene_symbol":gene_symbol } genomic_features.append(genomic_feature) #Find phenotype information indiv = Individual.objects.get(indiv_id=indiv_id,project=project) phenotypes_entered = get_phenotypes_entered_for_individual(project_id,indiv.phenotips_id) #need to eventually support "FEMALE"|"MALE"|"OTHER"|"MIXED_SAMPLE"|"NOT_APPLICABLE", #as of now PhenoTips only has M/F sex="NOT_APPLICABLE" if "M" == indiv.gender: sex="MALE" if "F" == indiv.gender: sex="FEMALE" features=[] if phenotypes_entered.has_key('features'): #as of now non-standard features ('nonstandard_features') without HPO #terms cannot be sent to MME for f in phenotypes_entered['features']: features.append({ "id":f['id'], "observed":f['observed'], "label":f['label']}) id=indiv.indiv_id label=indiv.indiv_id #add new patient to affected patients affected_patient={"id":id, "species":species, "label":label, "contact":contact, "features":features, "sex":sex, "genomicFeatures":genomic_features } #map to put into mongo time_stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') detailed_id_map={"generated_on": time_stamp, "project_id":project_id, "family_id":family_id, "individual_id":indiv.indiv_id, "mme_id":id, "individuals_used_for_phenotypes":affected_patient} return detailed_id_map,affected_patient
def get_all_clinical_data_for_family(project_id,family_id): """ Gets phenotype and genotype data for this individual Args: family_id: id of family Returns: A JSON object as per MME spec of a patient """ project = get_object_or_404(Project, project_id=project_id) #species (only human for now) till seqr starts tracking species species="NCBITaxon:9606" #contact (this should be set in settings contact={ "name":settings.MME_CONTACT_NAME, "institution" : settings.MME_CONTACT_INSTITUTION, "href" : settings.MME_CONTACT_HREF } #genomicFeatures section genomic_features=[] #family_data,variant_data,_,_ = fetch_project_individuals_data(project_id) #variants,phenotype_entry_counts = fetch_project_individuals_data(project_id) #-- variants=[] project_tags = ProjectTag.objects.filter(project__project_id='1kg') for project_tag in project_tags: variant_tags = VariantTag.objects.filter(project_tag=project_tag) for variant_tag in variant_tags: variant = get_datastore(project.project_id).get_single_variant( project.project_id, variant_tag.toJSON()['family'], variant_tag.xpos, variant_tag.ref, variant_tag.alt, ) if variant is None: raise ValueError("Variant no longer called in this family (did the callset version change?)") variants.append({"variant":variant.toJSON(), "tag":project_tag.title, "family":variant_tag.family.toJSON()}) for variant in variants: start = variant['variant']['pos'] reference_bases = variant['variant']['ref'] alternate_bases = variant['variant']['alt'] end = int(variant['variant']['pos_end']) #int and long are unified in python reference_name = variant['variant']['chr'].replace('chr','') #now we have more than 1 gene associated to these VAR postions, #so we will associate that information to each gene symbol genomic_features=[] for i,gene_id in enumerate(variant['variant']['gene_ids']): genomic_feature = {} genomic_feature['gene'] ={"id": gene_id } genomic_feature['variant']={ 'assembly':settings.GENOME_ASSEMBLY_NAME, 'referenceBases':reference_bases, 'alternateBases':alternate_bases, 'start':start, 'end':end, 'referenceName':reference_name } genomic_features.append(genomic_feature) #all affected patients affected_patients=[] detailed_id_map=[] id_map={} #--find individuals in this family family = Family.objects.get(project=project, family_id=family_id) for indiv in family.get_individuals(): if indiv.affected_status_display() == 'Affected': phenotypes_entered = get_phenotypes_entered_for_individual(project_id,indiv.indiv_id) #need to eventually support "FEMALE"|"MALE"|"OTHER"|"MIXED_SAMPLE"|"NOT_APPLICABLE", #as of now PhenoTips only has M/F sex="FEMALE" if "M" == indiv.gender: sex="MALE" features=[] if phenotypes_entered.has_key('features'): #as of now non-standard features ('nonstandard_features') without HPO #terms cannot be sent to MME for f in phenotypes_entered['features']: features.append({ "id":f['id'], "observed":f['observed']}) #make a unique hash to represent individual in MME for MME_ID h = hashlib.md5() h.update(indiv.indiv_id) id=h.hexdigest() label=id #using ID as label id_map[id]=indiv.indiv_id #add new patient to affected patients affected_patients.append({ "patient": { "id":id, "species":species, "label":label, "contact":contact, "features":features, "sex":sex, "genomicFeatures":genomic_features } }) #map to put into mongo time_stamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') detailed_id_map.append({"generated_on": time_stamp, "project_id":project_id, "family_id":family_id, "individual_id":indiv.indiv_id, "mme_id":id, "individuals_used_for_phenotypes":affected_patients}) return detailed_id_map,affected_patients,id_map