def _map_line_to_json(fields): assert len(fields) == VALID_COLUMN_NO rsid = fields[8] # load as json data if rsid is None: return url = "http://myvariant.info/v1/query?q=dbsnp.rsid:" + rsid + "&fields=_id" r = requests.get(url) for hits in r.json()["hits"]: HGVS = hits["_id"] one_snp_json = { "_id": HGVS, "grasp": { "hg19": {"chr": fields[5], "pos": fields[6]}, "hupfield": fields[1], "last_curation_date": fields[2], "creation_date": fields[3], "srsid": fields[4], "publication": { "journal": fields[16], "title": fields[17], "pmid": fields[7], "snpid": fields[8], "location_within_paper": fields[9], "p_value": fields[10], "phenotype": fields[11], "paper_phenotype_description": fields[12], "paper_phenotype_categories": fields[13], "date_pub": fields[14], }, "includes_male_female_only_analyses": fields[18], "exclusively_male_female": fields[19], "initial_sample_description": fields[20], "replication_sample_description": fields[21], "platform_snps_passing_qc": fields[22], "gwas_ancestry_description": fields[23], "discovery": { "total_samples": fields[25], "european": fields[26], "african": fields[27], "east_asian": fields[28], "indian_south_asian": fields[29], "hispanic": fields[30], "native": fields[31], "micronesian": fields[32], "arab_me": fields[33], "mixed": fields[34], "unspecified": fields[35], "filipino": fields[36], "indonesian": fields[37], }, "replication": { "total_samples": fields[38], "european": fields[39], "african": fields[40], "east_asian": fields[41], "indian_south_asian": fields[42], "hispanic": fields[43], "native": fields[44], "micronesian": fields[45], "arab_me": fields[46], "mixed": fields[47], "unspecified": fields[48], "filipino": fields[49], "indonesian": fields[50], }, "in_gene": fields[51], "nearest_gene": fields[52], "in_lincrna": fields[53], "in_mirna": fields[54], "in_mirna_bs": fields[55], "oreg_anno": fields[61], "conserv_pred_tfbs": fields[62], "human_enhancer": fields[63], "rna_edit": fields[64], "polyphen2": fields[65], "sift": fields[66], "ls_snp": fields[67], "uniprot": fields[68], "eqtl_meth_metab_study": fields[69], }, } return list_split(dict_sweep(unlist(value_convert(one_snp_json)), [""]), ",")
def _map_line_to_json(fields, version): # specific variable treatment chrom = fields[0] if chrom == 'M': chrom = 'MT' # fields[7] in version 2, represent hg18_pos if fields[10] == ".": hg18_end = "." else: hg18_end = int(fields[10]) # in case of no hg19 position provided, remove the item if fields[8] == '.': return None else: chromStart = int(fields[8]) chromEnd = int(fields[8]) chromStart_38 = int(fields[1]) ref = fields[2].upper() alt = fields[3].upper() HGVS_19 = "chr%s:g.%d%s>%s" % (chrom, chromStart, ref, alt) HGVS_38 = "chr%s:g.%d%s>%s" % (chrom, chromStart_38, ref, alt) if version == 'hg19': HGVS = HGVS_19 elif version == 'hg38': HGVS = HGVS_38 if fields[105] == ".": siphy = "." else: freq = fields[105].split(":") siphy = {'a': freq[0], 'c': freq[1], 'g': freq[2], 't': freq[3]} gtex_gene = fields[181].split('|') gtex_tissue = fields[182].split('|') gtex = map( dict, map(lambda t: zip(('gene', 'tissue'), t), zip(gtex_gene, gtex_tissue))) acc = fields[26].rstrip().rstrip(';').split(";") pos = fields[28].rstrip().rstrip(';').split(";") uniprot = map(dict, map(lambda t: zip(('acc', 'pos'), t), zip(acc, pos))) provean_score = fields[52].split(';') sift_score = fields[23].split(';') hdiv_score = fields[29].split(';') hvar_score = fields[32].split(';') lrt_score = fields[35].split(';') dann_score = fields[69].split(';') mutationtaster_score = fields[39].split(';') mutationassessor_score = fields[46].split(';') vest3_score = fields[57].split(';') metasvm_score = fields[59].split(';') fathmm_score = fields[49].split(';') lr_score = fields[62].split(';') fathmm_coding_score = fields[71].split(';') integrated_fitcons_score = fields[82].split(';') gm12878_fitcons_score = fields[85].split(';') h1_hesc_fitcons_score = fields[88].split(';') huvec_fitcons_score = fields[91].split(';') if len(provean_score) > 1: for i in range(len(provean_score)): if provean_score[i] == '.': provean_score[i] = None if len(sift_score) > 1: for i in range(len(sift_score)): if sift_score[i] == '.': sift_score[i] = None if len(hdiv_score) > 1: for i in range(len(hdiv_score)): if hdiv_score[i] == '.': hdiv_score[i] = None if len(hvar_score) > 1: for i in range(len(hvar_score)): if hvar_score[i] == '.': hvar_score[i] = None if len(lrt_score) > 1: for i in range(len(lrt_score)): if lrt_score[i] == '.': lrt_score[i] = None if len(mutationtaster_score) > 1: for i in range(len(mutationtaster_score)): if mutationtaster_score[i] == '.': mutationtaster_score[i] = None if len(mutationassessor_score) > 1: for i in range(len(mutationassessor_score)): if mutationassessor_score[i] == '.': mutationassessor_score[i] = None if len(metasvm_score) > 1: for i in range(len(metasvm_score)): if metasvm_score[i] == '.': metasvm_score[i] = None if len(vest3_score) > 1: for i in range(len(vest3_score)): if vest3_score[i] == '.': vest3_score[i] = None if len(fathmm_score) > 1: for i in range(len(fathmm_score)): if fathmm_score[i] == '.': fathmm_score[i] = None if len(lr_score) > 1: for i in range(len(lr_score)): if lr_score[i] == '.': lr_score[i] = None if len(fathmm_coding_score) > 1: for i in range(len(fathmm_coding_score)): if fathmm_coding_score[i] == '.': fathmm_coding_score[i] = None if len(dann_score) > 1: for i in range(len(dann_score)): if dann_score[i] == '.': dann_score[i] = None if len(integrated_fitcons_score) > 1: for i in range(len(integrated_fitcons_score)): if integrated_fitcons_score[i] == '.': integrated_fitcons_score[i] = None if len(gm12878_fitcons_score) > 1: for i in range(len(gm12878_fitcons_score)): if gm12878_fitcons_score[i] == '.': gm12878_fitcons_score[i] = None if len(h1_hesc_fitcons_score) > 1: for i in range(len(h1_hesc_fitcons_score)): if h1_hesc_fitcons_score[i] == '.': h1_hesc_fitcons_score[i] = None if len(huvec_fitcons_score) > 1: for i in range(len(huvec_fitcons_score)): if huvec_fitcons_score[i] == '.': huvec_fitcons_score[i] = None # load as json data one_snp_json = { "_id": HGVS, "dbnsfp": { "rsid": fields[6], #"rsid_dbSNP144": fields[6], "chrom": chrom, "hg19": { "start": chromStart, "end": chromEnd }, "hg18": { "start": fields[10], "end": hg18_end }, "hg38": { "start": fields[1], "end": fields[1] }, "ref": ref, "alt": alt, "aa": { "ref": fields[4], "alt": fields[5], "pos": fields[22], "refcodon": fields[13], "codonpos": fields[14], "codon_degeneracy": fields[15] }, "genename": fields[11], "uniprot": uniprot, "interpro_domain": fields[180], "cds_strand": fields[12], "ancestral_allele": fields[16], #"altaineandertal": fields[17], #"denisova": fields[18] "ensembl": { "geneid": fields[19], "transcriptid": fields[20], "proteinid": fields[21] }, "sift": { "score": sift_score, "converted_rankscore": fields[24], "pred": fields[25] }, "polyphen2": { "hdiv": { "score": hdiv_score, "rankscore": fields[30], "pred": fields[31] }, "hvar": { "score": hvar_score, "rankscore": fields[33], "pred": fields[34] } }, "lrt": { "score": lrt_score, "converted_rankscore": fields[36], "pred": fields[37], "omega": fields[38] }, "mutationtaster": { "score": mutationtaster_score, "converted_rankscore": fields[40], "pred": fields[41], "model": fields[42], "AAE": fields[43] }, "mutationassessor": { "score": mutationassessor_score, "rankscore": fields[47], "pred": fields[48] }, "fathmm": { "score": fathmm_score, "rankscore": fields[50], "pred": fields[51] }, "provean": { "score": provean_score, "rankscore": fields[53], "pred": fields[54] }, "vest3": { "score": vest3_score, "rankscore": fields[57], "transcriptid": fields[55], "transcriptvar": fields[56] }, "fathmm-mkl": { "coding_score": fathmm_coding_score, "coding_rankscore": fields[72], "coding_pred": fields[73], "coding_group": fields[74] }, "eigen": { "raw": fields[75], "phred": fields[76], "raw_rankscore": fields[77] }, "eigen-pc": { "raw": fields[78], "raw_rankscore": fields[79] }, "genocanyon": { "score": fields[80], "rankscore": fields[81] }, "metasvm": { "score": metasvm_score, "rankscore": fields[60], "pred": fields[61] }, "metalr": { "score": lr_score, "rankscore": fields[63], "pred": fields[64] }, "reliability_index": fields[65], "dann": { "score": dann_score, "rankscore": fields[70] }, "gerp++": { "nr": fields[94], "rs": fields[95], "rs_rankscore": fields[96] }, "integrated": { "fitcons_score": integrated_fitcons_score, "fitcons_rankscore": fields[83], "confidence_value": fields[84] }, "gm12878": { "fitcons_score": gm12878_fitcons_score, "fitcons_rankscore": fields[86], "confidence_value": fields[87] }, "h1-hesc": { "fitcons_score": h1_hesc_fitcons_score, "fitcons_rankscore": fields[89], "confidence_value": fields[90] }, "huvec": { "fitcons_score": huvec_fitcons_score, "fitcons_rankscore": fields[92], "confidence_value": fields[93] }, "phylo": { "p100way": { "vertebrate": fields[97], "vertebrate_rankscore": fields[98] }, "p20way": { "mammalian": fields[99], "mammalian_rankscore": fields[100] } }, "phastcons": { "100way": { "vertebrate": fields[101], "vertebrate_rankscore": fields[102] }, "20way": { "mammalian": fields[103], "mammalian_rankscore": fields[104] } }, "siphy_29way": { "pi": siphy, "logodds": fields[106], "logodds_rankscore": fields[107] }, "1000gp3": { "ac": fields[108], "af": fields[109], "afr_ac": fields[110], "afr_af": fields[111], "eur_ac": fields[112], "eur_af": fields[113], "amr_ac": fields[114], "amr_af": fields[115], "eas_ac": fields[116], "eas_af": fields[117], "sas_ac": fields[118], "sas_af": fields[119] }, "twinsuk": { "ac": fields[120], "af": fields[121] }, "alspac": { "ac": fields[122], "af": fields[123] }, "esp6500": { "aa_ac": fields[124], "aa_af": fields[125], "ea_ac": fields[126], "ea_af": fields[127] }, "exac": { "ac": fields[128], "af": fields[129], "adj_ac": fields[130], "adj_af": fields[131], "afr_ac": fields[132], "afr_af": fields[133], "amr_ac": fields[134], "amr_af": fields[135], "eas_ac": fields[136], "eas_af": fields[137], "fin_ac": fields[138], "fin_af": fields[139], "nfe_ac": fields[140], "nfe_af": fields[141], "sas_ac": fields[142], "sas_af": fields[143] }, "exac_nontcga": { "ac": fields[144], "af": fields[145], "adj_ac": fields[146], "adj_af": fields[147], "afr_ac": fields[148], "afr_af": fields[149], "amr_ac": fields[150], "amr_af": fields[151], "eas_ac": fields[152], "eas_af": fields[153], "fin_ac": fields[154], "fin_af": fields[155], "nfe_ac": fields[156], "nfe_af": fields[157], "sas_ac": fields[158], "sas_af": fields[159] }, "exac_nonpsych": { "ac": fields[160], "af": fields[161], "adj_ac": fields[162], "adj_af": fields[163], "afr_ac": fields[164], "afr_af": fields[165], "amr_ac": fields[166], "amr_af": fields[167], "eas_ac": fields[168], "eas_af": fields[169], "fin_ac": fields[170], "fin_af": fields[171], "nfe_ac": fields[172], "nfe_af": fields[173] }, "clinvar": { "rs": fields[176], "clinsig": fields[177], "trait": fields[178], "golden_stars": fields[179] }, "gtex": gtex } } one_snp_json = list_split( dict_sweep(unlist(value_convert(one_snp_json)), vals=["."]), ";") one_snp_json["dbnsfp"]["chrom"] = str(one_snp_json["dbnsfp"]["chrom"]) return one_snp_json
def _map_line_to_json(df, version, index): # specific variable treatment chrom = df.get_value(index, "#chr") if chrom == 'M': chrom = 'MT' # fields[7] in version 2, represent hg18_pos hg18_end = df.get_value(index, "hg18_pos(1-based)") if hg18_end == ".": hg18_end = "." else: hg18_end = int(hg18_end) # in case of no hg19 position provided, remove the item if df.get_value(index, "hg19_pos(1-based)") == '.': return None else: chromStart = int(df.get_value(index, "hg19_pos(1-based)")) chromEnd = chromStart chromStart_38 = int(df.get_value(index, "pos(1-based)")) ref = df.get_value(index, "ref").upper() alt = df.get_value(index, "alt").upper() HGVS_19 = "chr%s:g.%d%s>%s" % (chrom, chromStart, ref, alt) HGVS_38 = "chr%s:g.%d%s>%s" % (chrom, chromStart_38, ref, alt) if version == 'hg19': HGVS = HGVS_19 elif version == 'hg38': HGVS = HGVS_38 siphy_29way_pi = df.get_value(index, "SiPhy_29way_pi") if siphy_29way_pi == ".": siphy = "." else: freq = siphy_29way_pi.split(":") siphy = {'a': freq[0], 'c': freq[1], 'g': freq[2], 't': freq[3]} gtex_gene = df.get_value(index, "GTEx_V6_gene").split('|') gtex_tissue = df.get_value(index, "GTEx_V6_tissue").split('|') gtex = map( dict, map(lambda t: zip(('gene', 'tissue'), t), zip(gtex_gene, gtex_tissue))) acc = df.get_value(index, "Uniprot_acc_Polyphen2").rstrip().rstrip(';').split(";") pos = df.get_value( index, "Uniprot_aapos_Polyphen2").rstrip().rstrip(';').split(";") uniprot = map(dict, map(lambda t: zip(('acc', 'pos'), t), zip(acc, pos))) provean_score = df.get_value(index, "PROVEAN_score").split(';') sift_score = df.get_value(index, "SIFT_score").split(';') hdiv_score = df.get_value(index, "Polyphen2_HDIV_score").split(';') hvar_score = df.get_value(index, "Polyphen2_HVAR_score").split(';') lrt_score = df.get_value(index, "LRT_score").split(';') m_cap_score = df.get_value(index, "M-CAP_score").split(';') mutationtaster_score = df.get_value(index, "MutationTaster_score").split(';') mutationassessor_score = df.get_value(index, "MutationAssessor_score").split(';') vest3_score = df.get_value(index, "VEST3_score").split(';') metasvm_score = df.get_value(index, "MetaSVM_score").split(';') fathmm_score = df.get_value(index, "FATHMM_score").split(';') metalr_score = df.get_value(index, "MetaLR_score").split(';') modify_score_list = [ provean_score, sift_score, hdiv_score, hvar_score, lrt_score, m_cap_score, mutationtaster_score, mutationassessor_score, vest3_score, metasvm_score, fathmm_score, metalr_score ] for _score in modify_score_list: [None if item == '.' else item for item in _score] # load as json data one_snp_json = { "_id": HGVS, "dbnsfp": { "rsid": df.get_value(index, "rs_dbSNP147"), #"rsid_dbSNP144": fields[6], "chrom": chrom, "hg19": { "start": chromStart, "end": chromEnd }, "hg18": { "start": df.get_value(index, "hg18_pos(1-based)"), "end": hg18_end }, "hg38": { "start": df.get_value(index, "pos(1-based)"), "end": df.get_value(index, "pos(1-based)") }, "ref": ref, "alt": alt, "aa": { "ref": df.get_value(index, "aaref"), "alt": df.get_value(index, "aaalt"), "pos": df.get_value(index, "aapos"), "refcodon": df.get_value(index, "refcodon"), "codonpos": df.get_value(index, "codonpos"), "codon_degeneracy": df.get_value(index, "codon_degeneracy"), }, "genename": df.get_value(index, "genename"), "uniprot": uniprot, "interpro_domain": df.get_value(index, "Interpro_domain"), "cds_strand": df.get_value(index, "cds_strand"), "ancestral_allele": df.get_value(index, "Ancestral_allele"), #"altaineandertal": fields[17], #"denisova": fields[18] "ensembl": { "geneid": df.get_value(index, "Ensembl_geneid"), "transcriptid": df.get_value(index, "Ensembl_transcriptid"), "proteinid": df.get_value(index, "Ensembl_proteinid") }, "sift": { "score": sift_score, "converted_rankscore": df.get_value(index, "SIFT_converted_rankscore"), "pred": df.get_value(index, "SIFT_pred") }, "polyphen2": { "hdiv": { "score": hdiv_score, "rankscore": df.get_value(index, "Polyphen2_HDIV_rankscore"), "pred": df.get_value(index, "Polyphen2_HDIV_pred") }, "hvar": { "score": hvar_score, "rankscore": df.get_value(index, "Polyphen2_HVAR_rankscore"), "pred": df.get_value(index, "Polyphen2_HVAR_pred") } }, "lrt": { "score": lrt_score, "converted_rankscore": df.get_value(index, "LRT_converted_rankscore"), "pred": df.get_value(index, "LRT_pred"), "omega": df.get_value(index, "LRT_Omega") }, "mutationtaster": { "score": mutationtaster_score, "converted_rankscore": df.get_value(index, "MutationTaster_converted_rankscore"), "pred": df.get_value(index, "MutationTaster_pred"), "model": df.get_value(index, "MutationTaster_model"), "AAE": df.get_value(index, "MutationTaster_AAE") }, "mutationassessor": { "score": mutationassessor_score, "rankscore": df.get_value(index, "MutationAssessor_score_rankscore"), "pred": df.get_value(index, "MutationAssessor_pred") }, "fathmm": { "score": fathmm_score, "rankscore": df.get_value(index, "FATHMM_converted_rankscore"), "pred": df.get_value(index, "FATHMM_pred") }, "provean": { "score": provean_score, "rankscore": df.get_value(index, "PROVEAN_converted_rankscore"), "pred": df.get_value(index, "PROVEAN_pred") }, "vest3": { "score": vest3_score, "rankscore": df.get_value(index, "VEST3_rankscore"), "transcriptid": df.get_value(index, "Transcript_id_VEST3"), "transcriptvar": df.get_value(index, "Transcript_var_VEST3") }, "fathmm-mkl": { "coding_score": df.get_value(index, "fathmm-MKL_coding_score"), "coding_rankscore": df.get_value(index, "fathmm-MKL_coding_rankscore"), "coding_pred": df.get_value(index, "fathmm-MKL_coding_pred"), "coding_group": df.get_value(index, "fathmm-MKL_coding_group") }, "eigen": { "coding_or_noncoding": df.get_value(index, "Eigen_coding_or_noncoding"), "raw": df.get_value(index, "Eigen-raw"), "phred": df.get_value(index, "Eigen-phred") }, "eigen-pc": { "raw": df.get_value(index, "Eigen-PC-raw"), "phred": df.get_value(index, "Eigen-PC-phred"), "raw_rankscore": df.get_value(index, "Eigen-PC-raw_rankscore") }, "genocanyon": { "score": df.get_value(index, "GenoCanyon_score"), "rankscore": df.get_value(index, "GenoCanyon_score_rankscore") }, "metasvm": { "score": metasvm_score, "rankscore": df.get_value(index, "MetaSVM_rankscore"), "pred": df.get_value(index, "MetaSVM_pred") }, "metalr": { "score": metalr_score, "rankscore": df.get_value(index, "MetaLR_rankscore"), "pred": df.get_value(index, "MetaLR_pred") }, "reliability_index": df.get_value(index, "Reliability_index"), "m_cap_score": { "score": m_cap_score, "rankscore": df.get_value(index, "M-CAP_rankscore"), "pred": df.get_value(index, "M-CAP_pred") }, "dann": { "score": df.get_value(index, "DANN_score"), "rankscore": df.get_value(index, "DANN_rankscore") }, "gerp++": { "nr": df.get_value(index, "GERP++_NR"), "rs": df.get_value(index, "GERP++_RS"), "rs_rankscore": df.get_value(index, "GERP++_RS_rankscore") }, "integrated": { "fitcons_score": df.get_value(index, "integrated_fitCons_score"), "fitcons_rankscore": df.get_value(index, "integrated_fitCons_score_rankscore"), "confidence_value": df.get_value(index, "integrated_confidence_value") }, "gm12878": { "fitcons_score": df.get_value(index, "GM12878_fitCons_score"), "fitcons_rankscore": df.get_value(index, "GM12878_fitCons_score_rankscore"), "confidence_value": df.get_value(index, "GM12878_confidence_value") }, "h1-hesc": { "fitcons_score": df.get_value(index, "H1-hESC_fitCons_score"), "fitcons_rankscore": df.get_value(index, "H1-hESC_fitCons_score_rankscore"), "confidence_value": df.get_value(index, "H1-hESC_confidence_value") }, "huvec": { "fitcons_score": df.get_value(index, "HUVEC_fitCons_score"), "fitcons_rankscore": df.get_value(index, "HUVEC_fitCons_score_rankscore"), "confidence_value": df.get_value(index, "HUVEC_confidence_value") }, "phylo": { "p100way": { "vertebrate": df.get_value(index, "phyloP100way_vertebrate"), "vertebrate_rankscore": df.get_value(index, "phyloP100way_vertebrate_rankscore") }, "p20way": { "mammalian": df.get_value(index, "phyloP20way_mammalian"), "mammalian_rankscore": df.get_value(index, "phyloP20way_mammalian_rankscore") } }, "phastcons": { "100way": { "vertebrate": df.get_value(index, "phastCons100way_vertebrate"), "vertebrate_rankscore": df.get_value(index, "phastCons100way_vertebrate_rankscore") }, "20way": { "mammalian": df.get_value(index, "phastCons20way_mammalian"), "mammalian_rankscore": df.get_value(index, "phastCons20way_mammalian_rankscore") } }, "siphy_29way": { "pi": siphy, "logodds": df.get_value(index, "SiPhy_29way_logOdds"), "logodds_rankscore": df.get_value(index, "SiPhy_29way_logOdds_rankscore") }, "1000gp3": { "ac": df.get_value(index, "1000Gp3_AC"), "af": df.get_value(index, "1000Gp3_AF"), "afr_ac": df.get_value(index, "1000Gp3_AFR_AC"), "afr_af": df.get_value(index, "1000Gp3_AFR_AF"), "eur_ac": df.get_value(index, "1000Gp3_EUR_AC"), "eur_af": df.get_value(index, "1000Gp3_EUR_AF"), "amr_ac": df.get_value(index, "1000Gp3_AMR_AC"), "amr_af": df.get_value(index, "1000Gp3_AMR_AF"), "eas_ac": df.get_value(index, "1000Gp3_EAS_AC"), "eas_af": df.get_value(index, "1000Gp3_EAS_AF"), "sas_ac": df.get_value(index, "1000Gp3_SAS_AC"), "sas_af": df.get_value(index, "1000Gp3_SAS_AF") }, "twinsuk": { "ac": df.get_value(index, "TWINSUK_AC"), "af": df.get_value(index, "TWINSUK_AF") }, "alspac": { "ac": df.get_value(index, "ALSPAC_AC"), "af": df.get_value(index, "ALSPAC_AF") }, "esp6500": { "aa_ac": df.get_value(index, "ESP6500_AA_AC"), "aa_af": df.get_value(index, "ESP6500_AA_AF"), "ea_ac": df.get_value(index, "ESP6500_EA_AC"), "ea_af": df.get_value(index, "ESP6500_EA_AF") }, "exac": { "ac": df.get_value(index, "ExAC_AC"), "af": df.get_value(index, "ExAC_AF"), "adj_ac": df.get_value(index, "ExAC_Adj_AC"), "adj_af": df.get_value(index, "ExAC_Adj_AF"), "afr_ac": df.get_value(index, "ExAC_AFR_AC"), "afr_af": df.get_value(index, "ExAC_AFR_AF"), "amr_ac": df.get_value(index, "ExAC_AMR_AC"), "amr_af": df.get_value(index, "ExAC_AMR_AF"), "eas_ac": df.get_value(index, "ExAC_EAS_AC"), "eas_af": df.get_value(index, "ExAC_EAS_AF"), "fin_ac": df.get_value(index, "ExAC_FIN_AC"), "fin_af": df.get_value(index, "ExAC_FIN_AF"), "nfe_ac": df.get_value(index, "ExAC_NFE_AC"), "nfe_af": df.get_value(index, "ExAC_NFE_AF"), "sas_ac": df.get_value(index, "ExAC_SAS_AC"), "sas_af": df.get_value(index, "ExAC_SAS_AF") }, "exac_nontcga": { "ac": df.get_value(index, "ExAC_nonTCGA_AC"), "af": df.get_value(index, "ExAC_nonTCGA_AF"), "adj_ac": df.get_value(index, "ExAC_nonTCGA_Adj_AC"), "adj_af": df.get_value(index, "ExAC_nonTCGA_Adj_AF"), "afr_ac": df.get_value(index, "ExAC_nonTCGA_AFR_AC"), "afr_af": df.get_value(index, "ExAC_nonTCGA_AFR_AF"), "amr_ac": df.get_value(index, "ExAC_nonTCGA_AMR_AC"), "amr_af": df.get_value(index, "ExAC_nonTCGA_AMR_AF"), "eas_ac": df.get_value(index, "ExAC_nonTCGA_EAS_AC"), "eas_af": df.get_value(index, "ExAC_nonTCGA_EAS_AF"), "fin_ac": df.get_value(index, "ExAC_nonTCGA_FIN_AC"), "fin_af": df.get_value(index, "ExAC_nonTCGA_FIN_AF"), "nfe_ac": df.get_value(index, "ExAC_nonTCGA_NFE_AC"), "nfe_af": df.get_value(index, "ExAC_nonTCGA_NFE_AF"), "sas_ac": df.get_value(index, "ExAC_nonTCGA_SAS_AC"), "sas_af": df.get_value(index, "ExAC_nonTCGA_SAS_AF") }, "exac_nonpsych": { "ac": df.get_value(index, "ExAC_nonpsych_AC"), "af": df.get_value(index, "ExAC_nonpsych_AF"), "adj_ac": df.get_value(index, "ExAC_nonpsych_Adj_AC"), "adj_af": df.get_value(index, "ExAC_nonpsych_Adj_AF"), "afr_ac": df.get_value(index, "ExAC_nonpsych_AFR_AC"), "afr_af": df.get_value(index, "ExAC_nonpsych_AFR_AF"), "amr_ac": df.get_value(index, "ExAC_nonpsych_AMR_AC"), "amr_af": df.get_value(index, "ExAC_nonpsych_AMR_AF"), "eas_ac": df.get_value(index, "ExAC_nonpsych_EAS_AC"), "eas_af": df.get_value(index, "ExAC_nonpsych_EAS_AF"), "fin_ac": df.get_value(index, "ExAC_nonpsych_FIN_AC"), "fin_af": df.get_value(index, "ExAC_nonpsych_FIN_AF"), "nfe_ac": df.get_value(index, "ExAC_nonpsych_NFE_AC"), "nfe_af": df.get_value(index, "ExAC_nonpsych_NFE_AF"), "sas_ac": df.get_value(index, "ExAC_nonpsych_SAS_AC"), "sas_af": df.get_value(index, "ExAC_nonpsych_SAS_AF") }, "clinvar": { "rs": df.get_value(index, "clinvar_rs"), "clinsig": df.get_value(index, "clinvar_clnsig"), "trait": df.get_value(index, "clinvar_trait"), "golden_stars": df.get_value(index, "clinvar_golden_stars") }, "gtex": gtex } } one_snp_json = list_split( dict_sweep(unlist(value_convert(one_snp_json)), vals=["."]), ";") one_snp_json["dbnsfp"]["chrom"] = str(one_snp_json["dbnsfp"]["chrom"]) return one_snp_json
def _map_line_to_json(df, version, index=0): # specific variable treatment chrom = df["#chr"] if chrom == 'M': chrom = 'MT' # fields[7] in version 2, represent hg18_pos hg18_end = df["hg18_pos(1-coor)"] if hg18_end == ".": hg18_end = "." else: hg18_end = int(hg18_end) # in case of no hg19 position provided, remove the item if df["pos(1-coor)"] == '.': return None else: chromStart = int(df["pos(1-coor)"]) chromEnd = chromStart chromStart_38 = int(df["hg38_pos"]) ref = df["ref"].upper() alt = df["alt"].upper() HGVS_19 = "chr%s:g.%d%s>%s" % (chrom, chromStart, ref, alt) HGVS_38 = "chr%s:g.%d%s>%s" % (chrom, chromStart_38, ref, alt) if version == 'hg19': HGVS = HGVS_19 elif version == 'hg38': HGVS = HGVS_38 siphy_29way_pi = df["SiPhy_29way_pi"] if siphy_29way_pi == ".": siphy = "." else: freq = siphy_29way_pi.split(":") siphy = {'a': freq[0], 'c': freq[1], 'g': freq[2], 't': freq[3]} acc = df["Uniprot_acc"].rstrip().rstrip(';').split(";") pos = df["Uniprot_aapos"].rstrip().rstrip(';').split(";") uniprot = map(dict, map(lambda t: zip(('acc', 'pos'), t), zip(acc, pos))) provean_score = df["PROVEAN_score"].split(';') sift_score = df["SIFT_score"].split(';') hdiv_score = df["Polyphen2_HDIV_score"].split(';') hvar_score = df["Polyphen2_HVAR_score"].split(';') lrt_score = df["LRT_score"].split(';') m_cap_score = df["M-CAP_score"].split(';') mutationtaster_score = df["MutationTaster_score"].split(';') mutationassessor_score = df["MutationAssessor_score"].split(';') vest3_score = df["VEST3_score"].split(';') metasvm_score = df["MetaSVM_score"].split(';') fathmm_score = df["FATHMM_score"].split(';') metalr_score = df["MetaLR_score"].split(';') revel_score = df["REVEL_score"].split(';') ''' parse mutpred top 5 features ''' def modify_pvalue(pvalue): return float(pvalue.strip('P = ')) mutpred_mechanisms = df["MutPred_Top5features"] if mutpred_mechanisms not in ['.', ',', '-']: mutpred_mechanisms = mutpred_mechanisms.split( " (") and mutpred_mechanisms.split(";") mutpred_mechanisms = [m.rstrip(")") for m in mutpred_mechanisms] mutpred_mechanisms = [i.split(" (") for i in mutpred_mechanisms] mutpred_mechanisms = sum(mutpred_mechanisms, []) mechanisms = [{ "mechanism": mutpred_mechanisms[0], "p_val": modify_pvalue(mutpred_mechanisms[1]) }, { "mechanism": mutpred_mechanisms[2], "p_val": modify_pvalue(mutpred_mechanisms[3]) }, { "mechanism": mutpred_mechanisms[4], "p_val": modify_pvalue(mutpred_mechanisms[5]) }, { "mechanism": mutpred_mechanisms[6], "p_val": modify_pvalue(mutpred_mechanisms[7]) }, { "mechanism": mutpred_mechanisms[8], "p_val": modify_pvalue(mutpred_mechanisms[9]) }] else: mechanisms = '.' # normalize scores def norm(arr): return [None if item == '.' else item for item in arr] provean_score = norm(provean_score) sift_score = norm(sift_score) hdiv_score = norm(hdiv_score) hvar_score = norm(hvar_score) lrt_score = norm(lrt_score) m_cap_score = norm(m_cap_score) mutationtaster_score = norm(mutationtaster_score) mutationassessor_score = norm(mutationassessor_score) vest3_score = norm(vest3_score) metasvm_score = norm(metasvm_score) fathmm_score = norm(fathmm_score) metalr_score = norm(metalr_score) revel_score = norm(revel_score) # load as json data one_snp_json = { "_id": HGVS, "dbnsfp": { "rsid": df["rs_dbSNP147"], #"rsid_dbSNP144": fields[6], "chrom": chrom, "hg19": { "start": chromStart, "end": chromEnd }, "hg18": { "start": df["hg18_pos(1-coor)"], "end": hg18_end }, "hg38": { "start": df["hg38_pos"], "end": df["hg38_pos"] }, "ref": ref, "alt": alt, "aa": { "ref": df["aaref"], "alt": df["aaalt"], "pos": df["aapos"], "refcodon": df["refcodon"], "codonpos": df["codonpos"] }, "genename": df["genename"], "uniprot": list(uniprot), "interpro_domain": df["Interpro_domain"], "cds_strand": df["cds_strand"], "ancestral_allele": df["Ancestral_allele"], #"altaineandertal": fields[17], #"denisova": fields[18] "ensembl": { "geneid": df["Ensembl_geneid"], "transcriptid": df["Ensembl_transcriptid"] }, "sift": { "score": sift_score, "converted_rankscore": df["SIFT_converted_rankscore"], "pred": df["SIFT_pred"] }, "polyphen2": { "hdiv": { "score": hdiv_score, "rankscore": df["Polyphen2_HDIV_rankscore"], "pred": df["Polyphen2_HDIV_pred"] }, "hvar": { "score": hvar_score, "rankscore": df["Polyphen2_HVAR_rankscore"], "pred": df["Polyphen2_HVAR_pred"] } }, "lrt": { "score": lrt_score, "converted_rankscore": df["LRT_converted_rankscore"], "pred": df["LRT_pred"], "omega": df["LRT_Omega"] }, "mutationtaster": { "score": mutationtaster_score, "converted_rankscore": df["MutationTaster_converted_rankscore"], "pred": df["MutationTaster_pred"] }, "mutationassessor": { "score": mutationassessor_score, "rankscore": df["MutationAssessor_rankscore"], "pred": df["MutationAssessor_pred"] }, "fathmm": { "score": fathmm_score, "rankscore": df["FATHMM_rankscore"], "pred": df["FATHMM_pred"] }, "provean": { "score": provean_score, "rankscore": df["PROVEAN_converted_rankscore"], "pred": df["PROVEAN_pred"] }, "vest3": { "score": vest3_score, "rankscore": df["VEST3_rankscore"] }, "eigen": { "coding_or_noncoding": df["Eigen_coding_or_noncoding"], "raw": df["Eigen-raw"], "phred": df["Eigen-phred"] }, "eigen-pc": { "raw": df["Eigen-PC-raw"], "phred": df["Eigen-PC-phred"], "raw_rankscore": df["Eigen-PC-raw_rankscore"] }, "metasvm": { "score": metasvm_score, "rankscore": df["MetaSVM_rankscore"], "pred": df["MetaSVM_pred"] }, "metalr": { "score": metalr_score, "rankscore": df["MetaLR_rankscore"], "pred": df["MetaLR_pred"] }, "reliability_index": df["Reliability_index"], "m_cap_score": { "score": m_cap_score, "rankscore": df["M-CAP_rankscore"], "pred": df["M-CAP_pred"] }, "revel": { "score": revel_score, "rankscore": df["REVEL_rankscore"] }, "mutpred": { "score": df["MutPred_score"], "rankscore": df["MutPred_rankscore"], "accession": df["MutPred_protID"], "aa_change": df["MutPred_AAchange"], "pred": mechanisms }, "gerp++": { "nr": df["GERP++_NR"], "rs": df["GERP++_RS"], "rs_rankscore": df["GERP++_RS_rankscore"] }, "phylo": { "p100way": { "vertebrate": df["phyloP100way_vertebrate"], "vertebrate_rankscore": df["phyloP100way_vertebrate_rankscore"] }, "p46way": { "placental": df["phyloP46way_placental"], "placental_rankscore": df["phyloP46way_placental_rankscore"], "primate": df["phyloP46way_primate"], "primate_rankscore": df["phyloP46way_primate_rankscore"] } }, "phastcons": { "100way": { "vertebrate": df["phastCons100way_vertebrate"], "vertebrate_rankscore": df["phastCons100way_vertebrate_rankscore"] }, "46way": { "placental": df["phastCons46way_placental"], "placental_rankscore": df["phastCons46way_placental_rankscore"], "primate": df["phastCons46way_primate"], "primate_rankscore": df["phastCons46way_primate_rankscore"] } }, "siphy_29way": { "pi": siphy, "logodds": df["SiPhy_29way_logOdds"], "logodds_rankscore": df["SiPhy_29way_logOdds_rankscore"] }, "1000gp1": { "ac": df["1000Gp1_AC"], "af": df["1000Gp1_AF"], "afr_ac": df["1000Gp1_AFR_AC"], "afr_af": df["1000Gp1_AFR_AF"], "eur_ac": df["1000Gp1_EUR_AC"], "eur_af": df["1000Gp1_EUR_AF"], "amr_ac": df["1000Gp1_AMR_AC"], "amr_af": df["1000Gp1_AMR_AF"], "asn_ac": df["1000Gp1_ASN_AC"], "asn_af": df["1000Gp1_ASN_AF"] }, "esp6500": { "aa_af": df["ESP6500_AA_AF"], "ea_af": df["ESP6500_EA_AF "] }, "exac": { "ac": df["ExAC_AC"], "af": df["ExAC_AF"], "adj_ac": df["ExAC_Adj_AC"], "adj_af": df["ExAC_Adj_AF"], "afr_ac": df["ExAC_AFR_AC"], "afr_af": df["ExAC_AFR_AF"], "amr_ac": df["ExAC_AMR_AC"], "amr_af": df["ExAC_AMR_AF"], "eas_ac": df["ExAC_EAS_AC"], "eas_af": df["ExAC_EAS_AF"], "fin_ac": df["ExAC_FIN_AC"], "fin_af": df["ExAC_FIN_AF"], "nfe_ac": df["ExAC_NFE_AC"], "nfe_af": df["ExAC_NFE_AF"], "sas_ac": df["ExAC_SAS_AC"], "sas_af": df["ExAC_SAS_AF"] }, "aric5606": { "aa_ac": df["ARIC5606_AA_AC"], "aa_af": df["ARIC5606_AA_AF"], "ea_ac": df["ARIC5606_EA_AC"], "ea_af": df["ARIC5606_EA_AF"] }, "clinvar": { "rs": df["clinvar_rs"], "clinsig": list( map(int, [ i for i in df["clinvar_clnsig"].split("|") if i != "." ])), "trait": [i for i in df["clinvar_trait"].split("|") if i != "."], "golden_stars": list( map(int, [ i for i in df["clinvar_golden_stars"].split("|") if i != "." ])) } } } one_snp_json = list_split( dict_sweep(unlist(value_convert_to_number(one_snp_json)), vals=[".", None]), ";") one_snp_json["dbnsfp"]["chrom"] = str(one_snp_json["dbnsfp"]["chrom"]) return one_snp_json
def _map_line_to_json(df, version, index=0): # specific variable treatment chrom = df["#chr"] if chrom == 'M': chrom = 'MT' # fields[7] in version 2, represent hg18_pos hg18_end = df["hg18_pos(1-coor)"] if hg18_end == ".": hg18_end = "." else: hg18_end = int(hg18_end) # in case of no hg19 position provided, remove the item if df["pos(1-coor)"] == '.': return None else: chromStart = int(df["pos(1-coor)"]) chromEnd = chromStart chromStart_38 = int(df["hg38_pos"]) ref = df["ref"].upper() alt = df["alt"].upper() HGVS_19 = "chr%s:g.%d%s>%s" % (chrom, chromStart, ref, alt) HGVS_38 = "chr%s:g.%d%s>%s" % (chrom, chromStart_38, ref, alt) if version == 'hg19': HGVS = HGVS_19 elif version == 'hg38': HGVS = HGVS_38 siphy_29way_pi = df["SiPhy_29way_pi"] if siphy_29way_pi == ".": siphy = "." else: freq = siphy_29way_pi.split(":") siphy = {'a': freq[0], 'c': freq[1], 'g': freq[2], 't': freq[3]} acc = df["Uniprot_acc"].rstrip().rstrip(';').split(";") pos = df["Uniprot_aapos"].rstrip().rstrip(';').split(";") uniprot = map(dict, map(lambda t: zip(('acc', 'pos'), t), zip(acc, pos))) provean_score = df["PROVEAN_score"].split(';') sift_score = df["SIFT_score"].split(';') hdiv_score = df["Polyphen2_HDIV_score"].split(';') hvar_score = df["Polyphen2_HVAR_score"].split(';') lrt_score = df["LRT_score"].split(';') m_cap_score = df["M-CAP_score"].split(';') mutationtaster_score = df["MutationTaster_score"].split(';') mutationassessor_score = df["MutationAssessor_score"].split(';') vest3_score = df["VEST3_score"].split(';') metasvm_score = df["MetaSVM_score"].split(';') fathmm_score = df["FATHMM_score"].split(';') metalr_score = df["MetaLR_score"].split(';') revel_score = df["REVEL_score"].split(';') ''' parse mutpred top 5 features ''' def modify_pvalue(pvalue): return float(pvalue.strip('P = ')) mutpred_mechanisms = df["MutPred_Top5features"] if mutpred_mechanisms not in ['.', ',', '-']: mutpred_mechanisms = mutpred_mechanisms.split(" (") and mutpred_mechanisms.split(";") mutpred_mechanisms = [m.rstrip(")") for m in mutpred_mechanisms] mutpred_mechanisms = [i.split(" (") for i in mutpred_mechanisms] mutpred_mechanisms = sum(mutpred_mechanisms, []) mechanisms = [ {"mechanism": mutpred_mechanisms[0], "p_val": modify_pvalue(mutpred_mechanisms[1])}, {"mechanism": mutpred_mechanisms[2], "p_val": modify_pvalue(mutpred_mechanisms[3])}, {"mechanism": mutpred_mechanisms[4], "p_val": modify_pvalue(mutpred_mechanisms[5])}, {"mechanism": mutpred_mechanisms[6], "p_val": modify_pvalue(mutpred_mechanisms[7])}, {"mechanism": mutpred_mechanisms[8], "p_val": modify_pvalue(mutpred_mechanisms[9])} ] else: mechanisms = '.' # normalize scores def norm(arr): return [None if item == '.' else item for item in arr] provean_score = norm(provean_score) sift_score = norm(sift_score) hdiv_score = norm(hdiv_score) hvar_score = norm(hvar_score) lrt_score = norm(lrt_score) m_cap_score = norm(m_cap_score) mutationtaster_score = norm(mutationtaster_score) mutationassessor_score = norm(mutationassessor_score) vest3_score = norm(vest3_score) metasvm_score = norm(metasvm_score) fathmm_score = norm(fathmm_score) metalr_score = norm(metalr_score) revel_score = norm(revel_score) # load as json data one_snp_json = { "_id": HGVS, "dbnsfp": { "rsid": df["rs_dbSNP147"], #"rsid_dbSNP144": fields[6], "chrom": chrom, "hg19": { "start": chromStart, "end": chromEnd }, "hg18": { "start": df["hg18_pos(1-coor)"], "end": hg18_end }, "hg38": { "start": df["hg38_pos"], "end": df["hg38_pos"] }, "ref": ref, "alt": alt, "aa": { "ref": df["aaref"], "alt": df["aaalt"], "pos": df["aapos"], "refcodon": df["refcodon"], "codonpos": df["codonpos"] }, "genename": df["genename"], "uniprot": list(uniprot), "interpro_domain": df["Interpro_domain"], "cds_strand": df["cds_strand"], "ancestral_allele": df["Ancestral_allele"], #"altaineandertal": fields[17], #"denisova": fields[18] "ensembl": { "geneid": df["Ensembl_geneid"], "transcriptid": df["Ensembl_transcriptid"] }, "sift": { "score": sift_score, "converted_rankscore": df["SIFT_converted_rankscore"], "pred": df["SIFT_pred"] }, "polyphen2": { "hdiv": { "score": hdiv_score, "rankscore": df["Polyphen2_HDIV_rankscore"], "pred": df["Polyphen2_HDIV_pred"] }, "hvar": { "score": hvar_score, "rankscore": df["Polyphen2_HVAR_rankscore"], "pred": df["Polyphen2_HVAR_pred"] } }, "lrt": { "score": lrt_score, "converted_rankscore": df["LRT_converted_rankscore"], "pred": df["LRT_pred"], "omega": df["LRT_Omega"] }, "mutationtaster": { "score": mutationtaster_score, "converted_rankscore": df["MutationTaster_converted_rankscore"], "pred": df["MutationTaster_pred"] }, "mutationassessor": { "score": mutationassessor_score, "rankscore": df["MutationAssessor_rankscore"], "pred": df["MutationAssessor_pred"] }, "fathmm": { "score": fathmm_score, "rankscore": df["FATHMM_rankscore"], "pred": df["FATHMM_pred"] }, "provean": { "score": provean_score, "rankscore": df["PROVEAN_converted_rankscore"], "pred": df["PROVEAN_pred"] }, "vest3": { "score": vest3_score, "rankscore": df["VEST3_rankscore"] }, "eigen": { "coding_or_noncoding": df["Eigen_coding_or_noncoding"], "raw": df["Eigen-raw"], "phred": df["Eigen-phred"] }, "eigen-pc": { "raw": df["Eigen-PC-raw"], "phred": df["Eigen-PC-phred"], "raw_rankscore": df["Eigen-PC-raw_rankscore"] }, "metasvm": { "score": metasvm_score, "rankscore": df["MetaSVM_rankscore"], "pred": df["MetaSVM_pred"] }, "metalr": { "score": metalr_score, "rankscore": df["MetaLR_rankscore"], "pred": df["MetaLR_pred"] }, "reliability_index": df["Reliability_index"], "m_cap_score": { "score": m_cap_score, "rankscore": df["M-CAP_rankscore"], "pred": df["M-CAP_pred"] }, "revel": { "score": revel_score, "rankscore": df["REVEL_rankscore"] }, "mutpred": { "score": df["MutPred_score"], "rankscore": df["MutPred_rankscore"], "accession": df["MutPred_protID"], "aa_change": df["MutPred_AAchange"], "pred": mechanisms }, "gerp++": { "nr": df["GERP++_NR"], "rs": df["GERP++_RS"], "rs_rankscore": df["GERP++_RS_rankscore"] }, "phylo": { "p100way": { "vertebrate": df["phyloP100way_vertebrate"], "vertebrate_rankscore": df["phyloP100way_vertebrate_rankscore"] }, "p46way": { "placental": df["phyloP46way_placental"], "placental_rankscore": df["phyloP46way_placental_rankscore"], "primate": df["phyloP46way_primate"], "primate_rankscore": df["phyloP46way_primate_rankscore"] } }, "phastcons": { "100way": { "vertebrate": df["phastCons100way_vertebrate"], "vertebrate_rankscore": df["phastCons100way_vertebrate_rankscore"] }, "46way": { "placental": df["phastCons46way_placental"], "placental_rankscore": df["phastCons46way_placental_rankscore"], "primate": df["phastCons46way_primate"], "primate_rankscore": df["phastCons46way_primate_rankscore"] } }, "siphy_29way": { "pi": siphy, "logodds": df["SiPhy_29way_logOdds"], "logodds_rankscore": df["SiPhy_29way_logOdds_rankscore"] }, "1000gp1": { "ac": df["1000Gp1_AC"], "af": df["1000Gp1_AF"], "afr_ac": df["1000Gp1_AFR_AC"], "afr_af": df["1000Gp1_AFR_AF"], "eur_ac": df["1000Gp1_EUR_AC"], "eur_af": df["1000Gp1_EUR_AF"], "amr_ac": df["1000Gp1_AMR_AC"], "amr_af": df["1000Gp1_AMR_AF"], "asn_ac": df["1000Gp1_ASN_AC"], "asn_af": df["1000Gp1_ASN_AF"] }, "esp6500": { "aa_af": df["ESP6500_AA_AF"], "ea_af": df["ESP6500_EA_AF "] }, "exac": { "ac": df["ExAC_AC"], "af": df["ExAC_AF"], "adj_ac": df["ExAC_Adj_AC"], "adj_af": df["ExAC_Adj_AF"], "afr_ac": df["ExAC_AFR_AC"], "afr_af": df["ExAC_AFR_AF"], "amr_ac": df["ExAC_AMR_AC"], "amr_af": df["ExAC_AMR_AF"], "eas_ac": df["ExAC_EAS_AC"], "eas_af": df["ExAC_EAS_AF"], "fin_ac": df["ExAC_FIN_AC"], "fin_af": df["ExAC_FIN_AF"], "nfe_ac": df["ExAC_NFE_AC"], "nfe_af": df["ExAC_NFE_AF"], "sas_ac": df["ExAC_SAS_AC"], "sas_af": df["ExAC_SAS_AF"] }, "aric5606": { "aa_ac": df["ARIC5606_AA_AC"], "aa_af": df["ARIC5606_AA_AF"], "ea_ac": df["ARIC5606_EA_AC"], "ea_af": df["ARIC5606_EA_AF"] }, "clinvar": { "rs": df["clinvar_rs"], "clinsig": list(map(int,[i for i in df["clinvar_clnsig"].split("|") if i != "."])), "trait": [i for i in df["clinvar_trait"].split("|") if i != "."], "golden_stars": list(map(int,[i for i in df["clinvar_golden_stars"].split("|") if i != "."])) } } } one_snp_json = list_split(dict_sweep(unlist(value_convert_to_number(one_snp_json)), vals=[".", None]), ";") one_snp_json["dbnsfp"]["chrom"] = str(one_snp_json["dbnsfp"]["chrom"]) return one_snp_json
def _map_line_to_json(fields, version='hg19'): # specific variable treatment chrom = fields[0] if chrom == 'M': chrom = 'MT' # fields[7] in version 2, represent hg18_pos if fields[10] == ".": hg18_end = "." else: hg18_end = int(fields[10]) chromStart = int(fields[8]) chromEnd = int(fields[8]) chromStart_38 = int(fields[1]) ref = fields[2].upper() alt = fields[3].upper() HGVS_19 = "chr%s:g.%d%s>%s" % (chrom, chromStart, ref, alt) HGVS_38 = "chr%s:g.%d%s>%s" % (chrom, chromStart_38, ref, alt) if version == 'hg19': HGVS = HGVS_19 elif version == 'hg38': HGVS = HGVS_38 if fields[69] == ".": siphy = "." else: freq = fields[69].split(":") siphy = {'a': freq[0], 'c': freq[1], 'g': freq[2], 't': freq[3]} acc = fields[26].rstrip().rstrip(';').split(";") pos = fields[28].rstrip().rstrip(';').split(";") uniprot = map(dict, map(lambda t: zip(('acc', 'pos'), t), zip(acc, pos))) # load as json data one_snp_json = { "_id": HGVS, "dbnsfp": { "rsid": fields[6], "chrom": chrom, "hg19": { "start": fields[8], "end": chromEnd }, "hg18": { "start": fields[10], "end": hg18_end }, "hg38": { "start": fields[1], "end": fields[1] }, "ref": ref, "alt": alt, "aa": { "ref": fields[4], "alt": fields[5], "pos": fields[22], "refcodon": fields[13], "codonpos": fields[14], }, "genename": fields[11], "uniprot": uniprot, "interpro_domain": fields[111], "cds_strand": fields[12], "ancestral_allele": fields[16], "ensembl": { "geneid": fields[19], "transcriptid": fields[20] }, "sift": { "score": fields[23], "converted_rankscore": fields[24], "pred": fields[25] }, "polyphen2": { "hdiv": { "score": fields[29], "rankscore": fields[30], "pred": fields[31] }, "hvar": { "score": fields[32], "rankscore": fields[33], "pred": fields[34] } }, "lrt": { "score": fields[35], "converted_rankscore": fields[36], "pred": fields[37], "omega": fields[38] }, "mutationtaster": { "score": fields[39], "converted_rankscore": fields[40], "pred": fields[41], "model": fields[42], "AAE": fields[43] }, "mutationassessor": { "score": fields[46], "rankscore": fields[47], "pred": fields[48] }, "fathmm": { "score": fields[49], "rankscore": fields[50], "pred": fields[51] }, "provean": { "score": fields[52], "rankscore": fields[53], "pred": fields[54] }, "metasvm": { "score": fields[55], "rankscore": fields[56], "pred": fields[57] }, "lr": { "score": fields[58], "rankscore": fields[59], "pred": fields[60] }, "reliability_index": fields[61], "gerp++": { "nr": fields[62], "rs": fields[63], "rs_rankscore": fields[64] }, "phylop_7way": { "vertebrate": fields[65], "vertebrate_rankscore": fields[66] }, "phastcons_7way": { "vertebrate": fields[67], "vertebrate_rankscore": fields[68] }, "siphy_29way": { "pi": siphy, "logodds": fields[70], "logodds_rankscore": fields[71] }, "1000gp1": { "ac": fields[72], "af": fields[73], "afr_ac": fields[74], "afr_af": fields[75], "eur_ac": fields[76], "eur_af": fields[77], "amr_ac": fields[78], "amr_af": fields[79], "eas_ac": fields[80], "eas_af": fields[81], "sas_ac": fields[82], "sas_af": fields[83] }, "twinsuk": { "ac": fields[84], "af": fields[85] }, "alspac": { "ac": fields[86], "af": fields[87] }, "esp6500": { "aa_ac": fields[88], "aa_af": fields[89], "ea_ac": fields[90], "ea_af": fields[91] }, "exac": { "ac": fields[92], "af": fields[93], "adj_ac": fields[94], "adj_af": fields[95], "afr_ac": fields[96], "afr_af": fields[97], "amr_ac": fields[98], "amr_af": fields[99], "eas_ac": fields[100], "eas_af": fields[101], "fin_ac": fields[102], "fin_af": fields[103], "nfe_ac": fields[104], "nfe_af": fields[105], "sas_ac": fields[106], "sas_af": fields[107] }, "clinvar": { "rs": fields[108], "clinsig": fields[109], "trait": fields[110] } } } one_snp_json = list_split( dict_sweep(unlist(value_convert(one_snp_json)), vals=["."]), ";") one_snp_json["dbnsfp"]["chrom"] = str(one_snp_json["dbnsfp"]["chrom"]) return one_snp_json
def _map_line_to_json(fields): assert len(fields) == VALID_COLUMN_NO rsid = fields[8] # load as json data if rsid is None: return url = 'http://myvariant.info/v1/query?q=dbsnp.rsid:'\ + rsid + '&fields=_id' r = requests.get(url) for hits in r.json()['hits']: HGVS = hits['_id'] one_snp_json = { "_id": HGVS, "grasp": { 'hg19': { 'chr': fields[5], 'pos': fields[6] }, 'hupfield': fields[1], 'last_curation_date': fields[2], 'creation_date': fields[3], 'srsid': fields[4], 'publication': { 'journal': fields[16], 'title': fields[17], 'pmid': fields[7], 'snpid': fields[8], 'location_within_paper': fields[9], 'p_value': fields[10], 'phenotype': fields[11], 'paper_phenotype_description': fields[12], 'paper_phenotype_categories': fields[13], 'date_pub': fields[14] }, 'includes_male_female_only_analyses': fields[18], 'exclusively_male_female': fields[19], 'initial_sample_description': fields[20], 'replication_sample_description': fields[21], 'platform_snps_passing_qc': fields[22], 'gwas_ancestry_description': fields[23], 'discovery': { 'total_samples': fields[25], 'european': fields[26], 'african': fields[27], 'east_asian': fields[28], 'indian_south_asian': fields[29], 'hispanic': fields[30], 'native': fields[31], 'micronesian': fields[32], 'arab_me': fields[33], 'mixed': fields[34], 'unspecified': fields[35], 'filipino': fields[36], 'indonesian': fields[37] }, 'replication': { 'total_samples': fields[38], 'european': fields[39], 'african': fields[40], 'east_asian': fields[41], 'indian_south_asian': fields[42], 'hispanic': fields[43], 'native': fields[44], 'micronesian': fields[45], 'arab_me': fields[46], 'mixed': fields[47], 'unspecified': fields[48], 'filipino': fields[49], 'indonesian': fields[50] }, 'in_gene': fields[51], 'nearest_gene': fields[52], 'in_lincrna': fields[53], 'in_mirna': fields[54], 'in_mirna_bs': fields[55], 'oreg_anno': fields[61], 'conserv_pred_tfbs': fields[62], 'human_enhancer': fields[63], 'rna_edit': fields[64], 'polyphen2': fields[65], 'sift': fields[66], 'ls_snp': fields[67], 'uniprot': fields[68], 'eqtl_meth_metab_study': fields[69] } } return list_split(dict_sweep(unlist(value_convert(one_snp_json)), [""]), ",")
def _map_line_to_json(fields, version='hg19'): # specific variable treatment chrom = fields[0] if chrom == 'M': chrom = 'MT' # fields[7] in version 2, represent hg18_pos if fields[10] == ".": hg18_end = "." else: hg18_end = int(fields[10]) chromStart = int(fields[8]) chromEnd = int(fields[8]) chromStart_38 = int(fields[1]) ref = fields[2].upper() alt = fields[3].upper() HGVS_19 = "chr%s:g.%d%s>%s" % (chrom, chromStart, ref, alt) HGVS_38 = "chr%s:g.%d%s>%s" % (chrom, chromStart_38, ref, alt) if version == 'hg19': HGVS = HGVS_19 elif version == 'hg38': HGVS = HGVS_38 if fields[69] == ".": siphy = "." else: freq = fields[69].split(":") siphy = {'a': freq[0], 'c': freq[1], 'g': freq[2], 't': freq[3]} acc = fields[26].rstrip().rstrip(';').split(";") pos = fields[28].rstrip().rstrip(';').split(";") uniprot = map(dict, map(lambda t: zip(('acc', 'pos'), t), zip(acc, pos))) # load as json data one_snp_json = { "_id": HGVS, "dbnsfp": { "rsid": fields[6], "chrom": chrom, "hg19": { "start": fields[8], "end": chromEnd }, "hg18": { "start": fields[10], "end": hg18_end }, "hg38": { "start": fields[1], "end": fields[1] }, "ref": ref, "alt": alt, "aa": { "ref": fields[4], "alt": fields[5], "pos": fields[22], "refcodon": fields[13], "codonpos": fields[14], }, "genename": fields[11], "uniprot": uniprot, "interpro_domain": fields[111], "cds_strand": fields[12], "ancestral_allele": fields[16], "ensembl": { "geneid": fields[19], "transcriptid": fields[20] }, "sift": { "score": fields[23], "converted_rankscore": fields[24], "pred": fields[25] }, "polyphen2": { "hdiv": { "score": fields[29], "rankscore": fields[30], "pred": fields[31] }, "hvar": { "score": fields[32], "rankscore": fields[33], "pred": fields[34] } }, "lrt": { "score": fields[35], "converted_rankscore": fields[36], "pred": fields[37], "omega": fields[38] }, "mutationtaster": { "score": fields[39], "converted_rankscore": fields[40], "pred": fields[41], "model": fields[42], "AAE": fields[43] }, "mutationassessor": { "score": fields[46], "rankscore": fields[47], "pred": fields[48] }, "fathmm": { "score": fields[49], "rankscore": fields[50], "pred": fields[51] }, "provean": { "score": fields[52], "rankscore": fields[53], "pred": fields[54] }, "metasvm": { "score": fields[55], "rankscore": fields[56], "pred": fields[57] }, "lr": { "score": fields[58], "rankscore": fields[59], "pred": fields[60] }, "reliability_index": fields[61], "gerp++": { "nr": fields[62], "rs": fields[63], "rs_rankscore": fields[64] }, "phylop_7way": { "vertebrate": fields[65], "vertebrate_rankscore": fields[66] }, "phastcons_7way": { "vertebrate": fields[67], "vertebrate_rankscore": fields[68] }, "siphy_29way": { "pi": siphy, "logodds": fields[70], "logodds_rankscore": fields[71] }, "1000gp1": { "ac": fields[72], "af": fields[73], "afr_ac": fields[74], "afr_af": fields[75], "eur_ac": fields[76], "eur_af": fields[77], "amr_ac": fields[78], "amr_af": fields[79], "eas_ac": fields[80], "eas_af": fields[81], "sas_ac": fields[82], "sas_af": fields[83] }, "twinsuk": { "ac": fields[84], "af": fields[85] }, "alspac": { "ac": fields[86], "af": fields[87] }, "esp6500": { "aa_ac": fields[88], "aa_af": fields[89], "ea_ac": fields[90], "ea_af": fields[91] }, "exac": { "ac": fields[92], "af": fields[93], "adj_ac": fields[94], "adj_af": fields[95], "afr_ac": fields[96], "afr_af": fields[97], "amr_ac": fields[98], "amr_af": fields[99], "eas_ac": fields[100], "eas_af": fields[101], "fin_ac": fields[102], "fin_af": fields[103], "nfe_ac": fields[104], "nfe_af": fields[105], "sas_ac": fields[106], "sas_af": fields[107] }, "clinvar": { "rs": fields[108], "clinsig": fields[109], "trait": fields[110] } } } one_snp_json = list_split(dict_sweep(unlist(value_convert(one_snp_json)), vals=["."]), ";") one_snp_json["dbnsfp"]["chrom"] = str(one_snp_json["dbnsfp"]["chrom"]) return one_snp_json
def _map_line_to_json(fields, version): # specific variable treatment chrom = fields[0] if chrom == 'M': chrom = 'MT' # fields[7] in version 2, represent hg18_pos if fields[10] == ".": hg18_end = "." else: hg18_end = int(fields[10]) # in case of no hg19 position provided, remove the item if fields[8] == '.': return None else: chromStart = int(fields[8]) chromEnd = int(fields[8]) chromStart_38 = int(fields[1]) ref = fields[2].upper() alt = fields[3].upper() HGVS_19 = "chr%s:g.%d%s>%s" % (chrom, chromStart, ref, alt) HGVS_38 = "chr%s:g.%d%s>%s" % (chrom, chromStart_38, ref, alt) if version == 'hg19': HGVS = HGVS_19 elif version == 'hg38': HGVS = HGVS_38 if fields[105] == ".": siphy = "." else: freq = fields[105].split(":") siphy = {'a': freq[0], 'c': freq[1], 'g': freq[2], 't': freq[3]} gtex_gene = fields[181].split('|') gtex_tissue = fields[182].split('|') gtex = map(dict, map(lambda t: zip(('gene', 'tissue'), t), zip(gtex_gene, gtex_tissue))) acc = fields[26].rstrip().rstrip(';').split(";") pos = fields[28].rstrip().rstrip(';').split(";") uniprot = map(dict, map(lambda t: zip(('acc', 'pos'), t), zip(acc, pos))) provean_score = fields[52].split(';') sift_score = fields[23].split(';') hdiv_score = fields[29].split(';') hvar_score = fields[32].split(';') lrt_score = fields[35].split(';') dann_score = fields[69].split(';') mutationtaster_score = fields[39].split(';') mutationassessor_score = fields[46].split(';') vest3_score = fields[57].split(';') metasvm_score = fields[59].split(';') fathmm_score = fields[49].split(';') lr_score = fields[62].split(';') fathmm_coding_score = fields[71].split(';') integrated_fitcons_score = fields[82].split(';') gm12878_fitcons_score = fields[85].split(';') h1_hesc_fitcons_score = fields[88].split(';') huvec_fitcons_score = fields[91].split(';') if len(provean_score) > 1: for i in range(len(provean_score)): if provean_score[i] == '.': provean_score[i] = None if len(sift_score) > 1: for i in range(len(sift_score)): if sift_score[i] == '.': sift_score[i] = None if len(hdiv_score) > 1: for i in range(len(hdiv_score)): if hdiv_score[i] == '.': hdiv_score[i] = None if len(hvar_score) > 1: for i in range(len(hvar_score)): if hvar_score[i] == '.': hvar_score[i] = None if len(lrt_score) > 1: for i in range(len(lrt_score)): if lrt_score[i] == '.': lrt_score[i] = None if len(mutationtaster_score) > 1: for i in range(len(mutationtaster_score)): if mutationtaster_score[i] == '.': mutationtaster_score[i] = None if len(mutationassessor_score) > 1: for i in range(len(mutationassessor_score)): if mutationassessor_score[i] == '.': mutationassessor_score[i] = None if len(metasvm_score) > 1: for i in range(len(metasvm_score)): if metasvm_score[i] == '.': metasvm_score[i] = None if len(vest3_score) > 1: for i in range(len(vest3_score)): if vest3_score[i] == '.': vest3_score[i] = None if len(fathmm_score) > 1: for i in range(len(fathmm_score)): if fathmm_score[i] == '.': fathmm_score[i] = None if len(lr_score) > 1: for i in range(len(lr_score)): if lr_score[i] == '.': lr_score[i] = None if len(fathmm_coding_score) > 1: for i in range(len(fathmm_coding_score)): if fathmm_coding_score[i] == '.': fathmm_coding_score[i] = None if len(dann_score) > 1: for i in range(len(dann_score)): if dann_score[i] == '.': dann_score[i] = None if len(integrated_fitcons_score) > 1: for i in range(len(integrated_fitcons_score)): if integrated_fitcons_score[i] == '.': integrated_fitcons_score[i] = None if len(gm12878_fitcons_score) > 1: for i in range(len(gm12878_fitcons_score)): if gm12878_fitcons_score[i] == '.': gm12878_fitcons_score[i] = None if len(h1_hesc_fitcons_score) > 1: for i in range(len(h1_hesc_fitcons_score)): if h1_hesc_fitcons_score[i] == '.': h1_hesc_fitcons_score[i] = None if len(huvec_fitcons_score) > 1: for i in range(len(huvec_fitcons_score)): if huvec_fitcons_score[i] == '.': huvec_fitcons_score[i] = None # load as json data one_snp_json = { "_id": HGVS, "dbnsfp": { "rsid": fields[6], #"rsid_dbSNP144": fields[6], "chrom": chrom, "hg19": { "start": chromStart, "end": chromEnd }, "hg18": { "start": fields[10], "end": hg18_end }, "hg38": { "start": fields[1], "end": fields[1] }, "ref": ref, "alt": alt, "aa": { "ref": fields[4], "alt": fields[5], "pos": fields[22], "refcodon": fields[13], "codonpos": fields[14], "codon_degeneracy": fields[15] }, "genename": fields[11], "uniprot": uniprot, "interpro_domain": fields[180], "cds_strand": fields[12], "ancestral_allele": fields[16], #"altaineandertal": fields[17], #"denisova": fields[18] "ensembl": { "geneid": fields[19], "transcriptid": fields[20], "proteinid": fields[21] }, "sift": { "score": sift_score, "converted_rankscore": fields[24], "pred": fields[25] }, "polyphen2": { "hdiv": { "score": hdiv_score, "rankscore": fields[30], "pred": fields[31] }, "hvar": { "score": hvar_score, "rankscore": fields[33], "pred": fields[34] } }, "lrt": { "score": lrt_score, "converted_rankscore": fields[36], "pred": fields[37], "omega": fields[38] }, "mutationtaster": { "score": mutationtaster_score, "converted_rankscore": fields[40], "pred": fields[41], "model": fields[42], "AAE": fields[43] }, "mutationassessor": { "score": mutationassessor_score, "rankscore": fields[47], "pred": fields[48] }, "fathmm": { "score": fathmm_score, "rankscore": fields[50], "pred": fields[51] }, "provean": { "score": provean_score, "rankscore": fields[53], "pred": fields[54] }, "vest3": { "score": vest3_score, "rankscore": fields[57], "transcriptid": fields[55], "transcriptvar": fields[56] }, "fathmm-mkl": { "coding_score": fathmm_coding_score, "coding_rankscore": fields[72], "coding_pred": fields[73], "coding_group": fields[74] }, "eigen": { "raw": fields[75], "phred": fields[76], "raw_rankscore": fields[77] }, "eigen-pc": { "raw": fields[78], "raw_rankscore": fields[79] }, "genocanyon": { "score": fields[80], "rankscore": fields[81] }, "metasvm": { "score": metasvm_score, "rankscore": fields[60], "pred": fields[61] }, "metalr": { "score": lr_score, "rankscore": fields[63], "pred": fields[64] }, "reliability_index": fields[65], "dann": { "score": dann_score, "rankscore": fields[70] }, "gerp++": { "nr": fields[94], "rs": fields[95], "rs_rankscore": fields[96] }, "integrated": { "fitcons_score": integrated_fitcons_score, "fitcons_rankscore": fields[83], "confidence_value": fields[84] }, "gm12878": { "fitcons_score": gm12878_fitcons_score, "fitcons_rankscore": fields[86], "confidence_value": fields[87] }, "h1-hesc": { "fitcons_score": h1_hesc_fitcons_score, "fitcons_rankscore": fields[89], "confidence_value": fields[90] }, "huvec": { "fitcons_score": huvec_fitcons_score, "fitcons_rankscore": fields[92], "confidence_value": fields[93] }, "phylo": { "p100way": { "vertebrate": fields[97], "vertebrate_rankscore": fields[98] }, "p20way": { "mammalian": fields[99], "mammalian_rankscore": fields[100] } }, "phastcons": { "100way": { "vertebrate": fields[101], "vertebrate_rankscore": fields[102] }, "20way": { "mammalian": fields[103], "mammalian_rankscore": fields[104] } }, "siphy_29way": { "pi": siphy, "logodds": fields[106], "logodds_rankscore": fields[107] }, "1000gp3": { "ac": fields[108], "af": fields[109], "afr_ac": fields[110], "afr_af": fields[111], "eur_ac": fields[112], "eur_af": fields[113], "amr_ac": fields[114], "amr_af": fields[115], "eas_ac": fields[116], "eas_af": fields[117], "sas_ac": fields[118], "sas_af": fields[119] }, "twinsuk": { "ac": fields[120], "af": fields[121] }, "alspac": { "ac": fields[122], "af": fields[123] }, "esp6500": { "aa_ac": fields[124], "aa_af": fields[125], "ea_ac": fields[126], "ea_af": fields[127] }, "exac": { "ac": fields[128], "af": fields[129], "adj_ac": fields[130], "adj_af": fields[131], "afr_ac": fields[132], "afr_af": fields[133], "amr_ac": fields[134], "amr_af": fields[135], "eas_ac": fields[136], "eas_af": fields[137], "fin_ac": fields[138], "fin_af": fields[139], "nfe_ac": fields[140], "nfe_af": fields[141], "sas_ac": fields[142], "sas_af": fields[143] }, "exac_nontcga": { "ac": fields[144], "af": fields[145], "adj_ac": fields[146], "adj_af": fields[147], "afr_ac": fields[148], "afr_af": fields[149], "amr_ac": fields[150], "amr_af": fields[151], "eas_ac": fields[152], "eas_af": fields[153], "fin_ac": fields[154], "fin_af": fields[155], "nfe_ac": fields[156], "nfe_af": fields[157], "sas_ac": fields[158], "sas_af": fields[159] }, "exac_nonpsych": { "ac": fields[160], "af": fields[161], "adj_ac": fields[162], "adj_af": fields[163], "afr_ac": fields[164], "afr_af": fields[165], "amr_ac": fields[166], "amr_af": fields[167], "eas_ac": fields[168], "eas_af": fields[169], "fin_ac": fields[170], "fin_af": fields[171], "nfe_ac": fields[172], "nfe_af": fields[173] }, "clinvar": { "rs": fields[176], "clinsig": fields[177], "trait": fields[178], "golden_stars": fields[179] }, "gtex": gtex } } one_snp_json = list_split(dict_sweep(unlist(value_convert(one_snp_json)), vals=["."]), ";") one_snp_json["dbnsfp"]["chrom"] = str(one_snp_json["dbnsfp"]["chrom"]) return one_snp_json
def _map_line_to_json(fields): # specific variable treatment chrom = fields[0] if fields[7] == ".": hg18_end = "." else: hg18_end = int(fields[7])+1 chromStart = int(fields[1]) chromEnd = int(fields[1]) + 1 allele1 = fields[2] allele2 = fields[3] HGVS = "chr%s:g.%d%s>%s" % (chrom, chromStart, allele1, allele2) if fields[74] == ".": siphy = "." else: freq = fields[74].split(":") siphy = {'a': freq[0], 'c': freq[1], 'g': freq[2], 't': freq[3]} acc = fields[11].rstrip().rstrip(';').split(";") pos = fields[13].rstrip().rstrip(';').split(";") uniprot = map(dict, map(lambda t: zip(('acc', 'pos'), t), zip(acc, pos))) # load as json data one_snp_json = { "_id": HGVS, "dbnsfp": { "chrom": chrom, "hg19": { "start": fields[1], "end": chromEnd }, "hg18": { "start": fields[7], "end": hg18_end }, "hg38": { "chrom": fields[8], "pos": fields[9] }, "allele1": allele1, "allele2": allele2, "aa": { "ref": fields[4], "alt": fields[5], "pos": fields[23], "refcodon": fields[16], "codonpos": fields[18], "aapos_sift": fields[24], "aapos_fathmm": fields[25] }, "genename": fields[10], "uniprot": uniprot, "interpro_domain": fields[14], "cds_strand": fields[15], "slr_test_statistic": fields[17], "fold-degenerate": fields[19], "ancestral_allele": fields[20], "ensembl": { "geneid": fields[21], "transcriptid": fields[22] }, "sift": { "score": fields[26], "converted_rankscore": fields[27], "pred": fields[28] }, "polyphen2": { "hdiv": { "score": fields[29], "rankscore": fields[30], "pred": fields[31] }, "hvar": { "score": fields[32], "rankscore": fields[33], "pred": fields[34] } }, "lrt": { "score": fields[35], "converted_rankscore": fields[36], "pred": fields[37] }, "mutationtaster": { "score": fields[38], "converted_rankscore": fields[39], "pred": fields[40] }, "mutationassessor": { "score": fields[41], "rankscore": fields[42], "pred": fields[43] }, "fathmm": { "score": fields[44], "rankscore": fields[45], "pred": fields[46] }, "radialsvm": { "score": fields[47], "rankscore": fields[48], "pred": fields[49] }, "lr": { "score": fields[50], "rankscore": fields[51], "pred": fields[52] }, "reliability_index": fields[53], "vest3": { "score": fields[54], "rankscore": fields[55] }, "cadd": { "raw": fields[56], "raw_rankscore": fields[57], "phred": fields[58] }, "gerp++": { "nr": fields[59], "rs": fields[60], "rs_rankscore": fields[61] }, "phylop": { "46way": { "primate": fields[62], "primate_rankscore": fields[63], "placental": fields[64], "placental_rankscore": fields[65], }, "100way": { "vertebrate": fields[66], "vertebrate_rankscore": fields[67] } }, "phastcons": { "46way": { "primate": fields[68], "primate_rankscore": fields[69], "placental": fields[70], "placental_rankscore": fields[71], }, "100way": { "vertebrate": fields[72], "vertebrate_rankscore": fields[73] } }, "siphy_29way": { "pi": siphy, "logodds": fields[75], "logodds_rankscore": fields[76] }, "lrt_omega": fields[77], "unisnp_ids": fields[78], "1000gp1": { "ac": fields[79], "af": fields[80], "afr_ac": fields[81], "afr_af": fields[82], "eur_ac": fields[83], "eur_af": fields[84], "amr_ac": fields[85], "amr_af": fields[86], "asn_ac": fields[87], "asn_af": fields[88] }, "esp6500": { "aa_af": fields[89], "ea_af": fields[90] }, "aric5606": { "aa_ac": fields[91], "aa_af": fields[92], "ea_ac": fields[93], "ea_af": fields[94] }, "clinvar": { "rs": fields[95], "clin_sig": fields[96], "trait": fields[97] } } } one_snp_json = list_split(dict_sweep(unlist(value_convert(one_snp_json)), vals=["."]), ";") one_snp_json["dbnsfp"]["chrom"] = str(one_snp_json["dbnsfp"]["chrom"]) return one_snp_json