def HOM_COUNT(self): if 'HOM_COUNT' in self.__dict__: return self.__dict__['HOM_COUNT'] q=vcf.vcf_query(variant_str=self.variant_id) if q is None: raise Exception('ERROR',self.variant_id) self.__dict__.update(q) print(self.save()) return self.__dict__['HOM_COUNT']
def allele_num(self): if 'allele_num' in self.__dict__: return self.__dict__['allele_num'] q=vcf.vcf_query(variant_str=self.variant_id) if q is None: raise Exception('ERROR',self.variant_id) self.__dict__.update(q) print(self.save()) return self.__dict__['allele_num']
def __init__(self, variant_id=None, db=None,data=None): if variant_id is None: variant_id=data['variant_id'] self.variant_id=str(variant_id).strip().replace('_','-') self.chrom, self.pos, self.ref, self.alt = variant_id.split('-') q=vcf.vcf_query(variant_str=self.variant_id,) if q is None: raise Exception('NOT IN VCF',self.variant_id) self.__dict__.update(q) if data: self.__dict__.update(data) if db: Variant.db=db data=Variant.db.variants.find_one({'variant_id':self.variant_id},fields={'_id':False}) if not data: print('NOT IN DB', self.variant_id, 'WILL INSERT') self.save() #self.xpos = get_xpos(self.chrom, self.pos) else: self.__dict__.update(data)
def get_rare_var_p_hpo(gene_id, db, patient_db): #return {'hom_comp':{p_id1:{hpo: [(HP:1234,hell)], exac_af:[0.0021], uclex_af:[0.001]}, # 'het':{p_id2: {hpo:[(HP:2345,yeah)], exac_af:[0.001,0.002],uclex_af:[0.002,0.001]}} # sometimes variant is not in vcf. move it to debug/bad_variants for inspection and later clean bad_var_file = open('views/debug/bad_variants', 'w') # get all variants on this gene all_vars = db.genes.find_one({'gene_id':gene_id})['variant_ids'] results = {'hom_comp':{}, 'het':{}} for v in all_vars: var = db.variants.find_one({'variant_id':v}) exac_af = 0 if var['in_exac']: if 'allele_freq' not in var['EXAC']: VAR = annotation.exac_anno(v) exac_af = VAR['allele_freq'] else: exac_af = var['EXAC']['allele_freq'] # not interested if af is > 0.01 if float(exac_af) > 0.01: continue # get relevant info from vcf this = vcf_query(variant_str=v) if not this: bad_var_file.write(v+'\n') continue uclex_af = this['allele_freq'] # dealing with hom patients. also add it to het. count hom as twice # will need to deal with both_het !!! their af are different!!! hom_p = this['hom_samples'] for p in hom_p: populate_mode_p(results, 2, ['hom_comp', 'het'], p, exac_af, uclex_af, patient_db) # dealing with het patients. note to check length of exac_af. longer than one? # also added it to 'hom_comp' het = this['het_samples'] for p in het: results['het'][p] = results['het'].get(p, {'exac_af':[], 'uclex_af':[]}) modes = ['het'] if results['het'][p]['exac_af']: # this patient has more than one var on this gene. copy it to hom_comp modes.append('hom_comp') populate_mode_p(results, 1, modes, p, exac_af, uclex_af, patient_db) return results
def gene_hpo(gene_id): # if gene not ensembl id then translate to db=get_db() db_patients=get_db('patients') if not gene_id.startswith('ENSG'): gene_id = lookups.get_gene_by_name(get_db(), gene_id)['gene_id'] gene_name=db.genes.find_one({'gene_id':gene_id})['gene_name'] print(gene_name) exac_thresh=request.args.get('exac_thresh') model=request.args.get('model') everyone=frozenset(file('/slms/UGI/vm_exports/vyp/phenotips/uclex_files/current/headers.txt','r').read().strip().split('\t')) def condition(v): variant_id=v['variant_id'] try: v=Variant(db=db,variant_id=variant_id) except: print('NOT IN VCF',variant_id,) return False if v.in_exac and float(v.EXAC['allele_freq'])>0.001: print('TOO COMMON',v.EXAC) return False condition=v.filter=='PASS' and v.WT_COUNT> len(everyone)/4. and ((v.HOM_COUNT==1 and v.HET_COUNT==0) or (v.HOM_COUNT<v.HET_COUNT)) and v.HET_COUNT<len(everyone)/1000. print(v.filter) print(v.WT_COUNT> len(everyone)/4.) print(v.HOM_COUNT<v.HET_COUNT) print(v.HET_COUNT<len(everyone)/1000.) print(v.variant_id,condition) return condition #variants in gene variants=[v for v in db.variants.find({'genes': gene_id}, fields={'_id': False}) if condition(v)] #if gene_id in app.config['GENES_TO_CACHE']: #return open(os.path.join(app.config['GENE_CACHE_DIR'], '{}.html'.format(gene_id))).read() #else: print(len(variants)) samples=[] hom_samples=[] het_samples=[] for v in variants: q=vcf_query(variant_str=v['variant_id']) if not q: print(v['variant_id']) continue hom_samples+=q.get('hom_samples',[]) het_samples+=q.get('het_samples',[]) hom_samples_count=Counter(hom_samples) het_samples_count=Counter(het_samples) print('HOM:') print(hom_samples_count) print('HET:') print(het_samples_count) if model=='recessive': samples=frozenset([s for s in hom_samples_count]+[s for s in het_samples_count if het_samples_count[s]>1]) else: samples=frozenset(hom_samples+het_samples) hpo=[] for s in everyone: #hpo+=[f for f in db_patients.patients.find_one({'external_id':s},{'features':1}) if f['observed']=='yes'] p=db_patients.patients.find_one({'external_id':s},{'features':1}) if not p: continue if 'features' not in p: print(s + ' has no features ') continue p2=dict() p2['features']=[f for f in p['features'] if f['observed']=='yes'] if s in samples: p2[gene_name]=True else: p2[gene_name]=False hpo.append(p2) stats=Counter([h[gene_name] for h in hpo]) variants=[v['variant_id'] for v in variants] return(jsonify(result=hpo,stats=stats,variants=variants))
def het_samples(self): if 'het_samples' in self.__dict__: return self.__dict__['het_samples'] q=vcf.vcf_query(variant_str=self.variant_id) self.__dict__.update(q) print(self.save()) return self.__dict__['het_samples']