Exemplo n.º 1
0
 def HOM_COUNT(self):
     if 'HOM_COUNT' in self.__dict__: return self.__dict__['HOM_COUNT']
     q=vcf.vcf_query(variant_str=self.variant_id)
     if q is None: raise Exception('ERROR',self.variant_id)
     self.__dict__.update(q)
     print(self.save())
     return self.__dict__['HOM_COUNT']
Exemplo n.º 2
0
 def allele_num(self):
     if 'allele_num' in self.__dict__: return self.__dict__['allele_num']
     q=vcf.vcf_query(variant_str=self.variant_id)
     if q is None: raise Exception('ERROR',self.variant_id)
     self.__dict__.update(q)
     print(self.save())
     return self.__dict__['allele_num']
Exemplo n.º 3
0
 def allele_num(self):
     if 'allele_num' in self.__dict__: return self.__dict__['allele_num']
     q=vcf.vcf_query(variant_str=self.variant_id)
     if q is None: raise Exception('ERROR',self.variant_id)
     self.__dict__.update(q)
     print(self.save())
     return self.__dict__['allele_num']
Exemplo n.º 4
0
 def HOM_COUNT(self):
     if 'HOM_COUNT' in self.__dict__: return self.__dict__['HOM_COUNT']
     q=vcf.vcf_query(variant_str=self.variant_id)
     if q is None: raise Exception('ERROR',self.variant_id)
     self.__dict__.update(q)
     print(self.save())
     return self.__dict__['HOM_COUNT']
Exemplo n.º 5
0
 def __init__(self, variant_id=None, db=None,data=None):
     if variant_id is None: variant_id=data['variant_id']
     self.variant_id=str(variant_id).strip().replace('_','-')
     self.chrom, self.pos, self.ref, self.alt = variant_id.split('-')
     q=vcf.vcf_query(variant_str=self.variant_id,)
     if q is None: raise Exception('NOT IN VCF',self.variant_id)
     self.__dict__.update(q)
     if data: self.__dict__.update(data)
     if db:
         Variant.db=db
         data=Variant.db.variants.find_one({'variant_id':self.variant_id},fields={'_id':False})
         if not data:
             print('NOT IN DB', self.variant_id, 'WILL INSERT')
             self.save()
             #self.xpos = get_xpos(self.chrom, self.pos)
         else:
             self.__dict__.update(data)
Exemplo n.º 6
0
def get_rare_var_p_hpo(gene_id, db, patient_db):
    #return {'hom_comp':{p_id1:{hpo: [(HP:1234,hell)], exac_af:[0.0021], uclex_af:[0.001]},
    #        'het':{p_id2: {hpo:[(HP:2345,yeah)], exac_af:[0.001,0.002],uclex_af:[0.002,0.001]}}

    # sometimes variant is not in vcf. move it to debug/bad_variants for inspection and later clean
    bad_var_file = open('views/debug/bad_variants', 'w')
    # get all variants on this gene
    all_vars = db.genes.find_one({'gene_id':gene_id})['variant_ids']
    results = {'hom_comp':{}, 'het':{}} 
    for v in all_vars:
        var = db.variants.find_one({'variant_id':v})
        exac_af = 0
        if var['in_exac']:
            if 'allele_freq' not in var['EXAC']:
                VAR = annotation.exac_anno(v)
                exac_af = VAR['allele_freq']
            else:
                exac_af = var['EXAC']['allele_freq']
        # not interested if af is > 0.01
        if float(exac_af) > 0.01:
            continue
        # get relevant info from vcf
        this = vcf_query(variant_str=v)
        if not this:
            bad_var_file.write(v+'\n')
            continue
        uclex_af = this['allele_freq']

        # dealing with hom patients. also add it to het. count hom as twice
        # will need to deal with both_het !!! their af are different!!!
        hom_p = this['hom_samples']
        for p in hom_p:
            populate_mode_p(results, 2, ['hom_comp', 'het'], p, exac_af, uclex_af, patient_db)

        # dealing with het patients. note to check length of exac_af. longer than one?
        # also added it to 'hom_comp'
        het = this['het_samples']
        for p in het:
            results['het'][p] = results['het'].get(p, {'exac_af':[], 'uclex_af':[]})
            modes = ['het']
            if results['het'][p]['exac_af']:
                # this patient has more than one var on this gene. copy it to hom_comp
                modes.append('hom_comp')
            populate_mode_p(results, 1, modes, p, exac_af, uclex_af, patient_db)
    return results
Exemplo n.º 7
0
def gene_hpo(gene_id):
    # if gene not ensembl id then translate to
    db=get_db()
    db_patients=get_db('patients')
    if not gene_id.startswith('ENSG'): gene_id = lookups.get_gene_by_name(get_db(), gene_id)['gene_id']
    gene_name=db.genes.find_one({'gene_id':gene_id})['gene_name']
    print(gene_name)
    exac_thresh=request.args.get('exac_thresh')
    model=request.args.get('model')
    everyone=frozenset(file('/slms/UGI/vm_exports/vyp/phenotips/uclex_files/current/headers.txt','r').read().strip().split('\t'))
    def condition(v):
        variant_id=v['variant_id']
        try:
            v=Variant(db=db,variant_id=variant_id)
        except:
            print('NOT IN VCF',variant_id,)
            return False
        if v.in_exac and float(v.EXAC['allele_freq'])>0.001:
            print('TOO COMMON',v.EXAC)
            return False
        condition=v.filter=='PASS' and v.WT_COUNT> len(everyone)/4. and ((v.HOM_COUNT==1 and v.HET_COUNT==0) or (v.HOM_COUNT<v.HET_COUNT)) and v.HET_COUNT<len(everyone)/1000.
        print(v.filter)
        print(v.WT_COUNT> len(everyone)/4.)
        print(v.HOM_COUNT<v.HET_COUNT)
        print(v.HET_COUNT<len(everyone)/1000.)
        print(v.variant_id,condition)
        return condition
    #variants in gene
    variants=[v for v in db.variants.find({'genes': gene_id}, fields={'_id': False}) if condition(v)]
    #if gene_id in app.config['GENES_TO_CACHE']:
        #return open(os.path.join(app.config['GENE_CACHE_DIR'], '{}.html'.format(gene_id))).read()
    #else:
    print(len(variants))
    samples=[]
    hom_samples=[]
    het_samples=[]
    for v in variants:
        q=vcf_query(variant_str=v['variant_id'])
        if not q:
            print(v['variant_id'])
            continue
        hom_samples+=q.get('hom_samples',[])
        het_samples+=q.get('het_samples',[])
    hom_samples_count=Counter(hom_samples)
    het_samples_count=Counter(het_samples)
    print('HOM:')
    print(hom_samples_count)
    print('HET:')
    print(het_samples_count)
    if model=='recessive':
        samples=frozenset([s for s in hom_samples_count]+[s for s in het_samples_count if het_samples_count[s]>1])
    else:
        samples=frozenset(hom_samples+het_samples)
    hpo=[]
    for s in everyone:
        #hpo+=[f for f in db_patients.patients.find_one({'external_id':s},{'features':1}) if f['observed']=='yes']
        p=db_patients.patients.find_one({'external_id':s},{'features':1})
        if not p: continue
        if 'features' not in p:
            print(s + ' has no features ')
            continue
        p2=dict()
        p2['features']=[f for f in p['features'] if f['observed']=='yes']
        if s in samples:
            p2[gene_name]=True
        else:
            p2[gene_name]=False
        hpo.append(p2)
    stats=Counter([h[gene_name] for h in hpo])
    variants=[v['variant_id'] for v in variants]
    return(jsonify(result=hpo,stats=stats,variants=variants))
Exemplo n.º 8
0
 def het_samples(self):
     if 'het_samples' in self.__dict__: return self.__dict__['het_samples']
     q=vcf.vcf_query(variant_str=self.variant_id)
     self.__dict__.update(q)
     print(self.save())
     return self.__dict__['het_samples']
Exemplo n.º 9
0
 def het_samples(self):
     if 'het_samples' in self.__dict__: return self.__dict__['het_samples']
     q=vcf.vcf_query(variant_str=self.variant_id)
     self.__dict__.update(q)
     print(self.save())
     return self.__dict__['het_samples']