def load(): # load appropriate data from GA4GH callset_id = sample_id = session['sample_id'] variants, variantset_ids = ga4gh.search_variants( GENOTYPES, ga4gh.OKG, callSetIds=[sample_id], repo_id='google') pid = session['patient'] uploads = [] for rsid, variant in variants: coord = snps.COORDINATES[rsid] interp = 'positive' if float(snps.DATA[rsid]['Risk']) > 1 else 'negative' seq = { 'resourceType': 'Sequence', 'type': DNA, 'coordinate': [ {"start": coord['pos']-1, "end": coord['pos'], "chromosome": {'text': coord['chromosome']}, "genomeBuild": {'text': 'GRCh37'}, }], 'variation': {'text': rsid}, 'species': {'text': 'H**o sapiens'}, "repository": [ { "url": "https://www.googleapis.com/genomics/v1beta2", "variantId": variantset_ids } ] } uploads.append(gevent.spawn(upload_seq, seq, session['access_token'])) for g in uploads: g.join() return redirect('%s/Sequence' % config.API_BASE)
def get_snps(sample_id): ''' return sequences mentioned in SNPData.csv ''' variants = ga4gh.search_variants(GENOTYPES, ga4gh.OKG, callSetIds=[sample_id], repo_id='google') snps = {} for rsid, variant in variants: gts = [variant['referenceBases']] gts.extend(variant['alternateBases']) for call in variant['calls']: if call.get('callSetId') != sample_id: continue snps[rsid] = [gts[i] for i in call['genotype']] return jsonify(snps)
def get_snps(sample_id): ''' return sequences mentioned in SNPData.csv ''' variants = ga4gh.search_variants( GENOTYPES, ga4gh.OKG, callSetIds=[sample_id], repo_id='google') snps = {} for rsid, variant in variants: gts = [variant['referenceBases']] gts.extend(variant['alternateBases']) for call in variant['calls']: if call.get('callSetId') != sample_id: continue snps[rsid] = [gts[i] for i in call['genotype']] return jsonify(snps)
def load(): # load appropriate data from GA4GH callset_id = sample_id = session['sample_id'] variants, variantset_ids = ga4gh.search_variants(GENOTYPES, ga4gh.OKG, callSetIds=[sample_id], repo_id='google') pid = session['patient'] uploads = [] for rsid, variant in variants: coord = snps.COORDINATES[rsid] interp = 'positive' if float( snps.DATA[rsid]['Risk']) > 1 else 'negative' seq = { 'resourceType': 'Sequence', 'type': DNA, 'coordinate': [{ "start": coord['pos'] - 1, "end": coord['pos'], "chromosome": { 'text': coord['chromosome'] }, "genomeBuild": { 'text': 'GRCh37' }, }], 'variation': { 'text': rsid }, 'species': { 'text': 'H**o sapiens' }, "repository": [{ "url": "https://www.googleapis.com/genomics/v1beta2", "variantId": variantset_ids }] } uploads.append(gevent.spawn(upload_seq, seq, session['access_token'])) for g in uploads: g.join() return redirect('%s/Sequence' % config.API_BASE)
def load(): # load appropriate data from GA4GH callset_id = sample_id = session['sample_id'] variants, variantset_ids = ga4gh.search_variants( GENOTYPES, ga4gh.OKG, callSetIds=[sample_id], repo_id='google') pid = session['patient'] uploads = [] for rsid, variant in variants: coord = snps.COORDINATES[rsid] interp = 'positive' if float(snps.DATA[rsid]['Risk']) > 1 else 'negative' seq = { 'resourceType': 'Sequence', 'chromosome': {'text': coord['chromosome']}, 'start': coord['pos']-1, 'end': coord['pos'], 'genomeBuild': {'text': 'GRCh37'}, 'type': 'DNA', 'source': {'text': 'somatic'}, 'patient': {'reference': '/Patient/%s' % pid}, 'variation': {'text': rsid}, 'species': {'text': 'H**o sapiens'}, 'analysis': [{ 'target': {'text': snps.DATA[rsid]['disease']}, 'type': {'text': 'Disease Risk Analysis'}, 'interpretation': {'text': interp}, 'confidence': 'reviewing' }], 'gaRepository': ga4gh.REPOSITORIES['google'], 'gaVariantSet': variantset_ids, 'gaCallSet': callset_id } uploads.append(gevent.spawn(upload_seq, seq, session['access_token'])) for g in uploads: g.join() return redirect('%s/Sequence' % config.API_BASE)
f = TabixFile('snps.sorted.txt.gz', parser=asTuple()) snp_table = {} for row in f.fetch(): _, snp, chrom, pos = row if snp in snps or snp in drug_info: snp_table[snp] = {'chromosome': chrom, 'pos': int(pos)} with open('snps.py', 'w') as dump: dump.write(WARNING) dump.write('COORDINATES = %s\n' % snp_table) dump.write('DATA = %s\n' % snps) dump.write('DRUG_INFO = %s\n' % drug_info) print 'Data written to snps.py' print 'Determining allele frequencies (using data from 1000 Genomes)' genotypes = {snp: snp_data['Code'] for snp, snp_data in snps.iteritems()} variants = list( ga4gh.search_variants(genotypes, dataset=ga4gh.OKG, repo_id='google')) # determine allele frequencies for different population freqs = { pop: ga4gh.get_frequencies(variants, genotypes, population=lambda call: populations.get( call.get('callSetName')) == pop) for pop in set(populations.values()) } # allele frequencies for 1000 Genomes' whole population freqs['1kg'] = ga4gh.get_frequencies(variants, genotypes) with open('freq.py', 'w') as dump: dump.write(WARNING) dump.write('FREQUENCIES = %s\n' % freqs) print 'Data written to freq.py.'
snp_table = {} for row in f.fetch(): _, snp, chrom, pos = row if snp in snps or snp in drug_info: snp_table[snp] = { 'chromosome': chrom, 'pos': int(pos) } with open('snps.py', 'w') as dump: dump.write(WARNING) dump.write('COORDINATES = %s\n'% snp_table) dump.write('DATA = %s\n'% snps) dump.write('DRUG_INFO = %s\n'% drug_info) print 'Data written to snps.py' print 'Determining allele frequencies (using data from 1000 Genomes)' genotypes = {snp: snp_data['Code'] for snp, snp_data in snps.iteritems()} variants = list(ga4gh.search_variants(genotypes, dataset=ga4gh.OKG, repo_id='google')) # determine allele frequencies for different population freqs = { pop: ga4gh.get_frequencies( variants, genotypes, population=lambda call: populations.get(call.get('callSetName'))==pop) for pop in set(populations.values())} # allele frequencies for 1000 Genomes' whole population freqs['1kg'] = ga4gh.get_frequencies(variants, genotypes) with open('freq.py', 'w') as dump: dump.write(WARNING) dump.write('FREQUENCIES = %s\n'% freqs) print 'Data written to freq.py.'
def load(): # load appropriate data from GA4GH callset_id = sample_id = session['sample_id'] variants, variantset_ids = ga4gh.search_variants(GENOTYPES, ga4gh.OKG, callSetIds=[sample_id], repo_id='google') pid = session['patient'] uploads = [] for rsid, variant in variants: coord = snps.COORDINATES[rsid] interp = 'positive' if float( snps.DATA[rsid]['Risk']) > 1 else 'negative' seq = { 'resourceType': 'Sequence', 'chromosome': { 'text': coord['chromosome'] }, 'start': coord['pos'] - 1, 'end': coord['pos'], 'genomeBuild': { 'text': 'GRCh37' }, 'type': 'DNA', 'source': { 'text': 'somatic' }, 'patient': { 'reference': '/Patient/%s' % pid }, 'variation': { 'text': rsid }, 'species': { 'text': 'H**o sapiens' }, 'analysis': [{ 'target': { 'text': snps.DATA[rsid]['disease'] }, 'type': { 'text': 'Disease Risk Analysis' }, 'interpretation': { 'text': interp }, 'confidence': 'reviewing' }], 'gaRepository': ga4gh.REPOSITORIES['google'], 'gaVariantSet': variantset_ids, 'gaCallSet': callset_id } uploads.append(gevent.spawn(upload_seq, seq, session['access_token'])) for g in uploads: g.join() return redirect('%s/Sequence' % config.API_BASE)