''' # snps reader = TsvReader(snpfile, cnames=False) snplist = list(set(r[snpcol] for r in reader)) reader.close() from cruzdb import Genome g = Genome(genome) outfiletmp = outfile + '.tmp' writer = TsvWriter(outfiletmp) for i in range(0, len(snplist), 1000): chunk = snplist[i:i + 1000] sql = 'SELECT chrom, chromStart, chromEnd, name, score, strand, refUCSC, alleles, alleleFreqs FROM snp{dbsnpver} WHERE name in ({snps})'.format( dbsnpver=dbsnpver, snps=', '.join("'{}'".format(s) for s in chunk)) result = g.sql(sql) for r in result: allfreqs = dict(zip(r.alleles.split(','), r.alleleFreqs.split(','))) reffreq = allfreqs.get(r.refUCSC, '0') if r.refUCSC in allfreqs: del allfreqs[r.refUCSC] if '' in allfreqs: del allfreqs[''] writer.write([ r.chrom, r.chromStart, r.chromEnd, r.name, r.score, r.strand, r.refUCSC, ','.join(allfreqs.keys()), ','.join([reffreq] + list(allfreqs.values())) ]) writer.close()