def testFromVCF(self): self.vcf = pysam.VCF() self.assertRaises( UnicodeDecodeError, self.vcf.connect, self.tmpfilename + ".gz", "ascii") self.vcf.connect(self.tmpfilename + ".gz", encoding="utf-8") v = self.vcf.getsamples()[0]
def __init__(self, filename, sample): self.sample = sample self.vcf = pysam.VCF() self.vcf.connect(filename) if sample not in self.vcf.getsamples(): raise KeyError("sample %s not vcf file" % sample)
def load(self, filename, exclude=None): '''load tracks from a vcf file.''' tracks = [] v = pysam.VCF() v.setversion(40) if not os.path.exists(filename): self.tracks = tracks return self v.connect(filename) if exclude: to_exclude = [re.compile(x) for x in exclude] for sample in v.getsamples(): if exclude: for x in to_exclude: if x.search(sample): skip = True break if skip: continue tracks.append(self.factory(sample)) self.tracks = tracks return self
def iterate_from_vcf(infile, sample): '''iterate over a vcf-formatted file. *infile* can be any iterator over a lines. The function yields named tuples of the type :class:`pysam.Pileup.PileupSubstitution` or :class:`pysam.Pileup.PileupIndel`. Positions without a snp will be skipped. This method is wasteful and written to support same legacy code that expects samtools pileup output. Better use the vcf parser directly. ''' vcf = pysam.VCF() vcf.connect(infile) if sample not in vcf.getsamples(): raise KeyError("sample %s not vcf file") for row in vcf.fetch(): result = vcf2pileup(row, sample) if result: yield result
def testRead(self): self.vcf = pysam.VCF() self.vcf.connect(self.filename) for x in self.vcf.fetch(): print str(x) print x.pos print x.alt print x.id print x.qual print x.filter print x.info print x.format for s in x.samples: print s, x[s]
def setUp(self): TestVCF.setUp(self) self.vcf = pysam.VCF() self.compare = loadAndConvert(self.filename, encode=False)