def _next(self, line): cols = line.split("\t") ge = GenomeElement(self._genome) ge.chr = self._checkValidChr(cols[0]) ge.start = int(cols[1]) ge.end = int(cols[2]) self._parseVal(ge, cols[3]) return ge
def _next(self, line): if line.startswith('>'): self._appendBoundingRegionTuple() self._elCount = 0 self._chr = self._checkValidChr(line[1:].split()[0]) else: if self._chr is None: raise InvalidFormatError('FASTA file does not start with the ">" character.') self._elCount += len(line) ge = GenomeElement(self._genome, self._chr) ge.val = np.fromstring(line, dtype='S1') return ge
def testAssignAndRetrieve(self): e = GenomeElement('TestGenome', start=5, val=1.0, extra={'a':1,'b':2}, orderedExtraKeys=['a','b']) self.assertEqual(e.genome, 'TestGenome') self.assertEqual(e.chr, None) self.assertEqual(e.start, 5) self.assertEqual(e.end, None) self.assertEqual(e.val, 1.0) self.assertEqual(e.strand, None) self.assertEqual(e.a, 1) self.assertEqual(e.b, 2) self.assertEqual(e.extra, {'a':1,'b':2}) self.assertEqual(e.orderedExtraKeys, ['a', 'b']) e = GenomeElement('TestGenome', a=1) e.b = 2 self.assertEqual(e.genome, 'TestGenome') self.assertEqual(e.a, 1) self.assertEqual(e.b, 2) self.assertEqual(e.extra, {'a':1,'b':2}) self.assertEqual(e.orderedExtraKeys, ['a', 'b']) self.assertRaises(AttributeError, lambda : e.nonExisting)
def _next(self, line): if line.startswith('##FASTA'): raise StopIteration if len(line)>0 and line[0]=='#': return None origCols = line.split('\t') cols = [unquote(x) for x in origCols] if len(cols) != 9: raise InvalidFormatError("Error: GFF files must contain 9 tab-separated columns") ge = GenomeElement(self._genome) ge.chr = self._checkValidChr(cols[0]) ge.source = cols[1] self._parseThirdCol(ge, cols[2]) ge.start = self._checkValidStart(ge.chr, int(cols[3]) - 1) ge.end = self._checkValidEnd(ge.chr, int(cols[4]), start=ge.start) self._parseSixthCol(ge, cols[5]) ge.strand = self._getStrandFromString(cols[6]) ge.phase = cols[7] ge.attributes = cols[8] for attr in origCols[8].split(';'): attrSplitted = attr.split('=') if len(attrSplitted) == 2: key, val = attrSplitted if key.lower() == 'id': ge.id = unquote(val) elif key.lower() == 'name': ge.name = unquote(val) return ge
def _wrappedTrackElsGenerator(self): track = self._getTrack() for region, tv in ((region, self._getTrackView(track, region)) for region in self._boundingRegions): for te in tv: yield GenomeElement.createGeFromTrackEl(te, tv.trackFormat, globalCoords=self._globalCoords)
def next(self): trackEl = self._tvIter.next() ge = GenomeElement.createGeFromTrackEl(trackEl, self._tv.trackFormat) return ge