def testAssignAndRetrieve(self): e = GenomeElement('TestGenome', start=5, val=1.0, extra={ 'a': 1, 'b': 2 }, orderedExtraKeys=['a', 'b']) self.assertEqual(e.genome, 'TestGenome') self.assertEqual(e.chr, None) self.assertEqual(e.start, 5) self.assertEqual(e.end, None) self.assertEqual(e.val, 1.0) self.assertEqual(e.strand, None) self.assertEqual(e.a, 1) self.assertEqual(e.b, 2) self.assertEqual(e.extra, {'a': 1, 'b': 2}) self.assertEqual(e.orderedExtraKeys, ['a', 'b']) e = GenomeElement('TestGenome', a=1) e.b = 2 self.assertEqual(e.genome, 'TestGenome') self.assertEqual(e.a, 1) self.assertEqual(e.b, 2) self.assertEqual(e.extra, {'a': 1, 'b': 2}) self.assertEqual(e.orderedExtraKeys, ['a', 'b']) self.assertRaises(AttributeError, lambda: e.nonExisting)
def _next(self, line): if line.startswith('#'): return ge = GenomeElement(self._genome) cols = line.split('\t') if self._numCols is not None: if len(cols) != self._numCols: raise InvalidFormatError( 'Error: BED files must have the same number of columns in each data line.' ) else: self._numCols = len(cols) if self._numCols < self.MIN_NUM_COLS or self._numCols > self.MAX_NUM_COLS: raise InvalidFormatError('Error: BED file contains %s columns, but must contain between %s and %s columns.' \ % (self._numCols, self.MIN_NUM_COLS, self.MAX_NUM_COLS)) ge.chr = self._checkValidChr(cols[0]) ge.start = self._checkValidStart(ge.chr, int(cols[1])) self._parseEnd( ge, self._checkValidEnd(ge.chr, int(cols[2]), start=ge.start)) self._parseName(ge, cols) self._parseVal(ge, cols) if self._numCols >= 6: ge.strand = self._getStrandFromString(cols[5]) for i, extraCol in enumerate(self.BED_EXTRA_COLUMNS): if self._numCols >= i + 7: setattr(ge, extraCol, cols[i + 6]) return ge
def _next(self, line): if line.startswith('#'): return ge = GenomeElement(self._genome) cols = line.split('\t') if self._numCols is not None: if len(cols) != self._numCols: raise InvalidFormatError('Error: BED files must have the same number of columns in each data line.') else: self._numCols = len(cols) if self._numCols < self.MIN_NUM_COLS or self._numCols > self.MAX_NUM_COLS: raise InvalidFormatError('Error: BED file must contain between %s and %s columns.' % (self.MIN_NUM_COLS, self.MAX_NUM_COLS)) ge.chr = self._checkValidChr(cols[0]) ge.start = self._checkValidStart(ge.chr, int(cols[1])) self._parseEnd( ge, self._checkValidEnd(ge.chr, int(cols[2]), start=ge.start)) self._parseName( ge, cols ) self._parseVal( ge, cols ) if self._numCols >= 6: ge.strand = self._getStrandFromString(cols[5]) for i,extraCol in enumerate(self.BED_EXTRA_COLUMNS): if self._numCols >= i+7: setattr(ge, extraCol, cols[i+6]) return ge
def _next(self, line): cols = line.split('\t') ge = GenomeElement(self._genome) ge.chr = self._checkValidChr(cols[0]) ge.start = int(cols[1]) ge.end = int(cols[2]) self._parseVal(ge, cols[3]) return ge
def _next(self, line): if line.startswith('>'): self._appendBoundingRegionTuple() self._elCount = 0 self._chr = self._checkValidChr(line[1:].split()[0]) else: if self._chr is None: raise InvalidFormatError( 'FASTA file does not start with the ">" character.') self._elCount += len(line) ge = GenomeElement(self._genome, self._chr) ge.val = np.fromstring(line, dtype='S1') return ge
def _compute(self): tv1 = self._children[0].getResult() allSortedCodedEvents = self._children[1].getResult() allEventCodes = (allSortedCodedEvents % 4) - 2 allSortedDecodedEvents = allSortedCodedEvents / 4 from numpy.ma import add cumulativeCoverStatus = add.accumulate(allEventCodes) assert len(cumulativeCoverStatus) == len(allSortedDecodedEvents), str( len(allSortedDecodedEvents)) unionStartList = [] unionEndList = [] startedFlag = False for i, cumVal in enumerate(cumulativeCoverStatus): if cumVal == 1 and not startedFlag: startPos = allSortedDecodedEvents[i] startedFlag = True elif cumVal == 0: if startPos: unionStartList.append(startPos) unionEndList.append(allSortedDecodedEvents[i]) startPos = None startedFlag = False return [ GenomeElement(start=x, end=y) for x, y in zip(unionStartList, unionEndList) ]
def _parseDeclarationLine(self, line): returnGE = None chr, start, step, span = self._getDeclarationLineAttrValues(line) self._fixedStep = self._checkFixedStep(line, start, step) chr = self._handleChr(chr) self._span = self._handleSpan(span) self._isPoints = self._span == 1 if self._fixedStep: start = self._handleStart(chr, start) self._step = self._handleStep(step) self._isStepFunction = (self._step == self._span and self._step > 1) self._isFunction = (self._step == self._span and self._step == 1) if self._isFunction: self._genomeElement.chr = chr if not self._shouldExpandBoundingRegion(chr, start): if self._chr is not None: #self._chr is still the chromosome of the previous decl. line self._appendBoundingRegionTuple() self._start = start self._curElCountInBoundingRegion = 0 if self._isStepFunction: returnGE = GenomeElement(genome=self._genome, chr=chr, end=self._start, \ val=numpy.nan, isBlankElement=True) self._chr = chr return returnGE
def _wrappedTrackElsGenerator(self): track = self._getTrack() for region, tv in ((region, self._getTrackView(track, region)) for region in self._boundingRegions): for te in tv: yield GenomeElement.createGeFromTrackEl( te, tv.trackFormat, globalCoords=self._globalCoords)
def next(self): self._curPos += 1 if self._curPos % 10e6 == 0: print '.', if self._curPos >= len(self._tv.genomeAnchor): raise StopIteration if self._exhausted: return None if self._curEl is None: try: self._curEl = self._tvIter.next() except StopIteration: self._exhausted = True return None if self._curPos == self._curEl.start(): trackEl = self._curEl genome = self._tv.genomeAnchor.genome chr = self._tv.genomeAnchor.chr #print 'EL: ',GenomeElement(genome,chr, trackEl.start(), trackEl.end(), trackEl.val(), trackEl.strand()) outEl = GenomeElement(genome, chr, trackEl.start(), trackEl.end(), trackEl.val(), trackEl.strand()) self._curEl = None return outEl else: #print self._curPos,' AND ', self._curEl.start() #print 'None' return None
def testWriteElement(self): s = SetupDir( self.path, ['start', 'end', 'val', 'strand', 'id', 'edges', 'weights', 'cat']) ge = GenomeElement() s.od.writeElement(ge) for f in s.od._files.values(): self.assertEqual(ge, f.ge)
def next(self): if self._returnedOneElement: raise StopIteration self._returnedOneElement = True return GenomeElement(genome=self._genome, chr=self._region.chr, val=self._valSlice)
def _compute(self): tv = self._children[0].getResult() starts = tv.startsAsNumpyArray() binArray = starts/self.microBin binCounts = np.bincount(binArray) numMicroBins = int( math.ceil( float(len(self._region)) / self.microBin) ) binCounts = np.concatenate([binCounts, np.zeros(numMicroBins-len(binCounts), dtype='int')]) return [GenomeElement(self._region.genome, self._region.chr, self._region.start+i*self.microBin, min(self._region.start+(i+1)*self.microBin, self._region.end), binCounts[i]) for i in xrange(len(binCounts))]
def next(self): self._index += 1 if self._index >= len(self): raise StopIteration return GenomeElement(start = self._startList[self._index] if self._index<len(self._startList) else None, end = self._endList[self._index] if self._index<len(self._endList) else None, strand = self._strandList[self._index] if self._index<len(self._strandList) else None, val = self._valList[self._index] if self._index<len(self._valList) else None, id = self._idList[self._index] if self._index<len(self._idList) else None, edges = self._edgesList[self._index] if self._index<len(self._edgesList) else None, weights = self._weightsList[self._index] if self._index<len(self._weightsList) else None, extra = self._extraList[self._index] if self._index<len(self._extraList) else None)
def testAssignAndRetrieve(self): e = GenomeElement('hg18', start=5, val=1.0, extra={'a':1,'b':2}, orderedExtraKeys=['a','b']) self.assertEqual(e.genome, 'hg18') self.assertEqual(e.chr, None) self.assertEqual(e.start, 5) self.assertEqual(e.end, None) self.assertEqual(e.val, 1.0) self.assertEqual(e.strand, None) self.assertEqual(e.a, 1) self.assertEqual(e.b, 2) self.assertEqual(e.extra, {'a':1,'b':2}) self.assertEqual(e.orderedExtraKeys, ['a', 'b']) e = GenomeElement('hg18', a=1) e.b = 2 self.assertEqual(e.genome, 'hg18') self.assertEqual(e.a, 1) self.assertEqual(e.b, 2) self.assertEqual(e.extra, {'a':1,'b':2}) self.assertEqual(e.orderedExtraKeys, ['a', 'b']) self.assertRaises(AttributeError, lambda : e.nonExisting)
def _getIter(elList, valDataType, valDim, edgeWeightDataType, edgeWeightDim, brList=[]): geIter = MyGeIter(valDataType, valDim, edgeWeightDataType, edgeWeightDim) for i in xrange(len(elList)): ge = GenomeElement(genome=elList[i][0], chr=elList[i][1], start=elList[i][2], end=elList[i][3]) if len(elList[i]) == 5: for prefix in elList[i][4]: setattr(ge, prefix, elList[i][4][prefix]) geIter.iter.append(ge) for i in xrange(len(brList)): br = GenomeRegion(genome=brList[i][0], chr=brList[i][1], start=brList[i][2], end=brList[i][3]) geIter.boundingRegionTuples.append(BoundingRegionTuple(br, brList[i][4])) return geIter
def __init__(self, fn, genome=None, trackName=None, suffix=None, external=False, printWarnings=True, strToUseInsteadOfFn='', *args, **kwArgs): #, depth=0 self._fn = fn self._genome = genome self._genomeElement = GenomeElement(genome) self._trackName = trackName self._suffix = suffix self._external = external self._prefixList = None self._printWarnings = printWarnings self._strToUseInsteadOfFn = strToUseInsteadOfFn self._lastWarning = None
def _next(self, line): if self._isDeclarationLine(line): ge = self._parseDeclarationLine(line) if ge is not None: return ge else: if line.startswith('#'): return None cols = line.split() self._checkDataLineCols(cols) if self._fixedStep: self._curElCountInBoundingRegion += 1 val = numpy.float(self._handleNan(cols[0])) if self._isFunction: self._genomeElement.val = val return self._genomeElement else: start = self._checkValidStart( self._chr, self._getFixedStepCurElStart()) else: start = self._checkValidStart(self._chr, int(cols[0]) - 1) val = numpy.float(self._handleNan(cols[1])) end = None if not self._isPoints: end = self._checkValidEnd(self._chr, self._getEnd(start), start) if self._isStepFunction: start = None return GenomeElement(genome=self._genome, chr=self._chr, start=start, end=end, val=val)
def _next(self, line): if len(line)>0 and line[0]=='#': return None origCols = line.split('\t') cols = [unquote(x) for x in origCols] if len(cols) != 9: raise InvalidFormatError("Error: GFF files must contain 9 tab-separated columns") ge = GenomeElement(self._genome) ge.chr = self._checkValidChr(cols[0]) ge.source = cols[1] ge.type = cols[2] ge.start = self._checkValidStart(ge.chr, int(cols[3]) - 1) ge.end = self._checkValidEnd(ge.chr, int(cols[4]), start=ge.start) ge.val = numpy.float(self._handleNan(cols[5])) ge.strand = self._getStrandFromString(cols[6]) ge.phase = cols[7] ge.attributes = cols[8] for attr in origCols[8].split(';'): attrSplitted = attr.split('=') if len(attrSplitted) == 2: key, val = attrSplitted if key.lower() == 'id': ge.id = unquote(val) elif key.lower() == 'name': ge.name = unquote(val) return ge
def _wrappedTrackElsGenerator(self): track = self._getTrack() for region,tv in ((region, self._getTrackView(track, region)) for region in self._boundingRegions): for te in tv: yield GenomeElement.createGeFromTrackEl(te, tv.trackFormat, globalCoords=self._globalCoords)
def next(self): trackEl = self._tvIter.next() ge = GenomeElement.createGeFromTrackEl(trackEl, self._tv.trackFormat) return ge
def testExclude(self): self.assertEqual([GenomeElement('TestGenome','chr21',100,200)],\ GenomeElement('TestGenome','chr21',100,200).exclude( GenomeElement('TestGenome','chr21',90,100) )) self.assertEqual([GenomeElement('TestGenome','chr21',100,200)],\ GenomeElement('TestGenome','chr21',100,200).exclude( GenomeElement('TestGenome','chr21',200,210) )) self.assertEqual([GenomeElement('TestGenome','chr21',100,200)],\ GenomeElement('TestGenome','chr21',100,200).exclude( GenomeElement('TestGenome','chrM',100,110) )) self.assertEqual([GenomeElement('TestGenome','chr21',110,200)],\ GenomeElement('TestGenome','chr21',100,200).exclude( GenomeElement('TestGenome','chr21',100,110) )) self.assertEqual([GenomeElement('TestGenome','chr21',110,200)],\ GenomeElement('TestGenome','chr21',100,200).exclude( GenomeElement('TestGenome','chr21',90,110) )) self.assertEqual([GenomeElement('TestGenome','chr21',100,190)],\ GenomeElement('TestGenome','chr21',100,200).exclude( GenomeElement('TestGenome','chr21',190,200) )) self.assertEqual([GenomeElement('TestGenome','chr21',100,190)],\ GenomeElement('TestGenome','chr21',100,200).exclude( GenomeElement('TestGenome','chr21',190,210) )) self.assertEqual([],\ GenomeElement('TestGenome','chr21',100,200).exclude( GenomeElement('TestGenome','chr21',90,210) )) self.assertEqual([GenomeElement('TestGenome','chr21',100,140), GenomeElement('TestGenome','chr21',160,200)],\ GenomeElement('TestGenome','chr21',100,200).exclude( GenomeElement('TestGenome','chr21',140,160) ))
def testExtend(self): self.assertEqual(GenomeElement('TestGenome','chr21',100,200),\ GenomeElement('TestGenome','chr21',100,200).extend( 0 )) self.assertEqual(GenomeElement('TestGenome','chr21',0,200),\ GenomeElement('TestGenome','chr21',100,200).extend( -100 )) self.assertEqual(GenomeElement('TestGenome','chr21',-100,200),\ GenomeElement('TestGenome','chr21',100,200).extend( -200, ensureValidity=False )) self.assertEqual(GenomeElement('TestGenome','chr21',0,200),\ GenomeElement('TestGenome','chr21',100,200).extend( -200, ensureValidity=True )) self.assertEqual(GenomeElement('TestGenome','chr21',100,300),\ GenomeElement('TestGenome','chr21',100,200).extend( 100 )) self.assertEqual(GenomeElement('TestGenome','chr21',100,50000200),\ GenomeElement('TestGenome','chr21',100,200).extend( 50000000, ensureValidity=False )) self.assertEqual(GenomeElement('TestGenome','chr21',100,46944323),\ GenomeElement('TestGenome','chr21',100,200).extend( 50000000, ensureValidity=True ))
def testEqual(self): self.assertEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('hg18', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chrM', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chr21', 20, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chr21', 10, 110, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chr21', 10, 100, 6, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chr21', 10, 100, 5, False, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id4', ['id2', 'id3'], [5, 6], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id4'], [5, 6], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 7], extra={'source': 'source'})) self.assertNotEqual( GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={'source': 'source'}), GenomeElement('TestGenome', 'chr21', 10, 100, 5, True, 'id', ['id2', 'id3'], [5, 6], extra={ 'source': 'source', 'other': 'value' }))
def _compute(self): from gold.origdata.GenomeElement import GenomeElement ge = GenomeElement(start=0, end=1) ge2 = GenomeElement(start=10, end=11) return [ge,ge2]
def testContains(self): self.assertTrue(GenomeElement('TestGenome','chr21',10,100).contains( \ GenomeElement('TestGenome','chr21',10,100))) self.assertTrue(GenomeElement('TestGenome','chr21',10,100).contains( \ GenomeElement('TestGenome','chr21',20,80))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).contains( \ GenomeElement('TestGenome','chr21',10,101))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).contains( \ GenomeElement('TestGenome','chr21',9,100))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).contains( \ GenomeElement('TestGenome','chr21',9,101))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).contains( \ GenomeElement('TestGenome','chr21',0,10))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).contains( \ GenomeElement('TestGenome','chrM',20,80)))
def printGSuite(cls, choices, cols, rows, colListString, outFile): #print cols from quick.extra.ProgressViewer import ProgressViewer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource from gold.origdata.GtrackComposer import ExtendedGtrackComposer from gold.origdata.GESourceWrapper import ListGESourceWrapper from gold.origdata.GenomeElement import GenomeElement from collections import defaultdict from copy import copy from urllib import quote from unidecode import unidecode from pyliftover import LiftOver gSuite = GSuite() diseaseColIndex = cols.index(cls.DISEASE_COLUMN_NAME) chrColIndex = cols.index(cls.CHR_COLUMN_NAME) startColIndex = cols.index(cls.START_COLUMN_NAME) valColIndex = cols.index(cls.VAL_COLUMN_NAME) orderedExtraKeys = copy(cols) extraIndexes = range(len(cols)) for colName in [cls.DISEASE_COLUMN_NAME, cls.CHR_COLUMN_NAME, cls.START_COLUMN_NAME, cls.VAL_COLUMN_NAME]: extraIndexes.remove(cols.index(colName)) orderedExtraKeys.remove(colName) orderedExtraKeys = [cls._fixColNameForGTrack(key) for key in orderedExtraKeys] diseaseToRowsDict = defaultdict(list) for row in rows: disease = row[diseaseColIndex] if isinstance(disease, unicode): disease = unidecode(disease).replace('\x00', '') diseaseToRowsDict[disease].append(row) progressViewer = ProgressViewer([('Create GWAS tracks for diseases/traits', len(diseaseToRowsDict))], cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE] ) for disease in sorted(diseaseToRowsDict.keys()): uri = GalaxyGSuiteTrack.generateURI(galaxyFn=cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE], extraFileName=disease.replace('/', '_') + '.gtrack') gSuiteTrack = GSuiteTrack(uri, title=disease, genome=cls.OUTPUT_GENOME) gSuite.addTrack(gSuiteTrack) shouldLiftOver = cls.DATABASE_GENOME != cls.OUTPUT_GENOME if shouldLiftOver: liftOver = LiftOver(cls.DATABASE_GENOME, cls.OUTPUT_GENOME) geList = [] for row in diseaseToRowsDict[disease]: extra = {} for col, index in zip(orderedExtraKeys, extraIndexes): cell = row[index].strip() if isinstance(cell, unicode): cell = unidecode(cell) extra[col] = cell if cell != '' else '.' chrom = 'chr' + row[chrColIndex] if chrom == 'chr23': chrom = 'chrX' if chrom == 'chr24': chrom = 'chrY' if chrom == 'chrMT': chrom = 'chrM' start = int(row[startColIndex]) if shouldLiftOver: newPosList = liftOver.convert_coordinate(chrom, start) if newPosList is None or len(newPosList) != 1: print 'SNP with position %s on chromosome %s ' % (chrom, start) +\ 'could not be lifted over from reference genome ' +\ '%s to %s (for disease/trait "%s")' % \ (cls.DATABASE_GENOME, cls.OUTPUT_GENOME, disease) else: chrom, start = newPosList[0][0:2] #print extra geList.append(GenomeElement(chr=chrom, start=start, val=row[valColIndex], orderedExtraKeys=orderedExtraKeys, extra=extra)) geSource = GtrackGenomeElementSource(cls.GTRACK_BLUEPRINT_PATH) wrappedGeSource = ListGESourceWrapper(geSource, geList) composer = ExtendedGtrackComposer(wrappedGeSource) composer.composeToFile(gSuiteTrack.path) progressViewer.update() GSuiteComposer.composeToFile(gSuite, outFile)
def testOverlaps(self): self.assertTrue(GenomeElement('TestGenome','chr21',10,100).overlaps( \ GenomeElement('TestGenome','chr21',10,100))) self.assertTrue(GenomeElement('TestGenome','chr21',10,100).overlaps( \ GenomeElement('TestGenome','chr21',20,80))) self.assertTrue(GenomeElement('TestGenome','chr21',10,100).overlaps( \ GenomeElement('TestGenome','chr21',10,101))) self.assertTrue(GenomeElement('TestGenome','chr21',10,100).overlaps( \ GenomeElement('TestGenome','chr21',9,100))) self.assertTrue(GenomeElement('TestGenome','chr21',10,100).overlaps( \ GenomeElement('TestGenome','chr21',9,101))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).overlaps( \ GenomeElement('TestGenome','chr21',0,10))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).overlaps( \ GenomeElement('TestGenome','chr21',100,110))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).overlaps( \ GenomeElement('TestGenome','chrM',20,80)))
def _next(self, line): if line.startswith('##FASTA'): raise StopIteration if len(line) > 0 and line[0] == '#': return None origCols = line.split('\t') cols = [unquote(x) for x in origCols] if len(cols) != 9: raise InvalidFormatError( "Error: GFF files must contain 9 tab-separated columns") ge = GenomeElement(self._genome) ge.chr = self._checkValidChr(cols[0]) ge.source = cols[1] self._parseThirdCol(ge, cols[2]) ge.start = self._checkValidStart(ge.chr, int(cols[3]) - 1) ge.end = self._checkValidEnd(ge.chr, int(cols[4]), start=ge.start) self._parseSixthCol(ge, cols[5]) ge.strand = self._getStrandFromString(cols[6]) ge.phase = cols[7] ge.attributes = cols[8] for attr in origCols[8].split(';'): attrSplitted = attr.split('=') if len(attrSplitted) == 2: key, val = attrSplitted if key.lower() == 'id': ge.id = unquote(val) elif key.lower() == 'name': ge.name = unquote(val) return ge
def testTouches(self): self.assertFalse(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chr21',10,100))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chr21',20,80))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chr21',10,101))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chr21',9,100))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chr21',9,101))) self.assertTrue(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chr21',0,10))) self.assertTrue(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chr21',100,110))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chr21',0,9))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chr21',101,110))) self.assertFalse(GenomeElement('TestGenome','chr21',10,100).touches( \ GenomeElement('TestGenome','chrM',20,80)))