def run(self): gff3Data = gff3.Gff3Parser(self.gff3File).parse() dbconn = sqlite3.connect(self.dbFile) dbcur = dbconn.cursor() dbcur.execute(_dbTableSQL) # create table dbcur.execute("PRAGMA journal_mode=WAL") dbconn.commit() for i, featureName in enumerate(gff3Data.byFeatureName.keys()): for feature in gff3Data.byFeatureName[featureName]: # Ignores any parent IDs besides the first one. parentIds = [parent.uniqueId for parent in feature.parents] parentId = parentIds[0] if len(parentIds) > 0 else '' # FIXME: No current code to ensure childIDs in correct order. childIds = [child.uniqueId for child in feature.children] values = (feature.uniqueId, parentId, _db_serialize(childIds), feature.seqname, feature.source, feature.type, feature.start, feature.end, feature.score, feature.strand, feature.featureName, feature.attributes.get("gene_name", [None])[0], feature.attributes.get("transcript_name", [None])[0], _db_serialize(feature.attributes)) self._batchInsertValues(values, dbcur, dbconn) self._insertValues(dbcur, dbconn) dbcur.execute(("create INDEX idx1 " "on feature(start, end, reference_name)")) dbcur.execute( "CREATE INDEX name_type_index ON FEATURE (gene_name, type)") dbcur.execute("PRAGMA INDEX_LIST('feature')") dbcur.close() dbconn.close()
def setUp(self): testDataFile = _testDataDir + "specialCasesTest.gff3" self.gff3Parser = gff3.Gff3Parser(testDataFile) self.gff3Data = self.gff3Parser.parse()
def setUp(self): testDataFile = _testDataDir + "sacCerTest.gff3" self.gff3Parser = gff3.Gff3Parser(testDataFile) self.gff3Data = self.gff3Parser.parse()
def setUp(self): testDataFile = _testDataDir + "discontinuous.gff3" self.gff3Parser = gff3.Gff3Parser(testDataFile) self.gff3Data = self.gff3Parser.parse()
def setUp(self): testDataFile = _testDataDir + "gencodeV21Set1.gff3" self.gff3Parser = gff3.Gff3Parser(testDataFile) self.gff3Data = self.gff3Parser.parse()