def oneRegionTest(self, regionID, chrommod): self.chmodName = chrommod self.chrommod = readDatasetDescriptions.readDataset([ "chrommod_" + self.chmodName, "hg18_Barski_chrommod_" + self.chmodName + ".ini", "D:/Projects/Integrated_Genome_Profile_Search_Engine/Cosgen/Datasets/" ]) self.chrommod.datasetCollectionName = "main" self.chrommod.openDBConnections() self.chrommod.cR.execute( "SELECT regionID,chrom,start,stop FROM regions WHERE regionID=" + str(regionID)) regionData = list(self.chrommod.cR.fetchone()) start = regionData[2] stop = regionData[3] print regionID, regionData sqlQuery = "SELECT overlap_ratio, overlap_count FROM " + self.chrommod.datasetSimpleName + "_data WHERE regionID = " + str( regionID) self.chrommod.cD.execute(sqlQuery) regionResult = list(self.chrommod.cD.fetchone()) print regionID, regionResult dbcon = cx_Oracle.connect("epigraph_admin", "epigraph123", "bioinfo") dbcur = dbcon.cursor() oracleSQLQuery = "SELECT chromstart, chromend FROM hg18_EPIGRAPH_#romatin WHERE chrom = '" + str( utilities.convertIntToChrom(self.genome, regionData[1]) ) + "' AND chromstart <= " + str(stop) + " AND chromend > " + str( start ) + " AND chrommod = '" + self.chmodName + "' ORDER BY chromstart" dbcur.execute(oracleSQLQuery) oracleD = map(list, list(dbcur.fetchall())) print regionID, oracleD reducedGR = utilities.gr_reduceRegionSet(list(oracleD)) oracle_overlap_ratio = utilities.gr_Coverage( reducedGR, start, stop) / float(stop - start) self.assertEqual(len(oracleD), regionResult[1]) self.assertEqual(regionResult[0], oracle_overlap_ratio)
def readDatasetFromFileObject(f,genome,chromIndex,chromStartIndex,chromEndIndex,otherScoreIndeces): # print chromIndex,chromStartIndex,chromEndIndex,otherScoreIndeces lines = f.readlines() ds = [] for i in xrange(len(lines)): try: d = lines[i].strip().split("\t") #convert the chrom to a number or "X" and "Y" so that the sorting gives the correct order nd = [utilities.convertChromToInt(genome,d[chromIndex]),int(d[chromStartIndex]),int(d[chromEndIndex])] #filter the chrom column if d[1] < 0 or d[2] < 0: raise Exception if otherScoreIndeces: nd.extend([d[e] for e in otherScoreIndeces]) ds.append(nd) except utilities.GDMException: if "_random" in d[chromIndex]: pass elif "chrM" in d[chromIndex]: pass elif "hap" in d[chromIndex]: pass elif d[chromIndex].startswith("chrUn"): pass elif i == 0: # potential title, allow it to pass pass else: raise #ds = qsort(ds) ds = quicksort(ds) #ds.sort() ds = map(lambda x:[utilities.convertIntToChrom(genome,x[0])]+x[1:],ds) return ds
def mainTest(self, chrommod, n): self.chmodName = chrommod self.chrommod = readDatasetDescriptions.readDataset([ "chrommod_" + self.chmodName, "hg18_Barski_chrommod_" + self.chmodName + ".ini", "D:/Projects/Integrated_Genome_Profile_Search_Engine/Cosgen/Datasets/" ]) self.chrommod.datasetCollectionName = "main" # load n random regions self.chrommod.openDBConnections() self.chrommod.cR.execute("SELECT COUNT(*) FROM regions") nRows = self.chrommod.cR.fetchone()[0] print nRows self.chrommod.cR.execute( "SELECT regionID,chrom,start,stop,datasetID FROM regions ORDER BY regionID" ) dR = self.chrommod.cR.fetchall() selectedRegionIDs = [] selectedRegionsData = {} for i in range(n): id = random.randint(0, nRows - 1) selectedRegionIDs.append(id) if id != dR[id - 1][0]: print id, dR[id - 1] raise Exception selectedRegionsData[id] = dR[id - 1][1:] selectedRegionIDs = list(set(selectedRegionIDs)) selectedRegionIDs.sort() print selectedRegionIDs dbcon = cx_Oracle.connect("epigraph_admin", "epigraph123", "bioinfo") dbcur = dbcon.cursor() count = 0 for regionID in selectedRegionIDs: sqlQuery = "SELECT overlap_ratio, overlap_count FROM " + self.chrommod.datasetSimpleName + "_data WHERE regionID = " + str( regionID) self.chrommod.cD.execute(sqlQuery) try: regionData = list(self.chrommod.cD.fetchone()) overlap_ratio = regionData[0] overlap_count = regionData[1] except TypeError, ex: overlap_ratio = 0 overlap_count = 0 start = selectedRegionsData[regionID][1] stop = selectedRegionsData[regionID][2] print regionID, overlap_ratio, overlap_count oracleSQLQuery = "SELECT chromstart, chromend FROM hg18_EPIGRAPH_#romatin WHERE chrom = '" + str( utilities.convertIntToChrom(self.genome, selectedRegionsData[regionID][0]) ) + "' AND chromstart <= " + str(stop) + " AND chromend > " + str( start ) + " AND chrommod = '" + self.chmodName + "' ORDER BY chromstart" print oracleSQLQuery dbcur.execute(oracleSQLQuery) oracleD = map(list, list(dbcur.fetchall())) #print oracleD self.assertEqual(overlap_count, len(oracleD)) reducedGR = utilities.gr_reduceRegionSet(list(oracleD)) oracle_overlap_ratio = utilities.gr_Coverage( reducedGR, start, stop) / float(stop - start) self.assertEqual(overlap_ratio, oracle_overlap_ratio)
def mainTest(self,chrommod,n): self.chmodName = chrommod self.chrommod = readDatasetDescriptions.readDataset(["chrommod_"+self.chmodName, "hg18_Barski_chrommod_"+ self.chmodName +".ini", "D:/Projects/Integrated_Genome_Profile_Search_Engine/Cosgen/Datasets/"]) self.chrommod.datasetCollectionName = "main" # load n random regions self.chrommod.openDBConnections() self.chrommod.cR.execute("SELECT COUNT(*) FROM regions") nRows = self.chrommod.cR.fetchone()[0] print nRows self.chrommod.cR.execute("SELECT regionID,chrom,start,stop,datasetID FROM regions ORDER BY regionID") dR = self.chrommod.cR.fetchall() selectedRegionIDs = [] selectedRegionsData = {} for i in range(n): id = random.randint(0,nRows-1) selectedRegionIDs.append(id) if id != dR[id-1][0]: print id, dR[id-1] raise Exception selectedRegionsData[id] = dR[id-1][1:] selectedRegionIDs = list(set(selectedRegionIDs)) selectedRegionIDs.sort() print selectedRegionIDs dbcon = cx_Oracle.connect("epigraph_admin", "epigraph123", "bioinfo") dbcur = dbcon.cursor() count = 0 for regionID in selectedRegionIDs: sqlQuery = "SELECT overlap_ratio, overlap_count FROM "+ self.chrommod.datasetSimpleName + "_data WHERE regionID = "+str(regionID) self.chrommod.cD.execute(sqlQuery) try: regionData = list(self.chrommod.cD.fetchone()) overlap_ratio = regionData[0] overlap_count = regionData[1] except TypeError,ex: overlap_ratio = 0 overlap_count = 0 start = selectedRegionsData[regionID][1] stop = selectedRegionsData[regionID][2] print regionID,overlap_ratio,overlap_count oracleSQLQuery = "SELECT chromstart, chromend FROM hg18_EPIGRAPH_#romatin WHERE chrom = '"+str(utilities.convertIntToChrom(self.genome,selectedRegionsData[regionID][0]))+"' AND chromstart <= "+str(stop)+" AND chromend > "+str(start)+" AND chrommod = '"+self.chmodName+"' ORDER BY chromstart" print oracleSQLQuery dbcur.execute(oracleSQLQuery) oracleD = map(list,list(dbcur.fetchall())) #print oracleD self.assertEqual(overlap_count,len(oracleD)) reducedGR = utilities.gr_reduceRegionSet(list(oracleD)) oracle_overlap_ratio = utilities.gr_Coverage(reducedGR, start, stop) / float(stop - start) self.assertEqual(overlap_ratio,oracle_overlap_ratio)
def oneRegionTest(self,regionID,chrommod): self.chmodName = chrommod self.chrommod = readDatasetDescriptions.readDataset(["chrommod_"+self.chmodName, "hg18_Barski_chrommod_"+ self.chmodName +".ini", "D:/Projects/Integrated_Genome_Profile_Search_Engine/Cosgen/Datasets/"]) self.chrommod.datasetCollectionName = "main" self.chrommod.openDBConnections() self.chrommod.cR.execute("SELECT regionID,chrom,start,stop FROM regions WHERE regionID="+str(regionID)) regionData = list(self.chrommod.cR.fetchone()) start = regionData[2] stop = regionData[3] print regionID,regionData sqlQuery = "SELECT overlap_ratio, overlap_count FROM "+ self.chrommod.datasetSimpleName + "_data WHERE regionID = "+str(regionID) self.chrommod.cD.execute(sqlQuery) regionResult = list(self.chrommod.cD.fetchone()) print regionID,regionResult dbcon = cx_Oracle.connect("epigraph_admin", "epigraph123", "bioinfo") dbcur = dbcon.cursor() oracleSQLQuery = "SELECT chromstart, chromend FROM hg18_EPIGRAPH_#romatin WHERE chrom = '"+str(utilities.convertIntToChrom(self.genome,regionData[1]))+"' AND chromstart <= "+str(stop)+" AND chromend > "+str(start)+" AND chrommod = '"+self.chmodName+"' ORDER BY chromstart" dbcur.execute(oracleSQLQuery) oracleD = map(list,list(dbcur.fetchall())) print regionID,oracleD reducedGR = utilities.gr_reduceRegionSet(list(oracleD)) oracle_overlap_ratio = utilities.gr_Coverage(reducedGR, start, stop) / float(stop - start) self.assertEqual(len(oracleD),regionResult[1]) self.assertEqual(regionResult[0],oracle_overlap_ratio)