def extractReads(self, bams=[]): """Extract reads from sam files and write to file""" # load data self.BM = binManager.BinManager(dbFileName=self.dbFileName) # bins self.BM.loadBins(makeBins=True,silent=False,bids=self.bids) self.PM = self.BM.PM print "Extracting reads" # work out a set of targets to pass to the parser targets = {} bids = self.BM.getBids() for bid in bids: bin = self.BM.getBin(bid) for row_index in bin.rowIndices: targets[self.PM.contigNames[row_index]] = bid # get something to parse the bams with bam_parser = BTBP() bam_parser.extractReads(bams, '', targets, combineBams=False, headersOnly = True, dontTrustSamFlags=False, folder=self.outDir, verbose=True )
def parse(self, bamFiles, stoitColNames, covTable, contigNames, cid2Indicies): """Parse multiple bam files and store the results in the main DB table: a table in an open h5 file like "CID,COV_1,...,COV_n,length" stoitColNames: names of the COV_x columns """ print "Importing BAM files" from bamtyper.utilities import BamParser as BTBP BP = BTBP() (links, ref_lengths, coverages) = BP.getLinks(bamFiles, full=False, verbose=True, doCoverage=True, minJoin=5) # go through all the contigs sorted by name and write to the DB try: for cid in sorted(contigNames.keys()): # make a new row cov_row = covTable.row # punch in the data for i in range(len(stoitColNames)): try: cov = coverages[i][cid] except KeyError: # may be no coverage for this contig cov = 0.0 cov_row[stoitColNames[i]] = cov cov_row.append() covTable.flush() except: print "Error saving results to DB" raise # transform the links into something a little easier to parse later rowwise_links = [] for cid in links: for link in links[cid]: try: rowwise_links.append([cid2Indicies[cid], # contig 1 cid2Indicies[link[0]], # contig 2 int(link[1]), # numReads int(link[2]), # linkType int(link[3]) # gap ]) except KeyError: pass return rowwise_links