示例#1
0
    def  extractReads(self, bams=[]):
        """Extract reads from sam files and write to file"""
        # load data
        self.BM = binManager.BinManager(dbFileName=self.dbFileName)   # bins
        self.BM.loadBins(makeBins=True,silent=False,bids=self.bids)
        self.PM = self.BM.PM         

        print "Extracting reads"
        # work out a set of targets to pass to the parser
        targets = {}
        bids = self.BM.getBids()
        for bid in bids:
            bin = self.BM.getBin(bid)
            for row_index in bin.rowIndices:
                targets[self.PM.contigNames[row_index]] = bid
        # get something to parse the bams with
        bam_parser = BTBP()
        bam_parser.extractReads(bams, 
                                '', 
                                targets,   
                                combineBams=False, 
                                headersOnly = True,
                                dontTrustSamFlags=False,
                                folder=self.outDir, 
                                verbose=True
                                )
示例#2
0
文件: mstore.py 项目: jnesme/GroopM-1
    def parse(self, bamFiles, stoitColNames, covTable, contigNames, cid2Indicies):
        """Parse multiple bam files and store the results in the main DB
        
        table: a table in an open h5 file like "CID,COV_1,...,COV_n,length"
        stoitColNames: names of the COV_x columns
        """
        print "Importing BAM files"
        from bamtyper.utilities import BamParser as BTBP
        BP = BTBP()
        (links, ref_lengths, coverages) = BP.getLinks(bamFiles, full=False, verbose=True, doCoverage=True, minJoin=5)

        # go through all the contigs sorted by name and write to the DB
        try:
            for cid in sorted(contigNames.keys()):
                # make a new row
                cov_row = covTable.row
                # punch in the data
                for i in range(len(stoitColNames)):
                    try:
                        cov = coverages[i][cid]
                    except KeyError:
                        # may be no coverage for this contig
                        cov = 0.0
                    cov_row[stoitColNames[i]] = cov 
                cov_row.append()
            covTable.flush()
        except:
            print "Error saving results to DB"
            raise
        
        # transform the links into something a little easier to parse later
        rowwise_links = []
        for cid in links:
            for link in links[cid]:
                try:
                    rowwise_links.append([cid2Indicies[cid],          # contig 1 
                                          cid2Indicies[link[0]],      # contig 2
                                          int(link[1]),               # numReads
                                          int(link[2]),               # linkType
                                          int(link[3])                # gap
                                          ])
                except KeyError:
                    pass
        return rowwise_links