示例#1
0
 def accuracy(
     self, xcoverage
 ):  ### Calculate accuracy (if required) at xcoverage and returns
     '''Calculate accuracy (if required) at xcoverage and returns.'''
     try:  ### ~ [1] Calculate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         while len(self.list['Accuracy']) <= xcoverage:
             if not int((1.0 - self.list['Accuracy'][-1]) *
                        self.getNum('GenomeSize')):
                 self.list['Accuracy'].append(1.0)
                 continue
             xcov = len(self.list['Accuracy'])
             majority = int(
                 xcov /
                 2.0) + 1  # Number of correct reads needed for majority
             try:
                 self.list['Accuracy'].append(
                     rje.logBinomial(majority,
                                     xcov,
                                     1.0 - self.getNum('ErrPerBase'),
                                     exact=False,
                                     callobj=self))
             except:
                 self.list['Accuracy'].append(
                     rje.logPoisson(majority,
                                    xcov *
                                    (1.0 - self.getNum('ErrPerBase')),
                                    exact=False,
                                    callobj=self))
             self.debug(self.list['Accuracy'])
         return self.list['Accuracy'][xcoverage]
     except:
         self.errorLog('%s.accuracy error' % self.prog())
示例#2
0
 def accuracy(self,xcoverage):   ### Calculate accuracy (if required) at xcoverage and returns
     '''Calculate accuracy (if required) at xcoverage and returns.'''
     try:### ~ [1] Calculate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         while len(self.list['Accuracy']) <= xcoverage:
             if not int((1.0 - self.list['Accuracy'][-1]) * self.getNum('GenomeSize')):
                 self.list['Accuracy'].append(1.0)
                 continue
             xcov = len(self.list['Accuracy'])
             majority = int(xcov/2.0) + 1        # Number of correct reads needed for majority
             try: self.list['Accuracy'].append(rje.logBinomial(majority,xcov,1.0 - self.getNum('ErrPerBase'),exact=False,callobj=self))
             except: self.list['Accuracy'].append(rje.logPoisson(majority,xcov*(1.0 - self.getNum('ErrPerBase')),exact=False,callobj=self))
             self.debug(self.list['Accuracy'])
         return self.list['Accuracy'][xcoverage]
     except: self.errorLog('%s.accuracy error' % self.prog())
示例#3
0
    def coverage(
        self
    ):  ### Calculates estimated % coverage and accuracy of genome sequencing.
        '''Calculates estimated % coverage and accuracy of genome sequencing.'''
        try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            # XCoverage, SMRT, %Cov, Accuracy
            if self.getBool('BySMRT'): ckey = 'SMRT'
            else: ckey = 'XCoverage'
            cfields = ['XCoverage', 'SMRT', '%Coverage', 'Accuracy']
            for xn in self.list['XnList']:
                cfields.append('%%X%d' % xn)
            cdb = self.db().addEmptyTable('coverage', cfields, [ckey])

            ### ~ [2] Calculate stats for one round ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            self.progLog('\r#XCOV', 'Calculating coverage stats...')
            cov_per_base_per_read = self.getNum('AvRead') / self.getNum(
                'GenomeSize')
            if self.getBool('BySMRT'):
                reads = self.getInt('SMRTReads')  # If going per SMRT cell
            else:
                reads = int(0.5 + self.getNum('GenomeSize') /
                            self.getNum('AvRead'))  # if going per X coverage
            # Calculate X coverage counts using binomial
            bases = int(self.getNum('GenomeSize'))
            xcov = [
            ]  # List where index is X coverage and number is proportion of reads
            while bases > 1:
                try:
                    xcov.append(
                        rje.logBinomial(len(xcov),
                                        reads,
                                        cov_per_base_per_read,
                                        exact=True,
                                        callobj=self))
                except:
                    xcov.append(
                        rje.logPoisson(len(xcov),
                                       reads * cov_per_base_per_read,
                                       exact=True,
                                       callobj=self))
                bases -= self.getNum('GenomeSize') * xcov[-1]
                if len(xcov) > reads:
                    raise ValueError('XCoverage cannot exceed read count!')
            cyccov = xcov[0:]
            self.debug(xcov)

            ### ~ [3] Cycle through rounds, multiplying by X coverage ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            cx = 0.0
            ctot = self.getInt('MaxCov')
            xcoverage = 0.0
            while xcoverage < self.getInt('MaxCov'):
                self.progLog(
                    '\r#XCOV', 'Calculating coverage stats: %.1f%% (%d|%d)' %
                    ((cx / ctot, cdb.entryNum() + 1, len(cyccov))))
                cx += 100.0
                # Update xcov: calculate %bases at different X coverage
                if cdb.entryNum(
                ):  # Equivalent of starting with [1.0] (!00% 0 @ 0X)
                    prevcov = cyccov[0:]
                    cyccov = [0.0] * (len(prevcov) * 2 - 1)
                    for xi in range(len(prevcov)):
                        for xj in range(len(xcov)):
                            x = xi + xj
                            cyccov[x] += (prevcov[xi] * xcov[xj])
                while (cyccov[-1]) < 1.0 / self.getNum('GenomeSize'):
                    cyccov.pop(-1)
                # Calculate accuracy: For each X coverage, calculate % bases with >50% correct
                accuracy = 0.0
                for x in range(len(cyccov[1:])):
                    accuracy += cyccov[x] * self.accuracy(x)
                accuracy = 100.0 * accuracy / sum(cyccov[1:])
                # SMRT cells versus coverage
                xcoverage += self.getNum('AvRead') * reads / self.getNum(
                    'GenomeSize')
                smrt = (self.getNum('GenomeSize') * xcoverage) / (
                    self.getNum('AvRead') * self.getNum('SMRTReads'))
                # Update cdb
                #centry = {'XCoverage':'%.3f' % xcoverage,'SMRT':'%.2f' % smrt,'%Coverage':100.0 * (1.0-cyccov[0]),'Accuracy':accuracy}
                centry = {
                    'XCoverage': rje.sf(xcoverage, 3),
                    'SMRT': rje.sf(smrt, 3),
                    '%Coverage': 100.0 * (1.0 - cyccov[0]),
                    'Accuracy': accuracy
                }
                for xn in self.list['XnList']:
                    if xn <= len(cyccov):
                        centry['%%X%d' % xn] = rje.sf(100.0 * sum(cyccov[xn:]),
                                                      4)
                    else:
                        centry['%%X%d' % xn] = 0.000
                cdb.addEntry(centry)
            self.progLog(
                '\r#XCOV', 'Calculated coverage stats upto %dX coverage.' %
                self.getInt('MaxCov'))

            ### ~ [4] Save results ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            for xkey in cdb.dataKeys():
                cdb.dict['Data'][float(xkey)] = cdb.dict['Data'].pop(xkey)
            cdb.saveToFile()

            return
        except:
            self.errorLog('%s.coverage error' % self.prog())
示例#4
0
    def coverage(self): ### Calculates estimated % coverage and accuracy of genome sequencing.
        '''Calculates estimated % coverage and accuracy of genome sequencing.'''
        try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            # XCoverage, SMRT, %Cov, Accuracy
            if self.getBool('BySMRT'): ckey = 'SMRT'
            else: ckey = 'XCoverage'
            cfields = ['XCoverage','SMRT','%Coverage','Accuracy']
            for xn in self.list['XnList']: cfields.append('%%X%d' % xn)
            cdb = self.db().addEmptyTable('coverage',cfields,[ckey])

            ### ~ [2] Calculate stats for one round ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            self.progLog('\r#XCOV','Calculating coverage stats...')
            cov_per_base_per_read = self.getNum('AvRead') / self.getNum('GenomeSize')
            if self.getBool('BySMRT'): reads = self.getInt('SMRTReads')    # If going per SMRT cell
            else: reads = int(0.5 + self.getNum('GenomeSize') / self.getNum('AvRead')) # if going per X coverage
            # Calculate X coverage counts using binomial
            bases = int(self.getNum('GenomeSize'))
            xcov = []   # List where index is X coverage and number is proportion of reads
            while bases > 1:
                try: xcov.append(rje.logBinomial(len(xcov),reads,cov_per_base_per_read,exact=True,callobj=self))
                except: xcov.append(rje.logPoisson(len(xcov),reads*cov_per_base_per_read,exact=True,callobj=self))
                bases -= self.getNum('GenomeSize') * xcov[-1]
                if len(xcov) > reads: raise ValueError('XCoverage cannot exceed read count!')
            cyccov = xcov[0:]
            self.debug(xcov)

            ### ~ [3] Cycle through rounds, multiplying by X coverage ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            cx = 0.0; ctot = self.getInt('MaxCov'); xcoverage = 0.0
            while xcoverage < self.getInt('MaxCov'):
                self.progLog('\r#XCOV','Calculating coverage stats: %.1f%% (%d|%d)' % ((cx/ctot,cdb.entryNum()+1,len(cyccov)))); cx += 100.0
                # Update xcov: calculate %bases at different X coverage
                if cdb.entryNum():  # Equivalent of starting with [1.0] (!00% 0 @ 0X)
                    prevcov = cyccov[0:]
                    cyccov = [0.0] * (len(prevcov)*2 - 1)
                    for xi in range(len(prevcov)):
                        for xj in range(len(xcov)):
                            x = xi + xj
                            cyccov[x] += (prevcov[xi] * xcov[xj])
                while(cyccov[-1]) < 1.0 / self.getNum('GenomeSize'): cyccov.pop(-1)
                # Calculate accuracy: For each X coverage, calculate % bases with >50% correct
                accuracy = 0.0
                for x in range(len(cyccov[1:])): accuracy += cyccov[x] * self.accuracy(x)
                accuracy = 100.0 * accuracy / sum(cyccov[1:])
                # SMRT cells versus coverage
                xcoverage += self.getNum('AvRead') * reads / self.getNum('GenomeSize')
                smrt = (self.getNum('GenomeSize') * xcoverage) / (self.getNum('AvRead') * self.getNum('SMRTReads'))
                # Update cdb
                #centry = {'XCoverage':'%.3f' % xcoverage,'SMRT':'%.2f' % smrt,'%Coverage':100.0 * (1.0-cyccov[0]),'Accuracy':accuracy}
                centry = {'XCoverage':rje.sf(xcoverage,3),'SMRT':rje.sf(smrt,3),'%Coverage':100.0 * (1.0-cyccov[0]),'Accuracy':accuracy}
                for xn in self.list['XnList']:
                    if xn <= len(cyccov): centry['%%X%d' % xn] = rje.sf(100.0*sum(cyccov[xn:]),4)
                    else: centry['%%X%d' % xn] = 0.000
                cdb.addEntry(centry)
            self.progLog('\r#XCOV','Calculated coverage stats upto %dX coverage.' % self.getInt('MaxCov'))

            ### ~ [4] Save results ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            for xkey in cdb.dataKeys():
                cdb.dict['Data'][float(xkey)] = cdb.dict['Data'].pop(xkey)
            cdb.saveToFile()

            return
        except: self.errorLog('%s.coverage error' % self.prog())