示例#1
0
	def random_mutations(self):
		
		#	add random indels
		all_indels  = [[] for n in self.sequences]
		for i in xrange(self.ploidy):
			for j in xrange(self.indelsToAdd[i]):
				if random.random() <= self.models[i][1]:	# insert homozygous indel
					whichPloid = range(self.ploidy)
				else:								# insert heterozygous indel
					whichPloid = [self.ploidMutPrior.sample()]

				# try to find suitable places to insert indels
				eventPos = -1
				for attempt in xrange(MAX_ATTEMPTS):
					eventPos = random.randint(self.winBuffer,self.seqLen-1)
					for p in whichPloid:
						if self.blackList[p][eventPos]:
							eventPos = -1
					if eventPos != -1:
						break
				if eventPos == -1:
					continue

				if random.random() <= self.models[i][3]:	# insertion
					inLen   = self.models[i][4].sample()
					# sequence content of random insertions is uniformly random (change this later)
					inSeq   = ''.join([random.choice(NUCL) for n in xrange(inLen)])
					refNucl = chr(self.sequences[i][eventPos])
					myIndel = (eventPos,refNucl,refNucl+inSeq)
				else:										# deletion
					inLen   = self.models[i][5].sample()
					if eventPos+inLen+1 >= len(self.sequences[i]):	# skip if deletion too close to boundary
						continue
					if inLen == 1:
						inSeq = chr(self.sequences[i][eventPos+1])
					else:
						inSeq = str(self.sequences[i][eventPos+1:eventPos+inLen+1])
					refNucl = chr(self.sequences[i][eventPos])
					myIndel = (eventPos,refNucl+inSeq,refNucl)

				# if event too close to boundary, skip. if event conflicts with other indel, skip.
				skipEvent = False
				if eventPos+len(myIndel[1]) >= self.seqLen-self.winBuffer-1:
					skipEvent = True
				if skipEvent:
					continue
				for p in whichPloid:
					for k in xrange(eventPos,eventPos+inLen+1):
						if self.blackList[p][k]:
							skipEvent = True
				if skipEvent:
					continue

				for p in whichPloid:
					for k in xrange(eventPos,eventPos+inLen+1):
						self.blackList[p][k] = 1
					all_indels[p].append(myIndel)

		for i in xrange(len(all_indels)):
			all_indels[i].extend(self.indelList[i])
		all_indels = [sorted(n,reverse=True) for n in all_indels]
		#print all_indels

		#	add random snps
		all_snps  = [[] for n in self.sequences]
		for i in xrange(self.ploidy):
			for j in xrange(self.snpsToAdd[i]):
				if random.random() <= self.models[i][1]:	# insert homozygous SNP
					whichPloid = range(self.ploidy)
				else:								# insert heterozygous SNP
					whichPloid = [self.ploidMutPrior.sample()]

				# try to find suitable places to insert snps
				eventPos = -1
				for attempt in xrange(MAX_ATTEMPTS):
					# based on the mutation model for the specified ploid, choose a SNP location based on trinuc bias
					# (if there are multiple ploids, choose one at random)
					if IGNORE_TRINUC:
						eventPos = random.randint(self.winBuffer+1,self.seqLen-2)
					else:
						ploid_to_use = whichPloid[random.randint(0,len(whichPloid)-1)]
						eventPos     = self.trinuc_bias[ploid_to_use].sample()
					for p in whichPloid:
						if self.blackList[p][eventPos]:
							eventPos = -1
					if eventPos != -1:
						break
				if eventPos == -1:
					continue

				refNucl = chr(self.sequences[i][eventPos])
				context = str(chr(self.sequences[i][eventPos-1])+chr(self.sequences[i][eventPos+1]))
				# sample from tri-nucleotide substitution matrices to get SNP alt allele
				newNucl = self.models[i][6][TRI_IND[context]][NUC_IND[refNucl]].sample()
				mySNP   = (eventPos,refNucl,newNucl)

				for p in whichPloid:
					all_snps[p].append(mySNP)
					self.blackList[p][mySNP[0]] = 2

		# combine random snps with inserted snps, remove any snps that overlap indels
		for p in xrange(len(all_snps)):
			all_snps[p].extend(self.snpList[p])
			all_snps[p] = [n for n in all_snps[p] if self.blackList[p][n[0]] != 1]

		# modify reference sequences
		for i in xrange(len(all_snps)):
			for j in xrange(len(all_snps[i])):
				# sanity checking (for debugging purposes)
				vPos = all_snps[i][j][0]
				if all_snps[i][j][1] != chr(self.sequences[i][vPos]):
					print '\nError: Something went wrong!\n', all_snps[i][j], chr(self.sequences[i][vPos]),'\n'
					exit(1)
				else:
					self.sequences[i][vPos] = all_snps[i][j][2]

		adjToAdd = [[] for n in xrange(self.ploidy)]
		for i in xrange(len(all_indels)):
			for j in xrange(len(all_indels[i])):
				# sanity checking (for debugging purposes)
				vPos  = all_indels[i][j][0]
				vPos2 = vPos + len(all_indels[i][j][1])
				#print all_indels[i][j], str(self.sequences[i][vPos:vPos2])
				#print len(self.sequences[i]),'-->',
				if all_indels[i][j][1] != str(self.sequences[i][vPos:vPos2]):
					print '\nError: Something went wrong!\n', all_indels[i][j], str(self.sequences[i][vPos:vPos2]),'\n'
					exit(1)
				else:
					self.sequences[i] = self.sequences[i][:vPos] + bytearray(all_indels[i][j][2]) + self.sequences[i][vPos2:]
					adjToAdd[i].append((all_indels[i][j][0],len(all_indels[i][j][2])-len(all_indels[i][j][1])))
				#print len(self.sequences[i])
			adjToAdd[i].sort()
			#print adjToAdd[i]

			self.adj[i] = np.zeros(len(self.sequences[i]),dtype='<i4')
			indSoFar = 0
			valSoFar = 0
			for j in xrange(len(self.adj[i])):
				if indSoFar < len(adjToAdd[i]) and j >= adjToAdd[i][indSoFar][0]+1:
					valSoFar += adjToAdd[i][indSoFar][1]
					indSoFar += 1
				self.adj[i][j] = valSoFar

			# precompute cigar strings (we can skip this is going for only vcf output)
			if not self.onlyVCF:
				tempSymbolString = ['M']
				prevVal = self.adj[i][0]
				j = 1
				while j < len(self.adj[i]):
					diff = self.adj[i][j] - prevVal
					prevVal = self.adj[i][j]
					if diff > 0:	# insertion
						tempSymbolString.extend(['I']*abs(diff))
						j += abs(diff)
					elif diff < 0:	# deletion
						tempSymbolString.append('D'*abs(diff)+'M')
						j += 1
					else:
						tempSymbolString.append('M')
						j += 1

				for j in xrange(len(tempSymbolString)-self.readLen):
					self.allCigar[i].append(CigarString(listIn=tempSymbolString[j:j+self.readLen]).getString())
					# pre-compute reference position of first matching base
					my_fm_pos = None
					for k in xrange(self.readLen):
						if 'M' in tempSymbolString[j+k]:
							my_fm_pos = j+k
							break
					if my_fm_pos == None:
						self.FM_pos[i].append(None)
						self.FM_span[i].append(None)
					else:
						self.FM_pos[i].append(my_fm_pos-self.adj[i][my_fm_pos])
						span_dif = len([nnn for nnn in tempSymbolString[j:j+self.readLen] if 'M' in nnn])
						self.FM_span[i].append(self.FM_pos[i][-1] + span_dif)

		# tally up variants implemented
		countDict = {}
		all_variants = [sorted(all_snps[i]+all_indels[i]) for i in xrange(self.ploidy)]
		for i in xrange(len(all_variants)):
			for j in xrange(len(all_variants[i])):
				all_variants[i][j] = tuple([all_variants[i][j][0]+self.x])+all_variants[i][j][1:]
				t = tuple(all_variants[i][j])
				if t not in countDict:
					countDict[t] = []
				countDict[t].append(i)

		#
		#	TODO: combine multiple variants that happened to occur at same position into single vcf entry
		#

		output_variants = []
		for k in sorted(countDict.keys()):
			output_variants.append(k+tuple([len(countDict[k])/float(self.ploidy)]))
			ploid_string = ['0' for n in xrange(self.ploidy)]
			for k2 in [n for n in countDict[k]]:
				ploid_string[k2] = '1'
			output_variants[-1] += tuple(['WP='+'/'.join(ploid_string)])
		return output_variants
示例#2
0
	def sample_read(self, sequencingModel, fragLen=None):
		
		# choose a ploid
		myPloid = random.randint(0,self.ploidy-1)

		# stop attempting to find a valid position if we fail enough times
		MAX_READPOS_ATTEMPTS = 100
		attempts_thus_far    = 0

		# choose a random position within the ploid, and generate quality scores / sequencing errors
		readsToSample = []
		if fragLen == None:
			rPos = self.coverage_distribution[myPloid].sample()
			#####rPos = random.randint(0,len(self.sequences[myPloid])-self.readLen-1)	# uniform random
			####
			##### decide which subsection of the sequence to sample from using coverage probabilities
			####coords_bad = True
			####while coords_bad:
			####	attempts_thus_far += 1
			####	if attempts_thus_far > MAX_READPOS_ATTEMPTS:
			####		return None
			####	myBucket = max([self.which_bucket.sample() - self.win_per_read, 0])
			####	coords_to_select_from = [myBucket*self.windowSize,(myBucket+1)*self.windowSize]
			####	if coords_to_select_from[0] >= len(self.adj[myPloid]):	# prevent going beyond region boundaries
			####		continue
			####	coords_to_select_from[0] += self.adj[myPloid][coords_to_select_from[0]]
			####	coords_to_select_from[1] += self.adj[myPloid][coords_to_select_from[0]]
			####	if max(coords_to_select_from) <= 0: # prevent invalid negative coords due to adj
			####		continue
			####	if coords_to_select_from[1] - coords_to_select_from[0] <= 2:	# we don't span enough coords to sample
			####		continue
			####	if coords_to_select_from[1] < len(self.sequences[myPloid])-self.readLen:
			####		coords_bad = False
			####rPos = random.randint(coords_to_select_from[0],coords_to_select_from[1]-1)

			# sample read position and call function to compute quality scores / sequencing errors
			rDat = self.sequences[myPloid][rPos:rPos+self.readLen]
			(myQual, myErrors) = sequencingModel.getSequencingErrors(rDat)
			readsToSample.append([rPos,myQual,myErrors,rDat])

		else:
			rPos1 = self.coverage_distribution[myPloid][self.fraglens_indMap[fragLen]].sample()
			
			# EXPERIMENTAL
			#coords_to_select_from = self.coverage_distribution[myPloid][self.fraglens_indMap[fragLen]].sample()
			#rPos1 = random.randint(coords_to_select_from[0],coords_to_select_from[1])

			#####rPos1 = random.randint(0,len(self.sequences[myPloid])-fragLen-1)		# uniform random
			####
			##### decide which subsection of the sequence to sample from using coverage probabilities
			####coords_bad = True
			####while coords_bad:
			####	attempts_thus_far += 1
			####	if attempts_thus_far > MAX_READPOS_ATTEMPTS:
			####		#print coords_to_select_from
			####		return None
			####	myBucket = max([self.which_bucket.sample() - self.win_per_read, 0])
			####	coords_to_select_from = [myBucket*self.windowSize,(myBucket+1)*self.windowSize]
			####	if coords_to_select_from[0] >= len(self.adj[myPloid]):	# prevent going beyond region boundaries
			####		continue
			####	coords_to_select_from[0] += self.adj[myPloid][coords_to_select_from[0]]
			####	coords_to_select_from[1] += self.adj[myPloid][coords_to_select_from[0]]	# both ends use index of starting position to avoid issues with reads spanning breakpoints of large events
			####	if max(coords_to_select_from) <= 0: # prevent invalid negative coords due to adj
			####		continue
			####	if coords_to_select_from[1] - coords_to_select_from[0] <= 2:	# we don't span enough coords to sample
			####		continue
			####	rPos1 = random.randint(coords_to_select_from[0],coords_to_select_from[1]-1)
			####	# for PE-reads, flip a coin to decide if R1 or R2 will be the "covering" read
			####	if random.randint(1,2) == 1 and rPos1 > fragLen - self.readLen:
			####		rPos1 -= fragLen - self.readLen
			####	if rPos1 < len(self.sequences[myPloid])-fragLen:
			####		coords_bad = False

			rPos2 = rPos1 + fragLen - self.readLen
			rDat1 = self.sequences[myPloid][rPos1:rPos1+self.readLen]
			rDat2 = self.sequences[myPloid][rPos2:rPos2+self.readLen]
			#print len(rDat1), rPos1, len(self.sequences[myPloid])
			(myQual1, myErrors1) = sequencingModel.getSequencingErrors(rDat1)
			(myQual2, myErrors2) = sequencingModel.getSequencingErrors(rDat2,isReverseStrand=True)
			readsToSample.append([rPos1,myQual1,myErrors1,rDat1])
			readsToSample.append([rPos2,myQual2,myErrors2,rDat2])

		# error format:
		# myError[i] = (type, len, pos, ref, alt)

		# examine sequencing errors to-be-inserted.
		#	- remove deletions that don't have enough bordering sequence content to "fill in"
		# if error is valid, make the changes to the read data
		rOut = []
		for r in readsToSample:
			try:
				myCigar = self.allCigar[myPloid][r[0]]
			except IndexError:
				print 'Index error when attempting to find cigar string.'
				print len(self.allCigar[myPloid]), r[0]
				if fragLen != None:
					print (rPos1, rPos2)
				print myPloid, fragLen, self.fraglens_indMap[fragLen]
				exit(1)
			totalD  = sum([error[1] for error in r[2] if error[0] == 'D'])
			totalI  = sum([error[1] for error in r[2] if error[0] == 'I'])
			availB  = len(self.sequences[myPloid]) - r[0] - self.readLen - 1
			# add buffer sequence to fill in positions that get deleted
			r[3] += self.sequences[myPloid][r[0]+self.readLen:r[0]+self.readLen+totalD]
			expandedCigar = []
			extraCigar    = []
			adj           = 0
			sse_adj       = [0 for n in xrange(self.readLen + max(sequencingModel.errP[3]))]
			anyIndelErr   = False

			# sort by letter (D > I > S) such that we introduce all indel errors before substitution errors
			# secondarily, sort by index
			arrangedErrors = {'D':[],'I':[],'S':[]}
			for error in r[2]:
				arrangedErrors[error[0]].append((error[2],error))
			sortedErrors = []
			for k in sorted(arrangedErrors.keys()):
				sortedErrors.extend([n[1] for n in sorted(arrangedErrors[k])])

			skipIndels = False

			for error in sortedErrors:
				#print '-se-',r[0], error
				#print sse_adj
				eLen = error[1]
				ePos = error[2]
				if error[0] == 'D' or error[0] == 'I':
					anyIndelErr   = True
					extraCigarVal = []
					if totalD > availB:	# if not enough bases to fill-in deletions, skip all indel erors
						continue
					if expandedCigar == []:
						expandedCigar = CigarString(stringIn=myCigar).getList()
						fillToGo = totalD - totalI + 1
						if fillToGo > 0:
							try:
								extraCigarVal = CigarString(stringIn=self.allCigar[myPloid][r[0]+fillToGo]).getList()[-fillToGo:]
							except IndexError:	# applying the deletions we want requires going beyond region boundaries. skip all indel errors
								skipIndels = True

					if skipIndels:
						continue

					# insert deletion error into read and update cigar string accordingly
					if error[0] == 'D':
						myadj = sse_adj[ePos]
						pi = ePos+myadj
						pf = ePos+myadj+eLen+1
						if str(r[3][pi:pf]) == str(error[3]):
							r[3] = r[3][:pi+1] + r[3][pf:]
							expandedCigar = expandedCigar[:pi+1] + expandedCigar[pf:]
							if pi+1 == len(expandedCigar):	# weird edge case with del at very end of region. Make a guess and add a "M"
								expandedCigar.append('M')
							expandedCigar[pi+1] = 'D'*eLen + expandedCigar[pi+1]
						else:
							print '\nError, ref does not match alt while attempting to insert deletion error!\n'
							exit(1)
						adj -= eLen
						for i in xrange(ePos,len(sse_adj)):
							sse_adj[i] -= eLen

					# insert insertion error into read and update cigar string accordingly
					else:
						myadj = sse_adj[ePos]
						if chr(r[3][ePos+myadj]) == error[3]:
							r[3] = r[3][:ePos+myadj] + error[4] + r[3][ePos+myadj+1:]
							expandedCigar = expandedCigar[:ePos+myadj] + ['I']*eLen + expandedCigar[ePos+myadj:]
						else:
							print '\nError, ref does not match alt while attempting to insert insertion error!\n'
							print '---',chr(r[3][ePos+myadj]), '!=', error[3]
							exit(1)
						adj += eLen
						for i in xrange(ePos,len(sse_adj)):
							sse_adj[i] += eLen

				else:	# substitution errors, much easier by comparison...
					if chr(r[3][ePos+sse_adj[ePos]]) == error[3]:
						r[3][ePos+sse_adj[ePos]] = error[4]
					else:
						print '\nError, ref does not match alt while attempting to insert substitution error!\n'
						exit(1)

			if anyIndelErr:
				if len(expandedCigar):
					relevantCigar = (expandedCigar+extraCigarVal)[:self.readLen]
					myCigar = CigarString(listIn=relevantCigar).getString()

				r[3] = r[3][:self.readLen]

			rOut.append([self.FM_pos[myPloid][r[0]],myCigar,str(r[3]),str(r[1])])

		# rOut[i] = (pos, cigar, read_string, qual_string)
		return rOut
示例#3
0
    def sample_read(self, sequencingModel, fragLen=None):

        # choose a ploid
        myPloid = random.randint(0, self.ploidy - 1)

        # choose a random position within the ploid, and generate quality scores / sequencing errors
        readsToSample = []
        if fragLen == None:
            #rPos = random.randint(0,len(self.sequences[myPloid])-self.readLen-1)	# uniform random

            # decide which subsection of the sequence to sample from using coverage probabilities
            coords_bad = True
            while coords_bad:
                myBucket = max(
                    [self.which_bucket.sample() - self.win_per_read, 0])
                coords_to_select_from = [
                    myBucket * self.windowSize,
                    (myBucket + 1) * self.windowSize
                ]
                coords_to_select_from[0] += self.adj[myPloid][
                    coords_to_select_from[0]]
                coords_to_select_from[1] += self.adj[myPloid][
                    coords_to_select_from[1]]
                if coords_to_select_from[1] < len(
                        self.sequences[myPloid]) - self.readLen:
                    coords_bad = False
            rPos = random.randint(coords_to_select_from[0],
                                  coords_to_select_from[1] - 1)

            # sample read position and call function to compute quality scores / sequencing errors
            rDat = self.sequences[myPloid][rPos:rPos + self.readLen]
            (myQual, myErrors) = sequencingModel.getSequencingErrors(rDat)
            readsToSample.append([rPos, myQual, myErrors, rDat])

        else:
            #rPos1 = random.randint(0,len(self.sequences[myPloid])-fragLen-1)		# uniform random

            # decide which subsection of the sequence to sample from using coverage probabilities
            coords_bad = True
            while coords_bad:
                myBucket = max(
                    [self.which_bucket.sample() - self.win_per_read, 0])
                coords_to_select_from = [
                    myBucket * self.windowSize,
                    (myBucket + 1) * self.windowSize
                ]
                coords_to_select_from[0] += self.adj[myPloid][
                    coords_to_select_from[0]]
                coords_to_select_from[1] += self.adj[myPloid][
                    coords_to_select_from[
                        0]]  # both ends use index of starting position to avoid issues with reads spanning breakpoints of large events
                rPos1 = random.randint(coords_to_select_from[0],
                                       coords_to_select_from[1] - 1)
                # for PE-reads, flip a coin to decide if R1 or R2 will be the "covering" read
                if random.randint(1,
                                  2) == 1 and rPos1 > fragLen - self.readLen:
                    rPos1 -= fragLen - self.readLen
                if rPos1 < len(self.sequences[myPloid]) - fragLen:
                    coords_bad = False

            rPos2 = rPos1 + fragLen - self.readLen
            rDat1 = self.sequences[myPloid][rPos1:rPos1 + self.readLen]
            rDat2 = self.sequences[myPloid][rPos2:rPos2 + self.readLen]
            (myQual1, myErrors1) = sequencingModel.getSequencingErrors(rDat1)
            (myQual2, myErrors2) = sequencingModel.getSequencingErrors(
                rDat2, isReverseStrand=True)
            readsToSample.append([rPos1, myQual1, myErrors1, rDat1])
            readsToSample.append([rPos2, myQual2, myErrors2, rDat2])

        # error format:
        # myError[i] = (type, len, pos, ref, alt)

        # examine sequencing errors to-be-inserted.
        #	- remove deletions that don't have enough bordering sequence content to "fill in"
        # if error is valid, make the changes to the read data
        rOut = []
        for r in readsToSample:
            myCigar = self.allCigar[myPloid][r[0]]
            totalD = sum([error[1] for error in r[2] if error[0] == 'D'])
            totalI = sum([error[1] for error in r[2] if error[0] == 'I'])
            availB = len(self.sequences[myPloid]) - r[0] - self.readLen - 1
            # add buffer sequence to fill in positions that get deleted
            r[3] += self.sequences[myPloid][r[0] + self.readLen:r[0] +
                                            self.readLen + totalD]
            expandedCigar = []
            extraCigar = []
            adj = 0
            sse_adj = [0 for n in xrange(self.readLen)]
            anyIndelErr = False

            # sort by letter (D > I > S) such that we introduce all indel errors before substitution errors
            # secondarily, sort by index
            arrangedErrors = {'D': [], 'I': [], 'S': []}
            for error in r[2]:
                arrangedErrors[error[0]].append((error[2], error))
            sortedErrors = []
            for k in sorted(arrangedErrors.keys()):
                sortedErrors.extend([n[1] for n in sorted(arrangedErrors[k])])

            for error in sortedErrors:
                #print r[0], error
                eLen = error[1]
                ePos = error[2]
                if error[0] == 'D' or error[0] == 'I':
                    anyIndelErr = True
                    extraCigarVal = []
                    if totalD > availB:  # if not enough bases to fill-in deletions, skip all indel erors
                        continue
                    if expandedCigar == []:
                        expandedCigar = CigarString(stringIn=myCigar).getList()

                        fillToGo = totalD - totalI
                        if fillToGo > 0:
                            extraCigarVal = CigarString(
                                stringIn=self.allCigar[myPloid][
                                    r[0] + fillToGo]).getList()[-fillToGo:]

                    # insert deletion error into read and update cigar string accordingly
                    if error[0] == 'D':
                        pi = ePos + adj
                        pf = ePos + adj + eLen + 1
                        if str(r[3][pi:pf]) == str(error[3]):
                            r[3] = r[3][:pi + 1] + r[3][pf:]
                            expandedCigar = expandedCigar[:pi +
                                                          1] + expandedCigar[
                                                              pf:]
                            expandedCigar[pi +
                                          1] = 'D' * eLen + expandedCigar[pi +
                                                                          1]
                        else:
                            print '\nError, ref does not match alt while attempting to insert deletion error!\n'
                            exit(1)
                        adj -= eLen
                        for i in xrange(ePos, len(sse_adj)):
                            sse_adj[i] -= eLen

                    # insert insertion error into read and update cigar string accordingly
                    else:
                        if chr(r[3][ePos + adj]) == error[3]:
                            r[3] = r[3][:ePos +
                                        adj] + error[4] + r[3][ePos + adj + 1:]
                            expandedCigar = expandedCigar[:ePos + adj] + [
                                'I'
                            ] * eLen + expandedCigar[ePos + adj + 1:]
                        else:
                            print '\nError, ref does not match alt while attempting to insert insertion error!\n'
                            exit(1)
                        adj += eLen
                        for i in xrange(ePos, len(sse_adj)):
                            sse_adj[i] += eLen

                else:  # substitution errors, much easier by comparison...
                    if chr(r[3][ePos + sse_adj[ePos]]) == error[3]:
                        r[3][ePos + sse_adj[ePos]] = error[4]
                    else:
                        print '\nError, ref does not match alt while attempting to insert substitution error!\n'
                        exit(1)

            if anyIndelErr:
                if len(expandedCigar):
                    #print myCigar,'-->',
                    relevantCigar = (expandedCigar +
                                     extraCigarVal)[:self.readLen]
                    myCigar = CigarString(listIn=relevantCigar).getString()
                    #print myCigar

                r[3] = r[3][:self.readLen]
                #if len(r[3]) != self.readLen:
                #	print 'AHHHHHH_1'
                #	exit(1)
                #if len(expandedCigar+extraCigarVal) != self.readLen:
                #	print 'AHHHHHH_2'
                #	exit(1)

            rOut.append([
                r[0] - self.adj[myPloid][r[0]], myCigar,
                str(r[3]),
                str(r[1])
            ])

        # rOut[i] = (pos, cigar, read_string, qual_string)
        return rOut