Exemplo n.º 1
0
	def sheet6(self,ws, book):
		infolog("Start sheet6")
		col = 'B'
		for i in range(0, len(self.s1)):
			ws[col+'2'] = str(self.s1[i]) + '~' + str(self.s2[i]) +'%' # if self.s2[i] != 51.0 else str(self.s1[i]) + '%~'

			# get basepair between s1 and s2
			mxr = []
			for n in range(0, book.nsheets):
				_, _, minor_ = book.get_Number_of_GPS(book.BP35[n], self.s1[i], self.s2[i])
				mxr.append(book.BP35[n].loc[minor_.index])

			for x in range(-1, book.nsheets):
				ws[col+'3'] = book.sheet_list[x] if x != -1 else "Major/Minor"
				rows = 4
				for a in range(0, len(book.col_basepair)):
					for b in range(0, len(book.col_basepair)):
						if a==b:
							continue
						ws[col+str(rows)] = len(mxr[x][logical_and( mxr[x]['major_idx']==a, 
							mxr[x]['minor_idx']==b )]) if x != -1 else book.col_basepair[a] + '/' + book.col_basepair[b]
						rows = rows + 1
				col = next_col(col)
			col = next_col(col)
		infolog("End sheet6")
Exemplo n.º 2
0
	def sheet2(self, ws, book):
		infolog("Start Sheet2")
		
		ws['A1'] = "strain"
		ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=2)
		ws['C1'] = "Genome length (bp)"
		ws.merge_cells(start_row=1, start_column=3, end_row=2, end_column=3)
		ws['D1'] = "Range of minor allele frequency(%)"
		ws.merge_cells(start_row=1, start_column=4, end_row=1, end_column=4+len(self.s1))

		col = 'D'
		for ss1, ss2 in zip(self.s1, self.s2):
			ws[col+'2'] = str(ss1) + '≤f<' + str(ss2)
			col = next_col(col)

		for i in range(book.nsheets):
			infolog("Writing {0} sheet".format(book.sheet_list[i]))	

			ws['A' + str(i+3)] = book.sheet_list[i]
			ws['C' + str(i+3)] = book.BPRawLength[i]
			ws.merge_cells(start_row=3+i, start_column=1, end_row=3+i, end_column=2)
			
			col = 'D'
			for r1, r2 in zip(self.s1, self.s2):
				s, l, _ = book.get_Number_of_GPS(book.BP35[i], r1, r2) if len(book.BPxMinor[i])!=0 else (0,0)
				ws[col + str(i+3)] = str(round(s/l, 3))+'%' if l is not 0 else '-'
				col = next_col(col)
		infolog("End Sheet2")
Exemplo n.º 3
0
	def sheet1(self,ws, book):
		infolog("Start Sheet1")

		ws.title = "Polymorphic site"
		ws['A1'] = "strain"
		ws['C1'] = "Genome length (bp)"
		ws['D1'] = "Average of MAF"
		ws['E1'] = "Number of polymorphic site"
		ws.merge_cells(start_row=1, start_column=5, end_row=1, end_column=5+len(self.s1))
		ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=2)
		ws.merge_cells(start_row=1, start_column=3, end_row=2, end_column=3)
		ws.merge_cells(start_row=1, start_column=4, end_row=2, end_column=4)

		col = 'E'
		for ci in range(len(self.s1)):
			ws[col+'2'] = str(self.s1[ci]) + '≤n<' + str(self.s2[ci])
			col = next_col(col)

		for ni in range(book.nsheets):
			infolog("Writing {} sheet".format(book.sheet_list[ni]))
			ws['A' + str(ni+3)] = book.sheet_list[ni]
			ws.merge_cells(start_row=3+ni, start_column=1, end_row=3+ni, end_column=2)
			ws['C' + str(ni+3)] = book.BPRawLength[ni]
			s, l, _ = book.get_Number_of_GPS(book.BP35[ni], 5.0, 51.0)
			ws['D' + str(ni+3)] = str(round(s / l, 3)) + '%'

			col = 'E'
			for ss1, ss2 in zip(self.s1, self.s2):
				ws[col + str(ni+3)] = book.get_Number_of_GPS(book.BP35[ni], ss1, ss2)[1]
				col = next_col(col)
		# ws['E2'] = "2.5≤f<5"
		# ws['F2'] = "5≤f<15"
		# ws['G2'] = "15≤f<25"
		# ws['H2'] = "25≤f"
		# ws['I2'] = "sum"
		# # A col
		
		# for i in range(book.nsheets):
		# 	infolog("{0} Writing {1} sheet".format(time.time(), book.sheet_list[i]))
			
		# 	ws['A' + str(i+3)] = book.sheet_list[i]
		# 	ws.merge_cells(start_row=3+i, start_column=1, end_row=3+i, end_column=2)
		# 	ws['C' + str(i+3)] = book.BPRawLength[i]
		# 	s, l, _ = book.get_Number_of_GPS(book.BP35[i], 5.0, 51.0)
		# 	ws['D' + str(i+3)] = str(round(s / l, 3)) + '%'
		# 	ws['E' + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 2.5, 5.0)[1]
		# 	ws["F" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 5.0, 15.0)[1]
		# 	ws["G" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 15.0, 25.0)[1]
		# 	ws["H" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 25.0, 51.0)[1]
		# 	ws["I" + str(i+3)] = book.get_Number_of_GPS(book.BP35[i], 5.0, 51.0)[1]

		infolog("End Sheet1")		
Exemplo n.º 4
0
	def BaseComp(self, ws, types_, book):
		"""
			major가 같은 것, minor도 같은 것만 추출
			minor x -> 증가 포함
			1. major_idx_x == major_idx_y - 치환 제거
			2. (minor_idx_x == minor_idx_y) or ((minor_idx_x != minor_idx_y) and minor_x == 0)
		"""
		infolog("lowhigh BaseComp start")
		ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1)
		ws['B1'], ws['B2'] = 'Major', 'Minor'
		ws['D1'], ws['H1'], ws['L1'], ws['P1'] = 'A', 'G', 'C', 'T'
		ws['G2'], ws['K2'], ws['O2'] = 'a', 'a', 'a'
		ws['C2'], ws['L2'], ws['P2'] = 'g', 'g', 'g'
		ws['D2'], ws['H2'], ws['Q2'] = 'c', 'c', 'c'
		ws['E2'], ws['I2'], ws['M2'] = 't', 't', 't'
		cols = ['A','G','C','T']		

		bp = self.BPmergedinc if types_ is "INC" else self.BPmergeddec

		for r in range(book.nsheets):
			ws['A'+str(r+3)] = book.sheet_list[r]

		for r in range(book.nsheets):
			col = 'C'
			cond1 = bp[r]['major_idx_x'] == bp[r]['major_idx_y']
			cond2 = bp[r]['minor_idx_x'] == bp[r]['minor_idx_y']
			cond3 = logical_and( bp[r]['minor_idx_x'] != bp[r]['minor_idx_y'], bp[r]['minor_x'] == 0 )

			idx = logical_and(cond1, logical_or(cond2, cond3))
			tmp = bp[r][ idx.values ]

			for a in range(len(cols)):
				for b in range(len(cols)):
					if a == b:
						continue
					# major는 _x, _y 가 이미 같고, minor는 _y기준으로 하면 _x에서 minor가 0이던값 무시됨
					ws[col+str(r+3)] = len(tmp[ logical_and(tmp['major_idx_y'] == a, tmp['minor_idx_y'] == b).values ])
					col = next_col(col)
				col = next_col(col)

		infolog("lowhigh BaseComp end")
Exemplo n.º 5
0
	def PolymorphicSite(self, ws, types_, book):
		infolog("lowhigh PolymorphicSite start")
		ws.title = "INC-Polymorphic site"
		# A col
		ws['A1'] = types_
		ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=2)
		
		# C col
		ws['C1'] = "Increase in genetic polymrphism (%)"
		ws.merge_cells(start_row=1, start_column=3, end_row=1, end_column=6)

		from numpy import logical_and
		for i in range(book.nsheets):
			ws['A' + str(i+3)] = book.sheet_list[i]
			ws.merge_cells(start_row=i+3, start_column=1, end_row=i+3, end_column=2)
			col = 'C'
			bp = self.BPmergedinc[i][['diffofinc']] if types_ == 'INC' else self.BPmergeddec[i][['diffofdec']]
			for s1_, s2_ in zip(self.s1, self.s2):
				ws[col+'2'] = str(s1_) + '≤n<' + str(s2_) # 'Sum' if s1_ == 5.0 and s2_ == 51.0 else str(s1_) + '≤n<' + str(s2_) 
				ws[col+str(i+3)] = len(bp[ logical_and(bp >= s1_, bp < s2_).values]) if len(bp) is not 0 else 0
				col = next_col(col)			
		infolog("lowhigh PolymorphicSite end")
Exemplo n.º 6
0
	def ORFNCR(self, ws, types_, title, book):
		infolog("lowhigh {0} start".format(title))

		ws['A1'] = title
		ws.merge_cells(start_row=1, start_column=1, end_row=2, end_column=1)
		ws['B1'] = 'Length'
		ws.merge_cells(start_row=1, start_column=2, end_row=2, end_column=2)

		orfncr = book.ORF if title is 'ORF' else book.NCR
		col_diff = "diffofinc" if types_ == 'INC' else "diffofdec"
		bp = self.BPmergedinc if types_ == 'INC' else self.BPmergeddec

		sum_, col = 0, 'B'
		for si in range(-1, book.nsheets):
			sum_ = 0
			if si != -1:
				ws[col+'2'] = book.sheet_list[si] 
			for r in range(len(orfncr)):
				if si == -1:
					ws['A'+str(r+3)] = orfncr[r]
					cnt = len(book.Dumas[ book.Dumas[book.col_ORF] == orfncr[r] ])
					ws[col+str(r+3)] = cnt
					sum_ += cnt
				else:
					cnt = len( bp[si][ bp[si][book.col_ORF] == orfncr[r] ] )
					ws[col+str(r+3)] = cnt
					sum_ += cnt
			ws[col+str(3+len(orfncr))] = sum_
			col = next_col(col)
		ws['A'+str(3+len(orfncr))] = 'Total'

		col = next_col(col)
		for i in range(len(self.s1)):			
			# ORF or CNR 종류
			ws[col+'1'] = str(self.s1[i])+'~'+str(self.s2[i]) #if self.s2[i]!=51.0 else str(self.s1[i])+'~'
			for r in range(len(orfncr)):
				ws[col+str(r+3)] = orfncr[r]
			col = next_col(col)

			col_GPS = col
			ws[col+'1'] = '{0} in genetic polymorphism'.format("Increase" if types_ == 'INC' else "Decrease")
			for s in range(book.nsheets):
				ws[col+'2'] = book.sheet_list[s]
				bpx = bp[s][ logical_and(bp[s][col_diff] >= self.s1[i], bp[s][col_diff] < self.s2[i]).values ]

				sum_ = 0
				for r in range(3, 3+len(orfncr)):
					cnt = len(bpx[ bpx[book.col_ORF] == orfncr[r-3]])
					ws[col+str(r)] = cnt	
					sum_ += cnt					
				ws[col+str(3+len(orfncr))] = sum_
				col = next_col(col)			

			ws[col+'1'] = 'Average {0}'.format("Increase" if types_ == 'INC' else "Decrease")
			for s in range(book.nsheets):			
				ws[col+'2'] = book.sheet_list[s]
				bpx = bp[s][ logical_and(bp[s][col_diff] >= self.s1[i], bp[s][col_diff] < self.s2[i]).values ]
				sum_ = 0
				for r in range(3, 3+len(orfncr)):
					cnt = bpx[ bpx[book.col_ORF] == orfncr[r-3]][[col_diff]]					
					ws[col+str(r)] = str(round(cnt.sum()[0] / len(cnt), 3))+'%' if len(cnt) is not 0 else 'N/A'
					sum_ += cnt.sum()[0]
				col = next_col(col)

			ws[col+'1'] = 'Number of GPS / Length'
			for s in range(book.nsheets):			
				ws[col+'2'] = book.sheet_list[s]
				for r in range(3, 3+len(orfncr)):
					s, l = float(ws[col_GPS+str(r)].value), float(ws['B'+str(r)].value)
					ws[col+str(r)] = str( round(s/l, 6) ) if l != 0 else 'N/A'
				col_GPS = next_col(col_GPS)
				col = next_col(col)
			col = next_col(col)

		infolog("lowhigh {0} end".format(title))
Exemplo n.º 7
0
	def GenomeStr(self, ws, types_, book):
		infolog("lowhigh GenomeStr start")
		bp = self.BPmergedinc if types_ == 'INC' else self.BPmergeddec		
		col_diff = "diffofinc" if types_ == 'INC' else "diffofdec"

		# s1 ~ s2 
		for i in range(len(self.s1)):
			rows = i*(14+len(book.GenomeStructure) + len(book.RepeatRegion))+ 1

			ws['A'+str(rows)] = 'Region'
			ws.merge_cells(start_row=rows, start_column=1, end_row=rows+1, end_column=1)
			ws['B'+str(rows)] = 'Dumas Length'
			ws.merge_cells(start_row=rows, start_column=2, end_row=rows+1, end_column=2)

			for r in range(2, 2+len(book.GenomeStructure)):
				ws['A' + str(rows+r)] = book.GenomeStructure[r-2]
				ws['B' + str(rows+r)] = len(book.Dumas[ book.Dumas[book.col_GenomeStructure] == book.GenomeStructure[r-2] ])

			ws['A' + str(rows+2+len(book.GenomeStructure))] = 'Total'
			ws['B' + str(rows+2+len(book.GenomeStructure))] = len(book.Dumas)

			sum_ = 0
			for r in range(len(book.RepeatRegion)):
				row = rows+r+3+len(book.GenomeStructure)
				ws['A' + str(row)] = book.RepeatRegion[r]
				cnt = len(book.Dumas[ book.Dumas[book.col_RepeatRegion] == book.RepeatRegion[r] ])
				ws['B' + str(row)] = cnt
				sum_ += cnt

			leng = rows+ len(book.GenomeStructure) + len(book.RepeatRegion)
			ws['A' + str(leng+3)] = 'Total'
			ws['B' + str(leng+3)] = sum_
			ws['A' + str(leng+4)] = 'ORF'
			ws['B' + str(leng+4)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.ORF)])	
			ws['A' + str(leng+5)] = 'NCR'
			ws['B' + str(leng+5)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.NCR)])

			col = 'C'
			ws[col+str(rows)] = '{0} in genetic polymorphism'.format("Increase" if types_ is 'INC' else "Decrease")
			ws.merge_cells(start_row=rows, start_column=3, end_row=rows, end_column=2+book.nsheets)
			ws[chr(ord(col)+book.nsheets)+str(rows)] = "Average {0}".format("Increase" if types_ is 'INC' else "Decrease")
			ws.merge_cells(start_row=rows, start_column=3+book.nsheets, end_row=rows, end_column=2+2*book.nsheets)
			for s in range(book.nsheets):
				idx = logical_and( bp[s][col_diff] >= self.s1[i], bp[s][col_diff] < self.s2[i] )
				bpx = bp[s][idx.values]
				
				ncol = chr(ord(col)+book.nsheets)
				ws[col+str(rows+1)] = book.sheet_list[s]
				ws[chr(ord(col)+book.nsheets)+str(rows+1)] = book.sheet_list[s]

				sum_, avg_ = 0, 0
				for r in range(2, 2+len(book.GenomeStructure)):
					cnt = bpx[ bpx[book.col_GenomeStructure] == book.GenomeStructure[r-2]][[col_diff]]
					ws[col+str(rows+r)] = len(cnt)
					ws[ncol+str(rows+r)] = str(round((cnt.sum()[0] / len(cnt)), 3))+'%' if len(cnt) is not 0 else 'N/A'
					avg_ += cnt.sum()[0]
					sum_ += len(cnt)
				# Total
				ws[col+str(rows+2+len(book.GenomeStructure))] = sum_
				ws[chr(ord(col)+book.nsheets)+str(rows+2+len(book.GenomeStructure))] = str(round(avg_/sum_, 3))+'%' if sum_ is not 0 else 'N/A'

				sum_, avg_ = 0, 0
				for r in range(len(book.RepeatRegion)):
					row = rows+r+3+len(book.GenomeStructure)
					cnt = bpx[ bpx[book.col_RepeatRegion] == book.RepeatRegion[r]][[col_diff]]
					ws[col+str(row)] = len(cnt)
					ws[ncol+str(row)] = str(round((cnt.sum()[0] / len(cnt)),3))+'%' if len(cnt) is not 0 else 'N/A'
					sum_ += len(cnt)
					avg_ += cnt.sum()[0]
				ws[col+str(3+leng)] = sum_
				ws[ncol+str(3+leng)] = str(round(avg_/sum_,3))+'%' if sum_ is not 0 else 'N/A'
				# ORF & NCR
				cnt = bpx[ bpx[book.col_ORF].isin(book.ORF)][[col_diff]]
				ws[col+str(4+leng)] = len(cnt)
				ws[ncol+str(4+leng)] = str(round(cnt.sum()[0] / len(cnt), 3))+'%' if len(cnt) is not 0 else 'N/A'
				cnt = bpx[ bpx[book.col_ORF].isin(book.NCR)][[col_diff]]
				ws[col+str(5+leng)] = len(cnt)
				ws[ncol+str(5+leng)] = str(round(cnt.sum()[0] / len(cnt), 3))+'%' if len(cnt) is not 0 else 'N/A'
				col = next_col(col)

			ncol = chr(ord(ncol)+book.nsheets)
			ws[ncol+str(rows+2)] = str(self.s1[i])+"~"+str(self.s2[i]) #if self.s2[i] != 51.0 else str(self.s1[i])+"~"
		infolog("lowhigh GenomeStr end")
Exemplo n.º 8
0
	def sheet7(self, ws, book):
		infolog("Start sheet7")
		for i in range(0, len(self.s1)):
			r = i * (book.nsheets + 12) + 2
			ws["A"+str(r)] = str(self.s1[i]) + "~" + str(self.s2[i]) + "%" #if self.s2[i] != 51.0 else str(self.s1[i]) + "%~"
			
			ws["B"+str(r)] = "Virus"
			ws.merge_cells(start_row=r, start_column=2, end_row=r+2, end_column=2)

			ws["C"+str(r)] = "Number of GPS"
			ws.merge_cells(start_row=r, start_column=3, end_row=r+2, end_column=3)

			ws["D"+str(r)] = "GPS Mean"
			ws.merge_cells(start_row=r, start_column=4, end_row=r+2, end_column=4)

			ws["E"+str(r)] = "Major"
			ws.merge_cells(start_row=r, start_column=5, end_row=r+1, end_column=5)
			ws["E"+str(r+2)] = "Minor"

			ws['F'+str(r)] = 'A'
			ws.merge_cells(start_row=r, start_column=6, end_row=r+1, end_column=8)
			ws['I'+str(r)] = 'G'
			ws.merge_cells(start_row=r, start_column=9, end_row=r+1, end_column=11)
			ws['L'+str(r)] = 'C'
			ws.merge_cells(start_row=r, start_column=12, end_row=r+1, end_column=14)
			ws['O'+str(r)] = 'T'
			ws.merge_cells(start_row=r, start_column=15, end_row=r+1, end_column=17)	

			ws['E'+str(r+3+book.nsheets)] = 'sum'
			for x in range(0, book.nsheets):
				row, mafrow = r+x+3, r+book.nsheets+x+5
				s, l, minor_ = book.get_Number_of_GPS(book.BP35[x], self.s1[i], self.s2[i])
				mxr = book.BP35[x].loc[minor_.index]
				# s, l = self.book.get_Number_of_GPS(self.PxMinor[x], self.PxSum[x], self.s1[i], self.s2[i]) if len(self.PxMinor[x])!=0 else (0,0)
				ws["B"+str(row)] = book.sheet_list[x]
				ws['C'+str(row)] = len(minor_)
				ws['D'+str(row)] = str(round(s/l, 3)) + '%' if l != 0 else '-'
				
				col = 'F'
				for a in range(0, 4):
					for b in range(0, 4):
						if a == b:
							continue
						else:
							ws[col+str(r+2)] = book.col_basepair[b].lower()
							ext_ = mxr[logical_and( mxr['major_idx']==a, mxr['minor_idx']==b )]

							ws[col+str(row)] = len(ext_)

							if len(ext_) is not 0:
								# maf들의 평균
								maf_ =  (divide( ext_[['minor']], ext_[['sum']]) * 100).sum()[0]						
								ws[col+str(mafrow)] = str(round( maf_ / len(ext_), 3)) + '%'
							else:
								ws[col+str(mafrow)] = 'N/A'

							# 전체 개수의 maf
							if ws[col+str(r+3+book.nsheets)].value == None:							
								ws[col+str(r+3+book.nsheets)] =  len(ext_)
							else:
								ws[col+str(r+3+book.nsheets)].value += len(ext_)

							if ws[col+str(r+5+ 2*book.nsheets)].value == None:
								ws[col+str(r+5+ 2*book.nsheets)] = maf_
								# ws[col+str(r+5+ 2*book.nsheets+1)] = ext_[['sum']].sum()[0]
							else:
								# print(i, "mj ", a, "mn ", b, ext_[['minor']].sum()[0])
								ws[col+str(r+5+ 2*book.nsheets)].value += maf_
								# ws[col+str(r+5+ 2*book.nsheets+1)].value += ext_[['sum']].sum()[0]


						col = next_col(col)
			# total maf
			col = 'F'
			numrow, mnrow = r+3+book.nsheets, r+5+2*book.nsheets
			for c in range(0, 12):
				ws[col+str(mnrow)] = str(round( float(ws[col+str(mnrow)].value) / float(ws[col+str(numrow)].value), 3))+'%' if ws[col+str(numrow)].value != 0 else 'N/A'
				col = next_col(col)

		infolog("End sheet7")
Exemplo n.º 9
0
	def sheet4_5(self, ws, title, book):
		infolog("Start sheet {0}".format(4 if title is "ORF" else 5))
		ws['B2'] = book.filename
		ws["C2"] = "(BP_full)Length"
		ws.merge_cells(start_row=2, start_column=3, end_row=2, end_column=3+book.nsheets-1)
		ws["B3"] = title
		col = 'C'
		for i in range(0, book.nsheets):
			ws[col + str(3)] = book.sheet_list[i]
			col = next_col(col)		

		# Full - 35이하 포함
		col = 'B'
		orf_ = book.ORF if title is 'ORF' else book.NCR
		rows = list(range(4, len(orf_) + 4))
		for c in range(0, book.nsheets+1):
			cnt_ = 0
			for i in range(0, len(orf_)):
				if c is 0:
					ws[col+str(rows[i])] = orf_[i]
				else:
					tmp = len(book.BPRaw[c-1][ book.BPRaw[c-1][book.col_ORF] == orf_[i] ])
					ws[col+str(rows[i])] = tmp
					cnt_ += tmp
			ws[col + str(4+len(orf_))] = cnt_ if c is not 0 else 'total'
			col = next_col(col)

		# s1~s2 범위
		col = chr(ord('A')+3+book.nsheets)
		for i in range(0, len(self.s1)):
			co = (5+3*book.nsheets) * i + (5+book.nsheets)
			ws[col+str(2)] = str(self.s1[i]) + '~' + str(self.s2[i]) +'%' if self.s2[i] != 51.0 else str(self.s1[i]) + '%~'
			ws[col+str(3)] = title # NCR or ORF
			for ix in range(0, len(orf_)):
				ws[col+str(rows[ix])] = orf_[ix]
			ws[col+str(len(orf_) + 4)] = 'total'

			# Number of GPS of each sheets
			gps = list()
			col = next_col(col)			
			ws[col+str(2)] = "Number of GPS"
			ws.merge_cells(start_row=2, start_column=co, end_row=2, end_column=co+book.nsheets)

			dcol = next_n_col(col, book.nsheets)
			for n in range(0, book.nsheets):
				ws[col+str(3)] = book.sheet_list[n]
				_, _, tx_minor = book.get_Number_of_GPS(book.BP35[n], self.s1[i], self.s2[i])
				tx_rows = book.BP35[n].loc[ tx_minor.index ]

				g = []
				cnt_ = 0
				for ix in range(0, len(orf_)):
					mrows = tx_rows[ tx_rows[book.col_ORF] == orf_[ix] ]
					ws[col+str(rows[ix])] = len(mrows)
					cnt_ += len(mrows)
					g.append(len(mrows))

					# 180612
					ws[dcol+str(rows[ix])] = float(ws[dcol+str(rows[ix])].value) + len(mrows) if ws[dcol+str(rows[ix])].value != None else len(mrows)

				ws[col+str(len(orf_) + 4)] = cnt_
				ws[dcol+str(len(orf_) + 4)] = float(ws[dcol+str(len(orf_) + 4)].value) + cnt_ if ws[dcol+str(len(orf_) + 4)].value != None else cnt_
				g.append(cnt_)
				gps.append(g)
				col = next_col(col)
			col = next_col(col)

			gcol = next_n_col(col, book.nsheets)
			# Average MAF at GPS  of each sheets
			ws[col+str(2)] = "Average MAF at GPS"
			ws.merge_cells(start_row=2, start_column=co+book.nsheets+1, end_row=2, end_column=co+2*book.nsheets+1)
			for n in range(0, book.nsheets):
				val, cnt_ = 0, 0
				ws[col+str(3)] = book.sheet_list[n]
				_, _, tx_minor = book.get_Number_of_GPS(book.BP35[n], self.s1[i], self.s2[i])
				tx_rows = book.BP35[n].loc[ tx_minor.index ]
				for ix in range(0, len(orf_)):
					mrows = tx_rows[ tx_rows[book.col_ORF] == orf_[ix] ]
					if len(mrows) is 0:
						ws[col+str(rows[ix])] = '-'

						# 180612
						ws[gcol+str(rows[ix])] = ws[gcol+str(rows[ix])].value if ws[gcol+str(rows[ix])].value != None else 0
					else:
						maf_ = divide(mrows[["minor"]], mrows[["sum"]]) * 100
						val += maf_.sum()[0]
						cnt_ += len(maf_)
						ws[col+str(rows[ix])] = str(round(maf_.sum()[0] / len(maf_), 3)) + '%'

						# 180612
						ws[gcol+str(rows[ix])] = float(ws[gcol+str(rows[ix])].value) + maf_.sum()[0] if ws[gcol+str(rows[ix])].value != None else maf_.sum()[0]
				# total
				ws[col+str(len(orf_)+4)] = str(round(val / cnt_, 3)) + '%' if cnt_ is not 0 else '-'
				ws[gcol+str(len(orf_)+4)] = float(ws[gcol+str(len(orf_)+4)].value) + val if ws[gcol+str(len(orf_)+4)].value != None else val
				col = next_col(col)
			col = next_col(col)
			# "Number of GPS / Length" of each sheets
			ws[col+str(2)] = "Number of GPS / Length * 100"
			ws.merge_cells(start_row=2, start_column=co+2*book.nsheets+2, end_row=2, end_column=co+3*book.nsheets+2)
			ncol = 'C'
			d2col = next_n_col(col, book.nsheets)
			for n in range(0, book.nsheets):
				ws[col+str(3)] = book.sheet_list[n]
				for ix in range(0, len(orf_)):
					l_ = int(str(ws[ncol+str(rows[ix])].value))
					ws[col+str(rows[ix])] = (gps[n][ix] / l_)*100 if l_ is not 0 else '-'
					ws[d2col+str(rows[ix])] = float(ws[d2col+str(rows[ix])].value) + l_ if ws[d2col+str(rows[ix])].value != None else l_
				# total
				l_ = int(str(ws[ncol+str(len(orf_)+4)].value))
				ws[col+str(len(orf_)+4)] = (gps[n][-1] / l_)*100 if l_ is not 0 else '-'
				ws[d2col+str(len(orf_)+4)] = float(ws[d2col+str(len(orf_)+4)].value) + l_ if ws[d2col+str(len(orf_)+4)].value != None else l_

				ncol = next_col(ncol)
				col = next_col(col)
			
			for row_ in range(4, len(orf_)+5):
				ws[dcol+str(row_)] = float(ws[dcol+str(row_)].value) / book.nsheets if float(ws[dcol+str(row_)].value) != 0 else 0
				ws[gcol+str(row_)] = str(round(float(ws[gcol+str(row_)].value) / float(ws[dcol+str(row_)].value), 3)) + '%' if float(ws[dcol+str(row_)].value) != 0 else '-'
				ws[d2col+str(row_)] = float(ws[dcol+str(row_)].value) / (float(ws[d2col+str(row_)].value) / 3) * 100 if float(ws[d2col+str(row_)].value) != 0 else 0

			# end for s1
			col = next_col(next_col(col))
		infolog("End sheet {0}".format( 4 if title is "ORF" else 5))
Exemplo n.º 10
0
	def sheet3(self,ws, book):
		infolog("Start sheet3")
		
		for i in range(0, len(self.s1)):
			infolog("Writing {0} ~ {1}".format(self.s1[i], self.s2[i]))

			# r : (s1,s2)의 범위 별 데이터의 row 변수
			r = i*(15+len(book.GenomeStructure) + len(book.RepeatRegion))+ 1
			ws['A'+str(r)] = "Region"
			ws.merge_cells(start_row=r, start_column=1, end_row=r+2, end_column=2)
			ws['C'+str(r)] = "Dumas Length (bp)"
			ws.merge_cells(start_row=r, start_column=3, end_row=r+2, end_column=3)

			ws['A'+str(r+3)] = "Genome Structure"
			ws.merge_cells(start_row=r+3, start_column=1, end_row=r+3+len(book.GenomeStructure), end_column=1)
			ws['A'+str(r+10)] = "Repeat region"
			ws.merge_cells(start_row=r+4+len(book.GenomeStructure), start_column=1, end_row=r+4+len(book.GenomeStructure)+len(book.RepeatRegion), end_column=1)
			
			# B, C 컬럼 데이터 : GenomeStructure, RepeatRegion 종류와 길이
			# G_rows : GenomeStructure데이터를 넣기 위한 row 범위
			# R_rows : RepeatRegion데이터를 넣기 위한 row 범위
			G_rows = list(range(r+3, r+3+len(book.GenomeStructure)))
			cnt_num = 0
			for ix in range(0, len(book.GenomeStructure)):
				ws['B' + str(G_rows[ix])] = book.GenomeStructure[ix]
				nlen = len(book.Dumas[ book.Dumas[ book.col_GenomeStructure ] == book.GenomeStructure[ix]]) # dumas length 기준
				ws['C' + str(G_rows[ix])] = nlen
				cnt_num += nlen
			ws['B' + str(r+3+len(book.GenomeStructure))] = "Total"
			ws['C' + str(r+3+len(book.GenomeStructure))] = cnt_num
			cnt_num = 0

			nr = r+4+len(book.GenomeStructure)+len(book.RepeatRegion)
			R_rows = list(range(r+4+len(book.GenomeStructure), nr))
			for ix in range(0, len(book.RepeatRegion)):
				ws['B' + str(R_rows[ix])] = book.RepeatRegion[ix]
				nlen = len(book.Dumas[ book.Dumas[ book.col_RepeatRegion ] == book.RepeatRegion[ix]])
				ws['C'+str(R_rows[ix])] = nlen
				cnt_num += nlen
			ws['B'+str(nr)] = 'Total'
			ws['C' + str(nr)] = cnt_num

			## ORF NCR
			ws["A"+str(nr+1)] = "ORF"
			ws["A"+str(nr+2)] = "NCR"
			# ORF NCR Full Length
			ws['C'+str(nr+1)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.ORF)])
			ws['C'+str(nr+2)] = len(book.Dumas[ book.Dumas[book.col_ORF].isin(book.NCR)])
			# end B, C columns
			
			# ncol = 'D'
			# ncol : Column of 'Number of GPS' is need when calculating 'Number of GPS / Length'
			ncol = col = 'D'

			ws[col+str(r)] = "Number of GPS"
			gcol = next_n_col(col, book.nsheets)
			for c in range(0, book.nsheets):
				ws[col+str(r+2)] = book.sheet_list[c]
				# 35 이상 
				s, l, minor_ = book.get_Number_of_GPS(book.BP35[c], self.s1[i], self.s2[i])
				# maf_ = str(round(s/l, 3)) + '%' if l is not 0 else '-'

				tx_rows = book.BP35[c].loc[ minor_.index ]
				tx_orf = tx_rows[ tx_rows[ book.col_ORF ].isin(book.ORF)]
				tx_ncr = tx_rows[ tx_rows[ book.col_ORF ].isin(book.NCR)]

				ws.merge_cells(start_row=r, start_column=4, end_row=r, end_column=4 + book.nsheets)
				ws[col+str(r+1)] = book.filename
				ws.merge_cells(start_row=r+1, start_column=4, end_row=r+1, end_column=4 + book.nsheets)
				self.insert_value_in_cell(ws, book, G_rows, col, gcol, book.BP35[c], minor_, book.col_GenomeStructure , "GPS")
				self.insert_value_in_cell(ws, book, R_rows, col, gcol, book.BP35[c], minor_, book.col_RepeatRegion , "GPS")
				ws[col+str(nr+1)] = len(tx_orf)
				ws[col+str(nr+2)] = len(tx_ncr)
				#180612
				ws[gcol+str(nr+1)] = float(ws[gcol+str(nr+1)].value) + len(tx_orf) if ws[gcol+str(nr+1)].value != None else len(tx_orf)
				ws[gcol+str(nr+2)] = float(ws[gcol+str(nr+2)].value) + len(tx_ncr) if ws[gcol+str(nr+2)].value != None else len(tx_ncr)
				col = next_col(col)
			col = next_col(col)

			# "Average MAF at GPS"
			ws[col+str(r)] = "Average MAF at GPS"
			dcol = next_n_col(col, book.nsheets)
			for c in range(0, book.nsheets):
				ws[col+str(r+2)] = book.sheet_list[c]
				ws.merge_cells(start_row=r, start_column=5 + book.nsheets, end_row=r, end_column=5 + 2 * book.nsheets)
				ws[col+str(r+1)] = book.filename
				ws.merge_cells(start_row=r+1, start_column=5 + book.nsheets, end_row=r+1, end_column=5 + 2 * book.nsheets)

				s, l, minor_ = book.get_Number_of_GPS(book.BP35[c], self.s1[i], self.s2[i])
				# maf_ = str(round(s/l, 3)) + '%' if l is not 0 else '-'
				
				tx_rows = book.BP35[c].loc[ minor_.index ]
				tx_orf = tx_rows[ tx_rows[ book.col_ORF ].isin(book.ORF)]
				tx_ncr = tx_rows[ tx_rows[ book.col_ORF ].isin(book.NCR)]

				self.insert_value_in_cell(ws, book, G_rows, col, dcol, book.BP35[c], minor_, book.col_GenomeStructure , "MAF")
				self.insert_value_in_cell(ws, book, R_rows, col, dcol, book.BP35[c], minor_, book.col_RepeatRegion , "MAF")

				# 180612
				maf_ = (divide(tx_orf[["minor"]], book.BP35[c][['sum']].loc[ tx_orf.index]) * 100).sum()[0]
				ws[col+str(nr+1)] = str(round( maf_ /len(tx_orf), 3)) +'%' if len(tx_orf) is not 0 else '-'
				ws[dcol+str(nr+1)] = float(ws[dcol+str(nr+1)].value) + maf_ if ws[dcol+str(nr+1)].value != None else maf_

				maf_ = (divide(tx_ncr[["minor"]], book.BP35[c][['sum']].loc[ tx_ncr.index]) * 100).sum()[0]
				ws[col+str(nr+2)] = str(round( maf_ /len(tx_ncr), 3)) +'%' if len(tx_ncr) is not 0 else '-'
				ws[dcol+str(nr+2)] = float(ws[dcol+str(nr+2)].value) + maf_ if ws[dcol+str(nr+2)].value != None else maf_

				col = next_col(col)

			col = next_col(col)
			# "Number of GPS / length"
			d2col = next_n_col(col, book.nsheets)
			ws[col+str(r)] = "Number of GPS / length * 100"

			for c in range(0, book.nsheets):
				ws[col+str(r+2)] = book.sheet_list[c]
				ws.merge_cells(start_row=r, start_column=6 + 2 *book.nsheets, end_row=r, end_column=6 + 3 *book.nsheets)
				ws[col+str(r+1)] = book.filename
				ws.merge_cells(start_row=r+1, start_column=6 + 2 *book.nsheets, end_row=r+1, end_column=6 + 3 *book.nsheets)
				ridx = sum_ = 0
				
				for rs in range(r+3, r+5+len(book.GenomeStructure)+len(book.RepeatRegion)):
					if ridx < len(book.GenomeStructure):
						llen_ = len(book.BP35[c][ book.BP35[c][ book.col_GenomeStructure] == book.GenomeStructure[ridx]])
						sum_ += llen_
					elif ridx == len(book.GenomeStructure):
						llen_ = sum_
						sum_ = 0
					elif len(book.GenomeStructure) < ridx and ridx < len(book.GenomeStructure)+1+len(book.RepeatRegion):
						llen_ = len(book.BP35[c][ book.BP35[c][ book.col_RepeatRegion] == book.RepeatRegion[ridx-1-len(book.GenomeStructure)]])
						sum_ += llen_
					else:
						llen_ = sum_

					ws[col + str(rs)] = (ws[ncol+str(rs)].value / llen_) * 100 if llen_ is not 0 else '-'
					ridx = ridx + 1

					# 180612 강제지정
					ws[d2col + str(rs)] = ws[d2col + str(rs)].value + llen_ if ws[d2col + str(rs)].value != None else llen_

				# ORF & NCR
				for rs in range(2):
					tmp, rs_ = 0, r+5+len(book.GenomeStructure)+len(book.RepeatRegion) + rs
					if rs == 0:
						tmp = len(book.BP35[c][ book.BP35[c][book.col_ORF].isin(book.ORF)])
						ws[col+str(rs_)] = (ws[ncol+str(rs_)].value / tmp) * 100 if tmp != 0 else '-'
						
					else:
						tmp = len(book.BP35[c][ book.BP35[c][book.col_ORF].isin(book.NCR)])
						ws[col+str(rs_)] = (ws[ncol+str(rs_)].value / tmp) * 100 if tmp != 0 else '-'

					# 180612 강제지정
					ws[d2col + str(rs_)] = ws[d2col + str(rs_)].value + tmp if ws[d2col + str(rs_)].value != None else tmp

				ncol = next_col(ncol)
				col = next_col(col)

			# 180612 강제지정 
			for rs in range(r+3, r+7+len(book.GenomeStructure)+len(book.RepeatRegion)):
				# 수정   : 위치를 올림, 원래 위치 [수정2]
				ws[dcol+str(rs)] = str(round(float(ws[dcol+str(rs)].value) / float(ws[gcol+str(rs)].value), 3)) + '%' if float(ws[dcol+str(rs)].value) != 0 else '-'
				ws[gcol+str(rs)].value = float(ws[gcol+str(rs)].value) / book.nsheets
				# 수정2   :  ws[dcol+str(rs)] = str(round(float(ws[dcol+str(rs)].value) / float(ws[gcol+str(rs)].value), 3)) + '%' if float(ws[dcol+str(rs)].value) != 0 else '-'
				ws[d2col + str(rs)] = (float(ws[gcol + str(rs)].value) / ( float(ws[d2col + str(rs)].value) / book.nsheets)) * 100 if float(ws[d2col + str(rs)].value) != 0.0 else 0

			col = next_col(col)
			ws[col+str(r+3)] = str(self.s1[i]) + "~" + str(self.s2[i]) + "%" #if self.s2[i] != 51.0 else str(self.s1[i]) + "% 이상"			
		infolog("finished writing GenomeStructure & RepeatRegion")

		#### Full Sequence				
		col = next_col(next_col(next_col(col)))
		self.Full_Seq_in_sheet3(ws, col, book)
		infolog("End sheet3")
Exemplo n.º 11
0
	def Full_Seq_in_sheet3(self, ws, col, book):
		infolog("Start Full_Seq_in_sheet3")
		
		# Writing list of GenomeStructure & RepeatRegion, NCR & ORF
		for r in range(4, 4+len(book.GenomeStructure)):
			ws[col+str(r)] = book.GenomeStructure[r-4]
		ws[col+str(4+len(book.GenomeStructure))] = 'Total'
		for r in range(0, len(book.RepeatRegion)):
			row = r+5+len(book.GenomeStructure)
			ws[col+str(row)] = book.RepeatRegion[r]
		ws[col+str(5+len(book.GenomeStructure)+len(book.RepeatRegion))] = 'Total'

		rs_ = 5+len(book.GenomeStructure)+len(book.RepeatRegion)
		ws[col+str(rs_+1)], ws[col+str(rs_+2)] = 'ORF', 'NCR'

		# Writing the count of each sheets
		col = next_col(col)
		for i in range(0, book.nsheets):
			infolog("{0} Writing {1} sheet".format(time.time(), i))
			ws[col+str(2)] = book.sheet_list[i]

			# 35 이하 포함
			ws[col+str(3)] = '(BP_full)Length'
			sum_ = 0
			for r in range(4, 4+len(book.GenomeStructure)):
				tmp = len(book.BPRaw[i][ book.BPRaw[i][ book.col_GenomeStructure] == book.GenomeStructure[r-4]]) if len(book.BPRaw[i])!=0 else 0
				ws[col+str(r)] = tmp
				sum_ += tmp
			ws[col+str(4+len(book.GenomeStructure))] = sum_
			sum_=0
			for r in range(0, len(book.RepeatRegion)):
				row = r+5+len(book.GenomeStructure)
				tmp = len(book.BPRaw[i][ book.BPRaw[i][ book.col_RepeatRegion] == book.RepeatRegion[r]]) if len(book.BPRaw[i])!=0 else 0
				ws[col+str(row)] = tmp
				sum_ += tmp
			ws[col+str(5+len(book.GenomeStructure)+len(book.RepeatRegion))] = sum_

			# ORF & NCR
			ws[col+str(rs_+1)] = len(book.BPRaw[i][ book.BPRaw[i][ book.col_ORF].isin(book.ORF)]) if len(book.BPRaw[i])!=0 else 0
			ws[col+str(rs_+2)] = len(book.BPRaw[i][ book.BPRaw[i][ book.col_ORF].isin(book.NCR)]) if len(book.BPRaw[i])!=0 else 0

			# sum of basepair 35 이상
			col = next_col(col)
			ws[col+str(3)] = '(BP_35이상)Length'
			sum_ = 0
			for r in range(4, 4+len(book.GenomeStructure)):
				tmp = len(book.BP35[i][ book.BP35[i][ book.col_GenomeStructure] == book.GenomeStructure[r-4]]) if len(book.BPRaw[i])!=0 else 0
				ws[col+str(r)] = tmp
				sum_ += tmp
			ws[col+str(4+len(book.GenomeStructure))] = sum_
			sum_=0
			for r in range(0, len(book.RepeatRegion)):
				row = r+5+len(book.GenomeStructure)
				tmp = len(book.BP35[i][ book.BP35[i][ book.col_RepeatRegion] == book.RepeatRegion[r]]) if len(book.BP35[i])!=0 else 0
				ws[col+str(row)] = tmp
				sum_ += tmp
			ws[col+str(5+len(book.GenomeStructure)+len(book.RepeatRegion))] = sum_

			ws[col+str(rs_+1)] = len(book.BP35[i][ book.BP35[i][ book.col_ORF ].isin(book.ORF)]) if len(book.BP35[i])!=0 else 0
			ws[col+str(rs_+2)] = len(book.BP35[i][ book.BP35[i][ book.col_ORF ].isin(book.NCR)]) if len(book.BP35[i])!=0 else 0
			col = next_col(col)
		infolog("End Full_Seq_in_sheet3")