def snv_pattern(self, iFileList, xLabel): import numpy as np N = len(iFileList) dict1 = dict() for i, item in enumerate(iFileList): inFile = open(item) for line in inFile: line = line.strip() fields = line.split('\t') type = fields[4] + fields[5] dict1.setdefault(type, [0] * N) dict1[type][i] += 1 inFile.close() dict1['T>G/A>C'] = list(np.array(dict1['TG']) + np.array(dict1['AC'])) dict1['T>C/A>G'] = list(np.array(dict1['TC']) + np.array(dict1['AG'])) dict1['T>A/A>T'] = list(np.array(dict1['TA']) + np.array(dict1['AT'])) dict1['C>A/G>T'] = list(np.array(dict1['CA']) + np.array(dict1['GT'])) dict1['C>G/G>C'] = list(np.array(dict1['CG']) + np.array(dict1['GC'])) dict1['C>T/G>A'] = list(np.array(dict1['CT']) + np.array(dict1['GA'])) dict1['SNV'] = list( np.array(dict1['T>G/A>C']) + np.array(dict1['T>C/A>G']) + np.array(dict1['T>A/A>T']) + np.array(dict1['C>A/G>T']) + np.array(dict1['C>G/G>C']) + np.array(dict1['C>T/G>A'])) pp = PyPlot() pp.single_bar_multi_bar_vertical_proportion(dict1['SNV'], [ dict1['T>G/A>C'], dict1['T>C/A>G'], dict1['T>A/A>T'], dict1['C>A/G>T'], dict1['C>G/G>C'], dict1['C>T/G>A'] ], xLabel)
def __legacy_box_plot(self, iFileList, filename, xLabel=0, yLabel=0): pp = PyPlot(filename) aList = [] for item in iFileList: L = list() for it in item: row = 0 inFile = open(it) for line in inFile: row += 1 inFile.close() L.append(row) aList.append(L) pp.box_plot(aList, xLabel)
def snv_region_based_annotation(self, iFile): inFile = open(iFile) dict1 = dict() head = inFile.readline() for line in inFile: line = line.strip() fields = line.split('\t') dict1[fields[0]] = [int(x) for x in fields[1:]] inFile.close() pp = PyPlot() pp.multi_bar([ dict1['Coding'][0:4], dict1['Intronic'][0:4], dict1['NonCoding'][0:4], dict1['Intergenic'][0:4], dict1['Genomic'][0:4] ])
def box_plot(self, iFile, xLabel): pp = PyPlot() inFile = open(iFile) head = inFile.readline() for line in inFile: line = line.strip() fields = line.split('\t') pp.filename = iFile + '.' + fields[0] + '.pdf' group1 = [int(x) for x in fields[-8:-4]] group2 = [int(x) for x in fields[-4:]] pp.box_plot( [group1, group2], [xLabel[0] + ':' + fields[0], xLabel[1] + ':' + fields[0]]) inFile.close()
def gene_two_group_ranksum_test_matshow(self, iFile, geneNum, sampleNameList): geneList = list() list1 = list() row = 0 inFile = open(iFile) for line in inFile: row += 1 if row <= geneNum: line = line.strip() fields = line.split() geneList.append(fields[0]) list1.append([int(x) for x in fields[1:]]) inFile.close() pp = PyPlot() pp.heatmap_matshow(list1, sampleNameList, geneList)
def venn_diagram(self, iFile1, iFile2, iFile3=0, setname1='A', setname2='B', setname3='C', filename='test.pdf'): pp = PyPlot(filename) A = [] B = [] C = [] inFile1 = open(iFile1) inFile2 = open(iFile2) for line in inFile1: line = line.strip() #fields=line.split('\t') A.append(line) for line in inFile2: line = line.strip() #fields=line.split('\t') B.append(line) inFile1.close() inFile2.close() if iFile3 == 0: pp.venn_diagram([set(A), set(B)], setname1, setname2) else: inFile3 = open(iFile3) for line in inFile3: line = line.strip() #fields=line.split('\t') C.append(line) inFile3.close() pp.venn_diagram([set(A), set(B), set(C)], setname1, setname2, setname3)
def _plot_snv_number(self, yList): pp = PyPlot() pp.single_bar(yList)