def snv_pattern(self, iFileList, xLabel): import numpy as np N = len(iFileList) dict1 = dict() for i, item in enumerate(iFileList): inFile = open(item) for line in inFile: line = line.strip() fields = line.split('\t') type = fields[4] + fields[5] dict1.setdefault(type, [0] * N) dict1[type][i] += 1 inFile.close() dict1['T>G/A>C'] = list(np.array(dict1['TG']) + np.array(dict1['AC'])) dict1['T>C/A>G'] = list(np.array(dict1['TC']) + np.array(dict1['AG'])) dict1['T>A/A>T'] = list(np.array(dict1['TA']) + np.array(dict1['AT'])) dict1['C>A/G>T'] = list(np.array(dict1['CA']) + np.array(dict1['GT'])) dict1['C>G/G>C'] = list(np.array(dict1['CG']) + np.array(dict1['GC'])) dict1['C>T/G>A'] = list(np.array(dict1['CT']) + np.array(dict1['GA'])) dict1['SNV'] = list( np.array(dict1['T>G/A>C']) + np.array(dict1['T>C/A>G']) + np.array(dict1['T>A/A>T']) + np.array(dict1['C>A/G>T']) + np.array(dict1['C>G/G>C']) + np.array(dict1['C>T/G>A'])) pp = PyPlot() pp.single_bar_multi_bar_vertical_proportion(dict1['SNV'], [ dict1['T>G/A>C'], dict1['T>C/A>G'], dict1['T>A/A>T'], dict1['C>A/G>T'], dict1['C>G/G>C'], dict1['C>T/G>A'] ], xLabel)
def venn_diagram(self, iFile1, iFile2, iFile3=0, setname1="A", setname2="B", setname3="C", filename="test.pdf"): pp = PyPlot(filename) A = [] B = [] C = [] inFile1 = open(iFile1) inFile2 = open(iFile2) for line in inFile1: line = line.strip() # fields=line.split('\t') A.append(line) for line in inFile2: line = line.strip() # fields=line.split('\t') B.append(line) inFile1.close() inFile2.close() if iFile3 == 0: pp.venn_diagram([set(A), set(B)], setname1, setname2) else: inFile3 = open(iFile3) for line in inFile3: line = line.strip() # fields=line.split('\t') C.append(line) inFile3.close() pp.venn_diagram([set(A), set(B), set(C)], setname1, setname2, setname3)
def box_plot(self, iFile, xLabel): pp = PyPlot() inFile = open(iFile) head = inFile.readline() for line in inFile: line = line.strip() fields = line.split("\t") pp.filename = iFile + "." + fields[0] + ".pdf" group1 = [int(x) for x in fields[-8:-4]] group2 = [int(x) for x in fields[-4:]] pp.box_plot([group1, group2], [xLabel[0] + ":" + fields[0], xLabel[1] + ":" + fields[0]]) inFile.close()
def __legacy_box_plot(self, iFileList, filename, xLabel=0, yLabel=0): pp = PyPlot(filename) aList = [] for item in iFileList: L = list() for it in item: row = 0 inFile = open(it) for line in inFile: row += 1 inFile.close() L.append(row) aList.append(L) pp.box_plot(aList, xLabel)
def gene_two_group_ranksum_test_matshow(self, iFile, geneNum, sampleNameList): geneList = list() list1 = list() row = 0 inFile = open(iFile) for line in inFile: row += 1 if row <= geneNum: line = line.strip() fields = line.split() geneList.append(fields[0]) list1.append([int(x) for x in fields[1:]]) inFile.close() pp = PyPlot() pp.heatmap_matshow(list1, sampleNameList, geneList)
def snv_region_based_annotation(self, iFile): inFile = open(iFile) dict1 = dict() head = inFile.readline() for line in inFile: line = line.strip() fields = line.split('\t') dict1[fields[0]] = [int(x) for x in fields[1:]] inFile.close() pp = PyPlot() pp.multi_bar([ dict1['Coding'][0:4], dict1['Intronic'][0:4], dict1['NonCoding'][0:4], dict1['Intergenic'][0:4], dict1['Genomic'][0:4] ])
def box_plot(self, iFile, xLabel): pp = PyPlot() inFile = open(iFile) head = inFile.readline() for line in inFile: line = line.strip() fields = line.split('\t') pp.filename = iFile + '.' + fields[0] + '.pdf' group1 = [int(x) for x in fields[-8:-4]] group2 = [int(x) for x in fields[-4:]] pp.box_plot( [group1, group2], [xLabel[0] + ':' + fields[0], xLabel[1] + ':' + fields[0]]) inFile.close()
def snv_pattern(self, iFileList, xLabel): import numpy as np N = len(iFileList) dict1 = dict() for i, item in enumerate(iFileList): inFile = open(item) for line in inFile: line = line.strip() fields = line.split("\t") type = fields[4] + fields[5] dict1.setdefault(type, [0] * N) dict1[type][i] += 1 inFile.close() dict1["T>G/A>C"] = list(np.array(dict1["TG"]) + np.array(dict1["AC"])) dict1["T>C/A>G"] = list(np.array(dict1["TC"]) + np.array(dict1["AG"])) dict1["T>A/A>T"] = list(np.array(dict1["TA"]) + np.array(dict1["AT"])) dict1["C>A/G>T"] = list(np.array(dict1["CA"]) + np.array(dict1["GT"])) dict1["C>G/G>C"] = list(np.array(dict1["CG"]) + np.array(dict1["GC"])) dict1["C>T/G>A"] = list(np.array(dict1["CT"]) + np.array(dict1["GA"])) dict1["SNV"] = list( np.array(dict1["T>G/A>C"]) + np.array(dict1["T>C/A>G"]) + np.array(dict1["T>A/A>T"]) + np.array(dict1["C>A/G>T"]) + np.array(dict1["C>G/G>C"]) + np.array(dict1["C>T/G>A"]) ) pp = PyPlot() pp.single_bar_multi_bar_vertical_proportion( dict1["SNV"], [ dict1["T>G/A>C"], dict1["T>C/A>G"], dict1["T>A/A>T"], dict1["C>A/G>T"], dict1["C>G/G>C"], dict1["C>T/G>A"], ], xLabel, )
def snv_region_based_annotation(self, iFile): inFile = open(iFile) dict1 = dict() head = inFile.readline() for line in inFile: line = line.strip() fields = line.split("\t") dict1[fields[0]] = [int(x) for x in fields[1:]] inFile.close() pp = PyPlot() pp.multi_bar( [ dict1["Coding"][0:4], dict1["Intronic"][0:4], dict1["NonCoding"][0:4], dict1["Intergenic"][0:4], dict1["Genomic"][0:4], ] )
def venn_diagram(self, iFile1, iFile2, iFile3=0, setname1='A', setname2='B', setname3='C', filename='test.pdf'): pp = PyPlot(filename) A = [] B = [] C = [] inFile1 = open(iFile1) inFile2 = open(iFile2) for line in inFile1: line = line.strip() #fields=line.split('\t') A.append(line) for line in inFile2: line = line.strip() #fields=line.split('\t') B.append(line) inFile1.close() inFile2.close() if iFile3 == 0: pp.venn_diagram([set(A), set(B)], setname1, setname2) else: inFile3 = open(iFile3) for line in inFile3: line = line.strip() #fields=line.split('\t') C.append(line) inFile3.close() pp.venn_diagram([set(A), set(B), set(C)], setname1, setname2, setname3)
def _plot_snv_number(self, yList): pp = PyPlot() pp.single_bar(yList)