def plotExpBox_Main(inputFile,header,cols,outputFile,sep,startRow,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,plotPvalueCluster,outputClusterPrefix,methodCluster,xlegendrotation,xlabe,ylabe,figsz,titl,showSampleSizes): #if plotPvalueCluster: #if pvalue cluster is needed: # from Bio.Cluster.cluster import * # from Bio.Cluster import * #endif #the real deal! fin=generic_istream(inputFile) plotData=[] xtickLabels=[] for col in cols: plotData.append([]) xtickLabels.append(header[col]) colIndices=range(0,len(cols)) lino=0 for lin in fin: lino+=1 if lino<startRow: continue fields=lin.rstrip("\r\n").split(sep) for idx,col in zip(colIndices,cols): try: value=float(fields[col]) plotData[idx].append(value) except: pass fin.close() print >> stdout,"student t-test (1 sample; mean=0)" print >> stdout,"sample","mean","p-val" for x in range(0,len(plotData)): print >> stdout, xtickLabels[x],mean(plotData[x]),ttest_1samp(plotData[x],0)[1] pvalueM=[] print >> stdout,"" print >> stdout,"student t-test (2 samples)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: pvalue=ttest_ind(plotData[x],plotData[y])[1] print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; print >> stdout,"" makePValueRawPlot(outputClusterPrefix+"_t_raw",xtickLabels,pvalueM) if plotPvalueCluster: makePValueClusterPlot(outputClusterPrefix+"_t",xtickLabels,pvalueM,methodCluster) pvalueM=[] print >> stdout,"welch t-test" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: pvalue=welchs_approximate_ttest_arr(plotData[x],plotData[y])[3] print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; makePValueRawPlot(outputClusterPrefix+"_Welch_raw",xtickLabels,pvalueM) if plotPvalueCluster: makePValueClusterPlot(outputClusterPrefix+"_Welch",xtickLabels,pvalueM,methodCluster) print >> stdout,"" print >> stdout,"non-parametric (Mann-Whitney U)" #"non-parametric (Mann-Whitney U if larger n<=20 else Wilcoxon)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: pvalue=mannwhitneyu(plotData[x],plotData[y])[1]*2 print >> stdout,pvalue, #mann-whiteney need to mul by 2 (one tail to two tail) pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; makePValueRawPlot(outputClusterPrefix+"_U_raw",xtickLabels,pvalueM) if plotPvalueCluster: makePValueClusterPlot(outputClusterPrefix+"_U",xtickLabels,pvalueM,methodCluster) figure(figsize=figsz) if len(titl)==0: titl=outputFile plotExpBox(plotData,xtickLabels,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,xlegendrotation,xlabe,ylabe,titl,showSampleSizes) #ylim([0,200]) savefig(outputFile,bbox_inches="tight")
def plotExpBox_Main(inputFiles,headers,valcols,outputFile,sep,startRow,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,plotPvalueCluster,outputClusterPrefix,methodCluster,xlegendrotation,xlabe,ylabe,figsz,titl,showSampleSizes,trimToMinSize,relabels,logb,plotHistogramToFile,plotMedianForGroups,botta): #if plotPvalueCluster: #if pvalue cluster is needed: # from Bio.Cluster.cluster import * # from Bio.Cluster import * #endif #the real deal! plotData=[] xtickLabels=[] minSize=-1 for inputFile,header,cols in zip(inputFiles,headers,valcols): fin=generic_istream(inputFile) startIdx=len(plotData) for col in cols: plotData.append([]) xtickLabels.append(header[col]) colIndices=range(startIdx,startIdx+len(cols)) lino=0 for lin in fin: lino+=1 if lino<startRow: continue fields=lin.rstrip("\r\n").split(sep) for idx,col in zip(colIndices,cols): try: value=float(fields[col]) if logb!=0: value=log(value)/logb if value<-100000: raise ValueError plotData[idx].append(value) except: pass fin.close() if minSize==-1: minSize=len(plotData[idx]) #or startIDX? else: minSize=min([minSize,len(plotData[idx])]) if trimToMinSize: print >> stderr,"trimming to min size =",minSize trimData(plotData,minSize) if len(relabels)>0: #if len(relabels)!=len(xtickLabels): # print >> stderr,"relabels doesn't have the same length as original label vectors",xtickLabels,"=>",relabels # exit() for i,relabel in zip(range(0,len(relabels)),relabels): xtickLabels[i]=relabel for i in range(0,len(plotMedianForGroups)): plotMedianForGroups[i]=getCol0ListFromCol1ListStringAdv(xtickLabels,plotMedianForGroups[i]) #drawing medians: medianToDraw=[] for mediangrouper in plotMedianForGroups: curD=[] for c in mediangrouper: curD.extend(plotData[c]) medianToDraw.append(median(curD)) for c in range(len(plotData)-1,-1,-1): if len(plotData[c])==0: print >> stderr,xtickLabels[c],"discarded" del plotData[c] del xtickLabels[c] print >> stdout,"student t-test (1 sample; mean=0)" print >> stdout,"sample","mean","p-val" for x in range(0,len(plotData)): #print >> stderr, len(plotData[x]) try: print >> stdout, xtickLabels[x],mean(plotData[x]),ttest_1samp(plotData[x],0)[1] except: print >> stdout, xtickLabels[x],"NA","NA" pvalueM=[] print >> stdout,"" print >> stdout,"student t-test (2 samples)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue=ttest_ind(plotData[x],plotData[y])[1] except: pvalue=1.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; print >> stdout,"" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_t_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_t",xtickLabels,pvalueM,methodCluster) pvalueM=[] print >> stdout,"welch t-test" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: pvalue=welchs_approximate_ttest_arr(plotData[x],plotData[y])[3] print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_Welch_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_Welch",xtickLabels,pvalueM,methodCluster) print >> stdout,"" print >> stdout,"non-parametric (Mann-Whitney U)" #"non-parametric (Mann-Whitney U if larger n<=20 else Wilcoxon)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=mannwhitneyu(plotData[x],plotData[y])[1]*2 except: pvalue=1.0 print >> stdout,pvalue, #mann-whiteney need to mul by 2 (one tail to two tail) pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_U_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_U",xtickLabels,pvalueM,methodCluster) figure(figsize=figsz) subplots_adjust(top=0.9, bottom=botta, left=0.2, right=0.8) if len(titl)==0: titl=outputFile plotExpBox(plotData,xtickLabels,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,xlegendrotation,xlabe,ylabe,titl,showSampleSizes) #ylim([0,200]) for m in medianToDraw: axhline(y=m,linestyle=':',color='gray') savefig(outputFile,bbox_inches="tight") if len(plotHistogramToFile)>0: drawHistogram(plotHistogramToFile,plotData,xtickLabels)
def plotExpBox_Main(inputFiles,headers,valcols,outputFile,sep,startRow,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,plotPvalueCluster,outputClusterPrefix,methodCluster,xlegendrotation,xlabe,ylabe,figsz,titl,showSampleSizes,trimToMinSize,relabels,logb,plotHistogramToFile,plotMedianForGroups,botta): #if plotPvalueCluster: #if pvalue cluster is needed: # from Bio.Cluster.cluster import * # from Bio.Cluster import * #endif #the real deal! plotData=[] xtickLabels=[] minSize=-1 for inputFile,header,cols in zip(inputFiles,headers,valcols): fin=generic_istream(inputFile) startIdx=len(plotData) for col in cols: plotData.append([]) xtickLabels.append(header[col]) colIndices=range(startIdx,startIdx+len(cols)) lino=0 for lin in fin: lino+=1 if lino<startRow: continue fields=lin.rstrip("\r\n").split(sep) for idx,col in zip(colIndices,cols): try: value=float(fields[col]) if logb!=0: if value==0.0: raise ValueError value=log(value)/logb plotData[idx].append(value) except: pass fin.close() if minSize==-1: minSize=len(plotData[idx]) #or startIDX? else: minSize=min([minSize,len(plotData[idx])]) if trimToMinSize: print >> stderr,"trimming to min size =",minSize trimData(plotData,minSize) if len(relabels)>0: #if len(relabels)!=len(xtickLabels): # print >> stderr,"relabels doesn't have the same length as original label vectors",xtickLabels,"=>",relabels # exit() print >> stderr,xtickLabels print >> stderr,relabels for i,relabel in zip(range(0,len(relabels)),relabels): xtickLabels[i]=relabel for i in range(0,len(plotMedianForGroups)): plotMedianForGroups[i]=getCol0ListFromCol1ListStringAdv(xtickLabels,plotMedianForGroups[i]) #drawing medians: medianToDraw=[] for mediangrouper in plotMedianForGroups: curD=[] for c in mediangrouper: curD.extend(plotData[c]) medianToDraw.append(median(curD)) for c in range(len(plotData)-1,-1,-1): if len(plotData[c])==0: print >> stderr,xtickLabels[c],"discarded" del plotData[c] del xtickLabels[c] print >> stdout,"student t-test (1 sample; mean=0)" print >> stdout,"sample","mean","p-val" for x in range(0,len(plotData)): #print >> stderr, len(plotData[x]) try: print >> stdout, xtickLabels[x],mean(plotData[x]),ttest_1samp(plotData[x],0)[1] except: print >> stdout, xtickLabels[x],"NA","NA" pvalueM=[] print >> stdout,"" print >> stdout,"student t-test (2 samples)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue=ttest_ind(plotData[x],plotData[y])[1] except: pvalue=1.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; print >> stdout,"" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_t_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_t",xtickLabels,pvalueM,methodCluster) pvalueM=[] print >> stdout,"welch t-test" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: pvalue=welchs_approximate_ttest_arr(plotData[x],plotData[y])[3] print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_Welch_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_Welch",xtickLabels,pvalueM,methodCluster) print >> stdout,"" print >> stdout,"non-parametric (Mann-Whitney U)" #"non-parametric (Mann-Whitney U if larger n<=20 else Wilcoxon)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=mannwhitneyu(plotData[x],plotData[y])[1]*2 except: pvalue=1.0 print >> stdout,pvalue, #mann-whiteney need to mul by 2 (one tail to two tail) pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_U_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_U",xtickLabels,pvalueM,methodCluster) figure(figsize=figsz) subplots_adjust(top=0.9, bottom=botta, left=0.2, right=0.8) if len(titl)==0: titl=outputFile plotExpBox(plotData,xtickLabels,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,xlegendrotation,xlabe,ylabe,titl,showSampleSizes) #ylim([0,200]) for m in medianToDraw: axhline(y=m,linestyle=':',color='gray') savefig(outputFile,bbox_inches="tight") if len(plotHistogramToFile)>0: drawHistogram(plotHistogramToFile,plotData,xtickLabels)
def attachWelchpValue(filename, cols1, cols2, startRow1, sortByFDR): fin = open(filename) lino = 0 pvaluesMap = dict() orderedAsInFile = [] #[pvalue][ [line,p-value,FDR] ] for line in fin: line = line.strip() lino += 1 if lino < startRow1: print >> sys.stdout, line, "\tWelch p-value\tWelch FDR" continue spliton = line.split("\t") arr1 = [] arr2 = [] for i0 in cols1: try: arr1.append(float(spliton[i0])) except: pass for i0 in cols2: try: arr2.append(float(spliton[i0])) except: pass try: welchRes = welchttest.welchs_approximate_ttest_arr(arr1, arr2) pval = welchRes[3] except: pval = 1.0 try: copvalues = pvaluesMap[pval] except KeyError: copvalues = [] pvaluesMap[pval] = copvalues thisEntry = [line, pval, 0] copvalues.append(thisEntry) orderedAsInFile.append(thisEntry) #print >> sys.stdout, line, "\t", str(pval); fin.close() totalEntry = len(orderedAsInFile) nAlready = 0 #Now cal FDR and output sortedpvalues = pvaluesMap.keys() sortedpvalues.sort() for pval in sortedpvalues: copvalues = pvaluesMap[pval] lcopvalues = len(copvalues) nAlready += lcopvalues FDR = totalEntry * float(pval) / nAlready for copvalue in copvalues: copvalue[2] = FDR if sortByFDR: for pval in sortedpvalues: copvalues = pvaluesMap[pval] for copvalue in copvalues: line, pval, FDR = copvalue print >> sys.stdout, line + "\t" + str(pval) + "\t" + str(FDR) else: for line, pval, FDR in orderedAsInFile: print >> sys.stdout, line + "\t" + str(pval) + "\t" + str(FDR)
def plotExpBox_Main(inputFiles,headers,valcols,outputFile,sep,startRow,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,plotPvalueCluster,outputClusterPrefix,methodCluster,xlegendrotation,xlabe,ylabe,figsz,titl,showSampleSizes,trimToMinSize,relabels): #if plotPvalueCluster: #if pvalue cluster is needed: # from Bio.Cluster.cluster import * # from Bio.Cluster import * #endif #the real deal! plotData=[] xtickLabels=[] minSize=-1 for inputFile,header,cols in zip(inputFiles,headers,valcols): fin=generic_istream(inputFile) startIdx=len(plotData) for col in cols: plotData.append([]) xtickLabels.append(header[col]) colIndices=range(startIdx,startIdx+len(cols)) lino=0 for lin in fin: lino+=1 if lino<startRow: continue fields=lin.rstrip("\r\n").split(sep) for idx,col in zip(colIndices,cols): try: value=float(fields[col]) plotData[idx].append(value) except: pass fin.close() if minSize==-1: minSize=len(plotData[idx]) #or startIDX? else: minSize=min([minSize,len(plotData[idx])]) if trimToMinSize: print >> stderr,"trimming to min size =",minSize trimData(plotData,minSize) if len(relabels)>0: if len(relabels)!=len(xtickLabels): print >> stderr,"relabels doesn't have the same length as original label vectors",xtickLabels,"=>",relabels exit() xtickLabels=relabels print >> stdout,"student t-test (1 sample; mean=0)" print >> stdout,"sample","mean","p-val" for x in range(0,len(plotData)): #print >> stderr, len(plotData[x]) print >> stdout, xtickLabels[x],mean(plotData[x]),ttest_1samp(plotData[x],0)[1] pvalueM=[] print >> stdout,"" print >> stdout,"student t-test (2 samples)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: pvalue=ttest_ind(plotData[x],plotData[y])[1] print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; print >> stdout,"" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_t_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_t",xtickLabels,pvalueM,methodCluster) pvalueM=[] print >> stdout,"welch t-test" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: pvalue=welchs_approximate_ttest_arr(plotData[x],plotData[y])[3] print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_Welch_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_Welch",xtickLabels,pvalueM,methodCluster) print >> stdout,"" print >> stdout,"non-parametric (Mann-Whitney U)" #"non-parametric (Mann-Whitney U if larger n<=20 else Wilcoxon)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: pvalue=mannwhitneyu(plotData[x],plotData[y])[1]*2 print >> stdout,pvalue, #mann-whiteney need to mul by 2 (one tail to two tail) pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_U_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_U",xtickLabels,pvalueM,methodCluster) figure(figsize=figsz) if len(titl)==0: titl=outputFile plotExpBox(plotData,xtickLabels,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,xlegendrotation,xlabe,ylabe,titl,showSampleSizes) #ylim([0,200]) savefig(outputFile,bbox_inches="tight")
def plotExpBox_Main(inputFiles, headers, valcols, outputFile, sep, startRow, showIndPoints, mark, markMean, showMean, notch, whisker, outliers, plotPvalueCluster, outputClusterPrefix, methodCluster, xlegendrotation, xlabe, ylabe, figsz, titl, showSampleSizes, trimToMinSize, relabels, logb, plotHistogramToFile, plotMedianForGroups, botta, showViolin, showBox, firstColAnnot, plotTrend, showLegend, makePzfxFile, makeBinMatrix, writeDataSummaryStat, summaryStatRange, minuslog10pvalue, minNDataToKeep, vfacecolor, valpha, outXYZPvalues, dividePlots): #if plotPvalueCluster: #if pvalue cluster is needed: # from Bio.Cluster.cluster import * # from Bio.Cluster import * #endif #the real deal! plotData = [] xtickLabels = [] trendData = {} annot = {} minSize = -1 for inputFile, header, cols in zip(inputFiles, headers, valcols): fin = generic_istream(inputFile) startIdx = len(plotData) if firstColAnnot: colAnnot = cols[0] cols = cols[1:] annotThisFile = [] annot[startIdx] = annotThisFile else: colAnnot = -1 annotThisFile = None for col in cols: plotData.append([]) xtickLabels.append(header[col]) colIndices = range(startIdx, startIdx + len(cols)) if plotTrend: #print >> stderr,"plotTrend" trendDataThisFile = [] trendData[startIdx] = trendDataThisFile else: trendDataThisFile = None lino = 0 for lin in fin: lino += 1 if lino < startRow: continue fields = lin.rstrip("\r\n").split(sep) if plotTrend: #print >> stderr,"a" trendDataThisLine = [] else: trendDataThisLine = None allDataOKThisLine = True if colAnnot >= 0: annotThisFile.append(fields[colAnnot]) for idx, col in zip(colIndices, cols): try: value = float(fields[col]) if logb != 0: if value == 0.0: raise ValueError value = log(value) / logb plotData[idx].append(value) if plotTrend: trendDataThisLine.append(value) #print >> stderr,"value:",value except: allDataOKThisLine = False if plotTrend: if allDataOKThisLine: trendDataThisFile.append(trendDataThisLine) else: trendDataThisFile.append(None) fin.close() if minSize == -1: minSize = len(plotData[idx]) #or startIDX? else: minSize = min([minSize, len(plotData[idx])]) if trimToMinSize: print >> stderr, "trimming to min size =", minSize trimData(plotData, minSize) if len(relabels) > 0: #if len(relabels)!=len(xtickLabels): # print >> stderr,"relabels doesn't have the same length as original label vectors",xtickLabels,"=>",relabels # exit() print >> stderr, xtickLabels print >> stderr, relabels for i, relabel in zip(range(0, len(relabels)), relabels): xtickLabels[i] = relabel for i in range(0, len(plotMedianForGroups)): plotMedianForGroups[i] = getCol0ListFromCol1ListStringAdv( xtickLabels, plotMedianForGroups[i]) #drawing medians: medianToDraw = [] for mediangrouper in plotMedianForGroups: curD = [] for c in mediangrouper: curD.extend(plotData[c]) medianToDraw.append(median(curD)) for c in range(len(plotData) - 1, -1, -1): if len(plotData[c]) < minNDataToKeep: print >> stderr, xtickLabels[c], "discarded because has only", len( plotData[c]), "data points <", minNDataToKeep del plotData[c] del xtickLabels[c] if not skipStat: print >> stdout, "student t-test (1 sample; mean=0)" print >> stdout, "sample", "mean", "p-val", "median" if writeDataSummaryStat: fDSS = open(writeDataSummaryStat, "w") print >> fDSS, "sample\tmean\tvar\tsd\tmin\tmax\tN\tNInRange[" + str( summaryStatRange[0]) + "," + str( summaryStatRange[1] ) + "]\t%NInRange\tNbelowRange\t%Nbelow\tNAboveRange\t%NAbove" for x in range(0, len(plotData)): #print >> stderr, len(plotData[x]) try: print >> stdout, xtickLabels[x], mean( plotData[x]), ttest_1samp(plotData[x], 0)[1], median(plotData[x]) except: print >> stdout, xtickLabels[x], mean( plotData[x]), "NA", median(plotData[x]) if writeDataSummaryStat: sumData, N, NIN, NBelow, NAbove = filterDataInRangeInclusive( plotData[x], summaryStatRange[0], summaryStatRange[1]) if NIN > 1: #print >> stderr,"sumData=",sumData #print >> stderr,mean mea = mean2(sumData) DDOF = 1 sd = std(sumData, ddof=DDOF) var = sd * sd mi = min(sumData) ma = max(sumData) else: mea = "NA" sd = "NA" var = "NA" mi = "NA" ma = "NA" print >> fDSS, xtickLabels[x] + "\t" + str(mea) + "\t" + str( var) + "\t" + str(sd) + "\t" + str(mi) + "\t" + str( ma) + "\t" + str(N) + "\t" + str(NIN) + "\t" + str( float(NIN) * 100 / N) + "\t" + str(NBelow) + "\t" + str( float(NBelow) * 100 / N) + "\t" + str(NAbove) + "\t" + str( float(NAbove) * 100 / N) pvalueM = [] if writeDataSummaryStat: fDSS.close() print >> stdout, "" print >> stdout, "student t-test (2 samples)" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue = ttest_ind(plotData[x], plotData[y])[1] except: pvalue = 1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout, "" print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_t_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_t", xtickLabels, pvalueM, methodCluster) pvalueM = [] print >> stdout, "welch t-test" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue = welchs_approximate_ttest_arr( plotData[x], plotData[y])[3] except: pvalue = 1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout, "" if outXYZPvalues: writeXYZPvalues(outXYZPvalues + "_Welch.xyz", xtickLabels, pvalueM) if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_Welch_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_Welch", xtickLabels, pvalueM, methodCluster) print >> stdout, "" print >> stdout, "non-parametric (Mann-Whitney U)" #"non-parametric (Mann-Whitney U if larger n<=20 else Wilcoxon)" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = mannwhitneyu(plotData[x], plotData[y])[1] * 2 except: pvalue = 1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, pvalue, #mann-whiteney need to mul by 2 (one tail to two tail) pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if outXYZPvalues: writeXYZPvalues(outXYZPvalues + "_U.xyz", xtickLabels, pvalueM) if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_U_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_U", xtickLabels, pvalueM, methodCluster) #####now the variance tests print >> stdout, "" print >> stdout, "Ansari-Bradley Two-sample Test for difference in scale parameters " print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = ansari(plotData[x], plotData[y])[1] except: pvalue = "NA" if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 #pvalue=1.0 print >> stdout, pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_Ansari_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_Ansari", xtickLabels, pvalueM, methodCluster) ##### #####now the variance tests print >> stdout, "" print >> stdout, "Fligner's Two-sample Test for equal variance (non-parametrics)" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = fligner(plotData[x], plotData[y])[1] except: pvalue = "NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_fligner_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_fligner", xtickLabels, pvalueM, methodCluster) ##### #####now the variance tests print >> stdout, "" print >> stdout, "Levene's Two-sample Test for equal variance" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = levene(plotData[x], plotData[y])[1] except: pvalue = "NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_levene_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_levene", xtickLabels, pvalueM, methodCluster) ##### #####now the variance tests print >> stdout, "" print >> stdout, "Bartlett's Two-sample Test for equal variance (for normal distributions)" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = bartlett(plotData[x], plotData[y])[1] except: pvalue = "NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_bartlett_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_bartlett", xtickLabels, pvalueM, methodCluster) ##### figure(figsize=figsz) subplots_adjust(top=0.9, bottom=botta, left=0.2, right=0.8) if len(titl) == 0: titl = outputFile plotExpBox(plotData, xtickLabels, showIndPoints, mark, markMean, showMean, notch, whisker, outliers, xlegendrotation, xlabe, ylabe, titl, showSampleSizes, showViolin, showBox, annot, trendData, showLegend, makePzfxFile, makeBinMatrix, dividePlots) #ylim([0,200]) for m in medianToDraw: axhline(y=m, linestyle=':', color='gray') savefig(outputFile, bbox_inches="tight") if len(plotHistogramToFile) > 0: drawHistogram(plotHistogramToFile, plotData, xtickLabels) drawDensigram(plotHistogramToFile + ".density.png", plotData, xtickLabels)
candtomoney[name].append(amount) obama = candtomoney["Obama Barack"] mccain = candtomoney["McCain John S"] print len(obama), len(mccain) # certainly the means look different... print "Obama mean, stdev", numpy.mean(obama), numpy.std(obama) print "McCain mean, stdev", numpy.mean(mccain), numpy.std(mccain) # running a ttest of independent samples suggests that they aren't the same mean print "ttest, equal variances", scipy.stats.ttest_ind(obama, mccain) # small white lie: there is no reason to believe that these two samples have # equal variance, so let's use a welch test. print "welch", welchttest.welchs_approximate_ttest_arr(obama, mccain) # but we've been lying to you. you should only run ttests on normal data, so run # the shapiro-wilk test of normalcy print "obama shapiro", scipy.stats.shapiro(obama) print "mccain shapiro", scipy.stats.shapiro(mccain) # ooops...we have to reject the null hypothesis: it's very unlikely these two # are normally distributed. it's actually not that bad for a ttest, but just to # be sure, let's run a non-parametric test called the Mann-Whitney U test. print "mann-whitney U", scipy.stats.mannwhitneyu(obama, mccain) # cool! the p-value is 0. So if our alpha was .05 or .01, we'd still be below # it. it's unlikely these two are from the same distribution, and thus we # can safely reject the null hypothesis that they have the same mean. Obama's # donations were really smaller than McCain's!
def attachWelchpValue(filename,cols1,cols2,startRow1,sortByFDR): fin=open(filename); lino=0; pvaluesMap=dict() orderedAsInFile=[] #[pvalue][ [line,p-value,FDR] ] for line in fin: line=line.strip(); lino+=1; if lino<startRow1: print >> sys.stdout, line, "\tWelch p-value\tWelch FDR"; continue; spliton=line.split("\t"); arr1=[]; arr2=[]; for i0 in cols1: try: arr1.append(float(spliton[i0])); except: pass for i0 in cols2: try: arr2.append(float(spliton[i0])); except: pass try: welchRes=welchttest.welchs_approximate_ttest_arr(arr1,arr2); pval=welchRes[3]; except: pval=1.0 try: copvalues=pvaluesMap[pval] except KeyError: copvalues=[] pvaluesMap[pval]=copvalues thisEntry=[line,pval,0] copvalues.append(thisEntry) orderedAsInFile.append(thisEntry) #print >> sys.stdout, line, "\t", str(pval); fin.close(); totalEntry=len(orderedAsInFile) nAlready=0 #Now cal FDR and output sortedpvalues=pvaluesMap.keys() sortedpvalues.sort() for pval in sortedpvalues: copvalues=pvaluesMap[pval] lcopvalues=len(copvalues) nAlready+=lcopvalues FDR=totalEntry*float(pval)/nAlready for copvalue in copvalues: copvalue[2]=FDR if sortByFDR: for pval in sortedpvalues: copvalues=pvaluesMap[pval] for copvalue in copvalues: line,pval,FDR=copvalue print >> sys.stdout, line+"\t"+str(pval)+"\t"+str(FDR) else: for line,pval,FDR in orderedAsInFile: print >> sys.stdout, line+"\t"+str(pval)+"\t"+str(FDR)
def plotExpBox_Main(inputFiles,headers,valcols,outputFile,sep,startRow,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,plotPvalueCluster,outputClusterPrefix,methodCluster,xlegendrotation,xlabe,ylabe,figsz,titl,showSampleSizes,trimToMinSize,relabels,logb,plotHistogramToFile,plotMedianForGroups,botta,showViolin,showBox,firstColAnnot,plotTrend,showLegend,makePzfxFile,makeBinMatrix,writeDataSummaryStat,summaryStatRange,minuslog10pvalue,minNDataToKeep,vfacecolor,valpha,outXYZPvalues,dividePlots): #if plotPvalueCluster: #if pvalue cluster is needed: # from Bio.Cluster.cluster import * # from Bio.Cluster import * #endif #the real deal! plotData=[] xtickLabels=[] trendData={} annot={} minSize=-1 for inputFile,header,cols in zip(inputFiles,headers,valcols): fin=generic_istream(inputFile) startIdx=len(plotData) if firstColAnnot: colAnnot=cols[0] cols=cols[1:] annotThisFile=[] annot[startIdx]=annotThisFile else: colAnnot=-1 annotThisFile=None for col in cols: plotData.append([]) xtickLabels.append(header[col]) colIndices=range(startIdx,startIdx+len(cols)) if plotTrend: #print >> stderr,"plotTrend" trendDataThisFile=[] trendData[startIdx]=trendDataThisFile else: trendDataThisFile=None lino=0 for lin in fin: lino+=1 if lino<startRow: continue fields=lin.rstrip("\r\n").split(sep) if plotTrend: #print >> stderr,"a" trendDataThisLine=[] else: trendDataThisLine=None allDataOKThisLine=True if colAnnot>=0: annotThisFile.append(fields[colAnnot]) for idx,col in zip(colIndices,cols): try: value=float(fields[col]) if logb!=0: if value==0.0: raise ValueError value=log(value)/logb plotData[idx].append(value) if plotTrend: trendDataThisLine.append(value) #print >> stderr,"value:",value except: allDataOKThisLine=False if plotTrend: if allDataOKThisLine: trendDataThisFile.append(trendDataThisLine) else: trendDataThisFile.append(None) fin.close() if minSize==-1: minSize=len(plotData[idx]) #or startIDX? else: minSize=min([minSize,len(plotData[idx])]) if trimToMinSize: print >> stderr,"trimming to min size =",minSize trimData(plotData,minSize) if len(relabels)>0: #if len(relabels)!=len(xtickLabels): # print >> stderr,"relabels doesn't have the same length as original label vectors",xtickLabels,"=>",relabels # exit() print >> stderr,xtickLabels print >> stderr,relabels for i,relabel in zip(range(0,len(relabels)),relabels): xtickLabels[i]=relabel for i in range(0,len(plotMedianForGroups)): plotMedianForGroups[i]=getCol0ListFromCol1ListStringAdv(xtickLabels,plotMedianForGroups[i]) #drawing medians: medianToDraw=[] for mediangrouper in plotMedianForGroups: curD=[] for c in mediangrouper: curD.extend(plotData[c]) medianToDraw.append(median(curD)) for c in range(len(plotData)-1,-1,-1): if len(plotData[c])<minNDataToKeep: print >> stderr,xtickLabels[c],"discarded because has only",len(plotData[c]),"data points <",minNDataToKeep del plotData[c] del xtickLabels[c] if not skipStat: print >> stdout,"student t-test (1 sample; mean=0)" print >> stdout,"sample","mean","p-val","median" if writeDataSummaryStat: fDSS=open(writeDataSummaryStat,"w") print >> fDSS,"sample\tmean\tvar\tsd\tmin\tmax\tN\tNInRange["+str(summaryStatRange[0])+","+str(summaryStatRange[1])+"]\t%NInRange\tNbelowRange\t%Nbelow\tNAboveRange\t%NAbove" for x in range(0,len(plotData)): #print >> stderr, len(plotData[x]) try: print >> stdout, xtickLabels[x],mean(plotData[x]),ttest_1samp(plotData[x],0)[1],median(plotData[x]) except: print >> stdout, xtickLabels[x],mean(plotData[x]),"NA",median(plotData[x]) if writeDataSummaryStat: sumData,N,NIN,NBelow,NAbove=filterDataInRangeInclusive(plotData[x],summaryStatRange[0],summaryStatRange[1]) if NIN>1: #print >> stderr,"sumData=",sumData #print >> stderr,mean mea=mean2(sumData) DDOF=1 sd=std(sumData,ddof=DDOF) var=sd*sd mi=min(sumData) ma=max(sumData) else: mea="NA" sd="NA" var="NA" mi="NA" ma="NA" print >> fDSS,xtickLabels[x]+"\t"+str(mea)+"\t"+str(var)+"\t"+str(sd)+"\t"+str(mi)+"\t"+str(ma)+"\t"+str(N)+"\t"+str(NIN)+"\t"+str(float(NIN)*100/N)+"\t"+str(NBelow)+"\t"+str(float(NBelow)*100/N)+"\t"+str(NAbove)+"\t"+str(float(NAbove)*100/N) pvalueM=[] if writeDataSummaryStat: fDSS.close() print >> stdout,"" print >> stdout,"student t-test (2 samples)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue=ttest_ind(plotData[x],plotData[y])[1] except: pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; print >> stdout,"" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_t_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_t",xtickLabels,pvalueM,methodCluster) pvalueM=[] print >> stdout,"welch t-test" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue=welchs_approximate_ttest_arr(plotData[x],plotData[y])[3] except: pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; if outXYZPvalues: writeXYZPvalues(outXYZPvalues+"_Welch.xyz",xtickLabels,pvalueM) if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_Welch_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_Welch",xtickLabels,pvalueM,methodCluster) print >> stdout,"" print >> stdout,"non-parametric (Mann-Whitney U)" #"non-parametric (Mann-Whitney U if larger n<=20 else Wilcoxon)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=mannwhitneyu(plotData[x],plotData[y])[1]*2 except: pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout,pvalue, #mann-whiteney need to mul by 2 (one tail to two tail) pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if outXYZPvalues: writeXYZPvalues(outXYZPvalues+"_U.xyz",xtickLabels,pvalueM) if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_U_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_U",xtickLabels,pvalueM,methodCluster) #####now the variance tests print >> stdout,"" print >> stdout,"Ansari-Bradley Two-sample Test for difference in scale parameters " print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=ansari(plotData[x],plotData[y])[1] except: pvalue="NA" if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 #pvalue=1.0 print >> stdout,pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_Ansari_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_Ansari",xtickLabels,pvalueM,methodCluster) ##### #####now the variance tests print >> stdout,"" print >> stdout,"Fligner's Two-sample Test for equal variance (non-parametrics)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=fligner(plotData[x],plotData[y])[1] except: pvalue="NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout,pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_fligner_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_fligner",xtickLabels,pvalueM,methodCluster) ##### #####now the variance tests print >> stdout,"" print >> stdout,"Levene's Two-sample Test for equal variance" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=levene(plotData[x],plotData[y])[1] except: pvalue="NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout,pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_levene_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_levene",xtickLabels,pvalueM,methodCluster) ##### #####now the variance tests print >> stdout,"" print >> stdout,"Bartlett's Two-sample Test for equal variance (for normal distributions)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=bartlett(plotData[x],plotData[y])[1] except: pvalue="NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout,pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_bartlett_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_bartlett",xtickLabels,pvalueM,methodCluster) ##### figure(figsize=figsz) subplots_adjust(top=0.9, bottom=botta, left=0.2, right=0.8) if len(titl)==0: titl=outputFile plotExpBox(plotData,xtickLabels,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,xlegendrotation,xlabe,ylabe,titl,showSampleSizes,showViolin,showBox,annot,trendData,showLegend,makePzfxFile,makeBinMatrix,dividePlots) #ylim([0,200]) for m in medianToDraw: axhline(y=m,linestyle=':',color='gray') savefig(outputFile,bbox_inches="tight") if len(plotHistogramToFile)>0: drawHistogram(plotHistogramToFile,plotData,xtickLabels) drawDensigram(plotHistogramToFile+".density.png",plotData,xtickLabels)