def makePlots(truthFile, predFile, method, outBase, measure): if measure == "tpm": trueColName = "TPM_truth" predColName = "TPM_{}".format(method) elif measure == "num_reads": trueColName = "NumReads_truth" predColName = "NumReads_{}".format(method) # Load the data; first the predicitons p = None if method == "salmon": p = ParsingUtils.readSalmon(predFile, '_{}'.format(method)) elif method == "kallisto": p = ParsingUtils.readKallisto(predFile, '_{}'.format(method)) elif method == "express": p = ParsingUtils.readExpress(predFile, '_{}'.format(method)) # Now the ground truth g = ParsingUtils.readProFile(truthFile, '_truth') # Convert to TPM g["TPM_truth"] = 1000000.0 * (g["ExpFrac_truth"] / g["ExpFrac_truth"].sum()) # Flux sim thinks paired-end = 2 reads . . . sigh g["NumReads_truth"] = g["SeqNum_truth"] * 0.5 # Filter out low TPM AnalysisUtils.filterValues("TPM_truth", g, 0.01) AnalysisUtils.filterValues("TPM_{}".format(method), p, 0.01) AnalysisUtils.filterValues("NumReads_truth", g, 1.0) AnalysisUtils.filterValues("NumReads_{}".format(method), p, 1.0) # merge dataframes m = g.join(p) setPlotProperties() makeCorrPlot(trueColName, predColName, m, outBase, method, measure) makeRelDiffPlot(trueColName, predColName, m, outBase, method, measure) makeRelErrorPlot(trueColName, predColName, m, outBase, method, measure)
def makeTable(methodDict, outpath, outfile, measure, annotPath): import pandas as pd import seaborn as sns import ParsingUtils import AnalysisUtils dframes = [] for k, v in methodDict.items(): if k.upper().startswith('SALMON'): d = ParsingUtils.readSalmon(v, '_{}'.format(k)) elif k.upper().startswith('KALLISTO'): d = ParsingUtils.readKallisto(v, '_{}'.format(k)) elif k.upper().startswith('EXPRESS'): d = ParsingUtils.readExpress(v, '_{}'.format(k)) elif k.upper() == 'SAILFISH': d = ParsingUtils.readSailfish(v, '_{}'.format(k)) elif k.upper() == 'SAILFISH (QUASI)': d = ParsingUtils.readSalmon(v, '_{}'.format(k)) elif k.upper().startswith('TRUTH'): suffix = '_{}'.format(k) d = ParsingUtils.readProFile(v, suffix) d["TPM{}".format(suffix)] = 1000000.0 * (d["ExpFrac{}".format(suffix)] / d["ExpFrac{}".format(suffix)].sum()) # Flux sim thinks paired-end = 2 reads . . . sinh d["NumReads{}".format(suffix)] = d["SeqNum{}".format(suffix)] * 0.5 # Add this dataframe to the list dframes.append(d) M = dframes[0].join(dframes[1:]) # Filter eXpress results minVal = np.inf for mn in set(methodDict.keys()) - set(["Truth", "eXpress"]): newMin = M.loc[M["{}_{}".format(measure, mn)]>0, "{}_{}".format(measure,mn)].min() minVal = min(minVal, newMin) print("filtering eXpress results < {} {}".format(minVal, measure)) AnalysisUtils.filterValues("{}_{}".format(measure, "eXpress"), M, minVal) org = outfile.split('/')[-1].split('_')[0] print("org = {}".format(org)) if org == 'human': plotStratifiedDiffs(M, methodDict, annotPath, outpath, measure) mrdName = 'abs. mean rel. diff.' corrName = 'Spearman corr.' propName = 'Proportionality corr.' tpefName = 'TP error fraction' tpMedErrorName = 'TP median per. error' res = pd.DataFrame(data={ m : {tpMedErrorName : np.nan, tpefName : np.nan, mrdName : np.nan, corrName : np.nan, propName : np.nan} for m in (methodDict.keys() - set('Truth'))}) import scipy as sp import scipy.stats for k in methodDict: if k.upper() != "TRUTH": c = sp.stats.spearmanr(M["{}_Truth".format(measure)], M["{}_{}".format(measure, k)])[0] res[k][corrName] = c mrd, _ = AnalysisUtils.relDiff("{}_Truth".format(measure), "{}_{}".format(measure, k), M) res[k][mrdName] = mrd["relDiff"].abs().mean() pc = AnalysisUtils.proportionalityCorrelation("{}_Truth".format(measure), "{}_{}".format(measure, k), M) res[k][propName] = pc tpind = M[M["{}_Truth".format(measure)] >= 1] y = tpind["{}_{}".format(measure, k)] x = tpind["{}_Truth".format(measure)] ef = 10.0 re = (y - x) / x are = 100.0 * (y - x).abs() / x tpef = len(are[are > ef]) / float(len(are)) res[k][tpefName] = tpef res[k][tpMedErrorName] = re.median() res.drop('Truth', axis=1, inplace=True) print(res) res.to_csv(outfile+".csv") with open(outfile, 'w') as ofile: ofile.write(res.to_latex(float_format=lambda x: "{0:.2f}".format(x))) print("wrote {}".format(outpath))