def test_models(version=(30,-1), resample_flag=False, patients = -1, printReg = True): means = dict(educ_from_12=4, born_from_1960=10) if patients == -1: patients = DataUtilities.getDictReadofPatientsFilled() #patients = getDictReadofPatientsFilled() # read the survey survey, complete = read_complete(version, patients) print DataUtilities.get_version(version) #compare_survey_and_complete(survey, complete) #print 'all respondents', survey.len() #print 'complete', complete.len() # run the models if printReg: regs = run_regression_and_print(survey, version=version, means=means) else: regs = run_regression(survey, version) return regs
def errorTotal(): means = {} version = (30,-1) cumDict = {} patientDict = DataUtilities.getDictReadofPatients() for i in range(100): patients = DataUtilities.getDictReadofPatientsFilled(patientDict = patientDict) #resample sPatients = sample_wr(patients, 91) regs = TCGAlogReg.test_models(version = version, patients = sPatients, printReg = False) for reg in regs.regs: cumulative_odds = reg.report_odds(means, printCum = False) for name, odds, p in cumulative_odds: if name in cumDict.keys(): cumDict[name].append(odds) else: cumDict[name] = [odds] print "ErrorTotal" #create table print r"\begin{table}[h]" print r"\begin{tabular}{|l|l|l|l|}" print r"\hline" print r"\textbf{Type} & \textbf{Odds} & \textbf{Odds Lower} & \textbf{Odds Upper}\\ \hline" for key, oddsList in sorted(cumDict.items(), key=lambda e: sum(e[1])): if key not in mutationDict.getMutationsGreaterThan(1): average = sum(oddsList) / float(len(oddsList)) lower = getLowerConfidence(oddsList) upper= getUpperLowerConfidence(oddsList) print key + " & " "{:.3f}".format(average) + " & " + "{:.3f}".format(lower) + " & " "{:.3f}".format(upper) + r"\\ \hline" print "\end{tabular}" print "\end{table}" print "" print r"\begin{table}[h]" print r"\begin{tabular}{|l|l|l|l|}" print r"\hline" print r"\textbf{Type} & \textbf{Odds} & \textbf{Odds Lower} & \textbf{Odds Upper}\\ \hline" names = [] averages = [] lowers = [] uppers = [] for key, oddsList in sorted(cumDict.items(), key=lambda e: sum(e[1])): if key in mutationDict.getMutationsGreaterThan(1): average = sum(oddsList) / float(len(oddsList)) lower = getLowerConfidence(oddsList) upper= getUpperConfidence(oddsList) print key + " & " "{:.3f}".format(average) + " & (" + "{:.3f}".format(lower) + ", " "{:.3f}".format(upper) + r")\\ \hline" names.append(key) averages.append(average) lowers.append(lower) uppers.append(upper) print "\end{tabular}" print "\caption{The above table shows the effect each mutation has when tested in conjunction with the other mutations}" print "\end{table}" plotRegMult(names, averages, lowers, uppers, "Mutation Log Odds With Total Error","Mutations", "Log Odds")
def errorMissing(): means = {} version = (30,-1) cumDict = {} patientDict = DataUtilities.getDictReadofPatients() for i in range(100): patients = DataUtilities.getDictReadofPatientsFilled(patientDict = patientDict) regs = TCGAlogReg.test_models(version = version, patients = patients, printReg = False) for reg in regs.regs: cumulative_odds = reg.report_odds(means, printCum = False) for name, odds, p in cumulative_odds: if name in cumDict.keys(): cumDict[name].append(odds) else: cumDict[name] = [odds] print "ErrorMissing" #create table print r"\begin{table}[h]" print r"\begin{tabular}{|l|l|l|}" print r"\hline" print r"\textbf{Type} & \textbf{Odds} & \textbf{Confidence Interval}\\ \hline" namesL = [] averagesL = [] lowersL = [] uppersL = [] for key, oddsList in sorted(cumDict.items(), key=lambda e: sum(e[1])): if key not in mutationDict.getMutationsGreaterThan(1): average = sum(oddsList) / float(len(oddsList)) lower = getLowerConfidence(oddsList) upper= getUpperConfidence(oddsList) print key + " & " "{:.3f}".format(average) + " & (" + "{:.3f}".format(lower) + ", " "{:.3f}".format(upper) + r")\\ \hline" namesL.append(key) averagesL.append(average) lowersL.append(lower) uppersL.append(upper) print "\end{tabular}" print "\end{table}" plotRegMult(namesL, averagesL, lowersL, uppersL, "Life Factor Log Odds With Sampling Error","Factor", "Log Odds") print "" print r"\begin{table}[h]" print r"\begin{tabular}{|l|l|l|}" print r"\hline" print r"\textbf{Type} & \textbf{Odds} & \textbf{Confidence Interval}\\ \hline" namesM = [] averagesM = [] lowersM = [] uppersM = [] for key, oddsList in sorted(cumDict.items(), key=lambda e: sum(e[1])): if key in mutationDict.getMutationsGreaterThan(1): average = sum(oddsList) / float(len(oddsList)) lower = getLowerConfidence(oddsList) upper= getUpperConfidence(oddsList) print key + " & " "{:.3f}".format(average) + " & (" + "{:.3f}".format(lower) + ", " "{:.3f}".format(upper) + r")\\ \hline" namesM.append(key) averagesM.append(average) lowersM.append(lower) uppersM.append(upper) print "\end{tabular}" print "\caption{The above table shows the effect each mutation has when tested independently the other mutations. The confidence intervals are shown for the error due to missing data.}" print "\end{table}" plotRegMult(namesM, averagesM, lowersM, uppersM, "Mutation Log Odds With Sampling Error","Mutation", "Log Odds") plotRegMult(namesL + namesM, averagesL + averagesM, lowersL + lowersM, uppersL + uppersM, "Log Odds With Sampling Error","Factor", "Log Odds")