def main(): global options database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) diseases = database.get_diseases() diseases.sort() # Create plot for specified disease pair or for all disease pairs with # non-zero patient counts. if options.disease1 != None and options.disease2 != None: funcs = __get_funcs(database, options.disease1, options.disease2, options.verbose) plt = __plot_figure(funcs, options.disease1, options.disease2, options.verbose) plt.show() else: filtered_diseases = __filter_diseases(diseases, database) for D1 in filtered_diseases: for D2 in filtered_diseases: if D1 == D2: continue funcs = __get_funcs(database, D1, D2, options.verbose) plt = __plot_figure(funcs, D1, D2, options.verbose) plt.close() # close current figure
def main(): global options database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) diseases = database.get_diseases() diseases.sort() # Create plot for the specified disease or all diseases with non-zero # patient counts. if options.disease != None: M_func, F_func, M_count, F_count = __get_funcs(database, options.disease) plt = __plot_figure(M_func, F_func, M_count, F_count, options.disease, options.verbose) plt.show() else: filtered_diseases = __filter_diseases(diseases, database) for D in filtered_diseases: M_func, F_func, M_count, F_count = __get_funcs(database, D) plt = __plot_figure(M_func, F_func, M_count, F_count, D, options.verbose) plt.close()
def main(): global options database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) diseases = database.get_diseases() diseases.sort() # Create plot for the specified disease or all diseases with non-zero # patient counts. if options.disease != None: M_func, F_func, M_count, F_count = __get_funcs(database, options.disease) plt = __plot_figure(M_func, F_func, M_count, F_count, options.disease, options.verbose) plt.show() else: filtered_diseases = __filter_diseases(diseases, database) for D in filtered_diseases: M_func, F_func, M_count, F_count = __get_funcs(database, D) plt = __plot_figure(M_func, F_func, M_count, F_count, D, options.verbose) plt.close()
def main(): global options # Import EMR data into database database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) # Instantiate the OptimizeLogLikelihood class opt_log_likelihood = OptimizeLogLikelihood(options.verbose) opt_log_likelihood.set_use_random_seed(options.use_random_seed) f = open("p_value.txt", "w") f.write("# emr_data_file = %s\n" % options.emr_data_file) f.write("# norm_prval_method = %s\n" % options.norm_prval_method) f.write("# threshold_type = %s\n" % options.threshold_type) f.write("\n") f.write("# D1, D2, overlap_type, ") f.write("overlap_LL, indepedent_LL, ") f.write("LLR, p_value\n") # Compute p-value for specified disease pair or for all disease pairs with # non-zero patient counts. if options.disease1 != None and options.disease2 != None: D1_list = [options.disease1] D2_list = [options.disease2] else: diseases = database.get_diseases() diseases.sort() filtered_diseases = __filter_diseases(diseases, database) D1_list = filtered_diseases D2_list = filtered_diseases for D1 in D1_list: for D2 in D2_list: if options.disease1 == None or options.disease2 == None: if D1 >= D2: continue print "-" * 80 print "D1= %s, D2= %s," % (D1, D2), print "overlap_type= %s" % options.overlap_type # Independent (no genetic overlap) model indep_log_likelihood = __compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, "independent", options.threshold_type, options.prevalence_file, options.norm_prval_method, ) min_log_likelihood = indep_log_likelihood - 1.0 # Allow genetic overlap model overlap_log_likelihood = __compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.prevalence_file, options.norm_prval_method, min_log_likelihood, ) log_likelihood_ratio = 2.0 * (overlap_log_likelihood - indep_log_likelihood) # Degree of freedoms of the chi-square distribution. dof = 1 # p-value is the area at the right tail of the chi-square # distribution p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof) text = "%s, %s, %s, " % (D1, D2, options.overlap_type) text += "%.3E, " % overlap_log_likelihood text += "%.3E, " % indep_log_likelihood text += "%.3E, %.3E" % (log_likelihood_ratio, p_value) print "overlap_LL= %.2E," % overlap_log_likelihood, print "indep_LL= %.2E," % indep_log_likelihood, print "LLR= %.2E," % log_likelihood_ratio, print "p_value= %.2E" % p_value print "-" * 80 print f.write(text + "\n") f.close()
def main(): global options # Import EMR data into database database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) # Instantiate the OptimizeLogLikelihood class opt_log_likelihood = OptimizeLogLikelihood(options.verbose) opt_log_likelihood.set_use_random_seed(options.use_random_seed) f = open("p_value.txt", 'w') f.write("# emr_data_file = %s\n" % options.emr_data_file) f.write("# norm_prval_method = %s\n" % options.norm_prval_method) f.write("# threshold_type = %s\n" % options.threshold_type) f.write("\n") f.write("# D1, D2, overlap_type, ") f.write("overlap_LL, indepedent_LL, ") f.write("LLR, p_value\n") # Compute p-value for specified disease pair or for all disease pairs with # non-zero patient counts. if options.disease1 != None and options.disease2 != None: D1_list = [options.disease1] D2_list = [options.disease2] else: diseases = database.get_diseases() diseases.sort() filtered_diseases = __filter_diseases(diseases, database) D1_list = filtered_diseases D2_list = filtered_diseases for D1 in D1_list: for D2 in D2_list: if options.disease1 == None or options.disease2 == None: if D1 >= D2: continue print "-" * 80 print "D1= %s, D2= %s," % (D1, D2), print "overlap_type= %s" % options.overlap_type # Independent (no genetic overlap) model indep_log_likelihood = (__compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, "independent", options.threshold_type, options.prevalence_file, options.norm_prval_method)) min_log_likelihood = indep_log_likelihood - 1.0 # Allow genetic overlap model overlap_log_likelihood = (__compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.prevalence_file, options.norm_prval_method, min_log_likelihood)) log_likelihood_ratio = 2.0 * (overlap_log_likelihood - indep_log_likelihood) # Degree of freedoms of the chi-square distribution. dof = 1 # p-value is the area at the right tail of the chi-square # distribution p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof) text = "%s, %s, %s, " % (D1, D2, options.overlap_type) text += "%.3E, " % overlap_log_likelihood text += "%.3E, " % indep_log_likelihood text += "%.3E, %.3E" % (log_likelihood_ratio, p_value) print "overlap_LL= %.2E," % overlap_log_likelihood, print "indep_LL= %.2E," % indep_log_likelihood, print "LLR= %.2E," % log_likelihood_ratio, print "p_value= %.2E" % p_value print "-" * 80 print f.write(text + '\n') f.close()