Exemplo n.º 1
0
def main():

    global options

    database = EMRDatabase()

    database.import_data(options.emr_data_file,
                         options.diseases_file,
                         options.code2disease_file)

    diseases = database.get_diseases()
    diseases.sort()

    # Create plot for specified disease pair or for all disease pairs with 
    # non-zero patient counts.
    if options.disease1 != None and options.disease2 != None:

        funcs = __get_funcs(database,
                            options.disease1,
                            options.disease2,
                            options.verbose)

        plt = __plot_figure(funcs,
                            options.disease1,
                            options.disease2,
                            options.verbose)

        plt.show()

    else:

        filtered_diseases = __filter_diseases(diseases, database)

        for D1 in filtered_diseases:
            for D2 in filtered_diseases:

                if D1 == D2: continue

                funcs = __get_funcs(database,
                                    D1, D2,
                                    options.verbose)

                plt = __plot_figure(funcs,
                                    D1, D2,
                                    options.verbose)

                plt.close()  # close current figure
def main():

    global options

    database = EMRDatabase()

    database.import_data(options.emr_data_file,
                         options.diseases_file,
                         options.code2disease_file)

    diseases = database.get_diseases()
    diseases.sort()

    # Create plot for the specified disease or all diseases with non-zero
    # patient counts.
    if options.disease != None:

        M_func, F_func, M_count, F_count = __get_funcs(database, 
                                                       options.disease)

        plt = __plot_figure(M_func, F_func, M_count, F_count,
                            options.disease, options.verbose)

        plt.show()

    else:

        filtered_diseases = __filter_diseases(diseases, database)

        for D in filtered_diseases:

            M_func, F_func, M_count, F_count = __get_funcs(database, D)

            plt = __plot_figure(M_func, F_func, M_count, F_count,
                                D, options.verbose)

            plt.close()
def main():

    global options

    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file,
                         options.code2disease_file)

    diseases = database.get_diseases()
    diseases.sort()

    # Create plot for the specified disease or all diseases with non-zero
    # patient counts.
    if options.disease != None:

        M_func, F_func, M_count, F_count = __get_funcs(database,
                                                       options.disease)

        plt = __plot_figure(M_func, F_func, M_count, F_count, options.disease,
                            options.verbose)

        plt.show()

    else:

        filtered_diseases = __filter_diseases(diseases, database)

        for D in filtered_diseases:

            M_func, F_func, M_count, F_count = __get_funcs(database, D)

            plt = __plot_figure(M_func, F_func, M_count, F_count, D,
                                options.verbose)

            plt.close()
Exemplo n.º 4
0
def main():

    global options

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    f = open("p_value.txt", "w")
    f.write("# emr_data_file = %s\n" % options.emr_data_file)
    f.write("# norm_prval_method = %s\n" % options.norm_prval_method)
    f.write("# threshold_type = %s\n" % options.threshold_type)
    f.write("\n")
    f.write("# D1, D2, overlap_type, ")
    f.write("overlap_LL, indepedent_LL, ")
    f.write("LLR, p_value\n")

    # Compute p-value for specified disease pair or for all disease pairs with
    # non-zero patient counts.
    if options.disease1 != None and options.disease2 != None:
        D1_list = [options.disease1]
        D2_list = [options.disease2]
    else:
        diseases = database.get_diseases()
        diseases.sort()
        filtered_diseases = __filter_diseases(diseases, database)
        D1_list = filtered_diseases
        D2_list = filtered_diseases

    for D1 in D1_list:
        for D2 in D2_list:

            if options.disease1 == None or options.disease2 == None:
                if D1 >= D2:
                    continue

            print "-" * 80
            print "D1= %s, D2= %s," % (D1, D2),
            print "overlap_type= %s" % options.overlap_type

            # Independent (no genetic overlap) model
            indep_log_likelihood = __compute_log_likelihood_wrapper(
                opt_log_likelihood,
                database,
                D1,
                D2,
                options.tau1,
                options.tau2,
                "independent",
                options.threshold_type,
                options.prevalence_file,
                options.norm_prval_method,
            )

            min_log_likelihood = indep_log_likelihood - 1.0

            # Allow genetic overlap model
            overlap_log_likelihood = __compute_log_likelihood_wrapper(
                opt_log_likelihood,
                database,
                D1,
                D2,
                options.tau1,
                options.tau2,
                options.overlap_type,
                options.threshold_type,
                options.prevalence_file,
                options.norm_prval_method,
                min_log_likelihood,
            )

            log_likelihood_ratio = 2.0 * (overlap_log_likelihood - indep_log_likelihood)

            # Degree of freedoms of the chi-square distribution.
            dof = 1

            # p-value is the area at the right tail of the chi-square
            # distribution
            p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof)

            text = "%s, %s, %s, " % (D1, D2, options.overlap_type)
            text += "%.3E, " % overlap_log_likelihood
            text += "%.3E, " % indep_log_likelihood
            text += "%.3E, %.3E" % (log_likelihood_ratio, p_value)

            print "overlap_LL= %.2E," % overlap_log_likelihood,
            print "indep_LL= %.2E," % indep_log_likelihood,
            print "LLR= %.2E," % log_likelihood_ratio,
            print "p_value= %.2E" % p_value
            print "-" * 80
            print

            f.write(text + "\n")

    f.close()
Exemplo n.º 5
0
def main():

    global options

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file,
                         options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    f = open("p_value.txt", 'w')
    f.write("# emr_data_file = %s\n" % options.emr_data_file)
    f.write("# norm_prval_method = %s\n" % options.norm_prval_method)
    f.write("# threshold_type = %s\n" % options.threshold_type)
    f.write("\n")
    f.write("# D1, D2, overlap_type, ")
    f.write("overlap_LL, indepedent_LL, ")
    f.write("LLR, p_value\n")

    # Compute p-value for specified disease pair or for all disease pairs with
    # non-zero patient counts.
    if options.disease1 != None and options.disease2 != None:
        D1_list = [options.disease1]
        D2_list = [options.disease2]
    else:
        diseases = database.get_diseases()
        diseases.sort()
        filtered_diseases = __filter_diseases(diseases, database)
        D1_list = filtered_diseases
        D2_list = filtered_diseases

    for D1 in D1_list:
        for D2 in D2_list:

            if options.disease1 == None or options.disease2 == None:
                if D1 >= D2: continue

            print "-" * 80
            print "D1= %s, D2= %s," % (D1, D2),
            print "overlap_type= %s" % options.overlap_type

            # Independent (no genetic overlap) model
            indep_log_likelihood = (__compute_log_likelihood_wrapper(
                opt_log_likelihood, database, D1, D2, options.tau1,
                options.tau2, "independent", options.threshold_type,
                options.prevalence_file, options.norm_prval_method))

            min_log_likelihood = indep_log_likelihood - 1.0

            # Allow genetic overlap model
            overlap_log_likelihood = (__compute_log_likelihood_wrapper(
                opt_log_likelihood, database, D1, D2, options.tau1,
                options.tau2, options.overlap_type, options.threshold_type,
                options.prevalence_file, options.norm_prval_method,
                min_log_likelihood))

            log_likelihood_ratio = 2.0 * (overlap_log_likelihood -
                                          indep_log_likelihood)

            # Degree of freedoms of the chi-square distribution.
            dof = 1

            # p-value is the area at the right tail of the chi-square
            # distribution
            p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof)

            text = "%s, %s, %s, " % (D1, D2, options.overlap_type)
            text += "%.3E, " % overlap_log_likelihood
            text += "%.3E, " % indep_log_likelihood
            text += "%.3E, %.3E" % (log_likelihood_ratio, p_value)

            print "overlap_LL= %.2E," % overlap_log_likelihood,
            print "indep_LL= %.2E," % indep_log_likelihood,
            print "LLR= %.2E," % log_likelihood_ratio,
            print "p_value= %.2E" % p_value
            print "-" * 80
            print

            f.write(text + '\n')

    f.close()