def main():

    global options

    D1 = options.disease1
    D2 = options.disease2

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file,
                         options.diseases_file,
                         options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_opt_method(options.opt_method)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    opt_log_likelihood.setup_log_likelihood_func(database,
                                                 D1, D2,
                                                 options.tau1,
                                                 options.tau2,
                                                 options.overlap_type,
                                                 options.threshold_type,
                                                 options.prevalence_file,
                                                 options.norm_prval_method)

    # Get optimized parameters
    _, optimized_param, _ = opt_log_likelihood.run()

    # Compute optimization paths
    optimization_paths = []
    for n in range(options.num_paths):
        _, _, path = opt_log_likelihood.run(save_path=True)
        optimization_paths.append(path)

    log_likelihood_func = opt_log_likelihood.get_log_likelihood_func()

    plot = __plot_contour(log_likelihood_func,
                          optimized_param,
                          optimization_paths,
                          options.tau1,
                          options.tau2,
                          options.overlap_type,
                          options.threshold_type,
                          options.norm_prval_method,
                          options.verbose)

    plt.show()
Exemplo n.º 2
0
def main():

    global options

    database = EMRDatabase()

    database.import_data(options.emr_data_file,
                         options.diseases_file,
                         options.code2disease_file)

    diseases = database.get_diseases()
    diseases.sort()

    # Create plot for specified disease pair or for all disease pairs with 
    # non-zero patient counts.
    if options.disease1 != None and options.disease2 != None:

        funcs = __get_funcs(database,
                            options.disease1,
                            options.disease2,
                            options.verbose)

        plt = __plot_figure(funcs,
                            options.disease1,
                            options.disease2,
                            options.verbose)

        plt.show()

    else:

        filtered_diseases = __filter_diseases(diseases, database)

        for D1 in filtered_diseases:
            for D2 in filtered_diseases:

                if D1 == D2: continue

                funcs = __get_funcs(database,
                                    D1, D2,
                                    options.verbose)

                plt = __plot_figure(funcs,
                                    D1, D2,
                                    options.verbose)

                plt.close()  # close current figure
Exemplo n.º 3
0
def main():

    global options

    D1 = options.disease1
    D2 = options.disease2

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file,
                         options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_opt_method(options.opt_method)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    opt_log_likelihood.setup_log_likelihood_func(database, D1, D2,
                                                 options.tau1, options.tau2,
                                                 options.overlap_type,
                                                 options.threshold_type,
                                                 options.prevalence_file,
                                                 options.norm_prval_method)

    # Get optimized parameters
    _, optimized_param, _ = opt_log_likelihood.run()

    # Compute optimization paths
    optimization_paths = []
    for n in range(options.num_paths):
        _, _, path = opt_log_likelihood.run(save_path=True)
        optimization_paths.append(path)

    log_likelihood_func = opt_log_likelihood.get_log_likelihood_func()

    plot = __plot_contour(log_likelihood_func, optimized_param,
                          optimization_paths, options.tau1, options.tau2,
                          options.overlap_type, options.threshold_type,
                          options.norm_prval_method, options.verbose)

    plt.show()
def main():

    global options

    database = EMRDatabase()

    database.import_data(options.emr_data_file,
                         options.diseases_file,
                         options.code2disease_file)

    diseases = database.get_diseases()
    diseases.sort()

    # Create plot for the specified disease or all diseases with non-zero
    # patient counts.
    if options.disease != None:

        M_func, F_func, M_count, F_count = __get_funcs(database, 
                                                       options.disease)

        plt = __plot_figure(M_func, F_func, M_count, F_count,
                            options.disease, options.verbose)

        plt.show()

    else:

        filtered_diseases = __filter_diseases(diseases, database)

        for D in filtered_diseases:

            M_func, F_func, M_count, F_count = __get_funcs(database, D)

            plt = __plot_figure(M_func, F_func, M_count, F_count,
                                D, options.verbose)

            plt.close()
def main():

    global options

    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file,
                         options.code2disease_file)

    diseases = database.get_diseases()
    diseases.sort()

    # Create plot for the specified disease or all diseases with non-zero
    # patient counts.
    if options.disease != None:

        M_func, F_func, M_count, F_count = __get_funcs(database,
                                                       options.disease)

        plt = __plot_figure(M_func, F_func, M_count, F_count, options.disease,
                            options.verbose)

        plt.show()

    else:

        filtered_diseases = __filter_diseases(diseases, database)

        for D in filtered_diseases:

            M_func, F_func, M_count, F_count = __get_funcs(database, D)

            plt = __plot_figure(M_func, F_func, M_count, F_count, D,
                                options.verbose)

            plt.close()
Exemplo n.º 6
0
def main():

    global options

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    f = open("p_value.txt", "w")
    f.write("# emr_data_file = %s\n" % options.emr_data_file)
    f.write("# norm_prval_method = %s\n" % options.norm_prval_method)
    f.write("# threshold_type = %s\n" % options.threshold_type)
    f.write("\n")
    f.write("# D1, D2, overlap_type, ")
    f.write("overlap_LL, indepedent_LL, ")
    f.write("LLR, p_value\n")

    # Compute p-value for specified disease pair or for all disease pairs with
    # non-zero patient counts.
    if options.disease1 != None and options.disease2 != None:
        D1_list = [options.disease1]
        D2_list = [options.disease2]
    else:
        diseases = database.get_diseases()
        diseases.sort()
        filtered_diseases = __filter_diseases(diseases, database)
        D1_list = filtered_diseases
        D2_list = filtered_diseases

    for D1 in D1_list:
        for D2 in D2_list:

            if options.disease1 == None or options.disease2 == None:
                if D1 >= D2:
                    continue

            print "-" * 80
            print "D1= %s, D2= %s," % (D1, D2),
            print "overlap_type= %s" % options.overlap_type

            # Independent (no genetic overlap) model
            indep_log_likelihood = __compute_log_likelihood_wrapper(
                opt_log_likelihood,
                database,
                D1,
                D2,
                options.tau1,
                options.tau2,
                "independent",
                options.threshold_type,
                options.prevalence_file,
                options.norm_prval_method,
            )

            min_log_likelihood = indep_log_likelihood - 1.0

            # Allow genetic overlap model
            overlap_log_likelihood = __compute_log_likelihood_wrapper(
                opt_log_likelihood,
                database,
                D1,
                D2,
                options.tau1,
                options.tau2,
                options.overlap_type,
                options.threshold_type,
                options.prevalence_file,
                options.norm_prval_method,
                min_log_likelihood,
            )

            log_likelihood_ratio = 2.0 * (overlap_log_likelihood - indep_log_likelihood)

            # Degree of freedoms of the chi-square distribution.
            dof = 1

            # p-value is the area at the right tail of the chi-square
            # distribution
            p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof)

            text = "%s, %s, %s, " % (D1, D2, options.overlap_type)
            text += "%.3E, " % overlap_log_likelihood
            text += "%.3E, " % indep_log_likelihood
            text += "%.3E, %.3E" % (log_likelihood_ratio, p_value)

            print "overlap_LL= %.2E," % overlap_log_likelihood,
            print "indep_LL= %.2E," % indep_log_likelihood,
            print "LLR= %.2E," % log_likelihood_ratio,
            print "p_value= %.2E" % p_value
            print "-" * 80
            print

            f.write(text + "\n")

    f.close()
Exemplo n.º 7
0
def main():

    global options

    database = EMRDatabase(options.verbose)

    database.import_data(options.emr_data_file, options.diseases_file,
                         options.code2disease_file)

    diseases = [
        "Alzheimer's disease", "Attention deficit", "Autism",
        "Breast cancer (female)", "Epilepsy", "Schizophrenia"
    ]

    D1 = "Autism"
    D2 = "Epilepsy"
    D3 = "Breast cancer (female)"

    # Query for patients that have D1
    D1_patients = database.query_emr_data(D1)

    # Query for patients that have D2
    D2_patients = database.query_emr_data(D2)

    # Query for patients that have both D1 and D2
    D1andD2_patients = database.query_emr_data([D1, D2])

    # Query for patients that have either D1 or D2
    D1orD2_patients = database.query_emr_data([D1, D2], OR_match=True)

    # Query for patients that have D1 and not D2
    D1notD2_patients = database.query_emr_data([D1, "not " + D2])

    # Query for patients that have D2 and not D1
    D2notD1_patients = database.query_emr_data(["not " + D1, D2])

    # Query for patients that have neither D2 nor D1
    noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2])

    # Query for male patients with female Breast cancer
    D3_male_patients = database.query_emr_data(D3, gender_filters="M")

    # Query for female patients with female Breast cancer
    D3_female_patients = database.query_emr_data(D3, gender_filters="F")

    print "Test EMR_database queries:"
    print "%-6d patients with %s" % (len(D1_patients), D1)
    print "%-6d patients with %s" % (len(D2_patients), D2)
    print "%-6d patients with %s and %s" % (len(D1andD2_patients), D1, D2)
    print "%-6d patients with %s or %s" % (len(D1orD2_patients), D1, D2)
    print "%-6d patients with %s and no %s" % (len(D1notD2_patients), D1, D2)
    print "%-6d patients with %s and no %s" % (len(D2notD1_patients), D2, D1)
    print "%-6d patients with no %s and no %s" % (len(noD1D2_patients), D1, D2)
    print "%-6d male patients with %s" % (len(D3_male_patients), D3)
    print "%-6d female patients with %s" % (len(D3_female_patients), D3)

    if options.verbose:
        print "Patients with %s:" % D1
        for patient in D1_patients:
            print "patient %s --> %s" % (patient, D1_patients[patient])
        print

        print "Patients with %s:" % D2
        for patient in D2_patients:
            print "patient %s --> %s" % (patient, D2_patients[patient])
        print

        print "Patients with %s and %s:" % (D1, D2)
        for patient in D1andD2_patients:
            print "patient %s --> %s" % (patient, D1andD2_patients[patient])
        print

        print "Patients with %s or %s:" % (D1, D2)
        for patient in D1orD2_patients:
            print "patient %s --> %s" % (patient, D1orD2_patients[patient])
        print

        print "Patients with %s and no %s:" % (D2, D1)
        for patient in D2notD1_patients:
            print "patient %s --> %s" % (patient, D2notD1_patients[patient])
        print

        print "Patients with %s and no %s:" % (D1, D2)
        for patient in D1notD2_patients:
            print "patient %s --> %s" % (patient, D1notD2_patients[patient])
        print

        print "Patients with no %s and no %s:" % (D1, D2)
        for patient in noD1D2_patients:
            print "patient %s --> %s" % (patient, noD1D2_patients[patient])
        print

        print "Male patients with %s:" % (D3)
        for patient in D3_male_patients:
            print "patient %s --> %s" % (patient, D3_male_patients[patient])
        print

        print "Female patients with %s:" % (D3)
        for patient in D3_female_patients:
            print "patient %s --> %s" % (patient, D3_female_patients[patient])
        print

    # Find occurrences of 6 diseases.
    print
    print "Occurrences of the 6 diseases:"
    for n1, D1 in enumerate(diseases):
        D1_patients = database.query_emr_data(D1)
        print "%-6d patients with %s" % (len(D1_patients), D1)
        if options.verbose:
            print "Patients with %s:" % D1
            for patient in D1_patients:
                print "patient %s --> %s" % (patient, D1_patients[patient])
            print

    # Find comorbidities between the 6 diseases.
    print
    print "Comorbidities between the 6 diseases:"

    for n1, D1 in enumerate(diseases):
        for n2, D2 in enumerate(diseases):
            if n1 >= n2: continue

            D1andD2_patients = database.query_emr_data([D1, D2])

            print("%-6d patients with %s and %s" %
                  (len(D1andD2_patients), D1, D2))

            if options.verbose:
                print "Patients with both %s and %s:" % (D1, D2)
                for patient in D1andD2_patients:
                    print("patient %s --> %s" %
                          (patient, D1andD2_patients[patient]))
                print
def main():

    global options

    database = EMRDatabase(options.verbose)

    database.import_data(options.emr_data_file,
                         options.diseases_file,
                         options.code2disease_file)

    diseases = ["Alzheimer's disease", "Attention deficit", "Autism", 
                 "Breast cancer (female)", "Epilepsy", "Schizophrenia"]

    D1 = "Autism"
    D2 = "Epilepsy"
    D3 = "Breast cancer (female)"

    # Query for patients that have D1
    D1_patients = database.query_emr_data(D1)

    # Query for patients that have D2
    D2_patients = database.query_emr_data(D2)

    # Query for patients that have both D1 and D2
    D1andD2_patients = database.query_emr_data([D1, D2])

    # Query for patients that have either D1 or D2
    D1orD2_patients = database.query_emr_data([D1, D2], OR_match = True) 

    # Query for patients that have D1 and not D2
    D1notD2_patients = database.query_emr_data([D1, "not " + D2])

    # Query for patients that have D2 and not D1
    D2notD1_patients = database.query_emr_data(["not " + D1, D2])

    # Query for patients that have neither D2 nor D1
    noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2])

    # Query for male patients with female Breast cancer
    D3_male_patients = database.query_emr_data(D3 , gender_filters = "M")

    # Query for female patients with female Breast cancer
    D3_female_patients = database.query_emr_data(D3, gender_filters = "F")

    print "Test EMR_database queries:"
    print "%-6d patients with %s" %(len(D1_patients), D1)
    print "%-6d patients with %s" %(len(D2_patients), D2)
    print "%-6d patients with %s and %s" %(len(D1andD2_patients), D1, D2)
    print "%-6d patients with %s or %s" %(len(D1orD2_patients), D1, D2)
    print "%-6d patients with %s and no %s" %(len(D1notD2_patients), D1, D2)
    print "%-6d patients with %s and no %s" %(len(D2notD1_patients), D2, D1)
    print "%-6d patients with no %s and no %s" %(len(noD1D2_patients), D1, D2)
    print "%-6d male patients with %s" %(len(D3_male_patients), D3)
    print "%-6d female patients with %s" %(len(D3_female_patients), D3)

    if options.verbose:
        print "Patients with %s:" % D1
        for patient in D1_patients:
            print "patient %s --> %s" % (patient, D1_patients[patient])
        print

        print "Patients with %s:" % D2
        for patient in D2_patients:
            print "patient %s --> %s" % (patient, D2_patients[patient])
        print

        print "Patients with %s and %s:" % (D1, D2)
        for patient in D1andD2_patients:
            print "patient %s --> %s" % (patient, D1andD2_patients[patient])
        print

        print "Patients with %s or %s:" % (D1, D2)
        for patient in D1orD2_patients:
            print "patient %s --> %s" % (patient, D1orD2_patients[patient])
        print

        print "Patients with %s and no %s:" % (D2, D1)
        for patient in D2notD1_patients:
            print "patient %s --> %s" % (patient, D2notD1_patients[patient])
        print

        print "Patients with %s and no %s:" % (D1, D2)
        for patient in D1notD2_patients:
            print "patient %s --> %s" % (patient, D1notD2_patients[patient])
        print

        print "Patients with no %s and no %s:" % (D1, D2)
        for patient in noD1D2_patients:
            print "patient %s --> %s" % (patient, noD1D2_patients[patient])
        print

        print "Male patients with %s:" % (D3)
        for patient in D3_male_patients:
            print "patient %s --> %s" % (patient, D3_male_patients[patient])
        print

        print "Female patients with %s:" % (D3)
        for patient in D3_female_patients:
            print "patient %s --> %s" % (patient, D3_female_patients[patient])
        print


    # Find occurrences of 6 diseases.
    print
    print "Occurrences of the 6 diseases:"
    for n1, D1 in enumerate(diseases):
        D1_patients = database.query_emr_data(D1)
        print "%-6d patients with %s" %(len(D1_patients), D1)
        if options.verbose:
            print "Patients with %s:" % D1
            for patient in D1_patients:
                print "patient %s --> %s" % (patient, D1_patients[patient])
            print

    # Find comorbidities between the 6 diseases.
    print
    print "Comorbidities between the 6 diseases:"


    for n1, D1 in enumerate(diseases):
        for n2, D2 in enumerate(diseases):
            if n1 >= n2: continue

            D1andD2_patients = database.query_emr_data([D1, D2])

            print ("%-6d patients with %s and %s" 
                   %(len(D1andD2_patients), D1, D2))

            if options.verbose:
                print "Patients with both %s and %s:" % (D1, D2)
                for patient in D1andD2_patients:
                    print ("patient %s --> %s" % (patient, 
                           D1andD2_patients[patient]))
                print
Exemplo n.º 9
0
def main():

    global options

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file,
                         options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    f = open("p_value.txt", 'w')
    f.write("# emr_data_file = %s\n" % options.emr_data_file)
    f.write("# norm_prval_method = %s\n" % options.norm_prval_method)
    f.write("# threshold_type = %s\n" % options.threshold_type)
    f.write("\n")
    f.write("# D1, D2, overlap_type, ")
    f.write("overlap_LL, indepedent_LL, ")
    f.write("LLR, p_value\n")

    # Compute p-value for specified disease pair or for all disease pairs with
    # non-zero patient counts.
    if options.disease1 != None and options.disease2 != None:
        D1_list = [options.disease1]
        D2_list = [options.disease2]
    else:
        diseases = database.get_diseases()
        diseases.sort()
        filtered_diseases = __filter_diseases(diseases, database)
        D1_list = filtered_diseases
        D2_list = filtered_diseases

    for D1 in D1_list:
        for D2 in D2_list:

            if options.disease1 == None or options.disease2 == None:
                if D1 >= D2: continue

            print "-" * 80
            print "D1= %s, D2= %s," % (D1, D2),
            print "overlap_type= %s" % options.overlap_type

            # Independent (no genetic overlap) model
            indep_log_likelihood = (__compute_log_likelihood_wrapper(
                opt_log_likelihood, database, D1, D2, options.tau1,
                options.tau2, "independent", options.threshold_type,
                options.prevalence_file, options.norm_prval_method))

            min_log_likelihood = indep_log_likelihood - 1.0

            # Allow genetic overlap model
            overlap_log_likelihood = (__compute_log_likelihood_wrapper(
                opt_log_likelihood, database, D1, D2, options.tau1,
                options.tau2, options.overlap_type, options.threshold_type,
                options.prevalence_file, options.norm_prval_method,
                min_log_likelihood))

            log_likelihood_ratio = 2.0 * (overlap_log_likelihood -
                                          indep_log_likelihood)

            # Degree of freedoms of the chi-square distribution.
            dof = 1

            # p-value is the area at the right tail of the chi-square
            # distribution
            p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof)

            text = "%s, %s, %s, " % (D1, D2, options.overlap_type)
            text += "%.3E, " % overlap_log_likelihood
            text += "%.3E, " % indep_log_likelihood
            text += "%.3E, %.3E" % (log_likelihood_ratio, p_value)

            print "overlap_LL= %.2E," % overlap_log_likelihood,
            print "indep_LL= %.2E," % indep_log_likelihood,
            print "LLR= %.2E," % log_likelihood_ratio,
            print "p_value= %.2E" % p_value
            print "-" * 80
            print

            f.write(text + '\n')

    f.close()