def get_nih(db):
    nih_df = pd.read_csv(from_root("temp\\test_data\\nih_cleaned.csv"))
    db.insert(nih_df, "tmp_nih", "dbo")

    nih_metamap_df = annotate(nih_df)
    db.insert(nih_metamap_df, "tmp_nih_metamap", "dbo")
    write_df(from_root("temp\\test_data\\nih_metamap.csv"), nih_metamap_df)
def classify_culture(db, to_module, l1ml_module, l1s_module, l2_module):
    culture_df = db.extract(from_root("temp\\sql\\test_culture.sql"))
    keys = ["test_key", "result_key"]

    culture_to_results = to_module.classify(culture_df)
    culture_l1ml_results = l1ml_module.classify(culture_df)
    culture_l1s_results = l1s_module.classify(culture_df)
    culture_l2_results = l2_module.classify(culture_df)

    culture_results = culture_to_results\
        .merge(culture_l1ml_results, how="inner", on=keys)\
        .merge(culture_l1s_results, how="inner", on=keys)\
        .merge(culture_l2_results, how="inner", on=keys)

    db.insert(culture_results, "tmp_culture_predictions", "dbo")
    write_df(from_root("temp\\predictions\\culture.csv"), culture_results)
def classify_random(db, to_module, l1ml_module, l1s_module, l2_module):
    random_df = db.extract(from_root("temp\\sql\\test_random.sql"))
    keys = ["test_key", "result_key"]

    random_to_results = to_module.classify(random_df)
    random_l1ml_results = l1ml_module.classify(random_df)
    random_l1s_results = l1s_module.classify(random_df)
    random_l2_results = l2_module.classify(random_df)

    random_results = random_to_results\
        .merge(random_l1ml_results, how="inner", on=keys)\
        .merge(random_l1s_results, how="inner", on=keys)\
        .merge(random_l2_results, how="inner", on=keys)

    db.insert(random_results, "tmp_random_predictions", "dbo")
    write_df(from_root("temp\\predictions\\random.csv"), random_results)
def classify_nih(db, tp_module, to_module, l1ml_module, l1s_module, l2_module):
    nih_df = db.extract(from_root("temp\\sql\\test_nih.sql"))
    keys = ["test_key", "result_key"]

    nih_tp_results = tp_module.classify(nih_df)
    nih_to_results = to_module.classify(nih_df)
    nih_l1ml_results = l1ml_module.classify(nih_df)
    nih_l1s_results = l1s_module.classify(nih_df)
    nih_l2_results = l2_module.classify(nih_df)

    nih_results = nih_tp_results\
        .merge(nih_to_results, how="inner", on=keys)\
        .merge(nih_l1ml_results, how="inner", on=keys)\
        .merge(nih_l1s_results, how="inner", on=keys)\
        .merge(nih_l2_results, how="inner", on=keys)

    db.insert(nih_results, "tmp_nih_predictions", "dbo")
    write_df(from_root("temp\\predictions\\nih.csv"), nih_results)
示例#5
0
def main():
    # ==========================================================================
    # Load the DataFrames to classify

    db = Database.get_instance()

    tp_df = db.extract(from_root("sql\\test\\test_performed.sql"))
    to_df = db.extract(from_root("sql\\test\\test_outcome.sql"))
    l1_df = db.extract(from_root("sql\\test\\level_1.sql"))
    l2_df = db.extract(from_root("sql\\test\\level_2.sql"))

    print("Finished loading the DataFrames.")

    # ==========================================================================
    # Load modules

    tp_module = TestPerformedModule.load_from_file(
        from_root("pkl\\test_performed_module.pkl"))

    to_module = TestOutcomeModule.load_from_file(
        from_root("pkl\\test_outcome_module.pkl"))

    l1ml_module = Level1MLModule.load_from_file(
        from_root("pkl\\level_1_ml_module.pkl"))

    l1s_module = Level1SymbolicModule(to_module).load_from_file(
        from_root("pkl\\level_1_symbolic_module.pkl"))

    l2_module = Level2Module(l1ml_module).load_from_file(
        from_root("pkl\\level_2_module.pkl"))

    tp_module_org_false = TestPerformedModule.load_from_file(
        from_root("pkl\\test_performed_organisms_false_module.pkl"))

    to_module_org_false = TestOutcomeModule.load_from_file(
        from_root("pkl\\test_outcome_organisms_false_module.pkl"))

    print("Finished loading modules.")

    # ==========================================================================
    # Classify the DataFrames

    tp_results = tp_module.classify(tp_df)
    to_results = to_module.classify(to_df)
    l1ml_results = l1ml_module.classify(l1_df)
    l1s_results = l1s_module.classify(l1_df)
    l2_results = l2_module.classify(l2_df)

    tp_org_false_results = tp_module_org_false.classify(tp_df)
    to_org_false_results = to_module_org_false.classify(to_df)

    l1s_retall_results = l1s_module.classify(l1_df, return_all=True)
    l2_retall_results = l2_module.classify(l2_df, return_all=True)

    print("Finished classifying the DataFrames.")

    # ==========================================================================
    # Write final prediction results to CSV and database

    results = tp_results\
        .merge(to_results, how="outer", on=["test_key", "result_key"])\
        .merge(l1ml_results, how="outer", on=["test_key", "result_key"])\
        .merge(l1s_results, how="outer", on=["test_key", "result_key"])\
        .merge(l2_results, how="outer", on=["test_key", "result_key"])

    org_false_results = tp_org_false_results\
        .merge(to_org_false_results, how="outer", on=["test_key", "result_key"])

    retall_results = l1s_retall_results\
        .merge(l2_retall_results, how="outer", on=["test_key", "result_key"])

    write_df(from_root("results\\predictions.csv"), results)
    write_df(from_root("results\\predictions_org_false.csv"), org_false_results)
    write_df(from_root("results\\predictions_retall.csv"), retall_results)

    db.insert(results, "predictions", "dbo")

    print("Finished writing results to CSV and database.")
def get_culture(db):
    culture_df = db.extract(from_root("temp\\sql\\get_test_culture.sql"))
    culture_df = culture_df.sample(n=100)

    db.insert(culture_df, "tmp_culture", "dbo")
    write_df(from_root("temp\\test_data\\culture.csv"), culture_df)
def get_random(db):
    random_df = db.extract(from_root("temp\\sql\\get_test_random.sql"))
    random_df = random_df.sample(n=100)

    db.insert(random_df, "tmp_random", "dbo")
    write_df(from_root("temp\\test_data\\random.csv"), random_df)