示例#1
0
def test_get_titanic():
    df, value_dict, param_types = real_data.get_titanic()
    
    
    io.print_pretty_table(df.head(10))
    print(value_dict)
    print(param_types)
示例#2
0
def __print_items(freq_candidates):
    freq_sets = []
    for (sup, conds) in freq_candidates:

        str_conds = []
        for cond in conds:
            str_conds.append(cond[0] + "=" + cond[1])
        freq_sets.append(["(" + ", ".join(str_conds) + ")", sup])

    #freq_sets = sorted(freq_sets, key=lambda x : x[1], reverse=True)
    candidate_df = pd.DataFrame(freq_sets,
                                columns=["frequent set", "s_support"])
    io.print_pretty_table(candidate_df)
def test_value_dict():
    import os
    import pandas as pd
    from util import io
    path = os.path.dirname(
        os.path.realpath(__file__)) + "/../../_data/titanic/train.csv"
    df = pd.read_csv(path)
    df = df[["Survived", "Sex", "Age", "Fare", "Pclass"]]
    df, val_dict, param_types = fn.transform_dataset(df)

    io.print_pretty_table(df)
    print(val_dict)
    print(param_types)
示例#4
0
def _print_items(freq_items):
    feature_dict = {0: ("g", ("m  ", "w  ")), 1: ("c", ("no ", "yes")), 2: ("s", ("no ", "yes")), 3: ("w", ("no ", "yes"))}
    freq_sets = []
    for (sup, conds) in freq_items:
        
        str_conds=[]
        for cond in conds:
            str_conds.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]])
        freq_sets.append(["(" + ", ".join(str_conds) + ")", sup]) 
        
        
    #freq_sets = sorted(freq_sets, key=lambda x : x[1], reverse=True)
    rule_df = pd.DataFrame(freq_sets, columns=["frequent set", "s_support"])
    
    io.print_pretty_table(rule_df)
示例#5
0
def get_rki_export():
    path = os.path.dirname(os.path.realpath(__file__)) + "/../../_data/ed/rki_pre_process/epias_of_rki.2018-11.350000.json"
    
    df = pd.read_json(path)
        
    cols_circumstances = ["aufnahmezeitpunkt_datum", "aufnahmezeitpunkt_stunde", "behandlung_fachabteilung", "id_einrichtung", "zuweisungsart"]
    cols_patient = ["geschlecht", "altersklasse","plzbereich"]
    cols_pain = ["schmerz", 'diagnosen', "leitsymptom", "leitsymptom_gruppe", 'tetanus', 'triagesystem']
    cols_measures = ["untersuchung_bga", "untersuchung_echokardiographie", "untersuchung_ekg", 'untersuchung_roentgen_thorax', "vitalwerte"]
    
    io.print_pretty_table(df[cols_circumstances].head(10))
    io.print_pretty_table(df[cols_patient].head(10))
    io.print_pretty_table(df[cols_pain].head(10))
    io.print_pretty_table(df[cols_measures].head(10))
示例#6
0
def get_eseg_export():
    path = os.path.dirname(os.path.realpath(__file__)) + "/../../_data/ed/eseg_export_sample/example2019-09.json"
    
    df = pd.read_json(path)
    
    cols_circumstances = ["sys_date", "sys_hour", "sys_department", "sys_transport", 'sys_disposition', 'sys_ed', 'sys_lab', 'sys_labinfections']
    cols_patient = ["sys_gender", "sys_age16","sys_plz3"]
    cols_pain = ["sys_complaint", 'sys_diagnosis_icd4', "sys_triage", "sys_isolation", 'sys_ecg', 'sys_echo', 'sys_xraythorax']
    cols_measures = ["sys_heartrate", "sys_temperature", "sys_respiratoryrate",'sys_bloodpressuresystolic']
    
    io.print_pretty_table(df[cols_circumstances].head(10))
    io.print_pretty_table(df[cols_patient].head(10))
    io.print_pretty_table(df[cols_pain].head(10))
    io.print_pretty_table(df[cols_measures].head(10))
def create_p_value_dataset():

    from data import R_wrapper_data
    from evaluation import evaluator
    from methods import R_wrapper
    from ml import dataset_creator, pre_processing
    from util import io

    tss = R_wrapper_data.get_noufaily_configuration(25,
                                                    num_ts=10,
                                                    num_weeks=624,
                                                    k=5,
                                                    random_seed=100)

    baseline = 7
    #Init 7 time points
    method_descriptions1 = [
        {
            "method": R_wrapper.get_EARS_score,
            "parameters": {
                "method": "C1",
                "baseline": baseline,
                "alpha": 0.005
            }
        },
        {
            "method": R_wrapper.get_EARS_score,
            "parameters": {
                "method": "C2",
                "baseline": baseline,
                "alpha": 0.005
            }
        },
        {
            "method": R_wrapper.get_EARS_score,
            "parameters": {
                "method": "C3",
                "baseline": baseline,
                "alpha": 0.005
            }
        },
        #{"method": R_wrapper.get_EARS_score, "parameters" : {"method":"C4", "baseline":6, "alpha":0.05}},
        {
            "method": R_wrapper.get_Bayes_score,
            "parameters": {
                "b": 0,
                "w": baseline,
                "actY": True,
                "alpha": 0.005
            }
        },
        {
            "method": R_wrapper.get_RKI_score,
            "parameters": {
                "b": 0,
                "w": baseline,
                "actY": True
            }
        },
    ]
    method_results = evaluator.evaluate_method_results(tss,
                                                       method_descriptions1)

    ds = None
    for ts in tss:
        if ds is None:
            ds = dataset_creator.pValue_dataset(
                ts, method_results, pre_process=pre_processing.peak)
        else:
            new_ds = dataset_creator.pValue_dataset(
                ts, method_results, pre_process=pre_processing.peak)
            ds.df = ds.df.append(new_ds.df)

    ds.df.drop(["ground_truth"], inplace=True, axis=1)

    ds.df.replace({"target": "False"}, {"target": 0}, inplace=True)
    ds.df.replace({"target": "True"}, {"target": 1}, inplace=True)

    io.print_pretty_table(ds.df.head(100))

    from spn.structure.leaves.parametric.Parametric import Categorical, Gaussian
    return ds.df.values, [
        Gaussian, Gaussian, Gaussian, Gaussian, Gaussian, Categorical
    ]
示例#8
0
def extract_rules(spn, feature_id=1):
    
    from spn.experiments.AQP.Ranges import NominalRange
    from spn.algorithms import Inference
    from simple_spn.internal.InferenceRange import categorical_likelihood_range
    from spn.structure.Base import Sum, Product
    from spn.algorithms.Inference import sum_likelihood, prod_likelihood
    from spn.structure.leaves.parametric.Parametric import Categorical
    
    inference_support_ranges = {Categorical     : categorical_likelihood_range,
                                    Sum             : sum_likelihood,
                                    Product         : prod_likelihood}
    
    
    
    
    
    freq_items = get_frequent_items(spn, min_support=0.0)
    freq_items_filtered = freq_items#filter(lambda x : any(cond[0] == feature_id for cond in x[1]), freq_items)
    freq_items_sorted = sorted(freq_items_filtered, key=lambda x: x[0], reverse=True)
    
    #evidence = numpy.empty((3,3,)
    
    
    feature_dict = {0: ("g", ("m  ", "w  ")), 1: ("c", ("no ", "yes")), 2: ("s", ("no ", "yes")), 3: ("w", ("no ", "yes"))}
    freq_sets = []
    for (sup, conds) in freq_items_sorted:
        
        str_conds=[]
        ranges = [None] * len(spn.scope)
        for cond in conds:
            ranges[cond[0]] = NominalRange([cond[1]])
            str_conds.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]])
            
        ranges = np.array([ranges])
        sup_spn = Inference.likelihood(spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:,0][0]
        

        freq_sets.append(["(" + ", ".join(str_conds) + ")", sup, sup_spn]) 
        
        
    rules = sorted(freq_sets, key=lambda x : x[2], reverse=True)
    rule_df = pd.DataFrame(rules, columns=["frequent set", "s_support", "g_support"])
    
    io.print_pretty_table(rule_df.head(400))
    
    
    exit()
    
    
    
    
    rules = []
    for (sup, conds) in freq_items_sorted:
        
        rule_body = []
        rule_head = []
        conf = np.nan
        
        ranges = [None] * len(spn.scope)
        
        
        
        
        for cond in conds:
            if cond[0] == feature_id:
                rule_head.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]])
            else:
                rule_body.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]])
            
            ranges[cond[0]] = NominalRange([cond[1]])
        
        
        #Optimization possible
        ranges = np.array([ranges])
        prob_with_feature = Inference.likelihood(spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:,0][0]
        
        ranges[0][feature_id] = None
        prob_without_feature = Inference.likelihood(spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:,0][0]
        
        spn_sup = prob_without_feature
        spn_conf = prob_with_feature / prob_without_feature
        
        
        rules.append([" AND ".join(rule_body) + "-->" + " AND ".join(rule_head), sup, conf, spn_sup, spn_conf, spn_sup*spn_conf])
    
    
    rules = sorted(rules, key=lambda x : x[5], reverse=True)
    
    
     
    rule_df = pd.DataFrame(rules, columns=["Rule", "c_Support", "c_Confidence", "spn_Support", "spn_Confidence", "score"])
    
    #rule_df.drop_duplicates(subset=["Rule"], keep = True, inplace = True) 
    
    io.print_pretty_table(rule_df.head(400))
    

    
    pass
示例#9
0
def test_get_lending():
    df, value_dict, param_types = real_data.get_real_data('lending', only_n_rows=10000, seed=5)
    io.print_pretty_table(df.head(10))
    assert len(df.columns) == len(value_dict)
示例#10
0
def test_get_Ecommerce():
    df, value_dict, param_types = real_data.get_real_data('Ecommerce')
    io.print_pretty_table(df.head(10))
    assert len(df.columns) == len(value_dict)
示例#11
0
def test_get_OnlineRetail():
    df, value_dict,  param_types = real_data.get_real_data('OnlineRetail')
    io.print_pretty_table(df.head(10))
示例#12
0
    dataset_name = "titanic"

    #parameters for the construction
    rdc_threshold = 0.3
    min_instances_slice = 0.01

    if not spn_handler.exist_spn(dataset_name, rdc_threshold,
                                 min_instances_slice):
        print("Creating SPN ...")

        #get data
        df, value_dict, parametric_types = real_data.get_titanic()

        #print data (top 5 rows)
        io.print_pretty_table(df.head(5))

        #print value-dict
        print(value_dict)

        #Creates the SPN and saves to a file
        spn_handler.create_parametric_spns(df.values, parametric_types,
                                           dataset_name)

    #Load SPN
    spn, value_dict, _ = spn_handler.load_spn(dataset_name, rdc_threshold,
                                              min_instances_slice)

    #Print some statistics
    fn.print_statistics(spn)