예제 #1
0
def cmd_display_histo():
    print_banner("Displaying histogram")

    global X_test, Y_test

    model_type = config.get_str('model', 'Model')
    sel_ds = config.get_str('dataset', 'SelectedDataset')

    if X_test is None or Y_test is None:
        X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language,
                                          vuln_type, selected_features)

        X_test = sync_features(X_test)

    display_prob_histogram(title="%s %s (class: not vulnerable)" %
                           (vuln_type, model_type),
                           model=model,
                           X=X_test,
                           Y=Y_test,
                           cls=0)

    display_prob_histogram(title="%s %s (class: vulnerable)" %
                           (vuln_type, model_type),
                           model=model,
                           X=X_test,
                           Y=Y_test,
                           cls=1)
예제 #2
0
def cmd_select_features():
    print_banner("Selecting features")

    global selected_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type)

    selected_features = train.select_features(X, Y)
예제 #3
0
def cmd_calibrate_model():
    global model

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'tuning_set', language, vuln_type,
                            selected_features)

    X = sync_features(X)

    model = CalibratedClassifierCV(model, method='isotonic', cv='prefit')
    model.fit(X, Y)
예제 #4
0
def cmd_tune_params():
    print_banner("Tuning model parameters")

    global model, train_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type,
                            selected_features)

    X.sort_index(axis=1, inplace=True)

    if train_features is None:
        train_features = X.columns

    X_tuning, Y_tuning = transform.get_xy(sel_ds, 'tuning_set', language,
                                          vuln_type, selected_features)

    X_tuning = sync_features(X_tuning)

    train.select_best_model(X, Y, X_tuning, Y_tuning)
예제 #5
0
def cmd_test_model():
    print_banner("Testing model")

    global X_test, Y_test

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language,
                                      vuln_type, selected_features)

    X_test = sync_features(X_test)

    print_metrics(model=model, X=X_test, Y=Y_test)
예제 #6
0
def cmd_count_sets():
    sel_ds = config.get_str('dataset', 'SelectedDataset')
    _, Y_training = transform.get_xy(sel_ds, 'training_set', language,
                                     vuln_type, None)
    _, Y_tuning = transform.get_xy(sel_ds, 'tuning_set', language, vuln_type,
                                   None)
    _, Y_testing = transform.get_xy(sel_ds, 'testing_set', language, vuln_type,
                                    None)

    non_vuln = 0
    vuln = 0

    for setname, df in zip(['training', 'tuning', 'testing'],
                           [Y_training, Y_tuning, Y_testing]):
        nv = len(df.loc[df[0:] == 0])
        v = len(df.loc[df[0:] == 1])
        non_vuln += nv
        vuln += v
        print_notice("%s set: non-vulnerable lines %d, vulnerable lines %d" %
                     (setname, nv, v))

    print_notice("total: non-vulnerable lines %d, vulnerable lines %d" %
                 (non_vuln, vuln))
예제 #7
0
def cmd_create_model():
    print_banner("Creating model")

    global model, train_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type,
                            selected_features)

    X.sort_index(axis=1, inplace=True)

    if train_features is None:
        train_features = X.columns

    model = train.select_model(language, vuln_type, X, Y)
예제 #8
0
def cmd_display_model():
    print_banner("Displaying model")

    global X_test, Y_test

    model_type = config.get_str('model', 'Model')
    sel_ds = config.get_str('dataset', 'SelectedDataset')

    if X_test is None or Y_test is None:
        X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language,
                                          vuln_type, selected_features)

        X_test = sync_features(X_test)

    display_pr_curve(title="%s %s" % (vuln_type, model_type),
                     model=model,
                     X=X_test,
                     Y=Y_test)
예제 #9
0
def cmd_filter_features():
    print_banner("Filtering features")

    global selected_features

    start_string = config.get_str('model', 'FeatureFilterStartString')

    if selected_features is None:
        sel_ds = config.get_str('dataset', 'SelectedDataset')

        X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type)

        selected_features = X.columns.values

    selected_features = [
        feature for feature in selected_features
        if not feature.startswith(start_string)
    ]
    n = 1

    for feature in selected_features:
        print_notice("%d. %s" % (n, feature))
        n += 1
예제 #10
0
def cmd_compare_tools():
    global train_features

    print_banner("Comparing results")

    sel_ds = config.get_str('dataset', 'SelectedDataset')
    sel_vt = config.get_str('dataset', 'SelectedVulnerabilityType')

    if train_features is None:
        X, _ = transform.get_xy(sel_ds, 'training_set', language, vuln_type,
                                selected_features)
        X.sort_index(axis=1, inplace=True)

        train_features = X.columns

    orig_tuning, X_tuning, _ = transform.get_xy_with_orig(
        sel_ds, 'tuning_set', language, vuln_type, selected_features)

    X_tuning = sync_features(X_tuning)

    c = find_best_threshold(model, orig_tuning, X_tuning)

    print_notice("Preferred threshold (Y > c): %.2f" % c)

    orig, X, _ = transform.get_xy_with_orig(sel_ds, 'testing_set', language,
                                            vuln_type, selected_features)

    print_notice('-' * 55)
    print_notice("Our results")

    print_model_results(model, orig, X, c)

    for (tool, file_name) in config.get_items('tools'):
        print_notice('-' * 55)
        print_notice('Comparing against tool: %s' % tool)
        compare_results(file_name, orig, sel_vt)