Пример #1
0
def cmd_select_features():
    print_banner("Selecting features")

    global selected_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type)

    selected_features = train.select_features(X, Y)
Пример #2
0
def create_png(graph):
    graph_dir = config.get_str('CFG', 'GraphDirectory')
    basename = os.path.basename(graph.name)

    dot_file = os.path.join(graph_dir, '%s.dot' % basename)
    png_file = os.path.join(graph_dir, '%s.png' % basename)

    # Write DOT file
    nx.nx_pydot.write_dot(graph, dot_file)

    # Convert DOT to PNG
    os.system("dot -Tpng %s >%s" % (dot_file, png_file))
Пример #3
0
def cmd_calibrate_model():
    global model

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'tuning_set', language, vuln_type,
                            selected_features)

    X = sync_features(X)

    model = CalibratedClassifierCV(model, method='isotonic', cv='prefit')
    model.fit(X, Y)
Пример #4
0
def cmd_test_model():
    print_banner("Testing model")

    global X_test, Y_test

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language,
                                      vuln_type, selected_features)

    X_test = sync_features(X_test)

    print_metrics(model=model, X=X_test, Y=Y_test)
Пример #5
0
def cmd_store_all():
    print_banner("Store all")

    global model

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    orig, X, Y = transform.get_xy_with_orig(sel_ds, 'testing_set', language,
                                            vuln_type, selected_features)

    X = sync_features(X)

    data.store_data(model, orig, X, Y, just_outliers=False)
Пример #6
0
def cmd_filter_features():
    print_banner("Filtering features")

    global selected_features

    start_string = config.get_str('model', 'FeatureFilterStartString')

    if selected_features is None:
        sel_ds = config.get_str('dataset', 'SelectedDataset')

        X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type)

        selected_features = X.columns.values

    selected_features = [
        feature for feature in selected_features
        if not feature.startswith(start_string)
    ]
    n = 1

    for feature in selected_features:
        print_notice("%d. %s" % (n, feature))
        n += 1
Пример #7
0
def cmd_compare_tools():
    global train_features

    print_banner("Comparing results")

    sel_ds = config.get_str('dataset', 'SelectedDataset')
    sel_vt = config.get_str('dataset', 'SelectedVulnerabilityType')

    if train_features is None:
        X, _ = transform.get_xy(sel_ds, 'training_set', language, vuln_type,
                                selected_features)
        X.sort_index(axis=1, inplace=True)

        train_features = X.columns

    orig_tuning, X_tuning, _ = transform.get_xy_with_orig(
        sel_ds, 'tuning_set', language, vuln_type, selected_features)

    X_tuning = sync_features(X_tuning)

    c = find_best_threshold(model, orig_tuning, X_tuning)

    print_notice("Preferred threshold (Y > c): %.2f" % c)

    orig, X, _ = transform.get_xy_with_orig(sel_ds, 'testing_set', language,
                                            vuln_type, selected_features)

    print_notice('-' * 55)
    print_notice("Our results")

    print_model_results(model, orig, X, c)

    for (tool, file_name) in config.get_items('tools'):
        print_notice('-' * 55)
        print_notice('Comparing against tool: %s' % tool)
        compare_results(file_name, orig, sel_vt)
Пример #8
0
def select_model(language, vuln_type, X, Y):
    model_type = config.get_str('model', 'Model')
    params = config.get_dict('model',
                             model_type + vuln_type + 'Params',
                             optional=True)
    model = create_model(model_type, params)

    model.fit(X, Y)

    if model_type == "DecisionTreeClassifier" and config.get_boolean(
            'model', 'GenerateDecisionTreeGraph'):
        create_dt_graph("%s_%s" % (language, vuln_type), model,
                        X.columns.values)

    return model
Пример #9
0
def cmd_create_model():
    print_banner("Creating model")

    global model, train_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type,
                            selected_features)

    X.sort_index(axis=1, inplace=True)

    if train_features is None:
        train_features = X.columns

    model = train.select_model(language, vuln_type, X, Y)
Пример #10
0
def create_dt_graph(title, model, features):
    graph_dir = config.get_str('model', 'DecisionTreeGraphDirectory')

    dot_file = os.path.join(graph_dir, '%s.dot' % title)
    png_file = os.path.join(graph_dir, '%s.png' % title)

    print_notice("Creating Decision Tree graph in %s" % png_file)

    # Write DOT file
    tree.export_graphviz(model,
                         out_file=dot_file,
                         feature_names=features,
                         filled=True,
                         rounded=True,
                         proportion=True,
                         node_ids=True)

    # Convert DOT to PNG
    os.system("dot -Tpng %s >%s" % (dot_file, png_file))
Пример #11
0
def cmd_tune_params():
    print_banner("Tuning model parameters")

    global model, train_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type,
                            selected_features)

    X.sort_index(axis=1, inplace=True)

    if train_features is None:
        train_features = X.columns

    X_tuning, Y_tuning = transform.get_xy(sel_ds, 'tuning_set', language,
                                          vuln_type, selected_features)

    X_tuning = sync_features(X_tuning)

    train.select_best_model(X, Y, X_tuning, Y_tuning)
Пример #12
0
def cmd_count_sets():
    sel_ds = config.get_str('dataset', 'SelectedDataset')
    _, Y_training = transform.get_xy(sel_ds, 'training_set', language,
                                     vuln_type, None)
    _, Y_tuning = transform.get_xy(sel_ds, 'tuning_set', language, vuln_type,
                                   None)
    _, Y_testing = transform.get_xy(sel_ds, 'testing_set', language, vuln_type,
                                    None)

    non_vuln = 0
    vuln = 0

    for setname, df in zip(['training', 'tuning', 'testing'],
                           [Y_training, Y_tuning, Y_testing]):
        nv = len(df.loc[df[0:] == 0])
        v = len(df.loc[df[0:] == 1])
        non_vuln += nv
        vuln += v
        print_notice("%s set: non-vulnerable lines %d, vulnerable lines %d" %
                     (setname, nv, v))

    print_notice("total: non-vulnerable lines %d, vulnerable lines %d" %
                 (non_vuln, vuln))
Пример #13
0
 def __init__(self):
     super(CustomDataset,
           self).__init__(config.get_str('analysis', 'CustomPickle'))
Пример #14
0
def cmd_clean_set():
    print_banner("Cleaning sets")

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    dataset_factory.get_dataset(sel_ds).delete_sets()
Пример #15
0
def cmd_create_transform():
    print_banner("Transforming sets")

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    transform.transform_sets(sel_ds, sets, language)
Пример #16
0
def get_transform_filename(dataset, language, vuln_type):
    filename_format = config.get_str('dataset', 'TransformFilenameFormat')

    return filename_format % (dataset, language, vuln_type)
Пример #17
0
def get_features_filename(dataset, language, vuln_type):
    filename_format = config.get_str('dataset', 'FeaturesFilenameFormat')

    return filename_format % (dataset, language, vuln_type)
Пример #18
0
 def __init__(self):
     super(SamateDataset, self).__init__(config.get_str('SAMATE', 'SamatePickle'))