Пример #1
0
def kfold_results(y_true, y_pred, nfolds):
    kf = KFold(n_splits=nfolds, random_state=0)
    accuracy, precision, recall, f1, auc = list(), list(), list(), list(
    ), list()
    for train_index, test_index in kf.split(y_true):
        y_true_test_index, y_pred_test_index = get_items(items=y_true, indexes=test_index), \
                                               get_items(items=y_pred, indexes=test_index)
        accuracy.append(
            accuracy_score(y_true=y_true_test_index, y_pred=y_pred_test_index))
        precision.append(
            precision_score(y_true=y_true_test_index,
                            y_pred=y_pred_test_index))
        recall.append(
            recall_score(y_true=y_true_test_index, y_pred=y_pred_test_index))
        f1.append(f1_score(y_true=y_true_test_index, y_pred=y_pred_test_index))
        auc.append(
            auc_score(y_true=y_true_test_index, y_pred=y_pred_test_index))

    algorithm = ""
    print "Accuracy and std of %s: %f %f" % (
        algorithm, np.mean(np.array(accuracy)), np.std(np.array(accuracy)))
    print "Precision of %s: %f %f" % (algorithm, np.mean(
        np.array(precision)), np.std(np.array(precision)))
    print "Recall of %s: %f %f" % (algorithm, np.mean(
        np.array(recall)), np.std(np.array(recall)))
    print "F1 of %s: %f %f" % (algorithm, np.mean(
        np.array(f1)), np.std(np.array(f1)))
    print "AUC of %s: %f %f" % (algorithm, np.mean(
        np.array(auc)), np.std(np.array(auc)))
Пример #2
0
def baseline_testing(X_train, y_train, X_test, y_test, algorithm, type):
    vectorizer = CountVectorizer()
    X_train = vectorizer.fit_transform(X_train)
    X_test = vectorizer.transform(X_test)
    print X_train.shape, X_test.shape

    if algorithm == "svm":
        clf = LinearSVC(random_state=0)
    elif algorithm == "lr":
        clf = LogisticRegression()
    elif algorithm == "dt":
        clf = DecisionTreeClassifier()
    elif algorithm == "nb":
        clf = GaussianNB()
    else:
        print "Wrong algorithm name -- please retype again"
        exit()

    clf.fit(X=X_train.toarray(), y=y_train)
    y_pred = clf.predict(X_test)
    path_write = "./data_test_data_pred_results/cnn_" + type + ".txt"
    write_file(path_file=path_write, data=y_pred)
    print "Accuracy: ", accuracy_score(y_true=y_test, y_pred=y_pred)
    print "Precision: ", precision_score(y_true=y_test, y_pred=y_pred)
    print "Recall: ", recall_score(y_true=y_test, y_pred=y_pred)
    print "F1: ", f1_score(y_true=y_test, y_pred=y_pred)
    print "AUC: ", auc_score(y_true=y_test, y_pred=y_pred)
Пример #3
0
def checking_performance_v3(id_gt, label_gt, patches, threshold):
    new_labels, pred_labels = list(), list()
    new_patches_id, new_patches_label = list(), list()
    for p in patches:
        id_, label_ = p[0], p[1]
        new_patches_id.append(id_)
        new_patches_label.append(label_)
    prob_patches = patches[0][1]

    origin_prob = list()
    true_positive, false_negative = list(), list()
    for id_, true_label in zip(id_gt, label_gt):
        if true_label == "true":
            new_labels.append(1)
        else:
            new_labels.append(0)
        if id_ in new_patches_id:
            index_ = new_patches_id.index(id_)
            origin_prob.append(id_ + "\t" +
                               str(float(new_patches_label[index_])))
            if new_patches_label[index_] / prob_patches >= threshold:
                pred_labels.append(1)
                if true_label == "true":
                    true_positive.append(id_)
            else:
                pred_labels.append(0)
                if true_label == "true":
                    false_negative.append(id_)
        else:
            # pred_labels.append(random.choice((1, 10)))
            pred_labels.append(0)
            origin_prob.append(id_ + "\t" + str(float(0)))
            if true_label == "true":
                false_negative.append(id_)

    print len(new_labels), len(pred_labels)
    true_list, false_list = [], []
    for l in new_labels:
        if l == 1:
            true_list.append(l)
        else:
            false_list.append(l)
    print len(true_list), len(false_list)

    acc = accuracy_score(y_true=new_labels, y_pred=pred_labels)
    prc = precision_score(y_true=new_labels, y_pred=pred_labels)
    rc = recall_score(y_true=new_labels, y_pred=pred_labels)
    f1 = f1_score(y_true=new_labels, y_pred=pred_labels)
    auc = auc_score(y_true=new_labels, y_pred=pred_labels)
    print acc, prc, rc, f1, auc

    # dl_acc = accuracy_score(y_true=new_labels, y_pred=test_dl_labels)
    # dl_prc = precision_score(y_true=new_labels, y_pred=test_dl_labels)
    # dl_rc = recall_score(y_true=new_labels, y_pred=test_dl_labels)
    # dl_f1 = f1_score(y_true=new_labels, y_pred=test_dl_labels)
    # dl_auc = auc_score(y_true=new_labels, y_pred=test_dl_labels)
    # print dl_acc, dl_prc, dl_rc, dl_f1, dl_auc

    return acc, prc, rc, f1, auc, origin_prob, true_positive, false_negative
Пример #4
0
def checking_performance(id_gt, label_gt, patches):
    prob_patches = patches[0][1]
    new_labels, pred_labels = list(), list()

    new_test_dl_labels = list()
    for k, v in patches:
        if v / prob_patches >= 0.5:
            pred_labels.append(1)
        else:
            pred_labels.append(0)
        index = id_gt.index(k)
        true_label = label_gt[index]
        new_test_dl_labels.append(test_dl_labels[index])
        if true_label == "true":
            new_labels.append(1)
        else:
            new_labels.append(0)
    print len(new_labels), len(pred_labels), len(new_test_dl_labels)
    true_list, false_list = [], []
    for l in new_labels:
        if l == 1:
            true_list.append(l)
        else:
            false_list.append(l)
    print len(true_list), len(false_list)
    acc = accuracy_score(y_true=new_labels, y_pred=pred_labels)
    prc = precision_score(y_true=new_labels, y_pred=pred_labels)
    rc = recall_score(y_true=new_labels, y_pred=pred_labels)
    f1 = f1_score(y_true=new_labels, y_pred=pred_labels)
    auc = auc_score(y_true=new_labels, y_pred=pred_labels)
    print acc, prc, rc, f1, auc

    dl_acc = accuracy_score(y_true=new_labels, y_pred=new_test_dl_labels)
    dl_prc = precision_score(y_true=new_labels, y_pred=new_test_dl_labels)
    dl_rc = recall_score(y_true=new_labels, y_pred=new_test_dl_labels)
    dl_f1 = f1_score(y_true=new_labels, y_pred=new_test_dl_labels)
    dl_auc = auc_score(y_true=new_labels, y_pred=new_test_dl_labels)
    print dl_acc, dl_prc, dl_rc, dl_f1, dl_auc

    return acc, prc, rc, f1, auc
def get_predict_ICSE(name, X, y, algorithm, folds):
    kf = KFold(n_splits=folds, random_state=None)
    # kf.get_n_splits(X=X)
    # kf = StratifiedKFold(n_splits=folds)
    accuracy, precision, recall, f1, auc = list(), list(), list(), list(), list()
    X = preprocessing.normalize(X)
    pred_dict = dict()
    for train_index, test_index in kf.split(X, y):
        X_train, y_train = X[train_index], y[train_index]
        X_test, y_test = X[test_index], y[test_index]

        if algorithm == "svm":
            clf = LinearSVC(random_state=0)
        elif algorithm == "lr":
            clf = LogisticRegression()
        elif algorithm == "dt":
            clf = DecisionTreeClassifier()
        else:
            print "Wrong algorithm name -- please retype again"
            exit()

        clf.fit(X=X_train, y=y_train)
        y_pred = clf.predict(X_test)
        pred_dict.update(make_dictionary(y_pred=y_pred, y_index=test_index))
        accuracy.append(accuracy_score(y_true=y_test, y_pred=y_pred))
        precision.append(precision_score(y_true=y_test, y_pred=y_pred))
        recall.append(recall_score(y_true=y_test, y_pred=y_pred))
        f1.append(f1_score(y_true=y_test, y_pred=y_pred))
        auc.append(auc_score(y_true=y_test, y_pred=y_pred))

        # y_pred = clf.predict(X)
        # path_file = "./data/3_mar7/" + "new_features_ver1_pred.txt"
        # write_file(path_file, y_pred)
        # break
    # print "Accuracy of %s: %f" % (algorithm, avg_list(accuracy))
    # print "Precision of %s: %f" % (algorithm, avg_list(precision))
    # print "Recall of %s: %f" % (algorithm, avg_list(recall))
    # print "F1 of %s: %f" % (algorithm, avg_list(f1))

    # path_file = "./data/3_mar7/" + "new_features_ver2_pred.txt"
    # write_file(path_file=path_file, data=sorted_dict(dict=pred_dict))
    print accuracy, "Accuracy and std of %s: %f %f" % (
    algorithm, np.mean(np.array(accuracy)), np.std(np.array(accuracy)))
    print precision, "Precision of %s: %f %f" % (algorithm, np.mean(np.array(precision)), np.std(np.array(precision)))
    print recall, "Recall of %s: %f %f" % (algorithm, np.mean(np.array(recall)), np.std(np.array(recall)))
    print f1, "F1 of %s: %f %f" % (algorithm, np.mean(np.array(f1)), np.std(np.array(f1)))
    print auc, "AUC of %s: %f %f" % (algorithm, np.mean(np.array(auc)), np.std(np.array(auc)))
def get_predict_ICSE_new(X_train, y_train, X_test, y_test, algorithm):
    if algorithm == "svm":
        clf = LinearSVC()
    elif algorithm == "lr":
        clf = LogisticRegression()
    elif algorithm == "dt":
        clf = DecisionTreeClassifier()
    else:
        print "Wrong algorithm name -- please retype again"
        exit()

    clf.fit(X=X_train, y=y_train)
    y_pred = clf.predict(X_test)
    print "Accuracy of %s: %f" % (algorithm, accuracy_score(y_true=y_test, y_pred=y_pred))
    print "Precision of %s: %f" % (algorithm, precision_score(y_true=y_test, y_pred=y_pred))
    print "Recall of %s: %f" % (algorithm, recall_score(y_true=y_test, y_pred=y_pred))
    print "F1 of %s: %f" % (algorithm, f1_score(y_true=y_test, y_pred=y_pred))
    print "AUC of %s: %f" % (algorithm, auc_score(y_true=y_test, y_pred=y_pred))
Пример #7
0
def baseline_ver3(id, train, label, algorithm):
    X_train, y_train = train, label
    X_test, y_test = train, label
    id_train, id_test = id, id

    vectorizer = CountVectorizer()
    X_train = vectorizer.fit_transform(X_train)
    X_test = vectorizer.transform(X_test)
    # X = vectorizer.transform(X)

    # eval_train, eval_labels = loading_data("./data/3_mar7/typeaddres.out")
    # eval_train = vectorizer.transform(eval_train)

    if algorithm == "svm":
        clf = LinearSVC(random_state=0)
    elif algorithm == "lr":
        clf = LogisticRegression()
    elif algorithm == "dt":
        clf = DecisionTreeClassifier()
    else:
        print "Wrong algorithm name -- please retype again"
        exit()

    clf.fit(X=X_train, y=y_train)
    accuracy = accuracy_score(y_true=y_test, y_pred=clf.predict(X_test))
    precision = precision_score(y_true=y_test, y_pred=clf.predict(X_test))
    recall = recall_score(y_true=y_test, y_pred=clf.predict(X_test))
    f1 = f1_score(y_true=y_test, y_pred=clf.predict(X_test))
    auc = auc_score(y_true=y_test, y_pred=clf.predict(X_test))

    print "Accuracy:", accuracy
    print "Precision:", precision
    print "Recall:", recall
    print "F1:", f1
    print "AUC:", auc

    probs = clf.predict_proba(X_test)[:, 1]
    path_write = "./statistical_test_ver2/%s.txt" % (algorithm)
    write_file(path_file=path_write, data=probs)
Пример #8
0
def eval_patchNet_train_test(tf, checkpoint_dir, test):
    FLAGS = tf.flags.FLAGS
    allow_soft_placement = True  # "Allow device soft device placement"
    log_device_placement = False  # "Log placement of ops on devices"
    dirs = get_all_checkpoints(checkpoint_dir=checkpoint_dir)
    graph = tf.Graph()

    X_test_msg, X_test_added_code, X_test_removed_code, y_test = test[0], test[
        1], test[2], test[3]

    for checkpoint_file in dirs:
        with graph.as_default():
            session_conf = tf.ConfigProto(
                allow_soft_placement=allow_soft_placement,
                log_device_placement=log_device_placement)
            sess = tf.Session(config=session_conf)

            with sess.as_default():
                # Load the saved meta graph and restore variables
                saver = tf.train.import_meta_graph(
                    "{}.meta".format(checkpoint_file))
                saver.restore(sess, checkpoint_file)

                # Get the placeholders from the graph by name
                input_msg = graph.get_operation_by_name("input_msg").outputs[0]
                input_addedcode = graph.get_operation_by_name(
                    "input_addedcode").outputs[0]
                input_removedcode = graph.get_operation_by_name(
                    "input_removedcode").outputs[0]
                dropout_keep_prob = graph.get_operation_by_name(
                    "dropout_keep_prob").outputs[0]

                # Tensors we want to evaluate
                predictions = graph.get_operation_by_name(
                    "output/predictions").outputs[0]
                scores = graph.get_operation_by_name(
                    "output/scores").outputs[0]

                # Generate batches for one epoch
                batches = mini_batches(X_msg=X_test_msg,
                                       X_added_code=X_test_added_code,
                                       X_removed_code=X_test_removed_code,
                                       Y=y_test,
                                       mini_batch_size=FLAGS.batch_size)

                # Collect the predictions here
                all_predictions, all_scores = [], []

                for batch in batches:
                    batch_input_msg, batch_input_added_code, batch_input_removed_code, batch_input_labels = batch
                    batch_predictions = sess.run(
                        predictions, {
                            input_msg: batch_input_msg,
                            input_addedcode: batch_input_added_code,
                            input_removedcode: batch_input_removed_code,
                            dropout_keep_prob: 1.0
                        })
                    # print batch_predictions.shape
                    all_predictions = np.concatenate(
                        [all_predictions, batch_predictions])

                    batch_scores = sess.run(
                        scores, {
                            input_msg: batch_input_msg,
                            input_addedcode: batch_input_added_code,
                            input_removedcode: batch_input_removed_code,
                            dropout_keep_prob: 1.0
                        })
                    batch_scores = np.ravel(softmax(batch_scores)[:, [1]])
                    # print batch_scores.shape
                    all_scores = np.concatenate([all_scores, batch_scores])
        split_checkpoint_file = checkpoint_file.split("/")
        path_write = "./patchNet_results/%s_%s.txt" % (
            split_checkpoint_file[-3], split_checkpoint_file[-1])
        write_file(path_file=path_write, data=all_scores)
        print checkpoint_file, "Accuracy:", accuracy_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        print checkpoint_file, "Precision:", precision_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        print checkpoint_file, "Recall:", recall_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        print checkpoint_file, "F1:", f1_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        print checkpoint_file, "AUC:", auc_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        print "\n"
def evaluation_metrics(root, target):
    print "Accuracy: %f" % (accuracy_score(y_true=root, y_pred=target))
    print "Precision: %f" % (precision_score(y_true=root, y_pred=target))
    print "Recall: %f" % (recall_score(y_true=root, y_pred=target))
    print "F1: %f" % (f1_score(y_true=root, y_pred=target))
    print "AUC: %f" % (auc_score(y_true=root, y_pred=target))
Пример #10
0
    # path_pred = "./statistical_test_prob/lstm_cnn_all_fold_0.txt"
    # path_pred = "./statistical_test_prob/lstm_cnn_all_check_fold_0.txt"
    # path_pred = "./statistical_test_prob/lstm_cnn_all_checking.txt"
    # path_pred = "./statistical_test/lstm_cnn_all_ver2.txt"
    # path_pred = "./statistical_test_prob_ver3/PatchNet.txt"
    # path_pred = "./statistical_test_prob_ver3/LPU-SVM.txt"
    path_pred = "./statistical_test_prob_ver3/LS-CNN.txt"
    # path_pred, threshold = "./statistical_test_prob_ver3/sasha_results.txt", 50
    y_pred = load_file(path_file=path_pred)
    if "sasha" in path_pred:
        y_pred = np.array([1 if float(y) > threshold else 0 for y in y_pred])
    else:
        y_pred = np.array([1 if float(y) > 0.5 else 0 for y in y_pred])
        # y_pred = np.array([float(y) for y in y_pred])
        # y_pred[y_pred > 0.5] = 1
        # y_pred[y_pred <= 0.5] = 0

    split_data = split_train_test(data=y_true, folds=folds, random_state=None)

    for i in xrange(len(split_data)):
        train_index, test_index = split_data[i]["train"], split_data[i]["test"]
        y_true_, y_pred_ = get_items(
            items=y_true, indexes=test_index), get_items(items=y_pred,
                                                         indexes=test_index)
        acc = accuracy_score(y_true=y_true_, y_pred=y_pred_)
        prc = precision_score(y_true=y_true_, y_pred=y_pred_)
        rc = recall_score(y_true=y_true_, y_pred=y_pred_)
        f1 = f1_score(y_true=y_true_, y_pred=y_pred_)
        auc = auc_score(y_true=y_true_, y_pred=y_pred_)
        print acc, prc, rc, f1, auc
            # Collect the predictions here
            all_predictions = []

            for batch in batches:
                batch_input_msg, batch_input_added_code, batch_input_removed_code, batch_input_labels = batch
                batch_predictions = sess.run(
                    predictions, {
                        input_msg: batch_input_msg,
                        input_addedcode: batch_input_added_code,
                        input_removedcode: batch_input_removed_code,
                        dropout_keep_prob: 1.0
                    })
                all_predictions = np.concatenate(
                    [all_predictions, batch_predictions])
        print checkpoint_file, "Accuracy:", accuracy_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        print checkpoint_file, "Precision:", precision_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        print checkpoint_file, "Recall:", recall_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        print checkpoint_file, "F1:", f1_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        print checkpoint_file, "AUC:", auc_score(
            y_true=convert_to_binary(y_test), y_pred=all_predictions)
        # y_pred = all_predictions
        # split_checkpoint = checkpoint_file.split("/")
        # path_file = "./statistical_test_ver2/3_mar7/" + split_checkpoint[2] \
        #             + "_" + split_checkpoint[-1] + ".txt"
        # write_file(path_file, y_pred)
        exit()
Пример #12
0
            commits=filter_commits)
        msgs_ = codes_
    else:
        print "You need to type correct model"
        exit()

    dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
    pad_msg = mapping_commit_msg(msgs=msgs_,
                                 max_length=FLAGS.msg_length,
                                 dict_msg=dict_msg_)
    labels = load_label_commits(commits=filter_commits)
    labels = convert_to_binary(labels)
    print pad_msg.shape, labels.shape, labels.shape, len(dict_msg_)

    y_pred = model.predict(pad_msg, batch_size=FLAGS.batch_size)
    y_pred = np.ravel(y_pred)
    y_pred[y_pred > 0.5] = 1
    y_pred[y_pred <= 0.5] = 0

    accuracy = accuracy_score(y_true=labels, y_pred=y_pred)
    precision = precision_score(y_true=labels, y_pred=y_pred)
    recall = recall_score(y_true=labels, y_pred=y_pred)
    f1 = f1_score(y_true=labels, y_pred=y_pred)
    auc = auc_score(y_true=labels, y_pred=y_pred)

    print "Accuracy and std of %s: %f" % (FLAGS.model, accuracy)
    print "Precision of %s: %f" % (FLAGS.model, precision)
    print "Recall of %s: %f" % (FLAGS.model, recall)
    print "F1 of %s: %f" % (FLAGS.model, f1)
    print "AUC of %s: %f" % (FLAGS.model, auc)
Пример #13
0
        # f1.append(f1_score(y_true=Y_test, y_pred=y_pred))
        # auc.append(auc_score(y_true=Y_test, y_pred=y_pred))

        model.save("./lstm_model_ver2/rerun_" + FLAGS.model + "_" +
                   str(cntfold) + ".h5")
        cntfold += 1
        y_pred = model.predict(X_test_msg, batch_size=FLAGS.batch_size)
        y_pred = np.ravel(y_pred)
        y_pred[y_pred > 0.5] = 1
        y_pred[y_pred <= 0.5] = 0
        pred_dict.update(make_dictionary(y_pred=y_pred, y_index=test_index))
        accuracy.append(accuracy_score(y_true=Y_test, y_pred=y_pred))
        precision.append(precision_score(y_true=Y_test, y_pred=y_pred))
        recall.append(recall_score(y_true=Y_test, y_pred=y_pred))
        f1.append(f1_score(y_true=Y_test, y_pred=y_pred))
        auc.append(auc_score(y_true=Y_test, y_pred=y_pred))

        # print "Accuracy of %s: %f" % (FLAGS.model, avg_list(accuracy))
        # print "Precision of %s: %f" % (FLAGS.model, avg_list(precision))
        # print "Recall of %s: %f" % (FLAGS.model, avg_list(recall))
        # print "F1 of %s: %f" % (FLAGS.model, avg_list(f1))
        # print "AUC of %s: %f" % (FLAGS.model, avg_list(auc))

        # path_file = "./statistical_test/3_mar7/" + FLAGS.model + ".txt"
        # write_file(path_file, y_pred)
        # print "Accuracy of %s: %f" % (FLAGS.model, avg_list(accuracy))
        # print "Precision of %s: %f" % (FLAGS.model, avg_list(precision))
        # print "Recall of %s: %f" % (FLAGS.model, avg_list(recall))
        # print "F1 of %s: %f" % (FLAGS.model, avg_list(f1))
        # cntfold += 1
        # exit()
Пример #14
0
def running_baseline_july(tf, folds, random_state):
    FLAGS = tf.flags.FLAGS
    commits_ = extract_commit_july(path_file=FLAGS.path)
    filter_commits = commits_
    print len(commits_)
    kf = KFold(n_splits=folds, random_state=random_state)
    idx_folds = list()
    for train_index, test_index in kf.split(filter_commits):
        idx = dict()
        idx["train"], idx["test"] = train_index, test_index
        idx_folds.append(idx)

    if "msg" in FLAGS.model:
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
    elif "all" in FLAGS.model:
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        all_lines = add_two_list(list1=msgs_, list2=codes_)
        msgs_ = all_lines
    elif "code" in FLAGS.model:
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        msgs_ = codes_
    else:
        print "You need to type correct model"
        exit()

    dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
    pad_msg = mapping_commit_msg(msgs=msgs_, max_length=FLAGS.msg_length, dict_msg=dict_msg_)
    labels = load_label_commits(commits=filter_commits)
    labels = convert_to_binary(labels)
    print pad_msg.shape, labels.shape, len(dict_msg_)
    # exit()

    timestamp = str(int(time.time()))
    accuracy, precision, recall, f1, auc = list(), list(), list(), list(), list()
    cntfold = 0
    pred_dict, pred_dict_prob = dict(), dict()
    for i in xrange(cntfold, len(idx_folds)):
        idx = idx_folds[i]
        train_index, test_index = idx["train"], idx["test"]
        X_train_msg, X_test_msg = np.array(get_items(items=pad_msg, indexes=train_index)), \
                                  np.array(get_items(items=pad_msg, indexes=test_index))
        Y_train, Y_test = np.array(get_items(items=labels, indexes=train_index)), \
                          np.array(get_items(items=labels, indexes=test_index))
        if FLAGS.model == "lstm_cnn_msg" or FLAGS.model == "lstm_cnn_code" or FLAGS.model == "lstm_cnn_all":
            model = lstm_cnn(x_train=X_train_msg, y_train=Y_train, x_test=X_test_msg,
                             y_test=Y_test, dictionary_size=len(dict_msg_), FLAGS=FLAGS)
        elif FLAGS.model == "cnn_msg" or FLAGS.model == "cnn_code" or FLAGS.model == "cnn_all":
            model = cnn_model(x_train=X_train_msg, y_train=Y_train, x_test=X_test_msg,
                              y_test=Y_test, dictionary_size=len(dict_msg_), FLAGS=FLAGS)
        else:
            print "You need to give correct model name"
            exit()

        # model.save("./keras_model/" + FLAGS.model + "_" + str(cntfold) + ".h5")
        # model.save("./keras_model/" + FLAGS.model + "_" + str(cntfold) + "_testing.h5")
        # model.save("./keras_model/test_" + FLAGS.model + "_" + str(cntfold) + ".h5")
        model.save("./keras_model/newres_funcalls_" + FLAGS.model + "_" + str(cntfold) + ".h5")

        y_pred = model.predict(X_test_msg, batch_size=FLAGS.batch_size)
        y_pred = np.ravel(y_pred)

        y_pred_tolist = y_pred.tolist()
        data_fold = [str(i) + "\t" + str(l) for i, l in zip(test_index, y_pred)]
        path_file = "./statistical_test/newres_funcalls_%s_fold_%s.txt" % (FLAGS.model, str(cntfold))
        write_file(path_file=path_file, data=data_fold)

        y_pred[y_pred > 0.5] = 1
        y_pred[y_pred <= 0.5] = 0

        pred_dict.update(make_dictionary(y_pred=y_pred, y_index=test_index))
        accuracy.append(accuracy_score(y_true=Y_test, y_pred=y_pred))
        precision.append(precision_score(y_true=Y_test, y_pred=y_pred))
        recall.append(recall_score(y_true=Y_test, y_pred=y_pred))
        f1.append(f1_score(y_true=Y_test, y_pred=y_pred))
        auc.append(auc_score(y_true=Y_test, y_pred=y_pred))
        print "accuracy", accuracy_score(y_true=Y_test, y_pred=y_pred)
        print "precision", precision_score(y_true=Y_test, y_pred=y_pred)
        print "recall", recall_score(y_true=Y_test, y_pred=y_pred)
        print "f1", f1_score(y_true=Y_test, y_pred=y_pred)

        cntfold += 1
        break