def create_features_ICSE_new(commits_train, ids_train, commits_test, ids_test, type):
    new_commits_train, new_commits_test = list(), list()
    for id_ in ids_train:
        for c in commits_train:
            if c["id"] == id_:
                new_commits_train.append(c)
                break
    for id_ in ids_test:
        for c in commits_test:
            if c["id"] == id_:
                new_commits_test.append(c)
                break
    vectorizer = CountVectorizer()
    if type == "msg":
        msg_train, msg_test = extract_msg(commits=new_commits_train), extract_msg(commits=new_commits_test)
        X_train = vectorizer.fit_transform(msg_train)
        X_test = vectorizer.transform(msg_test)
    elif type == "code":
        codes_train, codes_test = extract_code(commits=new_commits_train), extract_code(commits=new_commits_test)
        X_train = vectorizer.fit_transform(codes_train)
        X_test = vectorizer.transform(codes_test)
    elif type == "all":
        msg_train, msg_test = extract_msg(commits=new_commits_train), extract_msg(commits=new_commits_test)
        codes_train, codes_test = extract_code(commits=new_commits_train), extract_code(commits=new_commits_test)
        all_lines_train = add_two_list(list1=msg_train, list2=codes_train)
        all_lines_test = add_two_list(list1=msg_test, list2=codes_test)
        X_train = vectorizer.fit_transform(all_lines_train)
        X_test = vectorizer.transform(all_lines_test)
    else:
        print "Your type is uncorrect"
        exit()
    return X_train.toarray(), X_test.toarray()
示例#2
0
def loading_data_all(FLAGS):
    # load all data from FLAGS path
    # split data to training and testing, only load testing data
    commits_ = extract_commit(path_file=FLAGS.path)
    filter_commits = filtering_commit(commits=commits_,
                                      num_file=FLAGS.code_file,
                                      num_hunk=FLAGS.code_hunk,
                                      num_loc=FLAGS.code_line,
                                      size_line=FLAGS.code_length)
    msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
        commits=filter_commits)
    dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
    pad_msg = mapping_commit_msg(msgs=msgs_,
                                 max_length=FLAGS.msg_length,
                                 dict_msg=dict_msg_)
    pad_added_code = mapping_commit_code(type="added",
                                         commits=filter_commits,
                                         max_hunk=FLAGS.code_hunk,
                                         max_code_line=FLAGS.code_line,
                                         max_code_length=FLAGS.code_length,
                                         dict_code=dict_code_)
    pad_removed_code = mapping_commit_code(type="removed",
                                           commits=filter_commits,
                                           max_hunk=FLAGS.code_hunk,
                                           max_code_line=FLAGS.code_line,
                                           max_code_length=FLAGS.code_length,
                                           dict_code=dict_code_)
    labels = load_label_commits(commits=filter_commits)
    return pad_msg, pad_added_code, pad_removed_code, labels
def create_features_ICSE(commits, ids, type):
    new_commits = list()
    for id_ in ids:
        for c in commits:
            if c["id"] == id_:
                new_commits.append(c)
                break
    vectorizer = CountVectorizer()
    if type == "msg":
        msgs = extract_msg(commits=new_commits)
        X = vectorizer.fit_transform(msgs)
    elif type == "code":
        codes = extract_code(commits=new_commits)
        X = vectorizer.fit_transform(codes)
    elif type == "all":
        msgs = extract_msg(commits=new_commits)
        codes = extract_code(commits=new_commits)
        all_lines = add_two_list(list1=msgs, list2=codes)
        X = vectorizer.fit_transform(all_lines)
    else:
        print "Your type is uncorrect"
        exit()
    return X.toarray()
示例#4
0
def loading_testing_data(FLAGS, path_file, type):
    if type == "msg":
        commits_ = extract_commit(path_file=path_file)
        filter_commits = filtering_commit(commits=commits_,
                                          num_file=FLAGS.code_file,
                                          num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
    elif type == "all":
        commits_ = extract_commit(path_file=path_file)
        filter_commits = filtering_commit(commits=commits_,
                                          num_file=FLAGS.code_file,
                                          num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
        all_lines = add_two_list(list1=msgs_, list2=codes_)
        msgs_ = all_lines

    elif type == "code":
        commits_ = extract_commit(path_file=path_file)
        filter_commits = filtering_commit(commits=commits_,
                                          num_file=FLAGS.code_file,
                                          num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
        msgs_ = codes_
    else:
        print "You need to type correct model"
        exit()
    return msgs_, codes_, filter_commits
示例#5
0
def load_data_type(path, FLAGS):
    commits_ = extract_commit_july(path_file=path)
    msgs_, codes_ = extract_msg(commits=commits_), extract_code(
        commits=commits_)
    dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
    print len(commits_), len(dict_msg_), len(dict_code_)

    pad_msg = mapping_commit_msg(msgs=msgs_,
                                 max_length=FLAGS.msg_length,
                                 dict_msg=dict_msg_)
    pad_added_code = mapping_commit_code(type="added",
                                         commits=commits_,
                                         max_hunk=FLAGS.code_hunk,
                                         max_code_line=FLAGS.code_line,
                                         max_code_length=FLAGS.code_length,
                                         dict_code=dict_code_)
    pad_removed_code = mapping_commit_code(type="removed",
                                           commits=commits_,
                                           max_hunk=FLAGS.code_hunk,
                                           max_code_line=FLAGS.code_line,
                                           max_code_length=FLAGS.code_length,
                                           dict_code=dict_code_)
    labels = load_label_commits(commits=commits_)
    return pad_msg, pad_added_code, pad_removed_code, labels, dict_msg_, dict_code_
示例#6
0
def loading_data_lstm(FLAGS):
    print FLAGS.model
    if "msg" in FLAGS.model:
        commits_ = extract_commit(path_file=FLAGS.path)
        filter_commits = filtering_commit(commits=commits_, num_file=FLAGS.code_file, num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
        pad_msg = mapping_commit_msg(msgs=msgs_, max_length=FLAGS.msg_length, dict_msg=dict_msg_)
        pad_added_code = mapping_commit_code(type="added", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                             max_code_line=FLAGS.code_line,
                                             max_code_length=FLAGS.code_length, dict_code=dict_code_)
        pad_removed_code = mapping_commit_code(type="removed", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                               max_code_line=FLAGS.code_line,
                                               max_code_length=FLAGS.code_length, dict_code=dict_code_)
        labels = load_label_commits(commits=filter_commits)
    elif "all" in FLAGS.model:
        commits_ = extract_commit(path_file=FLAGS.path)
        filter_commits = filtering_commit(commits=commits_, num_file=FLAGS.code_file, num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        all_lines = add_two_list(list1=msgs_, list2=codes_)
        msgs_ = all_lines
        dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
        pad_msg = mapping_commit_msg(msgs=msgs_, max_length=FLAGS.msg_length, dict_msg=dict_msg_)
        pad_added_code = mapping_commit_code(type="added", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                             max_code_line=FLAGS.code_line,
                                             max_code_length=FLAGS.code_length, dict_code=dict_code_)
        pad_removed_code = mapping_commit_code(type="removed", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                               max_code_line=FLAGS.code_line,
                                               max_code_length=FLAGS.code_length, dict_code=dict_code_)
        labels = load_label_commits(commits=filter_commits)
    elif "code" in FLAGS.model:
        commits_ = extract_commit(path_file=FLAGS.path)
        filter_commits = filtering_commit(commits=commits_, num_file=FLAGS.code_file, num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        msgs_ = codes_
        dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
        pad_msg = mapping_commit_msg(msgs=msgs_, max_length=FLAGS.msg_length, dict_msg=dict_msg_)
        pad_added_code = mapping_commit_code(type="added", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                             max_code_line=FLAGS.code_line,
                                             max_code_length=FLAGS.code_length, dict_code=dict_code_)
        pad_removed_code = mapping_commit_code(type="removed", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                               max_code_line=FLAGS.code_line,
                                               max_code_length=FLAGS.code_length, dict_code=dict_code_)
        labels = load_label_commits(commits=filter_commits)
    else:
        print "You need to type correct model"
        exit()

    kf = KFold(n_splits=FLAGS.folds, random_state=FLAGS.seed)
    for train_index, test_index in kf.split(filter_commits):
        X_train_msg, X_test_msg = np.array(get_items(items=pad_msg, indexes=train_index)), \
                                  np.array(get_items(items=pad_msg, indexes=test_index))
        X_train_added_code, X_test_added_code = np.array(get_items(items=pad_added_code, indexes=train_index)), \
                                                np.array(get_items(items=pad_added_code, indexes=test_index))
        X_train_removed_code, X_test_removed_code = np.array(get_items(items=pad_removed_code, indexes=train_index)), \
                                                    np.array(get_items(items=pad_removed_code, indexes=test_index))
        y_train, y_test = np.array(get_items(items=labels, indexes=train_index)), \
                          np.array(get_items(items=labels, indexes=test_index))
        return X_test_msg, X_test_added_code, X_test_removed_code, y_test
示例#7
0
    path_test.append("./data/test_data/nicholask_translated.out")
    path_test.append("./data/test_data/sasha_translated.out")

    path_dict = "./data/3_mar7/newres.simplified.dict"
    dict_index = load_file(path_file=path_dict)
    new_dict = {}
    for d in dict_index:
        split_d = d.strip().split(":")
        new_dict[int(split_d[0])] = split_d[1]

    data = list()
    for p in path_test:
        p_data = load_file(path_file=p)
        data += p_data
    commits_ = extract_commit_new(commits=data)
    msgs = extract_msg(commits=commits_)
    codes = extract_code(commits=commits_)
    all_lines = add_two_list(list1=msgs, list2=codes)
    labels = extract_label(commits=commits_)

    # pos_label = len([1 for l in labels if l == 1])
    # neg_label = len([0 for l in labels if l == 0])
    print len(labels), np.count_nonzero(np.array(labels))

    cnt = 1
    for i in all_lines:
        split_i = i.split()
        for j in split_i:
            if int(j) == 0:
                print i
                break
示例#8
0
            cnt += 1
    return write_data
    # exit()


if __name__ == "__main__":
    # path_data = "./satisfy_typediff_sorted.out"
    path_data = "./newres_funcalls_jul28.out.sorted.satisfy"
    commits_ = extract_commit_july(path_file=path_data)
    filter_commits = commits_
    print len(filter_commits), type(filter_commits)
    commits_id = [c["id"] for c in commits_]
    print len(commits_id)
    # load_model_labels(id=commits_id)

    msgs = extract_msg(commits=filter_commits)
    labels = extract_label(commits=filter_commits)
    codes = extract_code(commits=filter_commits)
    all_lines = add_two_list(list1=msgs, list2=codes)
    vectorizer = CountVectorizer()
    X = vectorizer.fit_transform(all_lines)
    print X.shape
    # exit()

    path_good_commits = "./statistical_test_prob_ver3/good_commits.txt"
    good_commits = load_file(path_file=path_good_commits)
    print "Leng of good commits: %s" % (str(len(good_commits)))

    write_data = []
    for g in good_commits:
        write_data += similarity_good_commit(id=commits_id,
示例#9
0
    print "Accuracy: ", accuracy_score(y_true=y_test, y_pred=y_pred)
    print "Precision: ", precision_score(y_true=y_test, y_pred=y_pred)
    print "Recall: ", recall_score(y_true=y_test, y_pred=y_pred)
    print "F1: ", f1_score(y_true=y_test, y_pred=y_pred)
    print "AUC: ", auc_score(y_true=y_test, y_pred=y_pred)


if __name__ == "__main__":
    nfile, nhunk, nline, nleng = 1, 8, 10, 120

    path_data = "./data/3_mar7/typediff.out"
    commits_train = extract_commit(path_file=path_data)
    filter_commits_train = filtering_commit(commits=commits_train, num_file=nfile,
                                            num_hunk=nhunk, num_loc=nline,
                                            size_line=nleng)
    msgs_train = extract_msg(commits=filter_commits_train)
    labels_train = extract_label(commits=filter_commits_train)
    codes_train = extract_code(commits=filter_commits_train)
    all_lines_train = add_two_list(list1=msgs_train, list2=codes_train)

    # path_test = "./data/test_data/sasha_translated.out"
    path_test = "./data/test_data/merging_markus_sasha.txt"
    type = "all"
    # type = "msg"
    # type = "code"
    commits_test = extract_commit(path_file=path_test)
    filter_commits_test = filtering_commit(commits=commits_test,
                                           num_file=nfile, num_hunk=nhunk,
                                           num_loc=nline, size_line=nleng)
    if type == "all":
        msgs_test = extract_msg(commits=filter_commits_test)
示例#10
0
        mini_batches.append(mini_batch)
    return mini_batches


if __name__ == "__main__":
    # path_data = "./data/1_oct5/sample_eq100_line_oct5.out"
    path_data = "./data/1_oct5/eq100_line_oct5.out"
    # path_data = "./data/3_mar7/typediff.out"
    commits_ = extract_commit(path_file=path_data)
    nfile, nhunk, nline, nleng = 1, 8, 10, 120
    filter_commits = filtering_commit(commits=commits_,
                                      num_file=nfile,
                                      num_hunk=nhunk,
                                      num_loc=nline,
                                      size_line=nleng)
    msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
        commits=filter_commits)
    dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
    print "Max length of commit msg: %i" % max(
        [len(m.split(" ")) for m in msgs_])
    print "Size of message and code dictionary: %i, %i" % (len(dict_msg_),
                                                           len(dict_code_))
    pad_msg = mapping_commit_msg(msgs=msgs_,
                                 max_length=128,
                                 dict_msg=dict_msg_)
    pad_removed_code = mapping_commit_code(type="removed",
                                           commits=filter_commits,
                                           max_hunk=nhunk,
                                           max_code_line=nline,
                                           max_code_length=nleng,
                                           dict_code=dict_code_)
示例#11
0
def loading_baseline_july(tf, folds, random_state):
    FLAGS = tf.flags.FLAGS
    commits_ = extract_commit_july(path_file=FLAGS.path)
    filter_commits = commits_
    print len(commits_)

    kf = KFold(n_splits=folds, random_state=random_state)
    idx_folds = list()
    for train_index, test_index in kf.split(filter_commits):
        idx = dict()
        idx["train"], idx["test"] = train_index, test_index
        idx_folds.append(idx)

    if "msg" in FLAGS.model:
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
    elif "all" in FLAGS.model:
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
        all_lines = add_two_list(list1=msgs_, list2=codes_)
        msgs_ = all_lines
    elif "code" in FLAGS.model:
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
        msgs_ = codes_
    else:
        print "You need to type correct model"
        exit()
    dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
    pad_msg = mapping_commit_msg(msgs=msgs_,
                                 max_length=FLAGS.msg_length,
                                 dict_msg=dict_msg_)
    labels = load_label_commits(commits=filter_commits)
    labels = convert_to_binary(labels)

    # path_file = "./statistical_test_prob/true_label.txt"
    # write_file(path_file=path_file, data=labels)
    # exit()

    print pad_msg.shape, labels.shape, len(dict_msg_)
    cntfold = 0
    pred_dict = dict()
    pred_dict_list = list()
    for i in xrange(cntfold, len(idx_folds)):
        idx = idx_folds[i]
        train_index, test_index = idx["train"], idx["test"]
        X_train_msg, X_test_msg = np.array(get_items(items=pad_msg, indexes=train_index)), \
                                  np.array(get_items(items=pad_msg, indexes=test_index))
        Y_train, Y_test = np.array(get_items(items=labels, indexes=train_index)), \
                          np.array(get_items(items=labels, indexes=test_index))
        if FLAGS.model == "lstm_cnn_all" or FLAGS.model == "lstm_cnn_msg" \
                or FLAGS.model == "lstm_cnn_code" or FLAGS.model == "cnn_all" \
                or FLAGS.model == "cnn_msg" or FLAGS.model == "cnn_code":
            # path_model = "./keras_model/%s_%s.h5" % (FLAGS.model, str(cntfold))
            path_model = "./keras_model/test_%s_%s.h5" % (FLAGS.model,
                                                          str(cntfold))
            # path_model = "./keras_model/%s_%s_testing.h5" % (FLAGS.model, str(cntfold))
            model = load_model(path_model)
        else:
            print "You need to give correct model name"
            exit()
        y_pred = model.predict(X_test_msg, batch_size=FLAGS.batch_size)
        y_pred = np.ravel(y_pred)

        pred_dict.update(make_dictionary(y_pred=y_pred, y_index=test_index))

        y_pred = y_pred.tolist()
        pred_dict_list += y_pred
    # print len(pred_dict_list)
    # exit()
    # path_file = "./statistical_test_prob/" + FLAGS.model + ".txt"
    # write_file(path_file=path_file, data=sorted_dict(dict=pred_dict))
    path_file = "./statistical_test_prob/" + FLAGS.model + "_checking.txt"
    write_file(path_file=path_file, data=pred_dict_list)
    code_file = 1  # "Max file of code in one in commits")

    path_train = "./data/3_mar7/typediff.out"
    # data_train = load_file(path_file=path_train)
    # train_pad_msg, train_pad_added_code, train_pad_removed_code, train_labels, dict_msg_, dict_code_ = \
    #     load_commit_train_data(commits=data_train, msg_length_=msg_length, code_length_=code_length,
    #                            code_line_=code_line, code_hunk_=code_hunk, code_file_=code_file)
    # print train_pad_msg.shape, train_pad_added_code.shape, train_pad_removed_code.shape, train_labels.shape

    commits_train = extract_commit(path_file=path_train)
    filter_commits_train = filtering_commit(commits=commits_train,
                                            num_file=code_file,
                                            num_hunk=code_hunk,
                                            num_loc=code_line,
                                            size_line=code_length)
    msgs_train, codes_train = extract_msg(
        commits=filter_commits_train), extract_code(
            commits=filter_commits_train)
    dict_msg_train, dict_code_train = dictionary(data=msgs_train), dictionary(
        data=codes_train)

    path_test = "./data/test_data/markus_translated.out"
    # path_test = "./data/test_data/sasha_translated.out"
    commits_test = extract_commit(path_file=path_test)
    filter_commits_test = filtering_commit(commits=commits_test,
                                           num_file=code_file,
                                           num_hunk=code_hunk,
                                           num_loc=code_line,
                                           size_line=code_length)
    msgs_test, codes_test = extract_msg(
        commits=filter_commits_test), extract_code(commits=filter_commits_test)
    all_lines_test = add_two_list(list1=msgs_test, list2=codes_test)
示例#13
0
def running_baseline_july(tf, folds, random_state):
    FLAGS = tf.flags.FLAGS
    commits_ = extract_commit_july(path_file=FLAGS.path)
    filter_commits = commits_
    print len(commits_)
    kf = KFold(n_splits=folds, random_state=random_state)
    idx_folds = list()
    for train_index, test_index in kf.split(filter_commits):
        idx = dict()
        idx["train"], idx["test"] = train_index, test_index
        idx_folds.append(idx)

    if "msg" in FLAGS.model:
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
    elif "all" in FLAGS.model:
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        all_lines = add_two_list(list1=msgs_, list2=codes_)
        msgs_ = all_lines
    elif "code" in FLAGS.model:
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        msgs_ = codes_
    else:
        print "You need to type correct model"
        exit()

    dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
    pad_msg = mapping_commit_msg(msgs=msgs_, max_length=FLAGS.msg_length, dict_msg=dict_msg_)
    labels = load_label_commits(commits=filter_commits)
    labels = convert_to_binary(labels)
    print pad_msg.shape, labels.shape, len(dict_msg_)
    # exit()

    timestamp = str(int(time.time()))
    accuracy, precision, recall, f1, auc = list(), list(), list(), list(), list()
    cntfold = 0
    pred_dict, pred_dict_prob = dict(), dict()
    for i in xrange(cntfold, len(idx_folds)):
        idx = idx_folds[i]
        train_index, test_index = idx["train"], idx["test"]
        X_train_msg, X_test_msg = np.array(get_items(items=pad_msg, indexes=train_index)), \
                                  np.array(get_items(items=pad_msg, indexes=test_index))
        Y_train, Y_test = np.array(get_items(items=labels, indexes=train_index)), \
                          np.array(get_items(items=labels, indexes=test_index))
        if FLAGS.model == "lstm_cnn_msg" or FLAGS.model == "lstm_cnn_code" or FLAGS.model == "lstm_cnn_all":
            model = lstm_cnn(x_train=X_train_msg, y_train=Y_train, x_test=X_test_msg,
                             y_test=Y_test, dictionary_size=len(dict_msg_), FLAGS=FLAGS)
        elif FLAGS.model == "cnn_msg" or FLAGS.model == "cnn_code" or FLAGS.model == "cnn_all":
            model = cnn_model(x_train=X_train_msg, y_train=Y_train, x_test=X_test_msg,
                              y_test=Y_test, dictionary_size=len(dict_msg_), FLAGS=FLAGS)
        else:
            print "You need to give correct model name"
            exit()

        # model.save("./keras_model/" + FLAGS.model + "_" + str(cntfold) + ".h5")
        # model.save("./keras_model/" + FLAGS.model + "_" + str(cntfold) + "_testing.h5")
        # model.save("./keras_model/test_" + FLAGS.model + "_" + str(cntfold) + ".h5")
        model.save("./keras_model/newres_funcalls_" + FLAGS.model + "_" + str(cntfold) + ".h5")

        y_pred = model.predict(X_test_msg, batch_size=FLAGS.batch_size)
        y_pred = np.ravel(y_pred)

        y_pred_tolist = y_pred.tolist()
        data_fold = [str(i) + "\t" + str(l) for i, l in zip(test_index, y_pred)]
        path_file = "./statistical_test/newres_funcalls_%s_fold_%s.txt" % (FLAGS.model, str(cntfold))
        write_file(path_file=path_file, data=data_fold)

        y_pred[y_pred > 0.5] = 1
        y_pred[y_pred <= 0.5] = 0

        pred_dict.update(make_dictionary(y_pred=y_pred, y_index=test_index))
        accuracy.append(accuracy_score(y_true=Y_test, y_pred=y_pred))
        precision.append(precision_score(y_true=Y_test, y_pred=y_pred))
        recall.append(recall_score(y_true=Y_test, y_pred=y_pred))
        f1.append(f1_score(y_true=Y_test, y_pred=y_pred))
        auc.append(auc_score(y_true=Y_test, y_pred=y_pred))
        print "accuracy", accuracy_score(y_true=Y_test, y_pred=y_pred)
        print "precision", precision_score(y_true=Y_test, y_pred=y_pred)
        print "recall", recall_score(y_true=Y_test, y_pred=y_pred)
        print "f1", f1_score(y_true=Y_test, y_pred=y_pred)

        cntfold += 1
        break