def loading_testing_data():
    data, paths = list(), list()
    paths.append("./data/test_data/features_markusinfo.txt")
    # paths.append("./data/test_data/features_nicholaskinfo.txt")
    # paths.append("./data/test_data/features_sashainfo.txt")
    for p in paths:
        data_ = load_file(path_file=p)
        data += data_
    ids_, X_ = load_data_ICSE_new(data=data)
    print len(ids_), X_.shape

    data_gt, path_gt = list(), list()
    # path_gt.append("./data/test_data/markus_translated.out")
    # path_gt.append("./data/test_data/nicholask_translated.out")
    path_gt.append("./data/test_data/sasha_translated.out")
    print path_gt

    for p in path_gt:
        p_data = load_file(path_file=p)
        data_gt += p_data
    commits = extract_commit_new(commits=data_gt)
    nfile, nhunk, nline, nleng = 1, 8, 10, 120
    commits_ = get_commits(commits=filtering_commit(commits=commits,
                                                    num_file=nfile,
                                                    num_hunk=nhunk,
                                                    num_loc=nline,
                                                    size_line=nleng), ids=ids_)
    ids_index = [ids_.index(c["id"]) for c in commits_]
    ids_ = [ids_[i] for i in ids_index]
    X_ = X_[ids_index, :]
    y_ = [1 if c["stable"] == "true" else 0 for c in commits_]
    return commits_, ids_, X_, np.array(y_)
def loading_training_data():
    path_ftr = "./data/3_mar7/new_features_ver1.txt"
    ids_, X_, y_ = load_data_ICSE(path=path_ftr)
    print len(ids_), X_.shape, y_.shape

    path_data = "./data/3_mar7/typediff.out"
    commits_ = extract_commit(path_file=path_data)
    nfile, nhunk, nline, nleng = 1, 8, 10, 120
    commits_ = get_commits(commits=filtering_commit(commits=commits_,
                                                    num_file=nfile,
                                                    num_hunk=nhunk,
                                                    num_loc=nline,
                                                    size_line=nleng), ids=ids_)
    return commits_, ids_, X_, y_
Exemplo n.º 3
0
    return commits_, ids_, X_, y_


def clean_merging_data(ids, ftrs):
    ftr_id = [f.split(",")[0] for f in ftrs]
    new_ftr = [ftrs[ftr_id.index(i)] for i in ids]
    return new_ftr


if __name__ == "__main__":
    path_data = "./data/test_data/merging_markus_sasha.txt"
    commits_ = extract_commit(path_file=path_data)
    nfile, nhunk, nline, nleng = 1, 8, 10, 120
    filter_commits = filtering_commit(commits=commits_,
                                      num_file=nfile,
                                      num_hunk=nhunk,
                                      num_loc=nline,
                                      size_line=nleng)
    ids_ = [c["id"] for c in filter_commits]
    labels_ = [1 if c["stable"] == "true" else 0 for c in filter_commits]

    path_ftr = "./data/test_data/features_merging_markus_sasha.txt"
    ftr = load_file(path_file=path_ftr)
    new_ftr = clean_merging_data(ids=ids_, ftrs=ftr)

    commits_test, ids_test, X_ftr_test, y_test = loading_testing_data(
        ftr_data=new_ftr, commit_data=filter_commits)
    commits_train, ids_train, X_ftr_train, y_train = loading_training_data()

    # type = "msg"
    # type = "code"
    # -----------------------------------------------------------------------------
    # path_file = "./data/3_mar7/new_features_ver1.txt"
    # ids, X_, y_ = load_data_ICSE(path=path_file)
    # get_predict_ICSE(name="", X=X_, y=y_, algorithm="svm", folds=10)
    # -----------------------------------------------------------------------------
    # -----------------------------------------------------------------------------
    path_ftr = "./data/3_mar7/new_features_ver1.txt"
    ids_, X_, y_ = load_data_ICSE(path=path_ftr)
    print len(ids_), X_.shape, y_.shape

    path_data = "./data/3_mar7/typediff.out"
    commits_ = extract_commit(path_file=path_data)
    nfile, nhunk, nline, nleng = 1, 8, 10, 120
    commits_ = get_commits(commits=filtering_commit(commits=commits_,
                                                    num_file=nfile,
                                                    num_hunk=nhunk,
                                                    num_loc=nline,
                                                    size_line=nleng), ids=ids_)
    X_data = create_features_ICSE(commits=commits_, ids=ids_, type="msg")
    new_X_ = np.column_stack((X_, X_data))
    print len(ids_), new_X_.shape, y_.shape
    get_predict_ICSE(name="", X=new_X_, y=y_, algorithm="svm", folds=5)
    # get_predict_ICSE(name="", X=new_X_, y=y_, algorithm="lr", folds=5)
    # get_predict_ICSE(name="", X=new_X_, y=y_, algorithm="dt", folds=5)

    # X_data = create_features_ICSE(commits=commits_, ids=ids_, type="code")
    # new_X_ = np.column_stack((X_, X_data))
    # print len(ids_), new_X_.shape, y_.shape
    # get_predict_ICSE(name="", X=new_X_, y=y_, algorithm="svm", folds=10)
    # get_predict_ICSE(name="", X=new_X_, y=y_, algorithm="lr", folds=10)
    # get_predict_ICSE(name="", X=new_X_, y=y_, algorithm="dt", folds=10)