示例#1
0
def split_into(chunk, BAseq_path):
    BAseq = load_dict(BAseq_path)
    chunks = [[] for c in range(chunk)]
    for key in BAseq.keys():
        ind = random.randint(0, chunk - 1)
        chunks[ind].append(BAseq[key])
    for i in range(len(chunks)):
        write2file(chunks[i], "D:\APIMU\Data\\raw_l5\\" + "chunk" + str(i))
示例#2
0
def build_errorLabels(BAseq_path, outputpre):
    BAdict = load_dict(BAseq_path)
    seqlist = []
    labellist = []
    ind = 0
    for key in BAdict.keys():
        before_seq = BAdict[key]["before"]
        if len(before_seq) < 2:
            continue
        after_seq = BAdict[key]["after"]
        labellist.append(' '.join(build_label(before_seq, after_seq)))
        seqlist.append(' '.join(before_seq))
        print(ind)
        ind += 1
    write_lines(outputpre + "/mu.seq", before_seq)
    write_lines(outputpre + "/mu.label", labellist)
示例#3
0
def Split_TrnValTest(BAseq_path, trn_p, val_p, test_p):
    BAseq = load_dict(BAseq_path)
    trnset = []
    valset = []
    testset = []
    for key in BAseq.keys():
        seed = random.random()
        if seed <= trn_p:
            trnset.append(BAseq[key])
        elif seed > trn_p and seed <= (trn_p + val_p):
            valset.append(BAseq[key])
        elif seed > test_p:
            testset.append(BAseq[key])
    write2file(trnset, r"D:\APIMU\Data\raw_l5/train")
    write2file(valset, r"D:\APIMU\Data\raw_l5/val")
    write2file(testset, r"D:\APIMU\Data\raw_l5/test")
示例#4
0
def Count_APIMU_APIPercent(BAdict_path, FixAPICount_path):
    BAdict = load_dict(BAdict_path)
    APICount = {}
    ind = 0
    for key in BAdict.keys():
        bef_api = BAdict[key]["before"]
        aft_api = BAdict[key]["after"]
        Dif = []
        for api in aft_api:
            if api not in bef_api:
                Dif.append(api)
        for ap in Dif:
            if ap in APICount.keys():
                APICount[ap] = APICount[ap] + 1
            else:
                APICount[ap] = 1
        print(ind)
        ind += 1
    write_dict(APICount, FixAPICount_path)
示例#5
0
def drawScatter(dict_path):
    apicount_dict = load_dict(dict_path)
    jdk_var = []
    control_var = []
    other_var = []
    for key in apicount_dict.keys():
        if key in CONTROL_NODES:
            control_var.append(int(apicount_dict[key]))
        elif str(key).startswith("java"):
            jdk_var.append(int(apicount_dict[key]))
        else:
            other_var.append(int(apicount_dict[key]))

    plt.xlabel('api')
    plt.ylabel('count')
    plt.xlim(xmax=10000, xmin=0)
    plt.ylim(ymin=50, ymax=200)
    jdk_x = np.random.normal(5000, 1500, len(jdk_var))
    control_x = np.random.normal(5000, 1500, len(control_var))
    other_x = np.random.normal(5000, 1500, len(other_var))
    jdk_y = np.array(jdk_var)
    control_y = np.array(control_var)
    other_y = np.array(other_var)
    colors1 = '#00CED1'  # 点的颜色
    colors2 = '#DC143C'
    colors3 = 'grey'

    plt.scatter(jdk_x, jdk_y, c=colors1, alpha=0.6, label='JDK_API')
    plt.scatter(control_x,
                control_y,
                c=colors2,
                alpha=0.6,
                label='CONTROL_NODE')
    plt.scatter(other_x, other_y, c=colors3, alpha=0.6, label='OTHER_API')
    plt.legend()
    plt.savefig(r'D:\apirep\Picture\50-200.png', dpi=300)
    plt.show()
示例#6
0
def Count_AMUpercent(logfile):
    myclient = pymongo.MongoClient("mongodb://127.0.0.1:27017/")
    mydb = myclient["APISeq"]
    methodCol = mydb['method_info']
    results = methodCol.aggregate([
        {
            '$lookup': {
                "from": "jdk_api",  # 需要联合查询的另一张表B
                "localField": "apiSeq.$id",  # 表A的字段
                "foreignField": "_id",  # 表B的字段
                "as": "task_docs"  # 根据A、B联合生成的新字段名
            },
        },
        {
            '$project': {
                "task_docs._id": 0,
                "task_docs.apiName": 0,
                "task_docs.className": 0,
                "task_docs._class": 0,
                'task_docs.inParams': 0,
                'task_docs.outParams': 0,
                'commithash': 0,
                'project_info': 0,
                'inParams': 0,
                'apiSeq': 0,
                'className': 0,
                '_class': 0,
            }
        },
    ])
    beforedict = {}
    afterdict = {}
    BASeqDict = load_dict("E:\PyCharmProjects\APIRepair\Data\\filtered_BA.txt")
    for re in results:

        codes = re['code']
        in_out = getInOutparam(codes)
        status = re['status']
        path = re['filepath'] + r"\\" + re['methodName'] + r"\\" + (in_out)

        if status == "after":
            afterdict[path] = codes
            print("after", len(afterdict))

        elif status == "before":
            beforedict[path] = codes
            print("before", len(beforedict))
    ind = 0
    print("Counting APIMU percent......")
    log_info = []
    BA_CodeDict = {}
    apichangecount = 0
    apiunchangecount = 0
    for key in beforedict.keys():
        afterkey = key.replace("P_dir", "F_dir")
        if afterkey in afterdict.keys():
            before_code = beforedict[key]
            after_code = afterdict[afterkey]
            if before_code != after_code:
                BA_CodeDict[key] = {"before": before_code, "after": after_code}
                if key in BASeqDict.keys():
                    log_info.append(key + " " + "code changed " +
                                    " apiseq changed")
                    apichangecount += 1
                else:
                    log_info.append(key + " " + "code changed " +
                                    " apiseq unchange")
                    apiunchangecount += 1
                ind += 1
                print(ind, log_info[-1])
    with open(logfile, 'w', encoding='utf8') as f:
        for line in log_info:
            f.write(line + '\n')
        f.write("Total: APISeq changed: " + str(apichangecount) +
                " , unchange: " + str(apiunchangecount) +
                " Total Code changed: " + str(len(BA_CodeDict)))
        f.close()
    print(len(BASeqDict))
    write_dict(BA_CodeDict,
               "E:\PyCharmProjects\APIRepair\Data\\filtered_BA_rawcode.txt")
示例#7
0
from Analyze.DataAnalyze import Analyze_API4Fix, Analyze_JDKAPI_percent
from DataProcess.ReadMongo import load_dict
if __name__ == "__main__":
    #Count_APIMU_APIPercent("D:\\apirep\Data\\BAdif.dict","D:\\apirep\Data\\API4FixCount.dict")
    #Analyze_JDKAPI_percent("D:\\apirep\Data\\API4FixCount.dict")
    APIvocab = load_dict("D:\\apirep\Data\\APIVocab.dict")
    count = 0
    for key in APIvocab.keys():
        if str(key).startswith("java"):
            count += 1
    print(count)