def split_into(chunk, BAseq_path): BAseq = load_dict(BAseq_path) chunks = [[] for c in range(chunk)] for key in BAseq.keys(): ind = random.randint(0, chunk - 1) chunks[ind].append(BAseq[key]) for i in range(len(chunks)): write2file(chunks[i], "D:\APIMU\Data\\raw_l5\\" + "chunk" + str(i))
def build_errorLabels(BAseq_path, outputpre): BAdict = load_dict(BAseq_path) seqlist = [] labellist = [] ind = 0 for key in BAdict.keys(): before_seq = BAdict[key]["before"] if len(before_seq) < 2: continue after_seq = BAdict[key]["after"] labellist.append(' '.join(build_label(before_seq, after_seq))) seqlist.append(' '.join(before_seq)) print(ind) ind += 1 write_lines(outputpre + "/mu.seq", before_seq) write_lines(outputpre + "/mu.label", labellist)
def Split_TrnValTest(BAseq_path, trn_p, val_p, test_p): BAseq = load_dict(BAseq_path) trnset = [] valset = [] testset = [] for key in BAseq.keys(): seed = random.random() if seed <= trn_p: trnset.append(BAseq[key]) elif seed > trn_p and seed <= (trn_p + val_p): valset.append(BAseq[key]) elif seed > test_p: testset.append(BAseq[key]) write2file(trnset, r"D:\APIMU\Data\raw_l5/train") write2file(valset, r"D:\APIMU\Data\raw_l5/val") write2file(testset, r"D:\APIMU\Data\raw_l5/test")
def Count_APIMU_APIPercent(BAdict_path, FixAPICount_path): BAdict = load_dict(BAdict_path) APICount = {} ind = 0 for key in BAdict.keys(): bef_api = BAdict[key]["before"] aft_api = BAdict[key]["after"] Dif = [] for api in aft_api: if api not in bef_api: Dif.append(api) for ap in Dif: if ap in APICount.keys(): APICount[ap] = APICount[ap] + 1 else: APICount[ap] = 1 print(ind) ind += 1 write_dict(APICount, FixAPICount_path)
def drawScatter(dict_path): apicount_dict = load_dict(dict_path) jdk_var = [] control_var = [] other_var = [] for key in apicount_dict.keys(): if key in CONTROL_NODES: control_var.append(int(apicount_dict[key])) elif str(key).startswith("java"): jdk_var.append(int(apicount_dict[key])) else: other_var.append(int(apicount_dict[key])) plt.xlabel('api') plt.ylabel('count') plt.xlim(xmax=10000, xmin=0) plt.ylim(ymin=50, ymax=200) jdk_x = np.random.normal(5000, 1500, len(jdk_var)) control_x = np.random.normal(5000, 1500, len(control_var)) other_x = np.random.normal(5000, 1500, len(other_var)) jdk_y = np.array(jdk_var) control_y = np.array(control_var) other_y = np.array(other_var) colors1 = '#00CED1' # 点的颜色 colors2 = '#DC143C' colors3 = 'grey' plt.scatter(jdk_x, jdk_y, c=colors1, alpha=0.6, label='JDK_API') plt.scatter(control_x, control_y, c=colors2, alpha=0.6, label='CONTROL_NODE') plt.scatter(other_x, other_y, c=colors3, alpha=0.6, label='OTHER_API') plt.legend() plt.savefig(r'D:\apirep\Picture\50-200.png', dpi=300) plt.show()
def Count_AMUpercent(logfile): myclient = pymongo.MongoClient("mongodb://127.0.0.1:27017/") mydb = myclient["APISeq"] methodCol = mydb['method_info'] results = methodCol.aggregate([ { '$lookup': { "from": "jdk_api", # 需要联合查询的另一张表B "localField": "apiSeq.$id", # 表A的字段 "foreignField": "_id", # 表B的字段 "as": "task_docs" # 根据A、B联合生成的新字段名 }, }, { '$project': { "task_docs._id": 0, "task_docs.apiName": 0, "task_docs.className": 0, "task_docs._class": 0, 'task_docs.inParams': 0, 'task_docs.outParams': 0, 'commithash': 0, 'project_info': 0, 'inParams': 0, 'apiSeq': 0, 'className': 0, '_class': 0, } }, ]) beforedict = {} afterdict = {} BASeqDict = load_dict("E:\PyCharmProjects\APIRepair\Data\\filtered_BA.txt") for re in results: codes = re['code'] in_out = getInOutparam(codes) status = re['status'] path = re['filepath'] + r"\\" + re['methodName'] + r"\\" + (in_out) if status == "after": afterdict[path] = codes print("after", len(afterdict)) elif status == "before": beforedict[path] = codes print("before", len(beforedict)) ind = 0 print("Counting APIMU percent......") log_info = [] BA_CodeDict = {} apichangecount = 0 apiunchangecount = 0 for key in beforedict.keys(): afterkey = key.replace("P_dir", "F_dir") if afterkey in afterdict.keys(): before_code = beforedict[key] after_code = afterdict[afterkey] if before_code != after_code: BA_CodeDict[key] = {"before": before_code, "after": after_code} if key in BASeqDict.keys(): log_info.append(key + " " + "code changed " + " apiseq changed") apichangecount += 1 else: log_info.append(key + " " + "code changed " + " apiseq unchange") apiunchangecount += 1 ind += 1 print(ind, log_info[-1]) with open(logfile, 'w', encoding='utf8') as f: for line in log_info: f.write(line + '\n') f.write("Total: APISeq changed: " + str(apichangecount) + " , unchange: " + str(apiunchangecount) + " Total Code changed: " + str(len(BA_CodeDict))) f.close() print(len(BASeqDict)) write_dict(BA_CodeDict, "E:\PyCharmProjects\APIRepair\Data\\filtered_BA_rawcode.txt")
from Analyze.DataAnalyze import Analyze_API4Fix, Analyze_JDKAPI_percent from DataProcess.ReadMongo import load_dict if __name__ == "__main__": #Count_APIMU_APIPercent("D:\\apirep\Data\\BAdif.dict","D:\\apirep\Data\\API4FixCount.dict") #Analyze_JDKAPI_percent("D:\\apirep\Data\\API4FixCount.dict") APIvocab = load_dict("D:\\apirep\Data\\APIVocab.dict") count = 0 for key in APIvocab.keys(): if str(key).startswith("java"): count += 1 print(count)