def text2feat(api, api_descriptions, w2v, idf, query_matrix, query_idf_vector): api_matrix, api_idf_vector = feedback.load_matrix(api, w2v, idf) api_descriptions_matrix, api_descriptions_idf_vector = feedback.load_matrix( api_descriptions, w2v, idf) # 获取api及doc信息并计算其相似度,相关问题在推荐中已经获得 api_sim = similarity.sim_doc_pair(query_matrix, api_matrix, query_idf_vector, api_idf_vector) if api_descriptions == 'null': api_desc_sim = 0 else: api_desc_sim = similarity.sim_doc_pair(query_matrix, api_descriptions_matrix, query_idf_vector, api_descriptions_idf_vector) # 将获得信息按api为一列放入sum_inf中 sum_inf = list() sum_inf.append(api_sim) sum_inf.append(api_desc_sim) # 将所有特征封装成字典并返回,这样得到特征之后能直接输出topn的相关特征 api_inf = dict() api_desc_inf = dict() api_inf[api] = api_sim api_desc_inf[api_descriptions] = api_desc_sim return sum_inf, api_inf, api_desc_inf
def process_input(msg='how to convert int to string?'): global rerank global responseToClient global g_query_str global feedback_inf global api_feature global sort query = msg query_matrix, query_idf_vector = feedback.load_matrix(query, w2v, idf) top_questions = recommendation.get_topk_questions(query, query_matrix, query_idf_vector, questions, 50, parent) recommended_api = recommendation.recommend_api(query_matrix, query_idf_vector, top_questions, questions, javadoc,javadoc_dict_methods,-1) # recommended_api = recommendation.recommend_api_class(query_matrix, query_idf_vector, top_questions, questions, javadoc,javadoc_dict_classes,-1) # combine api_relevant feature with FF pos = -1 rec_api = [] api_dict_desc = {} x, api_feature, responseToClient = [], [], [] for i,api in enumerate(recommended_api): # print('Rank',i+1,':',api) rec_api.append(api) # recommendation.summarize_api_method(api,top_questions,questions,javadoc,javadoc_dict_methods) api_descriptions, questions_titles = recommendation.summarize_api_method(api, top_questions, questions, javadoc, javadoc_dict_methods) api_dict_desc[api] = api_descriptions sum_inf, api_inf, api_desc_inf = text2feat(api, api_descriptions, w2v, idf, query_matrix, query_idf_vector) api_feature.append(sum_inf) # print(api_feature) if i == 9: break # print('##################') start1 = time.time() # feedback info of user query from SO fr = open('../data/feedback_all.csv', 'r') reader = csv.reader(fr) so_query, so_answer = [], [] for row in reader: so_query.append(row[0]) so_answer.append(row[1:]) # feedback info of user query from FR fr = open('../data/feedback_rec.csv', 'r') reader = csv.reader(fr) choose_query, choose_answer = [], [] for row in reader: choose_query.append(row[0]) choose_answer.append(row[1:]) feedback_inf = feedback.get_feedback_inf(query, choose_query, choose_answer, rec_api, w2v, idf) # FV = RF+FF for i in range(len(api_feature)): sum = api_feature[i] sum.extend(feedback_inf[i]) x.append(sum) # feature info of FR fr = open('../data/feedback_feature_rec.csv', 'r') reader = csv.reader(fr) y_feature, x_feautre, api_relevant_feature, rec_api_choose = [], [], [], [] for row in reader: # y_feature.append(row[0]) x_feautre.append(row[:-1]) api_relevant_feature.append(row[1:3]) rec_api_choose.append(row[-1]) #feature info of SO fr = open('../data/get_feature_method.csv', 'r') reader = csv.reader(fr) unlabel_feature, rec_api_unlabel = [], [] for row in reader: # y_feature.append(row[0]) unlabel_feature.append(row[:-1]) rec_api_unlabel.append(row[-1]) # AL_choose_feature, AL_unlabel_feature = split_data.get_choose(AL_train_feature, choose) pred2, add_x_FR, add_x_FV, add_y_FV = get_AL_predict(x, x_feautre, unlabel_feature, query, choose_query, choose_answer, so_query, so_answer, rec_api, rec_api_choose, rec_api_unlabel, w2v, idf) pred1 = braid_LTR.get_LTR_predict(add_x_FR, add_x_FV, add_y_FV) rem = -10 rec, rec_LTR, rec_AL = [], [], [] sort, sort_LTR, sort_AL = [], [], [] pred = [] sum_pred1, sum_pred2 = 0, 0 for i in range(len(api_feature)): sum_pred1 += pred1[i]+5 sum_pred2 += pred2[i] al_idx = [] rerank_al = sorted(pred2, reverse=True) for i in range(len(api_feature)): temp = rerank_al.index(pred2[i])+1 while temp in al_idx: temp += 1 al_idx.append(temp) m = 0.6 for num in range(len(api_feature)): sum = (pred1[num]+5)/len(api_feature) + m*pred2[num]/al_idx[num] pred.append(sum) for i in range(len(api_feature)): sort.append(pred.index(max(pred)) + 1) sort_LTR.append(pred1.index(max(pred1)) + 1) sort_AL.append(pred2.index(max(pred2)) + 1) rec.append(max(pred)) rec_LTR.append(max(pred1)) rec_AL.append(max(pred2)) pred[pred.index(max(pred))] = rem pred1[pred1.index(max(pred1))] = rem pred2[pred2.index(max(pred2))] = rem # 将api重新排序,输出相关结果 for i in sort: api_mod = rec_api[i-1] print(sort.index(i) + 1, api_mod) api_obj = {'id':sort.index(i) + 1, 'api':api_mod, 'desc':api_dict_desc[api_mod] } rerank.append(api_mod) responseToClient.append(api_obj) print(type(rerank)) print(json.dumps(rerank)) print(rerank) print(responseToClient) return responseToClient
except ValueError: predict = [0.0 for n in range(length)] # print('Input the error query') else: predict.append(float(y_pre[0, 1])) return predict, X, new_X_feedback, new_y_feedback while True: print('Please input your query:') query = input() # query = 'how to convert int to string?' if not query: continue query_matrix, query_idf_vector = feedback.load_matrix(query, w2v, idf) top_questions = recommendation.get_topk_questions(query, query_matrix, query_idf_vector, questions, 50, parent) recommended_api = recommendation.recommend_api(query_matrix, query_idf_vector, top_questions, questions, javadoc, javadoc_dict_methods, -1) # recommended_api = recommendation.recommend_api_class(query_matrix, query_idf_vector, top_questions, questions, javadoc,javadoc_dict_classes,-1) # combine api_relevant feature with FF pos = -1 rec_api = [] api_dict_desc = {}