def summarize_api_class(api_class, top_questions, questions, javadoc, javadoc_dict_classes): for api in javadoc: for i, method in enumerate(api.methods): if api.package_name + '.' + api.class_name == api_class: print('>>>JavaDoc<<<') print(api.class_name) print(api.class_description) api_descriptions = api.class_description[i] print(api_descriptions) break titles = dict() code_snippets = dict() # method_pure_name = api_method.split('.')[-1] for question in questions: if question.id not in top_questions: continue contains_api = False for answer in question.answers: soup = BeautifulSoup(answer.body, 'html.parser', from_encoding='utf-8') links = soup.find_all('a') for link in links: link = link['href'] if 'docs.oracle.com/javase/' in link and '/api/' in link and 'html' in link: pair = util.parse_api_link( link) # pair[0] is class name, pair[1] is method name class_name = pair[0] if class_name == api_class: titles[question.title] = top_questions[question.id] contains_api = True codes = soup.find_all('code') for code in codes: code = code.get_text() pos = code.find('(') if pos != -1: code = code[:pos] if code in javadoc_dict_classes: class_name = javadoc_dict_classes[code] if class_name == api_class: titles[question.title] = top_questions[question.id] contains_api = True # if contains_api: # snippet_list = list() # for answer in question.answers: # soup = BeautifulSoup(answer.body, 'html.parser', from_encoding='utf-8') # code_snippet = soup.find('pre') # if code_snippet is not None and code_snippet.get_text().count('\n') <= 5 \ # and '.'+method_pure_name+'(' in code_snippet.get_text(): # snippet_list.append(code_snippet.get_text()) # code_snippets[question.title] = snippet_list titles = sorted(titles.items(), key=lambda item: item[1], reverse=True) # #print('>>>Relevant Questions<<<') # tot = 0 # for i, title in enumerate(titles): # if tot == 3: # break # if len(code_snippets[title[0]])>0: # tot+=1 # #print(str(tot)+'.'+title[0]) # # if tot<3: # for i, title in enumerate(titles): # if tot == 3: # break # if len(code_snippets[title[0]])==0: # tot += 1 # #print(str(tot)+'.'+title[0]) # # # tot = 0 # for i, title in enumerate(titles): # if tot == 3: # break # if len(code_snippets[title[0]]) > 0: # tot += 1 # if tot == 1: # print('>>>Code Snippets<<<') # #print('/**********code snippet', tot, '**********/') # #print(code_snippets[title[0]][0]) # # if tot==0: # print('\n-----------------------------------------------\n') # else: print('-----------------------------------------------\n') return titles
def recommend_api(query_matrix, query_idf_vector, top_questions, questions, javadoc, javadoc_dict_methods, topk): # remember that top_questions is a dictionary of the top-k most relevant questions of the query # the key is question id, the value is the similarity between the question and the query # questions is a list including all questions (api related) in StackOverflow # javadoc is a list including all api classes api_methods = dict() #stores the SO_sim of api method and the query api_methods_count = dict() for question in questions: if question.id not in top_questions: continue tmp_set = set() for answer in question.answers: if int(answer.score) < 0: continue soup = BeautifulSoup(answer.body, 'html.parser') links = soup.find_all('a') for link in links: link = link['href'] if 'docs.oracle.com/javase/' in link and '/api/' in link and 'html' in link: pair = util.parse_api_link( link) # pair[0] is class name, pair[1] is method name if pair[1] != '': method_name = pair[0] + '.' + pair[1] if method_name in tmp_set: continue else: tmp_set.add(method_name) if method_name in api_methods: api_methods[method_name] += top_questions[ question.id] api_methods_count[method_name] += 1 else: api_methods[method_name] = top_questions[ question.id] api_methods_count[method_name] = 1.0 codes = soup.find_all('code') for code in codes: code = code.get_text() pos = code.find('(') if pos != -1: code = code[:pos] if code in javadoc_dict_methods: method_name = javadoc_dict_methods[code] if method_name in tmp_set: continue else: tmp_set.add(method_name) if method_name in api_methods: api_methods[method_name] += top_questions[ question.id] api_methods_count[method_name] += 1 else: api_methods[method_name] = top_questions[ question.id] api_methods_count[method_name] = 1.0 for key, value in api_methods.items(): api_methods[key] = min( 1.0, value / api_methods_count[key] * (1.0 + math.log(api_methods_count[key], 2) / 10)) api_sim = {} for api in javadoc: class_name = api.package_name + '.' + api.class_name for i, method in enumerate(api.methods): method_name = class_name + '.' + method if method_name not in api_methods: continue else: doc_sim = similarity.sim_doc_pair(query_matrix, api.methods_matrix[i], query_idf_vector, api.methods_idf_vector[i]) so_sim = api_methods[method_name] if method_name in api_sim: api_sim[method_name] = max( api_sim[method_name], 2 * doc_sim * so_sim / (doc_sim + so_sim)) else: api_sim[method_name] = 2 * doc_sim * so_sim / (doc_sim + so_sim) api_sim = sorted(api_sim.items(), key=lambda item: item[1], reverse=True) recommended_api = list() for item in api_sim: recommended_api.append(item[0]) if topk != -1 and len(recommended_api) >= topk: break return recommended_api
def recommend_api_class(query_matrix, query_idf_vector, top_questions, questions, javadoc, javadoc_dict_classes, topk): # remember that top_questions is a dictionary of the top-k most relevant questions of the query # the key is question id, the value is the similarity between the question and the query # questions is a list including all questions (api related) in StackOverflow # javadoc is a list including all api classes api_classes_count = dict() api_classes = dict( ) # stores the similarity between the question (whose answer contains the API class) and the query for question in questions: if question.id not in top_questions: continue for answer in question.answers: if int(answer.score) < 0: continue soup = BeautifulSoup(answer.body, 'html.parser', from_encoding='utf-8') links = soup.find_all('a') for link in links: link = link['href'] if 'docs.oracle.com/javase/' in link and '/api/' in link and 'html' in link: pair = util.parse_api_link( link) # pair[0] is class name, pair[1] is method name class_name = pair[ 0] #note that this class_name already contains package name, i.e, java.util.Calendar if class_name in api_classes: api_classes[class_name] += top_questions[question.id] api_classes_count[class_name] += 1 else: api_classes[class_name] = top_questions[question.id] api_classes_count[class_name] = 1 codes = soup.find_all('code') for code in codes: code = code.get_text() pos = code.find('(') if pos != -1: code = code[:pos] #code = code.replace('()', '') if code in javadoc_dict_classes: # print code,'!class' class_name = javadoc_dict_classes[code] if class_name in api_classes: api_classes[class_name] += top_questions[question.id] api_classes_count[class_name] += 1 else: api_classes[class_name] = top_questions[question.id] api_classes_count[class_name] = 1 for key, value in api_classes.items(): api_classes[key] = min( 1.0, value / api_classes_count[key] * (1.0 + math.log(api_classes_count[key], 2) / 10)) api_sim = {} for api in javadoc: if api.package_name + '.' + api.class_name not in api_classes: continue doc_sim = 0.0 for i, method_matrix in enumerate(api.methods_matrix): doc_sim = max( doc_sim, similarity.sim_doc_pair(query_matrix, method_matrix, query_idf_vector, api.methods_idf_vector[i])) so_sim = api_classes[api.package_name + '.' + api.class_name] api_sim[api.package_name + '.' + api.class_name] = 2 * doc_sim * so_sim / (doc_sim + so_sim) api_sim = sorted(api_sim.items(), key=lambda item: item[1], reverse=True) recommended_api = list() for item in api_sim: recommended_api.append(item[0]) if topk != -1 and len(recommended_api) >= topk: break return recommended_api
def summarize_api_method(api_method, top_questions, questions, javadoc, javadoc_dict_methods): for api in javadoc: for i, method in enumerate(api.methods): if api.package_name + '.' + api.class_name + '.' + method == api_method: print '>>>JavaDoc<<<' methods_descriptions_pure_text_changed = api.methods_descriptions_pure_text[ i].replace('\n', ' ').replace(' ', ' ').split('.')[0] + '.' print api.methods_descriptions_pure_text[i].replace( '\n', ' ').replace(' ', ' ').split('.')[0] + '.' break titles = dict() code_snippets = dict() method_pure_name = api_method.split('.')[-1] for question in questions: if question.id not in top_questions: continue contains_api = False for answer in question.answers: soup = BeautifulSoup(answer.body, 'html.parser', from_encoding='utf-8') links = soup.find_all('a') for link in links: link = link['href'] if 'docs.oracle.com/javase/' in link and '/api/' in link and 'html' in link: pair = util.parse_api_link( link) # pair[0] is class name, pair[1] is method name if pair[1] != '': method_name = pair[0] + '.' + pair[1] if method_name == api_method: titles[question.title] = top_questions[question.id] contains_api = True codes = soup.find_all('code') for code in codes: code = code.get_text() pos = code.find('(') if pos != -1: code = code[:pos] if code in javadoc_dict_methods: method_name = javadoc_dict_methods[code] if method_name == api_method: titles[question.title] = top_questions[question.id] contains_api = True if contains_api: snippet_list = list() for answer in question.answers: soup = BeautifulSoup(answer.body, 'html.parser', from_encoding='utf-8') code_snippet = soup.find('pre') if code_snippet is not None and code_snippet.get_text().count('\n') <= 5 \ and '.'+method_pure_name+'(' in code_snippet.get_text(): snippet_list.append(code_snippet.get_text()) code_snippets[question.title] = snippet_list titles = sorted(titles.items(), key=lambda item: item[1], reverse=True) print '>>>Relevant Questions<<<' tot = 0 for i, title in enumerate(titles): if tot == 3: break if len(code_snippets[title[0]]) > 0: tot += 1 print str(tot) + '.' + title[0] if tot < 3: for i, title in enumerate(titles): if tot == 3: break if len(code_snippets[title[0]]) == 0: tot += 1 print str(tot) + '.' + title[0] tot = 0 for i, title in enumerate(titles): if tot == 3: break if len(code_snippets[title[0]]) > 0: tot += 1 if tot == 1: print '>>>Code Snippets<<<' print '/**********code snippet', tot, '**********/' print code_snippets[title[0]][0] if tot == 0: print '\n-----------------------------------------------\n' else: print '-----------------------------------------------\n' return methods_descriptions_pure_text_changed, titles, code_snippets