def __init__(self, project_name): self.project_name = project_name self.cs = CosineSimilarity(self.project_name) self.tf = TermFrequency()
class Process(object): def __init__(self, project_name): self.project_name = project_name self.cs = CosineSimilarity(self.project_name) self.tf = TermFrequency() def get_corpus(self, train_time): request_infos = get_request_title_description(self.project_name, train_time) key_list = list() word_list = list() for request_info in request_infos: number = list(request_info.keys())[0] data = list(request_info.values())[0] valid_word = delete_stop_words(data) word = Counter(valid_word) key_list.append(number) word_list.append(word) return key_list, word_list def get_corpus_result(self, train_time): key_list, count_list = self.get_corpus(train_time) start = time.time() score_dict = {} for key, count in zip(key_list, count_list): word_score = { word: self.tf.tf_idf(word, count, count_list) for word in count } sorted_score = dict( sorted(word_score.items(), key=lambda x: x[1], reverse=True)) sorted_list = list(sorted_score.keys()) score_dict.update({key: sorted_list}) end = time.time() print('spend time is {}'.format(end - start)) return score_dict def cos_score(self, new, past, reviewer, k): """ in this function will get pull request similarity. :param new: :param past: :param k: the number of pull request :return: """ review_dict = {} for number, past_word in past.items(): # print('the number is',number) lib_tech_past = list(past_word) past_reviewer = reviewer.get(number) if not lib_tech_past or not new or not past_reviewer: continue score = self.cs.score(new, past_word, number) for rpv in past_reviewer: if rpv not in review_dict: review_dict.setdefault(rpv, score) else: past_score = review_dict.get(rpv) review_dict.update({rpv: past_score + score}) candidate_list_sort = sorted(review_dict.items(), key=lambda item: item[1], reverse=True) # strip zero score strip_zero_value = list(filter(lambda i: i[1] > 0, candidate_list_sort)) if len(strip_zero_value) > 0: split_list = candidate_list_sort # split_list = candidate_list_sort result = [i for i in split_list] else: result = [] return result def cos_test_info(self, train_time): # get word vector info_dict = self.get_corpus_result(train_time) # get predict interval reviewer = get_reviewer(self.project_name) print(reviewer) # test_number = get_test_number(self.project_name) # for i in test_number: # new_word = info_dict.get(i) # past_dict = {k: v for k, v in info_dict.items() if (k < i)} # score_list = self.cos_score(new_word, past_dict, reviewer, 5) # print(score_list) return info_dict, reviewer