def load_data(self): ref_miner_dir = os.path.join(get_dataset_dir(), "ref_miner") print("Loading packages efforts...") self._pkg_analyzer.load( os.path.join(ref_miner_dir, "ref_package_ex.csv")) print("Done") self._release_mgr.load( os.path.join(ref_miner_dir, "ref_release_summ.csv"))
pkt_efforts.total_ph = float(ll[6]) pkt_efforts.percent = float(ll[7]) def get_pkg_efforts(self, proj, rel, pkg): return self.efforts_mapping.get((proj, rel, pkg)) def get_pkgs_ranks(self, proj, release, packages): pkgs_data = dict() for pkg in packages: key = proj, release, pkg pkg_info = self.efforts_mapping.get(key) if not pkg_info: ph = 0 else: ph = pkg_info.ref_ph pkgs_data[pkg] = [ph, 0] sorted_rank = sorted( pkgs_data.items(), key=lambda x: x[1][0], reverse=True) rank = 1 for pkg, _ in sorted_rank: pkgs_data[pkg][1] = rank rank += 1 return pkgs_data if __name__ == "__main__": ref_miner_dir = os.path.join(get_dataset_dir(), "ref_miner") g_input_file = os.path.join(ref_miner_dir, "ref_package.csv") g_output_file = os.path.join(ref_miner_dir, "ref_package_ex.csv") PackageEffortsAnalyzer.generate(g_input_file, g_output_file)
req = self._auth_req(commit_url) try: with self.opener.open(req) as response: data_str = response.read() except URLError as exc: print("Request failed for %s: %s" % (commit_url, str(exc))) continue commit_data = json.loads(data_str) commit_date = commit_data["commit"]["author"]["date"] tags.append((project, tag_name, commit_date)) if not data: done = True else: page += 1 return tags if __name__ == "__main__": data_dir = get_dataset_dir() passwd = getpass.getpass("Please enter github password:"******"project_releases.csv"), "a") for proj in projects: print("handing project:", proj) g_tags = comm.get_tags(proj) for _, g_tag_name, g_commit_date in g_tags: filep.write("%s,%s,%s\n" % (proj, g_tag_name, g_commit_date)) filep.flush() filep.close()
def analyze_projects(self): work_book = Workbook() ws1 = work_book.active ws1.title = "All Projects" ws2 = work_book.create_sheet("Filtered Projects") parent_designite_path = os.path.join(get_dataset_dir(), "designite") out_folder = os.path.join(get_dataset_dir(), "designite_analysis") if not os.path.isdir(out_folder): os.mkdir(out_folder) ws1.append(ProjectInfo.get_headers()) ws2.append(ProjectInfo.get_headers()) cc_list = list() classes_list = list() pkg_list = list() arch_smells_list = list() design_smells_list = list() fp3 = open(os.path.join(out_folder, "pkg_efforts_3.arff"), "w") fp3_nd = open(os.path.join(out_folder, "pkg_efforts_3_nd.arff"), "w") fp5 = open(os.path.join(out_folder, "pkg_efforts_5.arff"), "w") fp5_nd = open(os.path.join(out_folder, "pkg_efforts_5_nd.arff"), "w") fp_ext = open(os.path.join(out_folder, "pkg_efforts_ext.csv"), "w") fp3.write(PackageInfo.get_arff_header("Efforts-3-Levels")) fp_ext.write(PackageInfo.get_extended_header()) fp3_nd.write( PackageInfo.get_arff_header( "Efforts-3-Levels-Normal-Distribution")) fp5.write(PackageInfo.get_arff_header("Efforts-5-Levels")) fp5_nd.write( PackageInfo.get_arff_header( "Efforts-5-Levels-Normal-Distribution")) for proj, releases in DesigniteProjects.PROJECTS.items(): for release, next_release in releases.items(): print("Analyzing project:", proj, "release:", release) proj_path = "%s-%s" % (proj, release) designite_path = os.path.join(parent_designite_path, proj, release) proj_out_path = os.path.join(out_folder, proj_path) if not os.path.isdir(proj_out_path): os.mkdir(proj_out_path) proj_analyzer = ProjectSmellsAnalyzer( proj, release, next_release, designite_path, proj_out_path, self._pkg_analyzer, self._release_mgr) proj_analyzer.analyze_smells() for pkg, pkg_info in proj_analyzer.packages_info.items(): if pkg == "<All packages>": continue fp3.write(pkg_info.get_arff_3()) fp3_nd.write(pkg_info.get_arff_3_nd()) fp5.write(pkg_info.get_arff_5()) fp5_nd.write(pkg_info.get_arff_5_nd()) fp_ext.write(pkg_info.get_extended_data( proj, release, pkg)) proj_analyzer.save() if proj_analyzer.proj_info.rank_p < 0.05: ws2.append(proj_analyzer.proj_info.to_tuple()) cc_list.append(proj_analyzer.proj_info.rank_cc) design_smells_list.append( proj_analyzer.proj_info.design_smells) arch_smells_list.append( proj_analyzer.proj_info.arch_smells) classes_list.append(proj_analyzer.proj_info.classes) pkg_list.append(proj_analyzer.proj_info.packages) ws1.append(proj_analyzer.proj_info.to_tuple()) fp3.close() fp3_nd.close() fp5.close() fp5_nd.close() fp_ext.close() ws2.append(("", )) ws2.append(("Median", numpy.median(cc_list))) ws2.append(("Mean", numpy.mean(cc_list))) ws2.append(("Stdev", numpy.std(cc_list))) ws2.append(("Min", numpy.min(cc_list))) ws2.append(("Max", numpy.max(cc_list))) data_range = "H2:H%d" % (len(cc_list) + 1) red_fill = PatternFill(start_color='EE1111', end_color='EE1111', fill_type='solid') ws1.conditional_formatting.add( data_range, CellIsRule(operator='greaterThan', formula=[0.05], stopIfTrue=False, fill=red_fill)) filename = os.path.join(out_folder, "projects_analysis.xlsx") col_pos = ProjectInfo.get_headers().index("Rank CC") + 1 _add_chart(ws2, (col_pos, ), len(cc_list) + 1, "Spearman Correlation", len(cc_list) + 10) self._analyze_correlation(cc_list, classes_list, pkg_list, arch_smells_list, design_smells_list) work_book.save(filename)
def main(cls): dataset_dir = get_dataset_dir() ref_miner_dir = os.path.join(dataset_dir, "ref_miner") ref_miner_file = open(os.path.join(ref_miner_dir, "ref_miner.csv"), "w") ref_release_file = open(os.path.join(ref_miner_dir, "ref_release.csv"), "w") ref_package_file = open(os.path.join(ref_miner_dir, "ref_package.csv"), "w") ref_release_summ_file = open( os.path.join(ref_miner_dir, "ref_release_summ.csv"), "w") proj_man = ProjectsMgr() for proj in proj_man: ref_data = dict() commit_summ = dict() release_sum = dict() release_total = dict() commits_files = dict() file_package_map = dict() print(proj) ref_tbl = RefactoringMinerTable(proj) commits_tbl = CommitChangesTable(proj) release_tbl = CommitReleaseMgr(proj) print('data loaded') for commit_hash, refactoring_type, file_name, package in ref_tbl: commit_details = commits_tbl.get_commit_details(commit_hash) if not commit_details: continue commit_files = commits_files.setdefault(commit_hash, set()) key = None for change in commit_details.changes: if file_name in change.file_path: key = (commit_hash, change.file_path, change.change_type, change.lines_added, change.nloc, change.complexity, package) ref_data_item = ref_data.setdefault(key, list()) ref_data_item.append(refactoring_type) commit_files.add(change.file_path) file_package_map[change.file_path] = package break if not key: print("File %s not found in commit %s" % (file_name, commit_hash)) release_packages = dict() for (commit_hash, file_path, change_type, lines_added, nloc, complexity, package), ref_types in ref_data.items(): release = release_tbl.get_commit_release(commit_hash) rel_pkg_data = release_packages.setdefault(release, dict()) pkg_data = rel_pkg_data.setdefault(package, 0) commit_loc = commit_summ.setdefault(commit_hash, 0) lines_added = int(lines_added) commit_loc += lines_added pkg_data += lines_added rel_pkg_data[package] = pkg_data commit_summ[commit_hash] = commit_loc ref_miner_file.write( "%s,%s,%s,%s,%s,%s,%s,%s\n" % (proj, commit_hash, file_path, change_type, lines_added, nloc, complexity, ";".join(ref_types))) for commit, commit_details in commits_tbl.items(): release = release_tbl.get_commit_release(commit) curr_change = release_total.setdefault(release, 0) for commit_change in commit_details.changes: curr_change += commit_change.lines_added # file_path = commit_change[0] release_total[release] = curr_change for commit_hash, commit_loc in commit_summ.items(): release = release_tbl.get_commit_release(commit_hash) hours = 2.94 * commit_loc / 1000. * 176 release_hours = release_sum.setdefault(release, 0) release_hours += hours release_sum[release] = release_hours ref_release_file.write( "%s,%s,%s,%d,%0.2f\n" % (proj, commit_hash, release, commit_loc, hours)) for release, release_hours in release_sum.items(): release_loc = release_total.get(release) months = release_hours / 176 release_total_months = 2.94 * release_loc / 1000. percent = months / release_total_months * 100. ref_release_summ_file.write( "%s,%s,%0.1f,%0.1f,%0.1f\n" % (proj, release, months, release_total_months, percent)) for release, release_data in release_packages.items(): for pkg, pkg_data in release_data.items(): pkh_hours = 2.94 * pkg_data / 1000. * 176 ref_package_file.write( "%s,%s,%s,%s,%0.1f\n" % (proj, release, pkg, pkg_data, pkh_hours)) ref_miner_file.close() ref_release_file.close() ref_release_summ_file.close() ref_package_file.close()