Пример #1
0
 def load_data(self):
     ref_miner_dir = os.path.join(get_dataset_dir(), "ref_miner")
     print("Loading packages efforts...")
     self._pkg_analyzer.load(
         os.path.join(ref_miner_dir, "ref_package_ex.csv"))
     print("Done")
     self._release_mgr.load(
         os.path.join(ref_miner_dir, "ref_release_summ.csv"))
Пример #2
0
                pkt_efforts.total_ph = float(ll[6])
                pkt_efforts.percent = float(ll[7])

    def get_pkg_efforts(self, proj, rel, pkg):
        return self.efforts_mapping.get((proj, rel, pkg))

    def get_pkgs_ranks(self, proj, release, packages):
        pkgs_data = dict()
        for pkg in packages:
            key = proj, release, pkg
            pkg_info = self.efforts_mapping.get(key)
            if not pkg_info:
                ph = 0
            else:
                ph = pkg_info.ref_ph
            pkgs_data[pkg] = [ph, 0]
        sorted_rank = sorted(
            pkgs_data.items(), key=lambda x: x[1][0], reverse=True)
        rank = 1
        for pkg, _ in sorted_rank:
            pkgs_data[pkg][1] = rank
            rank += 1
        return pkgs_data


if __name__ == "__main__":
    ref_miner_dir = os.path.join(get_dataset_dir(), "ref_miner")
    g_input_file = os.path.join(ref_miner_dir, "ref_package.csv")
    g_output_file = os.path.join(ref_miner_dir, "ref_package_ex.csv")
    PackageEffortsAnalyzer.generate(g_input_file, g_output_file)
Пример #3
0
                req = self._auth_req(commit_url)
                try:
                    with self.opener.open(req) as response:
                        data_str = response.read()
                except URLError as exc:
                    print("Request failed for %s: %s" % (commit_url, str(exc)))
                    continue
                commit_data = json.loads(data_str)
                commit_date = commit_data["commit"]["author"]["date"]
                tags.append((project, tag_name, commit_date))
            if not data:
                done = True
            else:
                page += 1
        return tags


if __name__ == "__main__":
    data_dir = get_dataset_dir()
    passwd = getpass.getpass("Please enter github password:"******"project_releases.csv"), "a")
    for proj in projects:
        print("handing project:", proj)
        g_tags = comm.get_tags(proj)
        for _, g_tag_name, g_commit_date in g_tags:
            filep.write("%s,%s,%s\n" % (proj, g_tag_name, g_commit_date))
        filep.flush()
    filep.close()
Пример #4
0
    def analyze_projects(self):
        work_book = Workbook()
        ws1 = work_book.active
        ws1.title = "All Projects"
        ws2 = work_book.create_sheet("Filtered Projects")
        parent_designite_path = os.path.join(get_dataset_dir(), "designite")
        out_folder = os.path.join(get_dataset_dir(), "designite_analysis")
        if not os.path.isdir(out_folder):
            os.mkdir(out_folder)
        ws1.append(ProjectInfo.get_headers())
        ws2.append(ProjectInfo.get_headers())
        cc_list = list()
        classes_list = list()
        pkg_list = list()
        arch_smells_list = list()
        design_smells_list = list()

        fp3 = open(os.path.join(out_folder, "pkg_efforts_3.arff"), "w")
        fp3_nd = open(os.path.join(out_folder, "pkg_efforts_3_nd.arff"), "w")
        fp5 = open(os.path.join(out_folder, "pkg_efforts_5.arff"), "w")
        fp5_nd = open(os.path.join(out_folder, "pkg_efforts_5_nd.arff"), "w")
        fp_ext = open(os.path.join(out_folder, "pkg_efforts_ext.csv"), "w")

        fp3.write(PackageInfo.get_arff_header("Efforts-3-Levels"))
        fp_ext.write(PackageInfo.get_extended_header())
        fp3_nd.write(
            PackageInfo.get_arff_header(
                "Efforts-3-Levels-Normal-Distribution"))
        fp5.write(PackageInfo.get_arff_header("Efforts-5-Levels"))
        fp5_nd.write(
            PackageInfo.get_arff_header(
                "Efforts-5-Levels-Normal-Distribution"))
        for proj, releases in DesigniteProjects.PROJECTS.items():
            for release, next_release in releases.items():
                print("Analyzing project:", proj, "release:", release)
                proj_path = "%s-%s" % (proj, release)
                designite_path = os.path.join(parent_designite_path, proj,
                                              release)
                proj_out_path = os.path.join(out_folder, proj_path)
                if not os.path.isdir(proj_out_path):
                    os.mkdir(proj_out_path)
                proj_analyzer = ProjectSmellsAnalyzer(
                    proj, release, next_release, designite_path, proj_out_path,
                    self._pkg_analyzer, self._release_mgr)

                proj_analyzer.analyze_smells()
                for pkg, pkg_info in proj_analyzer.packages_info.items():
                    if pkg == "<All packages>":
                        continue
                    fp3.write(pkg_info.get_arff_3())
                    fp3_nd.write(pkg_info.get_arff_3_nd())
                    fp5.write(pkg_info.get_arff_5())
                    fp5_nd.write(pkg_info.get_arff_5_nd())
                    fp_ext.write(pkg_info.get_extended_data(
                        proj, release, pkg))
                proj_analyzer.save()
                if proj_analyzer.proj_info.rank_p < 0.05:
                    ws2.append(proj_analyzer.proj_info.to_tuple())
                    cc_list.append(proj_analyzer.proj_info.rank_cc)
                    design_smells_list.append(
                        proj_analyzer.proj_info.design_smells)
                    arch_smells_list.append(
                        proj_analyzer.proj_info.arch_smells)
                    classes_list.append(proj_analyzer.proj_info.classes)
                    pkg_list.append(proj_analyzer.proj_info.packages)
                ws1.append(proj_analyzer.proj_info.to_tuple())
        fp3.close()
        fp3_nd.close()
        fp5.close()
        fp5_nd.close()
        fp_ext.close()
        ws2.append(("", ))
        ws2.append(("Median", numpy.median(cc_list)))
        ws2.append(("Mean", numpy.mean(cc_list)))
        ws2.append(("Stdev", numpy.std(cc_list)))
        ws2.append(("Min", numpy.min(cc_list)))
        ws2.append(("Max", numpy.max(cc_list)))
        data_range = "H2:H%d" % (len(cc_list) + 1)
        red_fill = PatternFill(start_color='EE1111',
                               end_color='EE1111',
                               fill_type='solid')
        ws1.conditional_formatting.add(
            data_range,
            CellIsRule(operator='greaterThan',
                       formula=[0.05],
                       stopIfTrue=False,
                       fill=red_fill))
        filename = os.path.join(out_folder, "projects_analysis.xlsx")
        col_pos = ProjectInfo.get_headers().index("Rank CC") + 1
        _add_chart(ws2, (col_pos, ),
                   len(cc_list) + 1, "Spearman Correlation",
                   len(cc_list) + 10)
        self._analyze_correlation(cc_list, classes_list, pkg_list,
                                  arch_smells_list, design_smells_list)
        work_book.save(filename)
Пример #5
0
    def main(cls):
        dataset_dir = get_dataset_dir()
        ref_miner_dir = os.path.join(dataset_dir, "ref_miner")
        ref_miner_file = open(os.path.join(ref_miner_dir, "ref_miner.csv"),
                              "w")
        ref_release_file = open(os.path.join(ref_miner_dir, "ref_release.csv"),
                                "w")
        ref_package_file = open(os.path.join(ref_miner_dir, "ref_package.csv"),
                                "w")
        ref_release_summ_file = open(
            os.path.join(ref_miner_dir, "ref_release_summ.csv"), "w")

        proj_man = ProjectsMgr()

        for proj in proj_man:
            ref_data = dict()
            commit_summ = dict()
            release_sum = dict()
            release_total = dict()
            commits_files = dict()
            file_package_map = dict()
            print(proj)
            ref_tbl = RefactoringMinerTable(proj)
            commits_tbl = CommitChangesTable(proj)
            release_tbl = CommitReleaseMgr(proj)
            print('data loaded')
            for commit_hash, refactoring_type, file_name, package in ref_tbl:
                commit_details = commits_tbl.get_commit_details(commit_hash)
                if not commit_details:
                    continue
                commit_files = commits_files.setdefault(commit_hash, set())
                key = None
                for change in commit_details.changes:
                    if file_name in change.file_path:
                        key = (commit_hash, change.file_path,
                               change.change_type, change.lines_added,
                               change.nloc, change.complexity, package)
                        ref_data_item = ref_data.setdefault(key, list())
                        ref_data_item.append(refactoring_type)
                        commit_files.add(change.file_path)
                        file_package_map[change.file_path] = package
                        break
                if not key:
                    print("File %s not found in commit %s" %
                          (file_name, commit_hash))
            release_packages = dict()
            for (commit_hash, file_path, change_type, lines_added, nloc,
                 complexity, package), ref_types in ref_data.items():
                release = release_tbl.get_commit_release(commit_hash)
                rel_pkg_data = release_packages.setdefault(release, dict())
                pkg_data = rel_pkg_data.setdefault(package, 0)
                commit_loc = commit_summ.setdefault(commit_hash, 0)
                lines_added = int(lines_added)
                commit_loc += lines_added
                pkg_data += lines_added
                rel_pkg_data[package] = pkg_data
                commit_summ[commit_hash] = commit_loc
                ref_miner_file.write(
                    "%s,%s,%s,%s,%s,%s,%s,%s\n" %
                    (proj, commit_hash, file_path, change_type, lines_added,
                     nloc, complexity, ";".join(ref_types)))
            for commit, commit_details in commits_tbl.items():
                release = release_tbl.get_commit_release(commit)
                curr_change = release_total.setdefault(release, 0)
                for commit_change in commit_details.changes:
                    curr_change += commit_change.lines_added
                    # file_path = commit_change[0]
                release_total[release] = curr_change

            for commit_hash, commit_loc in commit_summ.items():
                release = release_tbl.get_commit_release(commit_hash)
                hours = 2.94 * commit_loc / 1000. * 176
                release_hours = release_sum.setdefault(release, 0)
                release_hours += hours
                release_sum[release] = release_hours
                ref_release_file.write(
                    "%s,%s,%s,%d,%0.2f\n" %
                    (proj, commit_hash, release, commit_loc, hours))
            for release, release_hours in release_sum.items():
                release_loc = release_total.get(release)
                months = release_hours / 176
                release_total_months = 2.94 * release_loc / 1000.
                percent = months / release_total_months * 100.
                ref_release_summ_file.write(
                    "%s,%s,%0.1f,%0.1f,%0.1f\n" %
                    (proj, release, months, release_total_months, percent))
            for release, release_data in release_packages.items():
                for pkg, pkg_data in release_data.items():
                    pkh_hours = 2.94 * pkg_data / 1000. * 176
                    ref_package_file.write(
                        "%s,%s,%s,%s,%0.1f\n" %
                        (proj, release, pkg, pkg_data, pkh_hours))

        ref_miner_file.close()
        ref_release_file.close()
        ref_release_summ_file.close()
        ref_package_file.close()