示例#1
0
def organize_bin_versions(project: Project):
    general_log = logging.getLogger(__name__)
    success_log = logging.getLogger("success")
    failure_log = logging.getLogger("failure")
    failure_verbose_log = logging.getLogger("failure_verbose")

    general_log.info("organizing bin versions for {0}".format(project.github()))
    try:
        data = Config().config['CACHING']['RepositoryData']
        selected = Config().config['DATA_EXTRACTION']['SelectedVersionsBin']
        path = os.path.join(data, selected, project.github() + ".csv")
        in_path = Config.get_work_dir_path(path)
        versions = "versions"
        Path(versions).mkdir(parents=True, exist_ok=True)
        dest_path = os.path.join(versions, project.github() + ".csv")
        if os.path.exists(in_path):
            df = pd.read_csv(in_path)
            df.head(8).drop(columns=["start", "step", "stop"]).to_csv(dest_path, index=False)

        success_log.info("organize | bin | Succeeded to organize {0}".format(project.github()))
        return path

    except Exception as e:
        failure_log.error("organize | bin | Failed to organize {0}".format(project.github()))
        failure_verbose_log.exception("organize | bin | Failed to organize {0}".format(project.github()))
 def test_project_update(self):
     a = Project('Project A', 'Work on A')
     project_id = self._project_manager.add_project(a)
     b = Project('Project B', 'Work on B')
     self._project_manager.update_project(project_id, b)
     updated_project = self._project_manager.find_project_by_id(project_id)
     self.assertEqual(updated_project.name, b.name)
     self.assertEqual(updated_project.description, b.description)
示例#3
0
 def project(context, l, c, namespace, name): 
     activeGitlab = context.obj['active_gitlab']
     project = Project(projects = activeGitlab.projects, namespaces=activeGitlab.namespaces)
     if l:
         projects = project.listProjectsPaths()
         for proj in projects:
             print(proj) 
     if c:
         project.createProject(namespace=namespace, name=name)
示例#4
0
    def test_listProjectsPaths(self, list):
        print('starting now to auth')
        auth = Auth(username=logindetails.user, token=logindetails.token)
        activeGitlab = auth.authorizeUser()

        # create a Project object called project
        project = Project(projects=activeGitlab.projects, namespaces=activeGitlab.namespaces)
        # call the function to listProject paths - The API list function is mocked to return the Mockproject objects
        projpaths = project.listProjectsPaths()
        print(projpaths)
 def test_project_remove(self):
     a = Project('Project A', 'Work on A')
     a_id = self._project_manager.add_project(a)
     b = Project('Project B', 'Work on B')
     b_id = self._project_manager.add_project(b)
     self.assertEqual(self._project_manager.count_projects(), 2)
     self._project_manager.remove_project(a_id)
     project = self._project_manager.find_project_by_id(b_id)
     self.assertEqual(project.name, 'Project B')
     self.assertEqual(self._project_manager.count_projects(), 1)
 def test_find_project_by_id(self):
     a = Project('Project A', 'Work on A')
     b = Project('Project B', 'Work on B')
     a_id = self._project_manager.add_project(a)
     b_id = self._project_manager.add_project(b)
     project = self._project_manager.find_project_by_id(a_id)
     self.assertEqual(project.name, 'Project A')
     self.assertEqual(project.description, 'Work on A')
     project = self._project_manager.find_project_by_id(b_id)
     self.assertEqual(project.name, 'Project B')
     self.assertEqual(project.description, 'Work on B')
 def test_find_projects_by_name(self):
     a = Project('Initialization', 'Work on A')
     b = Project('Progress', 'Work on B')
     c = Project('Final steps', 'Work on C')
     self._project_manager.add_project(a)
     self._project_manager.add_project(b)
     self._project_manager.add_project(c)
     result = self._project_manager.find_projects_by_name('in')
     self.assertEqual(len(result), 2)
     result = self._project_manager.find_projects_by_name('in progress')
     self.assertEqual(len(result), 0)
     result = self._project_manager.find_projects_by_name('PROGRESS')
     self.assertEqual(len(result), 1)
示例#8
0
 def __init__(self, extractor_name, project: Project, version, repo=None):
     self.extractor_name = extractor_name
     self.project = project
     self.project_name = project.github()
     self.version = version
     self.config = Config().config
     self.runner = self._get_runner(self.config, extractor_name)
     if repo is None:
         repo = Repo(project.jira(), project.github(), project.path(),
                     version)
     self.local_path = os.path.realpath(repo.local_path)
     self.file_analyser = JavaParserFileAnalyser(self.local_path,
                                                 self.project_name,
                                                 self.version)
     self.data: Data = None
示例#9
0
def main():
    print("Loading data. This may take some time.")
    records = [Record(project) for project in Project.available_projects()]
    show_rates_table(records)
    method_category_score(records)
    method_score_distributions(records)
    plt.show()
示例#10
0
文件: gaudi.py 项目: brettviren/garpi
 def download(self):
     url = self.url()
     print 'gaudi url: "%s"'%url
     print 'gaudi tag: "%s"'%self.tag()
     # Gaudi has a peculiar repository
     if url[:3] == 'git': return self._download_git_monolithic()
     if url[:3] == 'svn' and 'cern.ch' in url: self._download_cern_svn()
     return Project.download(self)
示例#11
0
 def __init__(self, server):
     super(WorkbookManager, self).__init__(server)
     self.sample_project_id = None
     sample_project_entry = Project.get_by_name(
                                             self.server.environment.envid,
                                             'Tableau Samples')
     if sample_project_entry:
         self.sample_project_id = sample_project_entry.id
示例#12
0
文件: app.py 项目: raikel/anvid
 def loadProjectFile(self, filePath):
     try:
         projectSettings = Project.load(filePath)
     except Exception as err:
         logger.error(err)
         self.ui.errorMsg(f'Error loading project {filePath}.')
         self.updateRecentProjectsActions()
     else:
         self.loadProject(projectSettings)
示例#13
0
def main():
    print('Loading data. This may take a while...')
    projects = [(p.name, Record(p.descartes), Record(p.gregor)) for p in Project.available_projects()]
    print('Execution time:')
    time_table(projects)
    plot_times(projects)
    print('Number of mutants created:')
    mutants_table(projects)
    plot_mutants(projects)
    plt.show()
示例#14
0
 def add_project(self):
     user_name = raw_input("User name: ")
     for account in Account.objects:
         if account.username == user_name:
             user_account = account
     self.title = raw_input("Project title: ")
     self.contact = raw_input("Project contact: ")
     self.results = raw_input("Results: ")
     self.nsf_Aggreement = raw_input("NSF Aggreement (Yes or No): ")
     self.slide_collection_aggreement = raw_input(
         "Slide Collection Aggreement (Yes or No): ")
     self.other = raw_input("Other: ")
     project = Project(project_title=self.title,
                       lead=user_account, manager=user_account,
                       contact=self.contact, results=self.results,
                       nsf_Aggreement=self.nsf_Aggreement,
                       slide_collection_aggreement=self.slide_collection_aggreement,
                       other=self.other)
     project.save()
     print project.project_title
示例#15
0
 def test_properties(self):
     members = [1, 2, 3]
     documents = [4, 5, 6]
     project = Project('project_1',
                       'The first project',
                       members=members,
                       documents=documents)
     self.assertEqual(project.name, 'project_1')
     self.assertEqual(project.description, 'The first project')
     self.assertEqual(project.members, [1, 2, 3])
     self.assertEqual(project.documents, [4, 5, 6])
示例#16
0
    def do_create(self, arg):
        """\nCreate a new project from scratch.
        
        create name=<string> [target=<string>] [version=<string>] [author=<string>]

            name     = new project name. Default = New Project.
            target   = project target FPGA/CPLD. Default = APF9328.
            version  = project version. Default = 1.0.
            author   = component category. Default = User Component.
        """
        args = CREATION_ARGS.parse(arg)
        if args:
            proj = Project()
            proj.name = args.name
            proj.version =  args.version
            proj.category = args.category
            proj.target = args.target
            self.write("New project created.\n")
            settings.active_project = proj
        else:
            self.stdout.write("*** Arguments extraction error, creation canceled.\n")
def get_data():
    for project in Project.available_projects():
        record = Record(project)
        non_accessible_methods = set(
            method_id(m) for m in project.methods
            if 'ACCESSIBLE' not in m['classifications'])
        all_methods = set(method_id(m) for m in project.methods)

        def ratio(a_set):
            return len(a_set.intersection(non_accessible_methods)) / len(a_set)

        yield (project.name, ratio(record.pseudo_tested),
               ratio(record.methods_under_analysis), ratio(all_methods))
示例#18
0
文件: app.py 项目: raikel/anvid
    def __init__(self):
        super(MainWindow, self).__init__()

        self.ui = Ui_MainWindow()
        self.ui.setupUi(self)
        self.connectActions()

        self.videoAnnoWidget = VideoAnnoWidget(self)
        self.setCentralWidget(self.videoAnnoWidget)

        self.project: Project = Project()
        self.appSettings: AppSettings = AppSettings()
        self.loadAppSettings()
示例#19
0
 def add_project(self):
     user_name = raw_input("User name: ")
     for account in Account.objects:
         if account.username == user_name:
             user_account = account
     self.title = raw_input("Project title: ")
     self.contact = raw_input("Project contact: ")
     self.results = raw_input("Results: ")
     self.nsf_Aggreement = raw_input("NSF Aggreement (Yes or No): ")
     self.slide_collection_aggreement = raw_input(
         "Slide Collection Aggreement (Yes or No): ")
     self.other = raw_input("Other: ")
     project = Project(
         project_title=self.title,
         lead=user_account,
         manager=user_account,
         contact=self.contact,
         results=self.results,
         nsf_Aggreement=self.nsf_Aggreement,
         slide_collection_aggreement=self.slide_collection_aggreement,
         other=self.other)
     project.save()
     print project.project_title
示例#20
0
def main():
    projects = list(Project.available_projects())
    #Compute the scores
    print('Computing the scores. This may take a while...')
    scores = [get_both_scores(p) for p in projects]
    descartes_scores = [c[0].score for c in scores]
    gregor_scores = [c[1].score for c in scores]
    #Show the table
    render_table([p.name for p in projects], scores)
    #Show the correlation
    correlation = spearmanr(descartes_scores, gregor_scores)
    print(f'The Spearman correlation coefficient is {correlation.correlation} with a p-value of {correlation.pvalue}')
    #Show plot
    show_plot(descartes_scores, gregor_scores)
    bland_altman_plot(descartes_scores, gregor_scores)
    plt.show()
示例#21
0
    def read_file(self, month, year, file):

        wb_r = xlrd.open_workbook(file)

        sheet1 = wb_r.sheet_by_name(month + year)

        nrow = sheet1.nrows
        ncol = sheet1.ncols
        self.project_dates_col = sheet1.col_values(0)

        for i in range(1, ncol):
            project_obj = Project(sheet1.cell_value(0, i))
            for j in range(1, nrow):
                date_row = sheet1.cell_value(j, 0)
                words = sheet1.cell_value(j, i)
                project_obj.list_of_occurrences.append([date_row, words])

            self.main_list.append(project_obj)
示例#22
0
    def update_file(self, day, month, new_project, new_words):

        today = day + " " + month

        temp_project = Project(new_project)

        if today not in self.project_dates_col:
            self.project_dates_col.append(today)

        index_proj = 9999
        project_exists = False
        project_date_exists = False
        index_date = 9999

        # checks if project project exists and get its index in the main list
        for index, project in enumerate(self.main_list):
            if project.get_name() == new_project:
                index_proj = index
                project_exists = True
                print(index_proj)

        if project_exists == True:
            for date_proj, data_pair in enumerate(self.main_list[index_proj].list_of_occurrences):
                print(data_pair[0])
                if data_pair[0] == today:
                    index_date = date_proj
                    project_date_exists = True
                    print(index_date)

            if project_date_exists == True:
                self.main_list[index_proj].list_of_occurrences[index_date][1] = new_words


            else:
                self.main_list[index_proj].list_of_occurrences.append([today, new_words])

        if project_exists == False:
            temp_project.list_of_occurrences.append([today, new_words])
            print(temp_project.list_of_occurrences)
            self.main_list.append(temp_project)
示例#23
0
文件: gaudi.py 项目: brettviren/garpi
 def __init__(self):
     Project.__init__(self,"gaudi")
     return
示例#24
0
class user_project():

    accounts = Account.objects()
    projects = Project.objects()

    title = ""
    category = ""
    #keywords = ""
    contact = ""
    #members = ""
    #alumni = ""
    #nsf_grant_number = ""
    #nsf_grant_url = ""
    results = ""
    nsf_Aggreement = ""  #Yes/No
    slide_collection_aggreement = ""  #Yes/No
    other = ""

    def add_project(self):
        user_name = raw_input("User name: ")
        for account in Account.objects:
            if account.username == user_name:
                user_account = account
        self.title = raw_input("Project title: ")
        self.contact = raw_input("Project contact: ")
        self.results = raw_input("Results: ")
        self.nsf_Aggreement = raw_input("NSF Aggreement (Yes or No): ")
        self.slide_collection_aggreement = raw_input(
            "Slide Collection Aggreement (Yes or No): ")
        self.other = raw_input("Other: ")
        project = Project(
            project_title=self.title,
            lead=user_account,
            manager=user_account,
            contact=self.contact,
            results=self.results,
            nsf_Aggreement=self.nsf_Aggreement,
            slide_collection_aggreement=self.slide_collection_aggreement,
            other=self.other)
        project.save()
        print project.project_title

        #account.project = self.title refernce and list field

    def generate_random(self):
        pass

    def generate_project(self):
        pass

    def list_project(self):
        project = Project.objects()
        for project in Project.objects():
            print
            print project.project_title, ":", project
            print

    def del_project(self):
        user_name = raw_input("User name: ")
        for project in Project.objects:
            if project.lead.username == user_name:
                project.delete()
示例#25
0
 def test_creation(self):
     project = Project('project_1', 'The first project')
示例#26
0
 def set_project(self, github_name, github_user, jira_name, jira_url):
     self.project = Project(github_name, github_user, '', [jira_name], [],
                            jira_url, '')
     self.set_extractor()
示例#27
0
 def test_creation_with_documents(self):
     documents = [1, 2, 3]
     project = Project('project_1',
                       'The first project',
                       documents=documents)
示例#28
0
 def test_creation_with_members(self):
     members = [1, 2, 3]
     project = Project('project_1', 'The first project', members=members)
示例#29
0
 def list_project(self):
     project = Project.objects()
     for project in Project.objects():
         print
         print project.project_title, ":", project
         print
示例#30
0
 def __init__(self):
     Project.__init__(self,"lcgcmt")
     return
示例#31
0
 def set_project(self, github, jira):
     self.project = Project(github.lower(), jira.upper())
     self.set_extractor()
示例#32
0
class Main():
    def __init__(self):
        self.project = None
        self.extractor = None
        self.save_data_names()
        self.jira_url = None
        self.github_user_name = None

    def list_projects(self):
        print("\n".join(
            list(
                map(lambda e: "{0}: {1}".format(e.name, e.value.description()),
                    ProjectName))))

    def extract(self):
        self.extractor.extract(True)

    def set_project(self, github, jira):
        self.project = Project(github.lower(), jira.upper())
        self.set_extractor()

    def set_project_enum(self, name):
        self.project = ProjectName[name].value
        self.set_extractor()

    def set_extractor(self):
        self.extractor = DataExtractor(self.project, self.jira_url,
                                       self.github_user_name)

    def extract_metrics(self):
        classes_data = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'],
                         Config().config['VERSION_METRICS']['ClassesData'],
                         self.project.github()))
        Path(classes_data).mkdir(parents=True, exist_ok=True)
        method_data = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'],
                         Config().config['VERSION_METRICS']['MethodData'],
                         self.project.github()))
        Path(method_data).mkdir(parents=True, exist_ok=True)

        classes_datasets = []
        methods_datasets = []

        for version in self.extractor.get_selected_versions()[:-1]:
            self.extractor.checkout_version(version)
            classes_df, methods_df = self.extract_features_to_version(
                classes_data, method_data, version)
            classes_datasets.append(classes_df)
            methods_datasets.append(methods_df)

        classes_instance = self.extract_classes_datasets(classes_datasets)
        classes_instance.predict()

        methods_instance = self.extract_methods_datasets(methods_datasets)
        methods_instance.predict()

    def aggrate_methods_df(self, df):
        ids = df['Method_ids'].iteritems()
        files_id, classes_id = tee(ids, 2)
        files = pd.Series(list(map(lambda x: x[1].split('@')[0],
                                   files_id))).values
        classes = pd.Series(
            list(
                map(lambda x: x[1].split('@')[1].split('.')[:-1][-1],
                    classes_id))).values
        df.insert(0, 'File', files)
        df.insert(0, 'Class', classes)
        groupby = ['File', 'Class']
        columns_filter = [
            'File', 'Class', 'BuggedMethods', 'Method', 'Method_ids'
        ]
        columns = list(
            filter(lambda x: x not in columns_filter,
                   df.columns.values.tolist()))
        data = list()
        for key, group in df.groupby(groupby):
            key_data = {}
            key_data.update(dict(zip(groupby, key)))
            for feature in columns:
                pt = pd.DataFrame(group[feature].describe()).T
                cols = [
                    "{0}_{1}".format(feature, c)
                    for c in pt.columns.values.tolist()
                ]
                pt.columns = cols
                key_data.update(list(pt.iterrows())[0][1].to_dict())
            data.append(key_data)
        return pd.DataFrame(data)

    def fillna(self, df):
        for col in df:
            dt = df[col].dtype
            if dt == int or dt == float:
                df[col].fillna(0, inplace=True)
            else:
                df[col].fillna(False, inplace=True)
        return df

    def extract_features_to_version(self, classes_data, method_data, version):
        extractors = Extractor.get_all_extractors(self.project, version)
        for extractor in extractors:
            extractor.extract()
        db = DataBuilder(self.project, version)
        list(map(lambda d: db.append(d), DataNameEnum))
        classes_df, methods_df = db.build()
        intermediate_dir = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'],
                         Config().config['VERSION_METRICS']['Intermediate'],
                         self.project.github()))
        classes_intermediate_dir = os.path.join(intermediate_dir, "classes")
        methods_intermediate_dir = os.path.join(intermediate_dir, "methods")
        Path(classes_intermediate_dir).mkdir(parents=True, exist_ok=True)
        Path(methods_intermediate_dir).mkdir(parents=True, exist_ok=True)
        classes_df.to_csv(os.path.join(classes_intermediate_dir,
                                       version + ".csv"),
                          index=False,
                          sep=';')
        methods_df.to_csv(os.path.join(methods_intermediate_dir,
                                       version + ".csv"),
                          index=False,
                          sep=';')

        methods_df = self.fillna(methods_df)
        aggregated_methods_df = self.aggrate_methods_df(methods_df)

        classes_df.dropna(inplace=True)

        classes_df.to_csv(os.path.join(intermediate_dir, "classes_df.csv"),
                          index=False,
                          sep=';')
        aggregated_methods_df.to_csv(os.path.join(intermediate_dir,
                                                  "aggregated_methods_df.csv"),
                                     index=False,
                                     sep=';')

        if 'Class' in classes_df.columns and 'Class' in aggregated_methods_df.columns:
            classes_df = classes_df.merge(aggregated_methods_df,
                                          on=['File', 'Class'],
                                          how='outer')
        else:
            classes_df = classes_df.merge(aggregated_methods_df,
                                          on=['File'],
                                          how='outer')

        classes_df.to_csv(os.path.join(intermediate_dir,
                                       "classes_df_afterMerge.csv"),
                          index=False,
                          sep=';')

        classes_df = self.fillna(classes_df)
        classes_df.to_csv(os.path.join(classes_data, version + ".csv"),
                          index=False,
                          sep=';')

        methods_df = methods_df.drop('File', axis=1, errors='ignore')
        methods_df = methods_df.drop('Class', axis=1, errors='ignore')
        methods_df = methods_df.drop('Method', axis=1, errors='ignore')
        methods_df.to_csv(os.path.join(method_data, version + ".csv"),
                          index=False,
                          sep=';')

        return classes_df, methods_df

    def extract_classes_datasets(self, classes_datasets):
        dataset_dir = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'],
                         Config().config['VERSION_METRICS']['Dataset'],
                         self.project.github()))
        classes_dataset_dir = os.path.join(dataset_dir, "classes")
        Path(classes_dataset_dir).mkdir(parents=True, exist_ok=True)

        classes_training = pd.concat(classes_datasets[:-1],
                                     ignore_index=True).drop(
                                         ["File", "Class", "Method_ids"],
                                         axis=1,
                                         errors='ignore')
        classes_testing = classes_datasets[-1].drop("Method_ids",
                                                    axis=1,
                                                    errors='ignore')
        file_names = classes_testing.pop("File").values.tolist()
        classes_names = classes_testing.pop("Class").values.tolist()
        classes_testing_names = list(
            map("@".join, zip(file_names, classes_names)))
        return ClassificationInstance(classes_training, classes_testing,
                                      classes_testing_names,
                                      classes_dataset_dir)

    def extract_methods_datasets(self, methods_datasets):
        dataset_dir = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'],
                         Config().config['VERSION_METRICS']['Dataset'],
                         self.project.github()))
        methods_dataset_dir = os.path.join(dataset_dir, "methods")
        Path(methods_dataset_dir).mkdir(parents=True, exist_ok=True)
        methods_training = pd.concat(methods_datasets[:-1],
                                     ignore_index=True).drop("Method_ids",
                                                             axis=1,
                                                             errors='ignore')
        methods_testing = methods_datasets[-1]
        methods_testing_names = methods_testing.pop(
            "Method_ids").values.tolist()
        return ClassificationInstance(methods_training,
                                      methods_testing,
                                      methods_testing_names,
                                      methods_dataset_dir,
                                      label="BuggedMethods")

    def choose_versions(self,
                        version_num=5,
                        algorithm="bin",
                        version_type=VersionType.Untyped,
                        strict=True):
        self.extractor.choose_versions(version_num=version_num,
                                       algorithm=algorithm,
                                       strict=strict,
                                       version_type=version_type)

    def set_version_selection(self,
                              version_num=5,
                              algorithm="bin",
                              version_type=VersionType.Untyped,
                              strict=True,
                              selected_config=0):
        self.extractor.choose_versions(version_num=version_num,
                                       algorithm=algorithm,
                                       strict=strict,
                                       version_type=version_type,
                                       selected_config=selected_config)
        self.extractor.selected_config = selected_config
        assert self.extractor.get_selected_versions()

    def save_data_names(self):
        j = list()
        out_path = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'],
                         "dataname.json"))
        for d in DataNameEnum:
            j.append(d.value.as_description_dict())
        with open(out_path, "w") as f:
            json.dump(j, f)

    def main(self):
        parser = argparse.ArgumentParser(description='Execute project data')
        parser.add_argument('-p',
                            '--projects',
                            dest='projects',
                            action='store_const',
                            const=True,
                            default=False,
                            help='list all aleready defined projects')
        parser.add_argument('-c',
                            '--choose',
                            dest='choose',
                            action='store',
                            help='choose a project to extract')
        parser.add_argument(
            '-g',
            '--github_repo_name',
            dest='github',
            action='store',
            help=
            'the github repository name to the project to extract (lowercase)')
        parser.add_argument(
            '-j',
            '--jira_name',
            dest='jira',
            action='store',
            help='the jira name to the project to extract (uppercase)')
        parser.add_argument(
            '-עu',
            '--github_user_name',
            dest='github_user_name',
            action='store',
            help='the github user name to the project to extract (lowercase)',
            default="apache")
        parser.add_argument('-jl',
                            '--jira_url',
                            dest='jira_url',
                            action='store',
                            help='the link to jira',
                            default="http://issues.apache.org/jira")
        parser.add_argument(
            '-l',
            '--list_select_verions',
            dest='list_selected',
            action='store',
            help='the algorithm to select the versions : [bin]',
            default='bin')
        parser.add_argument('-s',
                            '--select_verions',
                            dest='select',
                            action='store',
                            help='the configuration to choose',
                            default=-1,
                            type=int)
        parser.add_argument('-n',
                            '--num_verions',
                            dest='num_versions',
                            action='store',
                            help='the number of versions to select',
                            default=5,
                            type=int)
        parser.add_argument('-t',
                            '--versions_type',
                            dest='versions_type',
                            action='store',
                            help='the versions type to select',
                            default="Untyped")
        parser.add_argument('-f',
                            '--free_choose',
                            dest='free_choose',
                            action='store_true',
                            help='the versions type to select')
        args = parser.parse_args()
        self.github_user_name = args.github_user_name
        self.jira_url = args.jira_url
        if args.projects:
            self.list_projects()
        if args.choose:
            self.set_project_enum(args.choose)
        if args.github and args.jira:
            self.set_project(args.github, args.jira)
        if args.list_selected:
            self.choose_versions(version_num=args.num_versions,
                                 algorithm=args.list_selected,
                                 version_type=VersionType[args.versions_type],
                                 strict=args.free_choose)
        if args.select != -1:
            self.set_version_selection(
                version_num=args.num_versions,
                algorithm='bin',
                version_type=VersionType[args.versions_type],
                strict=args.free_choose,
                selected_config=args.select)
            self.extract()
            self.extract_metrics()
示例#33
0
def stats():
  projects = Project("SteveKipp")
  projects.get_repo_activity()
  return render_template("stats.html", projects = projects)
示例#34
0
        x = entries[i]
        kinds = [my_dict['kind'] for my_dict in x[4]]
        actions = [my_dict['action'] for my_dict in x[4]]
        projects = []
        for i in range(len(x[3])):
            curr_path = x[3][i]
            size_to_add = 0
            if kinds[i] == 'file':
                size_to_add = parse_list(list_file,
                                         curr_path.replace('/mjschau2/', ''))
            svn_link = str(
                'https://subversion.ews.illinois.edu/svn/sp17-cs242' +
                curr_path + '/?p=' + x[0]['revision'])
            temp_proj = proj.Project(curr_path,
                                     size_to_add,
                                     actions[i],
                                     kinds[i],
                                     text=svn_link,
                                     file_id=curr_id)
            result = files.insert_one(temp_proj.__dict__)
            #print(result)
            curr_id += 1
            projects.append(temp_proj.__dict__)
        temp_obj = le.log_entry(int(x[0]['revision']), x[1], x[2], x[5],
                                projects)
        entry_objs.append(temp_obj.__dict__)

    project_data = entry_objs

    #now put up on mongodb database

    result = logs.insert_many(project_data)
示例#35
0
 def list_project(self):
     project = Project.objects()
     for project in Project.objects():
         print
         print project.project_title, ":", project
         print
示例#36
0
def build(repositories, theme, all_languages, output_dir):
    termlangs = get_termlangs(repositories, all_languages)

    print_info("Copying assets")
    copydir(html_assets, output_dir)
    css_dir = os.path.join(output_dir, "css")
    makedirs(css_dir)
    make_css(css_assets, theme, css_dir)

    languages = {}
    project_count = {}

    for language_code, terms in termlangs.items():
        if language_code not in all_languages:
            all_languages[language_code] = Language(
                code=language_code,
                name=language_code,
                legal={},
                translations={}
            )
        language = all_languages[language_code]
        print_info("Language "+language.name)
        out_terms = []
        count = 0
        lang_dir = os.path.join(output_dir, language.code)

        for term in terms:
            term_dir = os.path.join(lang_dir, "%s.%d"%(term.id, term.number))
            makedirs(term_dir)
            print_info("Building Term:\t\t" + str(term.title))
            projects = []

            for p in term.projects:
                built_project = Project.build_from_resource(p, \
                    term, \
                    term_dir, \
                    language, \
                    theme)
                if built_project:
                    count += 1
                    projects.append(built_project)
                else:
                    continue

            extras = []
            for r in term.extras:
                extras.append(build_extra(term, r, language, theme, term_dir))

            term = Term(
                id=term.id,
                manifest=term.manifest,
                number=term.number, language=term.language,
                title=term.title, description=term.description,
                projects=projects,
                extras=extras,
            )

            out_terms.append(make_term_index(term, language, theme, term_dir))

            print_info("Term built!")

        print_info("Building " + language.name +" index")

        languages[language_code] = \
            make_lang_index(language, out_terms, theme, lang_dir)
        project_count[language_code] = count

    print_info("Building " + theme.name + " index: " + output_dir)

    sorted_languages = []
    for lang in sorted(project_count.keys(), \
            key=lambda x: project_count[x], reverse=True):
        sorted_languages.append((all_languages[lang], languages[lang]))

    make_index(sorted_languages, \
        all_languages[theme.language], \
        theme, \
        output_dir)
    print_info("Complete")
示例#37
0
文件: app.py 项目: raikel/anvid
 def newProject(self):
     self.saveProject()
     se = Project()
     self.loadProject(se)
示例#38
0
class Main():
    def __init__(self):
        self.project = None
        self.extractor = None
        self.save_data_names()
        self.jira_url = None
        self.github_user_name = None

    def list_projects(self):
        print("\n".join(list(map(lambda e: "{0}: {1}".format(e.name, e.value.description()), ProjectName))))

    def extract(self):
        self.extractor.extract(True)

    def set_project(self, github, jira):
        self.project = Project(github.lower(), jira.upper())
        self.set_extractor()

    def set_project_enum(self, name):
        self.project = ProjectName[name].value
        self.set_extractor()

    def set_extractor(self):
        self.extractor = DataExtractor(self.project, self.jira_url, self.github_user_name)

    def extract_metrics(self, rest_versions, rest_only, data_types):
        classes_datasets = []
        aggregated_classes_datasets = []
        methods_datasets = []
        if not rest_only:
            for version in self.extractor.get_selected_versions()[:-1]:
                classes_df, methods_df, aggregated_classes_df = self.extract_features_to_version(version, True, data_types)
                classes_datasets.append(classes_df)
                methods_datasets.append(methods_df)
                aggregated_classes_datasets.append(aggregated_classes_df)
        for version in rest_versions:
            try:
                self.extract_features_to_version(version, False, data_types)
            except:
                pass
        if rest_only:
            return
        self.extract_classes_datasets(aggregated_classes_datasets[:-1], aggregated_classes_datasets[-1]).predict()
        # self.extract_classes_datasets(classes_datasets[:-1], classes_datasets[-1], "classes_no_aggregate").predict()
        self.extract_methods_datasets(methods_datasets[:-1], methods_datasets[-1]).predict()

    def create_all_but_one_dataset(self, data_types):
        alls = {}
        ones = {}
        detailed = {}
        for d in DataNameEnum:
            if d.value.data_type.value in data_types:
                detailed.setdefault(d.value.data_type.value, set()).add(d.value.name)
        for d in detailed:
            ones[d] = detailed[d]
            all_but_d = list(detailed.keys())
            all_but_d.remove(d)
            alls[d] = reduce(set.__or__, list(map(detailed.get, all_but_d)), set())
        for sub_dir, label in [("methods", "BuggedMethods"), ("classes", "Bugged")]:
            scores = []
            training_df = pd.read_csv(os.path.join(self.get_dataset_path(sub_dir), "training.csv"), sep=';')
            testing_df = pd.read_csv(os.path.join(self.get_dataset_path(sub_dir), "testing.csv"), sep=';')
            dataset_cols = set(training_df.columns.to_list()).intersection(set(testing_df.columns.to_list()))
            names = pd.read_csv(os.path.join(self.get_dataset_path(sub_dir), "prediction.csv"), sep=';')['name'].to_list()
            for dir_name, columns in (('one', ones), ('all', alls)):
                for d in columns:
                    cols = set(filter(lambda dc: any(map(lambda c: c in dc, columns[d])), dataset_cols))
                    if len(cols) == 0:
                        continue
                    cols.add(label)
                    cols = list(cols)
                    train = training_df[cols]
                    test = testing_df[cols]
                    ci = ClassificationInstance(train, test, names, self.get_dataset_path(os.path.join(dir_name, sub_dir, d)), label=label)
                    try:
                        ci.predict()
                        ci_scores = dict(ci.scores)
                        ci_scores.update({"type": dir_name, "data_type": d})
                        scores.append(ci_scores)
                    except Exception as e:
                        print(e)
            pd.DataFrame(scores).to_csv(self.get_dataset_path(sub_dir + "_metrics.csv", False), index=False, sep=';')

    def get_data_dirs(self):
        classes_data = Config.get_work_dir_path(os.path.join(Config().config['CACHING']['RepositoryData'],
                                                             Config().config['VERSION_METRICS']['ClassesData'],
                                                             self.project.github()))
        method_data = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'], Config().config['VERSION_METRICS']['MethodData'],
                         self.project.github()))
        intermediate_dir = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'],
                         Config().config['VERSION_METRICS']['Intermediate'],
                         self.project.github()))
        classes_intermediate_dir = os.path.join(intermediate_dir, "classes")
        methods_intermediate_dir = os.path.join(intermediate_dir, "methods")
        Path(classes_intermediate_dir).mkdir(parents=True, exist_ok=True)
        Path(methods_intermediate_dir).mkdir(parents=True, exist_ok=True)
        Path(classes_data).mkdir(parents=True, exist_ok=True)
        Path(method_data).mkdir(parents=True, exist_ok=True)
        return classes_data, method_data, classes_intermediate_dir, methods_intermediate_dir, intermediate_dir

    def aggrate_methods_df(self, df):
        def clean(s):
            if "@" in s:
                return s[1].split('@')[1].split('.')[:-1][-1]
            return s[1].split('.')[:-1][-1]
        ids = df['Method_ids'].iteritems()
        files_id, classes_id = tee(ids, 2)
        files = pd.Series(list(map(lambda x: x[1].split('@')[0], files_id))).values
        classes = pd.Series(list(map(clean, classes_id))).values
        df.insert(0, 'File', files)
        df.insert(0, 'Class', classes)
        groupby = ['File', 'Class']
        columns_filter = ['File', 'Class', 'BuggedMethods', 'Method', 'Method_ids']
        columns = list(
            filter(lambda x: x not in columns_filter, df.columns.values.tolist()))
        data = list()
        for key, group in df.groupby(groupby):
            key_data = {}
            key_data.update(dict(zip(groupby, key)))
            for feature in columns:
                pt = pd.DataFrame(group[feature].describe(include = 'all')).T
                cols = ["{0}_{1}".format(feature, c) for c in pt.columns.values.tolist()]
                pt.columns = cols
                key_data.update(list(pt.iterrows())[0][1].to_dict())
            data.append(key_data)
        return pd.DataFrame(data)

    def fillna(self, df, default=False):
        if 'Bugged' in df:
            df = df[df['Bugged'].notna()]
        if 'BuggedMethods' in df :
            df = df[df['BuggedMethods'].notna()]
        for col in df:
            dt = df[col].dtype
            if dt == int or dt == float:
                df[col].fillna(0, inplace=True)
            else:
                df[col].fillna(default, inplace=True)
        return df

    def extract_features_to_version(self, version, extract_bugs, data_types):
        self.extractor.checkout_version(version)
        db, extractors_to_run = self.get_extractors(data_types, extract_bugs, version)
        for extractor in extractors_to_run:
            start = time.time()
            extractor.extract()
            print(time.time() - start, extractor.__class__.__name__)
        classes_df, methods_df = db.build()
        aggregated_methods_df = self.aggrate_methods_df(methods_df)
        methods_df = self.fillna(methods_df)
        aggregated_classes_df = self.merge_aggregated_methods_to_class(aggregated_methods_df, classes_df)
        classes_df = self.fillna(classes_df)
        methods_df = methods_df.drop('File', axis=1, errors='ignore')
        methods_df = methods_df.drop('Class', axis=1, errors='ignore')
        methods_df = methods_df.drop('Method', axis=1, errors='ignore')
        self.save_dfs(classes_df, methods_df, aggregated_classes_df, aggregated_methods_df, version)
        return classes_df, methods_df, aggregated_classes_df

    def merge_aggregated_methods_to_class(self, aggregated_methods_df, classes_df):
        aggregated_classes_df = classes_df.copy(deep=True)
        if 'Class' in aggregated_classes_df.columns and 'Class' in aggregated_methods_df.columns:
            aggregated_classes_df = aggregated_classes_df.merge(aggregated_methods_df, on=['File', 'Class'],
                                                                how='outer')
        else:
            aggregated_classes_df = aggregated_classes_df.merge(aggregated_methods_df, on=['File'], how='outer')
        return self.fillna(aggregated_classes_df)

    def save_dfs(self, classes_df, methods_df, aggregated_classes_df, aggregated_methods_df, version):
        classes_data, method_data, classes_intermediate_dir, methods_intermediate_dir, intermediate_dir = self.get_data_dirs()
        classes_df.to_csv(os.path.join(classes_intermediate_dir, version + ".csv"), index=False, sep=';')
        aggregated_classes_df.to_csv(os.path.join(classes_intermediate_dir, version + "_aggregated_classes.csv"), index=False, sep=';')
        methods_df.to_csv(os.path.join(methods_intermediate_dir, version + ".csv"), index=False, sep=';')
        aggregated_methods_df.to_csv(os.path.join(intermediate_dir, version + "_aggregated_methods_df.csv"), index=False, sep=';')
        classes_df.to_csv(os.path.join(classes_data, version + ".csv"), index=False, sep=';')
        aggregated_classes_df.to_csv(os.path.join(classes_data, version + "_aggregated_classes_.csv"), index=False, sep=';')
        methods_df.to_csv(os.path.join(method_data, version + ".csv"), index=False, sep=';')

    def get_extractors(self, data_types, extract_bugs, version):
        db = DataBuilder(self.project, version)
        if extract_bugs:
            data_types.add("bugged")
            data_types.add("bugged_methods")
        extractors_to_run = set()
        for extractor in Extractor.get_all_extractors(self.project, version):
            if not extract_bugs and "bugged" in extractor.__class__.__name__.lower():
                continue
            extractor_data_types = []
            for dt in extractor.data_types:
                if dt.value in data_types:
                    extractor_data_types.append(dt)
                    extractors_to_run.add(extractor)
            db.add_data_types(extractor_data_types)
        return db, extractors_to_run

    def extract_classes_datasets(self, training_datasets, testing_dataset, sub_dir="classes"):
        training = pd.concat(training_datasets, ignore_index=True).drop(["File", "Class", "Method_ids"], axis=1, errors='ignore')
        training = self.fillna(training)
        testing = testing_dataset.drop(["Method_ids", "Class"], axis=1, errors='ignore')
        testing = self.fillna(testing, default='')
        file_names = testing.pop("File").values.tolist()
        # classes_names = testing.pop("Class").values.tolist()
        # classes_testing_names = list(map("@".join, zip(file_names, ['' if x in (False, True) else x for x in classes_names])))
        return ClassificationInstance(training, testing, file_names, self.get_dataset_path(sub_dir))

    def get_dataset_path(self, name, is_dir=True):
        dataset_dir = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'], Config().config['VERSION_METRICS']['Dataset'],
                         self.project.github()))
        path = os.path.join(dataset_dir, name)
        if is_dir:
            Path(path).mkdir(parents=True, exist_ok=True)
        return path

    def extract_methods_datasets(self, training_datasets, testing_dataset):
        training = pd.concat(training_datasets, ignore_index=True).drop("Method_ids", axis=1, errors='ignore')
        training = self.fillna(training)
        testing = testing_dataset
        testing = self.fillna(testing)
        methods_testing_names = testing.pop("Method_ids").values.tolist()
        return ClassificationInstance(training, testing, methods_testing_names, self.get_dataset_path("methods"), label="BuggedMethods")

    def choose_versions(self, version_num=5, algorithm="bin", version_type=VersionType.Untyped, strict=True):
        self.extractor.init_jira_commits()
        self.extractor.choose_versions(version_num=version_num, algorithm=algorithm, strict=strict, version_type=version_type)

    def set_version_selection(self, version_num=5, algorithm="bin", version_type=VersionType.Untyped, strict=True, selected_config=0):
        self.extractor.set_selected_config(selected_config)
        self.extractor.choose_versions(version_num=version_num, algorithm=algorithm, strict=strict, version_type=version_type)
        assert self.extractor.get_selected_versions()

    def save_data_names(self):
        j = list()
        out_path = Config.get_work_dir_path(
            os.path.join(Config().config['CACHING']['RepositoryData'], "dataname.json"))
        for d in DataNameEnum:
            j.append(d.value.as_description_dict())
        with open(out_path, "w") as f:
            json.dump(j, f)

    def main(self):
        parser = argparse.ArgumentParser(description='Execute project data')
        parser.add_argument('-p', '--projects', dest='projects', action='store_const', const=True, default=False,
                            help='list all aleready defined projects')
        parser.add_argument('-c', '--choose', dest='choose', action='store', help='choose a project to extract')
        parser.add_argument('-g', '--github_repo_name', dest='github', action='store', help='the github repository name to the project to extract (lowercase)')
        parser.add_argument('-j', '--jira_name', dest='jira', action='store', help='the jira name to the project to extract (uppercase)')
        parser.add_argument('-u', '--github_user_name', dest='github_user_name', action='store', help='the github user name to the project to extract (lowercase)', default="apache")
        parser.add_argument('-jl', '--jira_url', dest='jira_url', action='store', help='the link to jira', default="http://issues.apache.org/jira")
        parser.add_argument('-l', '--list_select_verions', dest='list_selected', action='store', help='the algorithm to select the versions : [bin]', default='bin')
        parser.add_argument('-d', '--data_types_to_extract', dest='data_types', action='store', help='Json file of the data types to extract as features. Choose a sublist of '
                                                                                                    '[checkstyle, designite_design, designite_implementation, '
                                                                                                    'designite_type_organic, designite_method_organic, designite_type_metrics,'
                                                                                                    'designite_method_metrics, source_monitor_files, source_monitor, ck, mood, halstead,'
                                                                                                    'jasome_files, jasome_methods, process_files, issues_files]. You can use the files under externals\configurations', default=r"externals\configurations\default.json")
        parser.add_argument('-s', '--select_verions', dest='select', action='store', help='the configuration to choose', default=0, type=int)
        parser.add_argument('-n', '--num_verions', dest='num_versions', action='store', help='the number of versions to select', default=5, type=int)
        parser.add_argument('-t', '--versions_type', dest='versions_type', action='store', help='the versions type to select', default="Untyped")
        parser.add_argument('-f', '--free_choose', dest='free_choose', action='store_true', help='the versions type to select')
        parser.add_argument('-r', '--only_rest', dest='only_rest', action='store_true', help='extract only rest versions')
        parser.add_argument('rest', nargs=argparse.REMAINDER)
        args = parser.parse_args()
        self.github_user_name = args.github_user_name
        self.jira_url = args.jira_url
        if args.projects:
            self.list_projects()
        if args.choose:
            self.set_project_enum(args.choose)
        if args.github and args.jira:
            self.set_project(args.github, args.jira)
        if args.list_selected:
            self.choose_versions(version_num=args.num_versions, algorithm=args.list_selected, version_type=VersionType[args.versions_type], strict=args.free_choose)
        # if args.select != -1:
        self.set_version_selection(version_num=args.num_versions, algorithm='bin',
                             version_type=VersionType[args.versions_type], strict=args.free_choose, selected_config=args.select)
        self.extract()
        data_types = None
        if os.path.exists(args.data_types):
            with open(args.data_types) as f:
                data_types = set(json.loads(f.read()))
        self.extract_metrics(args.rest, args.only_rest, data_types)
        self.create_all_but_one_dataset(data_types)