def comparing_two_repository_from_url(main_url, fork_url):
    """
        Retrieves commits from two url and then compares them pairwise.

        Return: Prints common and mismatch commits.
    """

    main_url_commits = {commit.hash: [commit.author_date, commit.msg] for commit in RepositoryMining(main_url).traverse_commits()}

    fork_url_commits = {commit.hash: [commit.author_date, commit.msg] for commit in RepositoryMining(fork_url).traverse_commits()}
    commit_count = 0
    for key in main_url_commits:
        commit_count += 1
        logger.info('Commit count main: {}'.format(commit_count))
        if key in fork_url_commits:
            logger.info('Main Commit count: {}, ID: {}, Date: {}, Message: {}'.format(commit_count, key,\
                                                                                      main_url_commits[key][0], main_url_commits[key][1]))
    commit_count = 0
    for key in fork_url_commits:
        commit_count += 1
        logger.info('Commit count main: {}'.format(commit_count))
        if key in main_url_commits:
            logger.info('Main Commit count: {}, ID: {}, Date: {}, Message: {}'.format(commit_count, key, \
                                                                                      fork_url_commits[key][0], fork_url_commits[key][1]))

    return
Пример #2
0
def test_diff_histogram():
    # without histogram
    commit = list(
        RepositoryMining('test-repos/test13',
                         single="93df8676e6fab70d9677e94fd0f6b17db095e890").
        traverse_commits())[0]
    mod = commit.modifications[0]
    gr = GitRepository('test-repos/test13')
    diff = gr.parse_diff(mod.diff)
    assert len(diff['added']) == 11
    assert (3, '    if (path == null)') in diff['added']
    assert (5, '        log.error("Icon path is null");') in diff['added']
    assert (6, '        return null;') in diff['added']
    assert (8, '') in diff['added']
    assert (9, '    java.net.URL imgURL = GuiImporter.class.getResource(path);'
            ) in diff['added']
    assert (10, '') in diff['added']
    assert (11, '    if (imgURL == null)') in diff['added']
    assert (12, '    {') in diff['added']
    assert (14, '        return null;') in diff['added']
    assert (16, '    else') in diff['added']
    assert (17, '        return new ImageIcon(imgURL);') in diff['added']

    assert len(diff['deleted']) == 7
    assert (3, '    java.net.URL imgURL = GuiImporter.class.getResource(path);'
            ) in diff['deleted']
    assert (4, '') in diff['deleted']
    assert (5, '    if (imgURL != null)') in diff['deleted']
    assert (7, '        return new ImageIcon(imgURL);') in diff['deleted']
    assert (9, '    else') in diff['deleted']
    assert (10, '    {') in diff['deleted']
    assert (13, '    return null;') in diff['deleted']

    # with histogram
    commit = list(
        RepositoryMining('test-repos/test13',
                         single="93df8676e6fab70d9677e94fd0f6b17db095e890",
                         histogram_diff=True).traverse_commits())[0]
    mod = commit.modifications[0]
    gr = GitRepository('test-repos/test13')
    diff = gr.parse_diff(mod.diff)
    assert (4, '    {') in diff["added"]
    assert (5, '        log.error("Icon path is null");') in diff["added"]
    assert (6, '        return null;') in diff["added"]
    assert (7, '    }') in diff["added"]
    assert (8, '') in diff["added"]
    assert (11, '    if (imgURL == null)') in diff["added"]
    assert (12, '    {') in diff["added"]
    assert (13, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["added"]
    assert (14, '        return null;') in diff["added"]
    assert (17, '        return new ImageIcon(imgURL);') in diff["added"]

    assert (6, '    {') in diff["deleted"]
    assert (7, '        return new ImageIcon(imgURL);') in diff["deleted"]
    assert (10, '    {') in diff["deleted"]
    assert (11, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["deleted"]
    assert (12, '    }') in diff["deleted"]
    assert (13, '    return null;') in diff["deleted"]
Пример #3
0
    def _get_commit_refs(self, repo_url, local_path, from_tag, to_tag):
        """
        Scan all commits between the two tags [`tag_start` .. `tag_end`]
        Extract any text from the commit message showing a github tag reference `#{number}`
        and return a list of ints

        :param repo_url: GitHub URL, used for finding issues/Pull requests
        :type  repo_url: str

        :param local_path: (Optional) path to scan a local repository and cross reference with GitHub
        :type  local_path: Path

        :param from_tag: Git Start Tag
        :type  from_tag: str

        :param to_tag: Git end tag
        :type  to_tag: str

        :return: Github rife references
        :rtype:  List of ints
        """
        self.logger.info("Fetching commits between tags {}...{} ".format(from_tag, to_tag))

        if local_path:
            repo = RepositoryMining(local_path, from_tag=from_tag, to_tag=to_tag)
        else:
            repo = RepositoryMining(repo_url, from_tag=from_tag, to_tag=to_tag)

        commit_list = [re.findall(r'#\d+', commit.msg) for commit in repo.traverse_commits()]
        commit_list = sum(commit_list, [])
        return set(map(lambda cm: int(cm[1:]), commit_list))
Пример #4
0
    def __init__(self, path_to_repo: str,
                 since: datetime = None,
                 to: datetime = None,
                 from_commit: str = None,
                 to_commit: str = None):
        """
        :path_to_repo: path to a single repo

        :param datetime since: starting date

        :param datetime to: ending date

        :param str from_commit: starting commit (only if `since` is None)

        :param str to_commit: ending commit (only if `to` is None)
        """

        if not since and not from_commit:
            raise TypeError('You must pass one between since and from_commit')

        if not to and not to_commit:
            raise TypeError('You must pass one between to and to_commit')

        if from_commit and to_commit and from_commit == to_commit:  # Use 'single' param to avoid Warning
            self.repo_miner = RepositoryMining(path_to_repo, single=from_commit)

        else:
            self.repo_miner = RepositoryMining(path_to_repo=path_to_repo,
                                               since=since,
                                               to=to,
                                               from_commit=from_commit,
                                               to_commit=to_commit,
                                               order='reverse')
Пример #5
0
 def __init__(self, repo_path, name, fromSerializer=None):
     if fromSerializer is not None:
         name.execute("SELECT * FROM repository WHERE ID == " +
                      str(repo_path))
         rows = cursor.fetchall()
         row = rows[0]
         self.id = row[0]
         self.path = row[1]
         self.name = row[2]
         self.creation = row[3]
         self.commits = self.getCommits(repo_path, cursor)
     else:
         self.path = repo_path
         self.name = name
         self.commits = []
         try:
             self.creation = next(
                 RepositoryMining(
                     repo_path).traverse_commits()).committer_date
         except:
             logging.warning("Could not get committer date for " + name +
                             " in " + repo_path)
         for commit in RepositoryMining(repo_path).traverse_commits():
             try:
                 self.commits.append(Commit.Commit(commit))
             except:
                 logging.warning("Could not create commit with hash " +
                                 commit.hash)
Пример #6
0
def test_badly_formatted_url():
    with pytest.raises(Exception):
        list(RepositoryMining(
            path_to_repo='https://github.com/ishepard.git/test')
             .traverse_commits())

    with pytest.raises(Exception):
        list(RepositoryMining(path_to_repo='test').traverse_commits())
Пример #7
0
def test_ignore_deleted_whitespaces():
    commit = list(RepositoryMining('test-repos/whitespace',
                                   single="e6e429f6b485e18fb856019d9953370fd5420b20").traverse_commits())[0]
    assert len(commit.modifications) == 1
    commit = list(RepositoryMining('test-repos/whitespace',
                                   skip_whitespaces=True,
                                   single="e6e429f6b485e18fb856019d9953370fd5420b20").traverse_commits())[0]
    assert len(commit.modifications) == 0
Пример #8
0
def test_ignore_add_whitespaces():
    commit = list(RepositoryMining('test-repos/whitespace',
                                   single="338a74ceae164784e216555d930210371279ba8e").traverse_commits())[0]
    assert len(commit.modifications) == 1
    commit = list(RepositoryMining('test-repos/whitespace',
                                   skip_whitespaces=True,
                                   single="338a74ceae164784e216555d930210371279ba8e").traverse_commits())[0]
    assert len(commit.modifications) == 0
Пример #9
0
def test_ignore_add_whitespaces_and_changed_file():
    commit = list(RepositoryMining('test-repos/whitespace',
                                   single="532068e9d64b8a86e07eea93de3a57bf9e5b4ae0").traverse_commits())[0]
    assert len(commit.modifications) == 2
    commit = list(RepositoryMining('test-repos/whitespace',
                                   skip_whitespaces=True,
                                   single="532068e9d64b8a86e07eea93de3a57bf9e5b4ae0").traverse_commits())[0]
    assert len(commit.modifications) == 1
Пример #10
0
def store_commit_data(git_directory_path, devranker_dir, output_file_path):
    # Why 'set_start_method("spawn")'?
    # Because getting Multiple windows unnecessarily and window became unresponsive after Mining is done
    # Ref: https://pythonspeed.com/articles/python-multiprocessing/
    mp.set_start_method("spawn")

    # Creating empty lists for carrying commit data
    doclist = []
    # Using list to update progress bar because it's thread-safe
    completed_commits = []

    # Create Multithreading pool to use full CPU
    # Ref: https://pythonspeed.com/articles/python-multiprocessing/
    pool = mp.Pool(mp.cpu_count())

    # If the Repo has just been cloned, the program will traverse the whole Repo
    # https://dzone.com/articles/shared-counter-python%E2%80%99s
    commits = RepositoryMining(git_directory_path).traverse_commits()
    global total_commits_count
    # 'more_itertools' used here to find commits count as 'commits' is Iterable
    # Note: ilen(commits) consumes the iterable 'commits'
    total_commits_count = more_itertools.ilen(commits)

    [
        pool.apply_async(process_commit(commit, doclist, completed_commits))
        for commit in RepositoryMining(git_directory_path).traverse_commits()
    ]
    # Close Multiprocessing pool
    pool.close()
    pool.join()

    # We have data in json format but we need output as csv.
    # There are many approaches to doing this including using dictionaries and stuff.
    # But the easiest way is to write json to file using json.dump and using pandas to read json file.
    # Write data to temp file since pandas.read_json expects file. We can probably optimise without having to
    #     create a new file.
    temp_file = os.path.join(devranker_dir, 'mod_data.json')
    with open(temp_file, 'w') as temp_out_file:
        # json.dump cannot handle python datetime object. We should convert this object to 'str'
        # https://stackoverflow.com/questions/11875770/how-to-overcome-datetime-datetime-not-json-serializable
        # https://code-maven.com/serialize-datetime-object-as-json-in-python
        json.dump(doclist, temp_out_file, default=str)

    # Use pandas to read json and write to csv.
    df = pandas.read_json(temp_file)
    df.to_csv(output_file_path)

    # Remove the temp file
    os.remove(temp_file)
    # display_data_file_location_path()
    # Inform user that mining is complete

    dict_callback_start_mining["msg"] = "Done"
    dict_callback_start_mining["tc"] = 0
    dict_callback_start_mining["cc"] = 0
    print(json.dumps(dict_callback_start_mining))
def recupFromRepo(s_repoPath, s_starting_date, s_ending_date, filename):
    s_date_format = "%d/%m/%Y"
    d_starting_date = datetime.strptime(s_starting_date, s_date_format)
    d_ending_date = datetime.strptime(s_ending_date, s_date_format)

    commits = RepositoryMining(s_repoPath, None, d_starting_date,
                               d_ending_date, None, None, None, None, None,
                               "master").traverse_commits()

    nbCommits = len(list(commits))
    numberCommit = 0

    file = open(filename, "w", encoding="utf8")

    print("Number of commits : {}".format(nbCommits))

    #s_starting_date = "01/01/2020"
    #s_ending_date = "27/11/2020"

    start = time.time()

    #file.write("date;modified files;title;detail;main branch\n")
    for commit in RepositoryMining(s_repoPath, None, d_starting_date,
                                   d_ending_date, None, None, None, None, None,
                                   "master").traverse_commits():

        #file.write('Message {} , date {} , includes {} modified files'.format(commit.msg, commit.committer_date, len(commit.modifications)))
        date = str(commit.committer_date)
        date = date.split(" ")[0]
        commitDate.append(date)
        msg = commit.msg
        mainBranch = commit.in_main_branch

        msg = msg.split('\n')
        title = msg[0]
        if (len(msg) > 1):
            detail = msg[0]
        else:
            detail = ""

        nbModification = len(commit.modifications)
        commitModification.append(nbModification)
        #file.write('date {} , ModifiedFiles {}\n'.format(commit.committer_date, len(commit.modifications)))

        file.write("{}\t{}\t{}\t{}\t{}\n".format(date, nbModification, title,
                                                 detail, mainBranch))

        #loadingBar(numberCommit,nbCommits,3)
        numberCommit += 1

    file.close()

    end = time.time()
    print("")
    print("time : {}".format(end - start))
Пример #12
0
    def __init__(self,
                 repoURL,
                 first=None,
                 second=None,
                 fromCommit=None,
                 since=None,
                 to=None):
        start = time.perf_counter()
        self.__gitRepo = GitRepository(repoURL)

        if first is not None and second is not None and since is None and to is None:
            self.repo = RepositoryMining(repoURL,
                                         from_commit=first,
                                         to_commit=second)
            self.__repo_type = RepoType.BETWEEN_COMMITS
        elif first is not None and second is None and since is None and to is None:
            self.repo = RepositoryMining(repoURL, single=first)
            self.__repo_type = RepoType.SINGLE_COMMIT
        elif first is None and second is None and since is not None and to is not None:
            try:
                date1 = parser.parse(since)
                date2 = parser.parse(to)
                self.repo = RepositoryMining(repoURL, since=date1, to=date2)
                self.__repo_type = RepoType.DATETIME
            except Exception:
                raise Exception("Entered Datetime is not valid.")
        elif fromCommit is not None:
            self.repo = RepositoryMining(path_to_repo=repoURL,
                                         from_commit=fromCommit)
            self.__repo_type = RepoType.FROM_COMMIT
        else:
            self.repo = RepositoryMining(path_to_repo=repoURL)
            self.__repo_type = RepoType.ALL

        print("repoMiner was created")

        self.__files = []  # number of analyzed files
        self.__files_with_methods = []
        self.__test_files = []  # number of test files
        self.__production_files = []  # number of production files
        self.__commits = []  # List[str] of analysed commits hash
        self.__commits_with_modified_methods = set(
        )  # List[str] of analysed commits with modified methods hash
        self.__production_methods = []  # List[ModifiedMethods]
        self.__test_methods = []  # List[ModifiedMethods]
        self.__modified_methods = []  # List[ModifiedMethods]
        self.__moved_files_without_changes = [
        ]  # List of files without changes
        self.__analyzed_commits = []  # List[AnalyzedCommits]
        self.__matched_files = []  # List of matched files
        self.__not_matched_files = None  # instance of NotMatchedFiles
        self.__GetModifications()  # performs analysis
        end = time.perf_counter()
        self.__analyse_time = "{:.2f}".format(
            (end - start) / 60)  # analysis performing time
Пример #13
0
def test_ignore_add_whitespaces_and_modified_normal_line(git_repo):
    commit = list(RepositoryMining('test-repos/whitespace',
                                   single="52716ef1f11e07308b5df1b313aec5496d5e91ce").traverse_commits())[0]
    assert len(commit.modifications) == 1
    parsed_normal_diff = commit.modifications[0].diff_parsed
    commit = list(RepositoryMining('test-repos/whitespace',
                                   skip_whitespaces=True,
                                   single="52716ef1f11e07308b5df1b313aec5496d5e91ce").traverse_commits())[0]
    assert len(commit.modifications) == 1
    parsed_wo_whitespaces_diff = commit.modifications[0].diff_parsed
    assert len(parsed_normal_diff['added']) == 2
    assert len(parsed_wo_whitespaces_diff['added']) == 1

    assert len(parsed_normal_diff['deleted']) == 1
    assert len(parsed_wo_whitespaces_diff['deleted']) == 0
    def getListOfModifiedFiles(self):
        commitList = RepositoryMining(self.repositoryURL,
                                      only_commits=[self.commitHash
                                                    ]).traverse_commits()

        miningFileList = []
        gitModifiedFileList = []

        for commitObject in commitList:
            miningFileList = commitObject.modifications

        isGitRootDirSpecified = len(self.gitRootDir) > 0

        for modifiedFile in miningFileList:
            modifiedFilePath = modifiedFile.new_path

            if isGitRootDirSpecified:
                if modifiedFilePath.rfind(self.gitRootDir) is not -1:
                    modifiedFilePath = modifiedFilePath.replace(
                        self.gitRootDir + '/', '')
                    gitModifiedFileList.append({'path': modifiedFilePath})
            else:
                gitModifiedFileList.append({'path': modifiedFilePath})

        return gitModifiedFileList
Пример #15
0
def test_projectname_multiple_repos_remote():
    repos = [
        'https://github.com/ishepard/pydriller',
        'test-repos/pydriller'
    ]
    for commit in RepositoryMining(path_to_repo=repos).traverse_commits():
        assert commit.project_name == 'pydriller'
Пример #16
0
    def get_meta_changes(self, commit_hash: str,
                         current_file: str) -> Set[str]:
        meta_changes = set()
        repo_mining = RepositoryMining(path_to_repo=self.repository_path,
                                       single=commit_hash).traverse_commits()
        for commit in repo_mining:
            show_str = self.repository.git.show(commit.hash,
                                                '--summary').splitlines()
            if show_str and self._is_git_mode_change(show_str, current_file):
                log.info(
                    f'exclude meta-change (file mode change): {current_file} {commit.hash}'
                )
                meta_changes.add(commit.hash)
            else:
                try:
                    for m in commit.modifications:
                        if (current_file == m.new_path
                                or current_file == m.old_path) and (
                                    m.change_type
                                    in self.change_types_to_ignore):
                            log.info(
                                f'exclude meta-change ({m.change_type}): {current_file} {commit.hash}'
                            )
                            meta_changes.add(commit.hash)
                except Exception as e:
                    log.error(
                        f'unable to analyze commit: {self.repository_path} {commit.hash}'
                    )

        return meta_changes
Пример #17
0
def get_repository_info():
    for commit in RepositoryMining("~/code/other/selfblog", only_no_merge=True).traverse_commits():
        print(commit.msg)
        #print(commit.hash)
        print(commit.author.email)
        print(commit.author.name)
        print(commit.project_name)
Пример #18
0
def explore_commits(repo_input):
    row = 1
    column_project_name = 0
    column_conditional = 1
    column_profile = 2

    #pour tous les projets github
    for repo in repo_input:

        occurences = [0 for x in range(10)]
        worksheet.write(row, 0, repo[repo.rfind('/'):repo.rfind('.')])
        conditional_added = 0
        #pour tous les commit dans ces projets
        print(repo)
        for commit in RepositoryMining(repo,
                                       only_modifications_with_file_types=[
                                           '.java'
                                       ]).traverse_commits():
            if (int(str(commit.committer_date)[0:4]) > 2010):
                # on récupere le nombre de conditional ajouté dans le commit et on l'ajoute au  nombre total de conditional
                occurences[int(str(
                    commit.committer_date)[3])] = occurences[int(
                        str(commit.committer_date)
                        [3])] + find_occurence_in_commit(commit, word)

        for i in range(9):
            worksheet.write(row, i + 1, occurences[i + 1])

        row = row + 1
Пример #19
0
def test_diff_without_histogram(git_repo):
    # without histogram
    commit = list(RepositoryMining('test-repos/histogram',
                                   single="93df8676e6fab70d9677e94fd0f6b17db095e890").traverse_commits())[0]

    diff = commit.modifications[0].diff_parsed
    assert len(diff['added']) == 11
    assert (3, '    if (path == null)') in diff['added']
    assert (5, '        log.error("Icon path is null");') in diff['added']
    assert (6, '        return null;') in diff['added']
    assert (8, '') in diff['added']
    assert (9, '    java.net.URL imgURL = GuiImporter.class.getResource(path);') in diff['added']
    assert (10, '') in diff['added']
    assert (11, '    if (imgURL == null)') in diff['added']
    assert (12, '    {') in diff['added']
    assert (14, '        return null;') in diff['added']
    assert (16, '    else') in diff['added']
    assert (17, '        return new ImageIcon(imgURL);') in diff['added']

    assert len(diff['deleted']) == 7
    assert (3, '    java.net.URL imgURL = GuiImporter.class.getResource(path);') in diff['deleted']
    assert (4, '') in diff['deleted']
    assert (5, '    if (imgURL != null)') in diff['deleted']
    assert (7, '        return new ImageIcon(imgURL);') in diff['deleted']
    assert (9, '    else') in diff['deleted']
    assert (10, '    {') in diff['deleted']
    assert (13, '    return null;') in diff['deleted']
Пример #20
0
def analyze_repo(repo):

    path = repo.path
    path = path.absolute().as_posix()

    counts = collections.defaultdict(int)
    nloc = collections.defaultdict(int)
    cyclomatic = collections.defaultdict(int)

    for commit in RepositoryMining(path).traverse_commits():
        for mod in commit.modifications:
            if mod.old_path != mod.new_path:
                print(mod.old_path, mod.new_path)
                if mod.new_path == None:
                    counts.pop(mod.old_path)
                    nloc.pop(mod.old_path)
                    cyclomatic.pop(mod.old_path)
                else:
                    counts[mod.new_path] = 1
                    nloc[mod.new_path] = mod.nloc or 0
                    cyclomatic[mod.new_path] = mod.complexity or 0
            else:
                counts[mod.new_path] += 1
                nloc[mod.new_path] = mod.nloc or 0
                cyclomatic[mod.new_path] = mod.complexity or 0

    return counts, nloc, cyclomatic
Пример #21
0
 def __init__(self, repository: str, owner: str):
     self.owner = owner
     self.repository = os.path.split(repository)[-1]
     self.repo = GitRepository(repository)
     self.mining = RepositoryMining(repository)
     self.pairs = []
     random.seed(42)
Пример #22
0
    def test_author_count(self):
        repo_summary = RepositorySummary()
        repoMiner = RepositoryMiner(
            RepositoryMining(path_to_repo="../test-repos/method-test5",
                             from_commit=None,
                             to_commit=None), repo_summary)
        repoMiner.create_repository_summary("../test-repos/method-test5", None,
                                            None)

        table = repo_summary.get_table()
        self.assertTrue(
            method_in_commit_has_properties(
                table, "d8df83f6437a33ded055527b43115ca8f3bc14eb",
                "Foo.java:Foo::noConditionInMethod()", NO_COND))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "d8df83f6437a33ded055527b43115ca8f3bc14eb",
                "Foo.java:Foo::neverAConditionChange()", NEVER_CHANGE))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "1353faee0fc24aa4c4c3e090003b98f7843c6ab0",
                "Foo.java:Foo::conditionChangedOnce()", ONCE_CHANGE))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "aaf2f93fa56cb71e0d6970f8f2d961e6d0128ee4",
                "Foo.java:Foo::conditionChangedTwice()", TWICE_CHANGE))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "3881c503d85a2232a49c95096ca21b84e746fa01",
                "Foo.java:Foo::twoDifferentConditionStatementsAdded()",
                TWO_DIFF_CONDITION_ADD))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "f29dfb7b552b1194d237098deb766e40f1ac009c",
                "Foo.java:Foo::oneConditionAddedAndRemovedAfter()",
                ONE_ADD_ONE_REMOVE))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "2965bf66ca6e60988997a2d3680e2543f19ec50e",
                "Foo.java:Foo::oneElseNeverChanged()", ONE_ELSE_NEVER_CHANGED))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "06dfd8d033338b5308a9e71f37df0e75462958d4",
                "Foo.java:Foo::oneDeletedAfter()", ONE_DEL_AFTER))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "7948cbcbf381db5eb36c39029b1a7389e355b3db",
                "Foo.java:Foo::twoElseOneDeleted()", TWO_ELSE_ONE_DELETED))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "0ff25aee20224f71cf0565bcb6f5da5de7514791",
                "Foo.java:Foo::fourElseAddedNoDeleted()", FOUR_ELSE_NO_DEL))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "a5298525a6f98c2e8d090ea5ae73d3676449b97b",
                "Foo.java:Foo::fourElseAddedAndDeleted()", FOUR_ELSE_DEL))
        self.assertTrue(
            method_in_commit_has_properties(
                table, "0093c5ef33d5c8156865cac1faa8e55877046725",
                "Foo.java:Foo::twoElseIfAddedOneDeleted( int a)", TOW_ELSEIF))
Пример #23
0
def test_simple_remote_url():
    dt2 = datetime(2018, 10, 20)
    assert len(
        list(
            RepositoryMining(
                path_to_repo="https://github.com/ishepard/pydriller.git",
                to=dt2).traverse_commits())) == 159
Пример #24
0
def mine(_type):
    p = psutil.Process(os.getpid())
    dt1 = datetime(2017, 1, 1)
    dt2 = datetime(2017, 7, 1)
    all_commits = []

    start = datetime.now()
    for commit in RepositoryMining('test-repos-hadoop/hadoop',
                                   since=dt1,
                                   to=dt2).traverse_commits():
        memory = p.memory_info()[0] / (2**20)
        all_commits.append(memory)

        h = commit.author.name

        if _type == 0:
            continue

        for mod in commit.modifications:
            dd = mod.diff

            if _type == 1:
                continue

            if mod.filename.endswith('.java'):
                cc = mod.complexity

    end = datetime.now()

    diff = end - start

    return diff, all_commits
Пример #25
0
    def analyze(self, repo_path, repo_type):
        repo_name = extractRepoName(repo_path)
        if os.path.isfile('./results/'+extractRepoName(repo_path)+'.json'):
            print("?")
            return
        self.repo_type = repo_type
        if repo_name == "ansible" or repo_name == 'dbeaver':
            branch = 'devel'
        elif repo_name == 'home-assistant' or repo_name == 'HikariCP':
            branch = 'dev'
        elif repo_name == 'glances' or repo_name == 'androidannotations' or repo_name == 'zaproxy' or repo_name == 'fescar':
            branch = 'develop'
        elif repo_name.lower() == 'rxjava' or repo_name == 'dex2jar':
            branch = '2.x'
        elif repo_name.lower() == 'exoplayer':
            branch = 'release-v2'
        elif repo_name == 'hadoop':
            branch = 'trunk'
        elif repo_name == 'mockito':
            branch = 'release/2.x'
        else:
            branch = 'master'

        self.repo = RepositoryMining('./repos/{}'.format(repo_name), only_in_branch=branch, only_modifications_with_file_types=[repo_type])
        file = open('./results/'+extractRepoName(repo_path)+'.json', 'w')
        file.close()


            branch = "gh-pages"
Пример #26
0
def fetch_keyword_introduce(repo, keyword):

    conditional_tag = re.compile(r'\+@Conditional.*\(')

    conds = dict()

    repo = RepositoryMining(repo, only_modifications_with_file_types=['.java'])

    commits = repo.traverse_commits()

    search = keyword


    def process(data):
        try:
            for m in data.modifications:
                matches = re.findall(conditional_tag, m.diff)
                for e in matches:
                    print(e[2:len(e)-1], data.committer_date, sep=" ; ")

        except TypeError:
            # print("WARNING cannot analyse commit : ", commit.hash)
            pass

    for commit in commits:
        t = Thread(target=process, args=(commit,))
        t.start()
def main():
    # mine for non-functional fixes in commit messages -- stem words to catch more commits
    search_terms = ["fix","bug","error","secur","maint", \
                    "stab","portab","efficien","usab", "perf" \
                    "reliab", "testab", "changeab", "replac"\
                    "memory","resource", "runtime", "#", "crash", "leak" \
                    "attack" , "authenticat", "authoriz", "cipher","crack", \
                    "decrypt","encrypt","vulnerab","minimize","optimize",\
                    "slow", "fast"]

    # the program is run with command line arguments representing
    # github repos
    for repo in range(1, len(sys.argv)):

        # NB: using the with keyword will close the file automatically
        with open(sys.argv[repo].replace('../', '').replace('/', '') + ".csv",
                  "w") as new_file:
            new_file.write('{:^40},{:^40}\n'.format('Commit ID:',
                                                    'Commit Message:'))

            for commit in RepositoryMining(sys.argv[repo],
                                           only_modifications_with_file_types=[
                                               '.java', '.py'
                                           ]).traverse_commits():
                # bool written avoids duplication if more than one word matches
                written = False
                msg = commit.msg.lower()
                for term in search_terms:
                    if term.lower() in msg and filter(msg) and not written:
                        written = True
                        # print the commit ID and committer message
                        new_file.write('{:^40},{:^40}\n'.format(
                            commit.hash, msg))
Пример #28
0
def test_projectname_multiple_repos():
    repos = [
        'test-repos/files_in_directories', 'test-repos/files_in_directories',
        'test-repos/files_in_directories'
    ]
    for commit in RepositoryMining(path_to_repo=repos).traverse_commits():
        assert commit.project_name == 'files_in_directories'
Пример #29
0
def test_diff_with_histogram(git_repo):
    # with histogram
    commit = list(
        RepositoryMining('test-repos/histogram',
                         single="93df8676e6fab70d9677e94fd0f6b17db095e890",
                         histogram_diff=True).traverse_commits())[0]
    diff = commit.modifications[0].diff_parsed
    assert (4, '    {') in diff["added"]
    assert (5, '        log.error("Icon path is null");') in diff["added"]
    assert (6, '        return null;') in diff["added"]
    assert (7, '    }') in diff["added"]
    assert (8, '') in diff["added"]
    assert (11, '    if (imgURL == null)') in diff["added"]
    assert (12, '    {') in diff["added"]
    assert (13, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["added"]
    assert (14, '        return null;') in diff["added"]
    assert (17, '        return new ImageIcon(imgURL);') in diff["added"]

    assert (6, '    {') in diff["deleted"]
    assert (7, '        return new ImageIcon(imgURL);') in diff["deleted"]
    assert (10, '    {') in diff["deleted"]
    assert (11, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["deleted"]
    assert (12, '    }') in diff["deleted"]
    assert (13, '    return null;') in diff["deleted"]
Пример #30
0
    def clone_repo(self, repo_path, since, repo):
        # cloning repository from remote
        print('cloning to {}'.format(repo_path + repo['name']))
        data = pd.DataFrame(columns=self.cfg['commit_columns'])

        for commit in RepositoryMining(
                repo['remote_repo'],
                only_modifications_with_file_types=self.cfg['filters']
            ['file_types'],
                only_in_branch=repo['branch'],
                since=since,
                clone_repo_to=repo_path).traverse_commits():
            # build dictionary containing modifications
            mdf_detail = {}
            mdf_files = []
            mdf_methods = []
            for modif in commit.modifications:
                # TODO remove print
                if modif.filename in mdf_detail.keys():
                    print(f'filename{modif.filename} en double')
                mdf_files.append(modif.filename)

                mdf_detail.update({
                    modif.filename: {
                        "old_path": modif.old_path,
                        "new_path": modif.new_path,
                        "change_type": modif.change_type,
                        "cyclomatic": modif.complexity,
                        "changed_methods": []
                    }
                })

                for method in modif.changed_methods:
                    mdf_detail[modif.filename]['changed_methods'] += [
                        method.name
                    ]
                    mdf_methods.append(method.name)

            data = data.append(
                {
                    "hash": commit.hash,
                    "author_date": commit.author_date,
                    "is_merge": commit.merge,
                    "nb_files": commit.files,
                    "files": mdf_files,
                    "methods": mdf_methods,
                    "nb_deletions": commit.deletions,
                    "nb_insertions ": commit.insertions,
                    "modifications": str(mdf_detail)
                },
                ignore_index=True)
            try:
                data.to_csv(self.cfg['paths']['commit_report'] +
                            repo['commit_file'],
                            index=False)
            except Exception as e:
                print(e)

        data.to_csv(self.cfg['paths']['commit_report'] + repo['commit_file'],
                    index=False)