def test_diff_histogram():
    # without histogram
    commit = list(
        RepositoryMining('test-repos/test13',
                         single="93df8676e6fab70d9677e94fd0f6b17db095e890").
        traverse_commits())[0]
    mod = commit.modifications[0]
    gr = GitRepository('test-repos/test13')
    diff = gr.parse_diff(mod.diff)
    assert len(diff['added']) == 11
    assert (3, '    if (path == null)') in diff['added']
    assert (5, '        log.error("Icon path is null");') in diff['added']
    assert (6, '        return null;') in diff['added']
    assert (8, '') in diff['added']
    assert (9, '    java.net.URL imgURL = GuiImporter.class.getResource(path);'
            ) in diff['added']
    assert (10, '') in diff['added']
    assert (11, '    if (imgURL == null)') in diff['added']
    assert (12, '    {') in diff['added']
    assert (14, '        return null;') in diff['added']
    assert (16, '    else') in diff['added']
    assert (17, '        return new ImageIcon(imgURL);') in diff['added']

    assert len(diff['deleted']) == 7
    assert (3, '    java.net.URL imgURL = GuiImporter.class.getResource(path);'
            ) in diff['deleted']
    assert (4, '') in diff['deleted']
    assert (5, '    if (imgURL != null)') in diff['deleted']
    assert (7, '        return new ImageIcon(imgURL);') in diff['deleted']
    assert (9, '    else') in diff['deleted']
    assert (10, '    {') in diff['deleted']
    assert (13, '    return null;') in diff['deleted']

    # with histogram
    commit = list(
        RepositoryMining('test-repos/test13',
                         single="93df8676e6fab70d9677e94fd0f6b17db095e890",
                         histogram_diff=True).traverse_commits())[0]
    mod = commit.modifications[0]
    gr = GitRepository('test-repos/test13')
    diff = gr.parse_diff(mod.diff)
    assert (4, '    {') in diff["added"]
    assert (5, '        log.error("Icon path is null");') in diff["added"]
    assert (6, '        return null;') in diff["added"]
    assert (7, '    }') in diff["added"]
    assert (8, '') in diff["added"]
    assert (11, '    if (imgURL == null)') in diff["added"]
    assert (12, '    {') in diff["added"]
    assert (13, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["added"]
    assert (14, '        return null;') in diff["added"]
    assert (17, '        return new ImageIcon(imgURL);') in diff["added"]

    assert (6, '    {') in diff["deleted"]
    assert (7, '        return new ImageIcon(imgURL);') in diff["deleted"]
    assert (10, '    {') in diff["deleted"]
    assert (11, '        log.error("Couldn\'t find icon: " + imgURL);'
            ) in diff["deleted"]
    assert (12, '    }') in diff["deleted"]
    assert (13, '    return null;') in diff["deleted"]
示例#2
0
    def update_has_impact_code(apps, schema_editor):
        # We can't import the Person model directly as it may be a newer
        # version than this migration expects. We use the historical version.
        Modification = apps.get_model('contributions', 'Modification')

        for mod in Modification.objects.filter(commit__tag__project_id=2):
            GR = GitRepository(mod.commit.tag.project.project_path)

            diff_text = GR.parse_diff(mod.diff)

            added_text = ""
            for line in diff_text['added']:
                added_text = added_text + "\n" + str(
                    line[0]) + ' ' + "" + ' ' + line[1]

            deleted_text = ""
            for line in diff_text['deleted']:
                deleted_text = deleted_text + "\n" + str(
                    line[0]) + ' ' + "" + ' ' + line[1]

            added_uncommented_lines = detect_impact_loc(added_text)
            deleted_uncommented_lines = detect_impact_loc(deleted_text)
            mod.has_impact_loc = added_uncommented_lines or deleted_uncommented_lines

            mod.save()
示例#3
0
def main():
    print('Running...')
    # Report_data: a list uses to store the result data
    report_data = []
    # Get the repo path from command line arguments
    path = sys.argv[1]

    # Analyse the commit in the repo
    git_repo = GitRepository(path)
    commits = git_repo.get_list_commits()
    for i,commit in enumerate(commits):
        for j,mod in enumerate(commit.modifications):
            diffs = git_repo.parse_diff(mod.diff)
            # Get Method Info From Modification Detail
            add_func_name,add_func_sig,add_func_modifier,add_func_mod_row,add_func_params = get_func_info(diffs, 'added')
            del_func_name,del_func_sig,del_func_modifier,del_func_mod_row,del_func_params = get_func_info(diffs, 'deleted')
            # Find method that have removed a parameter
            # Consider the Overloading in Java method
            # 1 - added method number <= deleted method number
            for add_idx,add_func in enumerate(add_func_name):
                if add_func in del_func_name:
                    add_override_count = add_func_name.count(add_func)
                    del_override_count = del_func_name.count(add_func)
                    if (add_override_count <= del_override_count):
                        # Find the deleted method that near the added method 
                        line_num = add_func_mod_row[add_idx]
                        del_idx = find_mod_func_pair(line_num, add_func, del_func_name, del_func_mod_row)
                        if ( del_func_modifier[del_idx] == add_func_modifier[add_idx]):
                            if ( len(del_func_params[del_idx]) == len(add_func_params[add_idx])+1):
                                if (all(elem in del_func_params[del_idx]  for elem in add_func_params[add_idx])):
                                    new_sig = add_func + add_func_sig[add_idx]
                                    old_sig = add_func + del_func_sig[del_idx]  
                                    report_data.append([commit.hash,mod.filename,old_sig,new_sig])
            # 2 - added method number > deleted method number
            for del_idx,del_func in enumerate(del_func_name):
                if del_func in add_func_name:
                    add_override_count = add_func_name.count(del_func)
                    del_override_count = del_func_name.count(del_func)
                    if (add_override_count > del_override_count):
                        line_num = del_func_mod_row[del_idx]
                        add_idx = find_mod_func_pair(line_num, del_func, add_func_name, add_func_mod_row)
                        if ( del_func_modifier[del_idx] == add_func_modifier[add_idx]):
                            if ( len(del_func_params[del_idx]) == len(add_func_params[add_idx])+1):
                                if (all(elem in del_func_params[del_idx]  for elem in add_func_params[add_idx])):
                                    new_sig = del_func + add_func_sig[add_idx]
                                    old_sig = del_func + del_func_sig[del_idx]  
                                    report_data.append([commit.hash,mod.filename,old_sig,new_sig])
    # Save the report data to a CSV
    report_data = list(set(tuple(element) for element in report_data))
    header = ['Commit SHA', 'Java File', 'Old function signature', 'New function signature']
    report_data.insert(0,header)
    with open('report.csv', 'w', newline='') as resultFile:  
        wr = csv.writer(resultFile, dialect='excel')
        wr.writerows(report_data)
    print('Finish!')
def test_ignore_add_whitespaces_and_modified_normal_line():
    gr = GitRepository('test-repos/test14')
    commit = list(
        RepositoryMining('test-repos/test14',
                         single="52716ef1f11e07308b5df1b313aec5496d5e91ce").
        traverse_commits())[0]
    assert len(commit.modifications) == 1
    parsed_normal_diff = gr.parse_diff(commit.modifications[0].diff)
    commit = list(
        RepositoryMining('test-repos/test14',
                         skip_whitespaces=True,
                         single="52716ef1f11e07308b5df1b313aec5496d5e91ce").
        traverse_commits())[0]
    assert len(commit.modifications) == 1
    parsed_wo_whitespaces_diff = gr.parse_diff(commit.modifications[0].diff)
    assert len(parsed_normal_diff['added']) == 2
    assert len(parsed_wo_whitespaces_diff['added']) == 1

    assert len(parsed_normal_diff['deleted']) == 1
    assert len(parsed_wo_whitespaces_diff['deleted']) == 0
示例#5
0
def test_tabs():
    diff = '@@ -1,4 +1,17 @@\r\n' + \
           ' a\r\n' + \
           ' b\r\n' + \
           '-c\r\n' + \
           '+\td\r\n' + \
           '+cc\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\r\n' + \
           '+\tg\r\n' + \
           '+\r\n' + \
           '+j\r\n' + \
           ' '

    gr = GitRepository('test-repos/test1')
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (3, 'c') in deleted
    assert 1 == len(deleted)

    assert (3, '\td') in added
    assert (4, 'cc') in added
    assert (5, '') in added
    assert (6, '') in added
    assert (7, '') in added
    assert (8, '') in added
    assert (9, '') in added
    assert (10, '') in added
    assert (11, '') in added
    assert (12, '') in added
    assert (13, '') in added
    assert (14, '\tg') in added
    assert (15, '') in added
    assert (16, 'j') in added
    assert 14 == len(added)
示例#6
0
def test_diff_no_newline():
    """
    If a file ends without a newline git represents this with the additional line
        \\ No newline at end of file
    in diffs. This test asserts these additional lines are parsed correctly.
    """
    gr = GitRepository('test-repos/no_newline')

    diff = gr.get_commit('52a78c1ee5d100528eccba0a3d67371dbd22d898').modifications[0].diff
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (1, 'test1') in deleted  # is considered as deleted as a 'newline' command is added
    assert (1, 'test1') in added  # now with added 'newline'
    assert (2, 'test2') in added

    gr.clear()
示例#7
0
def test_deletions():
    diff = '@@ -2,6 +2,7 @@ aa\r\n' + \
           ' bb\r\n' + \
           ' cc\r\n' + \
           ' log.info(\"aa\")\r\n' + \
           '-log.debug(\"b\")\r\n' + \
           ' dd\r\n' + \
           ' ee\r\n' + \
           ' ff'

    gr = GitRepository('test-repos/test1')
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (5, 'log.debug("b")') in deleted
    assert len(deleted) == 1
    assert len(added) == 0
示例#8
0
def find_occurence_in_commit(commit, word, file):

    conditional_added = 0
    commit_with_conditional = []
    commit_with_removed_conditional = []

    for m in commit.modifications:

        if (str(m.source_code).find(word) != -1):

            file.write("************** date : " + str(commit.committer_date) +
                       "*****************\n")
            diff = word
            gr = GitRepository('test-repos/test1')
            parsed_lines = gr.parse_diff(diff)

            if (len(parsed_lines['added']) > 0):
                conditional_added = conditional_added + len(
                    parsed_lines['added'])

            lines = str(m.source_code).splitlines()
            commit_with_conditional.append(m.new_path)

            for line in lines:
                if line.find(word) != -1:
                    file.write("\t\tligne ajouté : {}\n".format(line))

            if (len(parsed_lines['deleted']) > 0):
                conditional_added = conditional_added + len(
                    parsed_lines['deleted'])

            lines = str(m.source_code).splitlines()
            commit_with_removed_conditional.append(m.new_path)

            for line in lines:
                if line.find(word) != -1:
                    file.write("\t\tligne retiré : {}\n".format(line))

    if (len(commit_with_conditional) > 0):
        file.write(str(commit_with_conditional) + "\n\n")

    return conditional_added
示例#9
0
def test_real_example():
    diff = '@@ -72,7 +72,7 @@ public class GitRepository implements SCM {\r\n' + \
           ' \r\n' + \
           '        private static Logger log = Logger.getLogger(GitRepository.class);\r\n' + \
           ' \r\n' + \
           '-       public GitRepository(String path) {\r\n' + \
           '+       public GitRepository2(String path) {\r\n' + \
           '                this.path = path;\r\n' + \
           '                this.maxNumberFilesInACommit = checkMaxNumberOfFiles();\r\n' + \
           '                this.maxSizeOfDiff = checkMaxSizeOfDiff();\r\n' + \
           '@@ -155,7 +155,7 @@ public class GitRepository implements SCM {\r\n' + \
           '                return git.getRepository().getBranch();\r\n' + \
           '        }\r\n' + \
           ' \r\n' + \
           '-       public ChangeSet getHead() {\r\n' + \
           '+       public ChangeSet getHead2() {\r\n' + \
           '                Git git = null;\r\n' + \
           '                try {\r\n' + \
           '                        git = openRepository();\r\n' + \
           '@@ -320,6 +320,7 @@ public class GitRepository implements SCM {\r\n' + \
           ' \r\n' + \
           '                return diffs;\r\n' + \
           '        }\r\n' + \
           '+       newline\r\n' + \
           ' \r\n' + \
           '        private void setContext(DiffFormatter df) {\r\n' + \
           '                String context = System.getProperty(\"git.diffcontext\");'

    gr = GitRepository('test-repos/test1')
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (75, '       public GitRepository(String path) {') in deleted
    assert (158, '       public ChangeSet getHead() {') in deleted
    assert 2 == len(deleted)

    assert (75, '       public GitRepository2(String path) {') in added
    assert (158, '       public ChangeSet getHead2() {') in added
    assert (323, '       newline') in added
    assert 3 == len(added)
示例#10
0
def test_extract_line_number_and_content():
    diff = "@@ -1,8 +1,8 @@\r\n" + \
           "-a\r\n" + \
           "-b\r\n" + \
           "-c\r\n" + \
           "-log.info(\"a\")\r\n" + \
           "-d\r\n" + \
           "-e\r\n" + \
           "-f\r\n" + \
           "+aa\r\n" + \
           "+bb\r\n" + \
           "+cc\r\n" + \
           "+log.info(\"aa\")\r\n" + \
           "+dd\r\n" + \
           "+ee\r\n" + \
           "+ff\r\n" + \
           " "
    gr = GitRepository('test-repos/test1')
    parsed_lines = gr.parse_diff(diff)

    added = parsed_lines['added']
    deleted = parsed_lines['deleted']

    assert (1, 'a') in deleted
    assert (2, 'b') in deleted
    assert (3, 'c') in deleted
    assert (4, 'log.info(\"a\")') in deleted
    assert (5, 'd') in deleted
    assert (6, 'e') in deleted
    assert (7, 'f') in deleted

    assert (1, 'aa') in added
    assert (2, 'bb') in added
    assert (3, 'cc') in added
    assert (4, 'log.info(\"aa\")') in added
    assert (5, 'dd') in added
    assert (6, 'ee') in added
    assert (7, 'ff') in added
示例#11
0
    def consumeInitialRepositoryAnalyses(self, java_repository_analysis_result: JavaRepositoryAnalysisResult):

        noOfCommits = 0
        noOfModifications = 0
        totalLOC = 0

        visitedFile = []
        committerList = []
        fileLocCache = {}

        firstCommitDate = datetime.datetime.now()
        lastCommitDate = datetime.datetime.now()

        totalAddedLOC = 0
        totalDeletedLOC = 0

        # check if repo clone exists on disk
        if path.exists(java_repository_analysis_result.path):
            # run pydriller analysis and submit result to next queue
            print('path exists: ' + java_repository_analysis_result.path)

            # iterate through repo commits
            for commit in RepositoryMining(java_repository_analysis_result.path).traverse_commits():
                noOfCommits += 1
                if noOfCommits == 1:
                    firstCommitDate = commit.committer_date
                else:
                    lastCommitDate = commit.committer_date

                if commit.committer not in committerList:
                    committerList.append(commit.committer)

                for modification in commit.modifications:
                    noOfModifications += 1
                    if modification.new_path != None:
                        visitedFile.append(modification.new_path)  # add file to list of visited files
                        if modification.nloc != None:
                            totalLOC += modification.nloc
                            fileLocCache.update({modification.new_path: modification.nloc})

                        gr = GitRepository(java_repository_analysis_result.path)
                        parsed_lines = gr.parse_diff(modification.diff)
                        for item in parsed_lines['added']:
                            totalAddedLOC += item[0]
                        for item in parsed_lines['deleted']:
                            totalDeletedLOC += item[0]

                    else:
                        fileLocCache.update({modification.new_path: 0})

            # remove visited files from cache (already added to totalLOC)
            for file in list(fileLocCache):
                if file in visitedFile:
                    del fileLocCache[file]

            # add remaining to totalLOC
            for file, fileLoc in fileLocCache.items():
                if file != None and fileLoc != None:
                    totalLOC += fileLoc

            repository_analysis_results_python_repository_analysis_result = PythonRepositoryAnalysisResult()
            repository_analysis_results_python_repository_analysis_result.url = java_repository_analysis_result.url
            repository_analysis_results_python_repository_analysis_result.size_at_commit = java_repository_analysis_result.size_at_commit
            repository_analysis_results_python_repository_analysis_result.number_of_files = java_repository_analysis_result.number_of_files

            repository_analysis_results_python_repository_analysis_result.linesAdded = totalAddedLOC
            repository_analysis_results_python_repository_analysis_result.linesDeleted = totalDeletedLOC
            repository_analysis_results_python_repository_analysis_result.projectLOC = totalLOC
            repository_analysis_results_python_repository_analysis_result.numberOfCommits = noOfCommits
            repository_analysis_results_python_repository_analysis_result.numOfDevs = len(committerList)

            timeDelta = relativedelta.relativedelta(lastCommitDate, firstCommitDate)
            repository_analysis_results_python_repository_analysis_result.projectDuration = timeDelta.years * 12 + timeDelta.months + timeDelta.days / 30.4167 + timeDelta.hours / 730.001 + timeDelta.minutes / 43800

            print('url: ' + str(java_repository_analysis_result.url))
            print('size_at_commit: ' + str(java_repository_analysis_result.size_at_commit))
            print('number_of_files: ' + str(java_repository_analysis_result.number_of_files))

            print('noOfCommits: ' + str(noOfCommits))
            print('noOfModifications: ' + str(noOfModifications))
            print('totalAddedLOC: ' + str(totalAddedLOC))
            print('totalDeletedLOC: ' + str(totalDeletedLOC))
            print('totalLOC: ' + str(totalLOC))
            print('numberOfDevs: ', len(committerList))
            
            print('firstCommitDate: ' + str(firstCommitDate))
            print('lastCommitDate: ' + str(lastCommitDate))
            print('projectLength: ' + str(repository_analysis_results_python_repository_analysis_result.projectDuration))
            print('timestamp: ' + str(datetime.datetime.now()))
            print('')
            
            conf = Confirmation()
            conf.repository_name = java_repository_analysis_result.url
            
            self.workflow.getRepositorySyncTopic().send(conf,"PythonRepositoryAnalyzer")

            self.sendToRepositoryAnalysisResults(repository_analysis_results_python_repository_analysis_result)
        else:

            # not found on local machine, sending it back to origin queue
            self.workflow.getInitialRepositoryAnalyses().send(java_repository_analysis_result,"JavaRepositoryAnalyzer")
示例#12
0
                        				achou=False;
                        				testestring=""
                        			else:
                        				if testestring.isspace():
                        					testestring=""
                        				else:
                        					achou=False
                        					testestring=""

                        testestring=""
                    else :
                        testestring+=teste
                    
                #print(modification.filename)            
                if re.search("except.*:.*\n.*pass", diff):
                    parsed_lines = repo.parse_diff(diff)
                    added = parsed_lines['added']
                    

                    

                    for lineNumber, lineStr in added:
                        if re.search("except.*:.*", auxiliar):
                            if lineStr.endswith('pass') :
                                if(lineNumber==linhaanterior+1):
                                    #contador0+=1
                                    #print(contador0)

                                    #auxiliarParaNomeArquivo = modification.filename
                                    #auxiliarParaNomeArquivo = auxiliarParaNomeArquivo.replace('.py', 'csv')
                                    pasta = 'Testes/'+lista.project_name+"/"
示例#13
0
# keys = np.array(bug_dict.keys())
vals = np.array(list(bug_dict.values()))
# count = 0
for i in vals:
    # print("How many element in the list? \n", len(i))
    if len(i) == 1:
        for candidate_commit in RepositoryMining("~/openstack",
                                                 single=i).traverse_commits():
            print("cand sha is: ", candidate_commit.hash)
            print("cand msg is: ", candidate_commit.msg)

            for modified_files in candidate_commit.modifications:
                print("Modified this file : ", modification_file.filename)
                diff = modified_files.diff
                parsed_diff = gr.parse_diff(diff)
                print("This is the usual diff:  {}".format(diff))
                buggy_induced_commits = gr.get_commits_last_modified_lines(
                    candidate_commit, modified_files)
                print("This is a bug inducing commit :  ",
                      buggy_induced_commits)
                pprint("Parsed diff {} :".format(parsed_diff))
                # "This is this diff of the file :  {}".format(parsed_diff)
    else:
        for x in i:
            for cand_commit in RepositoryMining("~/openstack/",
                                                single=x).traverse_commits():
                print("cand sha is: {}".format(cand_commit.hash))
                print("cand msg is: {}".format(cand_commit.msg))

                for modified_files in cand_commit.modifications:
示例#14
0
    
    kconfig_commit_tags = []
    makefile_commit_tags = []
    am_commit_tags = []
    commitResults = []
    # if(commit.hash in listaC):
    #     print('funfouuuu')

    for modification in commit.modifications:
        #print('entrou nas modss')

        files_changing_tags = []
        if(('kconfig' in modification.filename.lower() or 'makefile' in modification.filename.lower()) and modification.change_type.value == 5):
            print('sou kconfig')
            diff = modification.diff
            parsed_lines = GR.parse_diff(diff)
            added = parsed_lines['added']
            removed = parsed_lines['deleted']
            file_source_code = modification.source_code.split('\n')
            classifier = SPLClassifier(added, removed, file_source_code)
            files_changing_tags = classifier.classify(modification.filename.lower(),features)
        # elif((re.match(r'\S*\.c', modification.filename.lower()) != None) or re.match(r'\S*\.h', modification.filename.lower()) != None):
        else:
            #print("SOU AM")
            if(modification.change_type.value != 1 and modification.change_type.value != 4):
                diff = modification.diff
                parsed_lines = GR.parse_diff(diff)
                added = parsed_lines['added']
                removed = parsed_lines['deleted']
                file_source_code = modification.source_code.split('\n')
                classifier = SPLClassifier(added, removed, file_source_code)
示例#15
0
    def __diff_text__(self):
        GR = GitRepository(self.commit.tag.project.project_path)

        parsed_lines = GR.parse_diff(self.diff)

        return parsed_lines
示例#16
0
        lastCommitDate = commit.committer_date

    if commit.committer not in committerList:
        committerList.append(commit.committer)

    for modification in commit.modifications:
        noOfModifications += 1
        if modification.new_path != None:
            visitedFile.append(
                modification.new_path)  # add file to list of visited files
            if modification.nloc != None:
                totalLOC += modification.nloc
                fileLocCache.update({modification.new_path: modification.nloc})

            gr = GitRepository(repoPath)
            parsed_lines = gr.parse_diff(modification.diff)
            for item in parsed_lines['added']:
                totalAddedLOC += item[0]
            for item in parsed_lines['deleted']:
                totalDeletedLOC += item[0]

        else:
            fileLocCache.update({modification.new_path: 0})

# remove visited files from cache (already added to totalLOC)
for file in list(fileLocCache):
    if file in visitedFile:
        del fileLocCache[file]

# add remaining to totalLOC
for file, fileLoc in fileLocCache.items():