def test_diff_histogram(): # without histogram commit = list( RepositoryMining('test-repos/test13', single="93df8676e6fab70d9677e94fd0f6b17db095e890"). traverse_commits())[0] mod = commit.modifications[0] gr = GitRepository('test-repos/test13') diff = gr.parse_diff(mod.diff) assert len(diff['added']) == 11 assert (3, ' if (path == null)') in diff['added'] assert (5, ' log.error("Icon path is null");') in diff['added'] assert (6, ' return null;') in diff['added'] assert (8, '') in diff['added'] assert (9, ' java.net.URL imgURL = GuiImporter.class.getResource(path);' ) in diff['added'] assert (10, '') in diff['added'] assert (11, ' if (imgURL == null)') in diff['added'] assert (12, ' {') in diff['added'] assert (14, ' return null;') in diff['added'] assert (16, ' else') in diff['added'] assert (17, ' return new ImageIcon(imgURL);') in diff['added'] assert len(diff['deleted']) == 7 assert (3, ' java.net.URL imgURL = GuiImporter.class.getResource(path);' ) in diff['deleted'] assert (4, '') in diff['deleted'] assert (5, ' if (imgURL != null)') in diff['deleted'] assert (7, ' return new ImageIcon(imgURL);') in diff['deleted'] assert (9, ' else') in diff['deleted'] assert (10, ' {') in diff['deleted'] assert (13, ' return null;') in diff['deleted'] # with histogram commit = list( RepositoryMining('test-repos/test13', single="93df8676e6fab70d9677e94fd0f6b17db095e890", histogram_diff=True).traverse_commits())[0] mod = commit.modifications[0] gr = GitRepository('test-repos/test13') diff = gr.parse_diff(mod.diff) assert (4, ' {') in diff["added"] assert (5, ' log.error("Icon path is null");') in diff["added"] assert (6, ' return null;') in diff["added"] assert (7, ' }') in diff["added"] assert (8, '') in diff["added"] assert (11, ' if (imgURL == null)') in diff["added"] assert (12, ' {') in diff["added"] assert (13, ' log.error("Couldn\'t find icon: " + imgURL);' ) in diff["added"] assert (14, ' return null;') in diff["added"] assert (17, ' return new ImageIcon(imgURL);') in diff["added"] assert (6, ' {') in diff["deleted"] assert (7, ' return new ImageIcon(imgURL);') in diff["deleted"] assert (10, ' {') in diff["deleted"] assert (11, ' log.error("Couldn\'t find icon: " + imgURL);' ) in diff["deleted"] assert (12, ' }') in diff["deleted"] assert (13, ' return null;') in diff["deleted"]
def update_has_impact_code(apps, schema_editor): # We can't import the Person model directly as it may be a newer # version than this migration expects. We use the historical version. Modification = apps.get_model('contributions', 'Modification') for mod in Modification.objects.filter(commit__tag__project_id=2): GR = GitRepository(mod.commit.tag.project.project_path) diff_text = GR.parse_diff(mod.diff) added_text = "" for line in diff_text['added']: added_text = added_text + "\n" + str( line[0]) + ' ' + "" + ' ' + line[1] deleted_text = "" for line in diff_text['deleted']: deleted_text = deleted_text + "\n" + str( line[0]) + ' ' + "" + ' ' + line[1] added_uncommented_lines = detect_impact_loc(added_text) deleted_uncommented_lines = detect_impact_loc(deleted_text) mod.has_impact_loc = added_uncommented_lines or deleted_uncommented_lines mod.save()
def main(): print('Running...') # Report_data: a list uses to store the result data report_data = [] # Get the repo path from command line arguments path = sys.argv[1] # Analyse the commit in the repo git_repo = GitRepository(path) commits = git_repo.get_list_commits() for i,commit in enumerate(commits): for j,mod in enumerate(commit.modifications): diffs = git_repo.parse_diff(mod.diff) # Get Method Info From Modification Detail add_func_name,add_func_sig,add_func_modifier,add_func_mod_row,add_func_params = get_func_info(diffs, 'added') del_func_name,del_func_sig,del_func_modifier,del_func_mod_row,del_func_params = get_func_info(diffs, 'deleted') # Find method that have removed a parameter # Consider the Overloading in Java method # 1 - added method number <= deleted method number for add_idx,add_func in enumerate(add_func_name): if add_func in del_func_name: add_override_count = add_func_name.count(add_func) del_override_count = del_func_name.count(add_func) if (add_override_count <= del_override_count): # Find the deleted method that near the added method line_num = add_func_mod_row[add_idx] del_idx = find_mod_func_pair(line_num, add_func, del_func_name, del_func_mod_row) if ( del_func_modifier[del_idx] == add_func_modifier[add_idx]): if ( len(del_func_params[del_idx]) == len(add_func_params[add_idx])+1): if (all(elem in del_func_params[del_idx] for elem in add_func_params[add_idx])): new_sig = add_func + add_func_sig[add_idx] old_sig = add_func + del_func_sig[del_idx] report_data.append([commit.hash,mod.filename,old_sig,new_sig]) # 2 - added method number > deleted method number for del_idx,del_func in enumerate(del_func_name): if del_func in add_func_name: add_override_count = add_func_name.count(del_func) del_override_count = del_func_name.count(del_func) if (add_override_count > del_override_count): line_num = del_func_mod_row[del_idx] add_idx = find_mod_func_pair(line_num, del_func, add_func_name, add_func_mod_row) if ( del_func_modifier[del_idx] == add_func_modifier[add_idx]): if ( len(del_func_params[del_idx]) == len(add_func_params[add_idx])+1): if (all(elem in del_func_params[del_idx] for elem in add_func_params[add_idx])): new_sig = del_func + add_func_sig[add_idx] old_sig = del_func + del_func_sig[del_idx] report_data.append([commit.hash,mod.filename,old_sig,new_sig]) # Save the report data to a CSV report_data = list(set(tuple(element) for element in report_data)) header = ['Commit SHA', 'Java File', 'Old function signature', 'New function signature'] report_data.insert(0,header) with open('report.csv', 'w', newline='') as resultFile: wr = csv.writer(resultFile, dialect='excel') wr.writerows(report_data) print('Finish!')
def test_ignore_add_whitespaces_and_modified_normal_line(): gr = GitRepository('test-repos/test14') commit = list( RepositoryMining('test-repos/test14', single="52716ef1f11e07308b5df1b313aec5496d5e91ce"). traverse_commits())[0] assert len(commit.modifications) == 1 parsed_normal_diff = gr.parse_diff(commit.modifications[0].diff) commit = list( RepositoryMining('test-repos/test14', skip_whitespaces=True, single="52716ef1f11e07308b5df1b313aec5496d5e91ce"). traverse_commits())[0] assert len(commit.modifications) == 1 parsed_wo_whitespaces_diff = gr.parse_diff(commit.modifications[0].diff) assert len(parsed_normal_diff['added']) == 2 assert len(parsed_wo_whitespaces_diff['added']) == 1 assert len(parsed_normal_diff['deleted']) == 1 assert len(parsed_wo_whitespaces_diff['deleted']) == 0
def test_tabs(): diff = '@@ -1,4 +1,17 @@\r\n' + \ ' a\r\n' + \ ' b\r\n' + \ '-c\r\n' + \ '+\td\r\n' + \ '+cc\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\r\n' + \ '+\tg\r\n' + \ '+\r\n' + \ '+j\r\n' + \ ' ' gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) added = parsed_lines['added'] deleted = parsed_lines['deleted'] assert (3, 'c') in deleted assert 1 == len(deleted) assert (3, '\td') in added assert (4, 'cc') in added assert (5, '') in added assert (6, '') in added assert (7, '') in added assert (8, '') in added assert (9, '') in added assert (10, '') in added assert (11, '') in added assert (12, '') in added assert (13, '') in added assert (14, '\tg') in added assert (15, '') in added assert (16, 'j') in added assert 14 == len(added)
def test_diff_no_newline(): """ If a file ends without a newline git represents this with the additional line \\ No newline at end of file in diffs. This test asserts these additional lines are parsed correctly. """ gr = GitRepository('test-repos/no_newline') diff = gr.get_commit('52a78c1ee5d100528eccba0a3d67371dbd22d898').modifications[0].diff parsed_lines = gr.parse_diff(diff) added = parsed_lines['added'] deleted = parsed_lines['deleted'] assert (1, 'test1') in deleted # is considered as deleted as a 'newline' command is added assert (1, 'test1') in added # now with added 'newline' assert (2, 'test2') in added gr.clear()
def test_deletions(): diff = '@@ -2,6 +2,7 @@ aa\r\n' + \ ' bb\r\n' + \ ' cc\r\n' + \ ' log.info(\"aa\")\r\n' + \ '-log.debug(\"b\")\r\n' + \ ' dd\r\n' + \ ' ee\r\n' + \ ' ff' gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) added = parsed_lines['added'] deleted = parsed_lines['deleted'] assert (5, 'log.debug("b")') in deleted assert len(deleted) == 1 assert len(added) == 0
def find_occurence_in_commit(commit, word, file): conditional_added = 0 commit_with_conditional = [] commit_with_removed_conditional = [] for m in commit.modifications: if (str(m.source_code).find(word) != -1): file.write("************** date : " + str(commit.committer_date) + "*****************\n") diff = word gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) if (len(parsed_lines['added']) > 0): conditional_added = conditional_added + len( parsed_lines['added']) lines = str(m.source_code).splitlines() commit_with_conditional.append(m.new_path) for line in lines: if line.find(word) != -1: file.write("\t\tligne ajouté : {}\n".format(line)) if (len(parsed_lines['deleted']) > 0): conditional_added = conditional_added + len( parsed_lines['deleted']) lines = str(m.source_code).splitlines() commit_with_removed_conditional.append(m.new_path) for line in lines: if line.find(word) != -1: file.write("\t\tligne retiré : {}\n".format(line)) if (len(commit_with_conditional) > 0): file.write(str(commit_with_conditional) + "\n\n") return conditional_added
def test_real_example(): diff = '@@ -72,7 +72,7 @@ public class GitRepository implements SCM {\r\n' + \ ' \r\n' + \ ' private static Logger log = Logger.getLogger(GitRepository.class);\r\n' + \ ' \r\n' + \ '- public GitRepository(String path) {\r\n' + \ '+ public GitRepository2(String path) {\r\n' + \ ' this.path = path;\r\n' + \ ' this.maxNumberFilesInACommit = checkMaxNumberOfFiles();\r\n' + \ ' this.maxSizeOfDiff = checkMaxSizeOfDiff();\r\n' + \ '@@ -155,7 +155,7 @@ public class GitRepository implements SCM {\r\n' + \ ' return git.getRepository().getBranch();\r\n' + \ ' }\r\n' + \ ' \r\n' + \ '- public ChangeSet getHead() {\r\n' + \ '+ public ChangeSet getHead2() {\r\n' + \ ' Git git = null;\r\n' + \ ' try {\r\n' + \ ' git = openRepository();\r\n' + \ '@@ -320,6 +320,7 @@ public class GitRepository implements SCM {\r\n' + \ ' \r\n' + \ ' return diffs;\r\n' + \ ' }\r\n' + \ '+ newline\r\n' + \ ' \r\n' + \ ' private void setContext(DiffFormatter df) {\r\n' + \ ' String context = System.getProperty(\"git.diffcontext\");' gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) added = parsed_lines['added'] deleted = parsed_lines['deleted'] assert (75, ' public GitRepository(String path) {') in deleted assert (158, ' public ChangeSet getHead() {') in deleted assert 2 == len(deleted) assert (75, ' public GitRepository2(String path) {') in added assert (158, ' public ChangeSet getHead2() {') in added assert (323, ' newline') in added assert 3 == len(added)
def test_extract_line_number_and_content(): diff = "@@ -1,8 +1,8 @@\r\n" + \ "-a\r\n" + \ "-b\r\n" + \ "-c\r\n" + \ "-log.info(\"a\")\r\n" + \ "-d\r\n" + \ "-e\r\n" + \ "-f\r\n" + \ "+aa\r\n" + \ "+bb\r\n" + \ "+cc\r\n" + \ "+log.info(\"aa\")\r\n" + \ "+dd\r\n" + \ "+ee\r\n" + \ "+ff\r\n" + \ " " gr = GitRepository('test-repos/test1') parsed_lines = gr.parse_diff(diff) added = parsed_lines['added'] deleted = parsed_lines['deleted'] assert (1, 'a') in deleted assert (2, 'b') in deleted assert (3, 'c') in deleted assert (4, 'log.info(\"a\")') in deleted assert (5, 'd') in deleted assert (6, 'e') in deleted assert (7, 'f') in deleted assert (1, 'aa') in added assert (2, 'bb') in added assert (3, 'cc') in added assert (4, 'log.info(\"aa\")') in added assert (5, 'dd') in added assert (6, 'ee') in added assert (7, 'ff') in added
def consumeInitialRepositoryAnalyses(self, java_repository_analysis_result: JavaRepositoryAnalysisResult): noOfCommits = 0 noOfModifications = 0 totalLOC = 0 visitedFile = [] committerList = [] fileLocCache = {} firstCommitDate = datetime.datetime.now() lastCommitDate = datetime.datetime.now() totalAddedLOC = 0 totalDeletedLOC = 0 # check if repo clone exists on disk if path.exists(java_repository_analysis_result.path): # run pydriller analysis and submit result to next queue print('path exists: ' + java_repository_analysis_result.path) # iterate through repo commits for commit in RepositoryMining(java_repository_analysis_result.path).traverse_commits(): noOfCommits += 1 if noOfCommits == 1: firstCommitDate = commit.committer_date else: lastCommitDate = commit.committer_date if commit.committer not in committerList: committerList.append(commit.committer) for modification in commit.modifications: noOfModifications += 1 if modification.new_path != None: visitedFile.append(modification.new_path) # add file to list of visited files if modification.nloc != None: totalLOC += modification.nloc fileLocCache.update({modification.new_path: modification.nloc}) gr = GitRepository(java_repository_analysis_result.path) parsed_lines = gr.parse_diff(modification.diff) for item in parsed_lines['added']: totalAddedLOC += item[0] for item in parsed_lines['deleted']: totalDeletedLOC += item[0] else: fileLocCache.update({modification.new_path: 0}) # remove visited files from cache (already added to totalLOC) for file in list(fileLocCache): if file in visitedFile: del fileLocCache[file] # add remaining to totalLOC for file, fileLoc in fileLocCache.items(): if file != None and fileLoc != None: totalLOC += fileLoc repository_analysis_results_python_repository_analysis_result = PythonRepositoryAnalysisResult() repository_analysis_results_python_repository_analysis_result.url = java_repository_analysis_result.url repository_analysis_results_python_repository_analysis_result.size_at_commit = java_repository_analysis_result.size_at_commit repository_analysis_results_python_repository_analysis_result.number_of_files = java_repository_analysis_result.number_of_files repository_analysis_results_python_repository_analysis_result.linesAdded = totalAddedLOC repository_analysis_results_python_repository_analysis_result.linesDeleted = totalDeletedLOC repository_analysis_results_python_repository_analysis_result.projectLOC = totalLOC repository_analysis_results_python_repository_analysis_result.numberOfCommits = noOfCommits repository_analysis_results_python_repository_analysis_result.numOfDevs = len(committerList) timeDelta = relativedelta.relativedelta(lastCommitDate, firstCommitDate) repository_analysis_results_python_repository_analysis_result.projectDuration = timeDelta.years * 12 + timeDelta.months + timeDelta.days / 30.4167 + timeDelta.hours / 730.001 + timeDelta.minutes / 43800 print('url: ' + str(java_repository_analysis_result.url)) print('size_at_commit: ' + str(java_repository_analysis_result.size_at_commit)) print('number_of_files: ' + str(java_repository_analysis_result.number_of_files)) print('noOfCommits: ' + str(noOfCommits)) print('noOfModifications: ' + str(noOfModifications)) print('totalAddedLOC: ' + str(totalAddedLOC)) print('totalDeletedLOC: ' + str(totalDeletedLOC)) print('totalLOC: ' + str(totalLOC)) print('numberOfDevs: ', len(committerList)) print('firstCommitDate: ' + str(firstCommitDate)) print('lastCommitDate: ' + str(lastCommitDate)) print('projectLength: ' + str(repository_analysis_results_python_repository_analysis_result.projectDuration)) print('timestamp: ' + str(datetime.datetime.now())) print('') conf = Confirmation() conf.repository_name = java_repository_analysis_result.url self.workflow.getRepositorySyncTopic().send(conf,"PythonRepositoryAnalyzer") self.sendToRepositoryAnalysisResults(repository_analysis_results_python_repository_analysis_result) else: # not found on local machine, sending it back to origin queue self.workflow.getInitialRepositoryAnalyses().send(java_repository_analysis_result,"JavaRepositoryAnalyzer")
achou=False; testestring="" else: if testestring.isspace(): testestring="" else: achou=False testestring="" testestring="" else : testestring+=teste #print(modification.filename) if re.search("except.*:.*\n.*pass", diff): parsed_lines = repo.parse_diff(diff) added = parsed_lines['added'] for lineNumber, lineStr in added: if re.search("except.*:.*", auxiliar): if lineStr.endswith('pass') : if(lineNumber==linhaanterior+1): #contador0+=1 #print(contador0) #auxiliarParaNomeArquivo = modification.filename #auxiliarParaNomeArquivo = auxiliarParaNomeArquivo.replace('.py', 'csv') pasta = 'Testes/'+lista.project_name+"/"
# keys = np.array(bug_dict.keys()) vals = np.array(list(bug_dict.values())) # count = 0 for i in vals: # print("How many element in the list? \n", len(i)) if len(i) == 1: for candidate_commit in RepositoryMining("~/openstack", single=i).traverse_commits(): print("cand sha is: ", candidate_commit.hash) print("cand msg is: ", candidate_commit.msg) for modified_files in candidate_commit.modifications: print("Modified this file : ", modification_file.filename) diff = modified_files.diff parsed_diff = gr.parse_diff(diff) print("This is the usual diff: {}".format(diff)) buggy_induced_commits = gr.get_commits_last_modified_lines( candidate_commit, modified_files) print("This is a bug inducing commit : ", buggy_induced_commits) pprint("Parsed diff {} :".format(parsed_diff)) # "This is this diff of the file : {}".format(parsed_diff) else: for x in i: for cand_commit in RepositoryMining("~/openstack/", single=x).traverse_commits(): print("cand sha is: {}".format(cand_commit.hash)) print("cand msg is: {}".format(cand_commit.msg)) for modified_files in cand_commit.modifications:
kconfig_commit_tags = [] makefile_commit_tags = [] am_commit_tags = [] commitResults = [] # if(commit.hash in listaC): # print('funfouuuu') for modification in commit.modifications: #print('entrou nas modss') files_changing_tags = [] if(('kconfig' in modification.filename.lower() or 'makefile' in modification.filename.lower()) and modification.change_type.value == 5): print('sou kconfig') diff = modification.diff parsed_lines = GR.parse_diff(diff) added = parsed_lines['added'] removed = parsed_lines['deleted'] file_source_code = modification.source_code.split('\n') classifier = SPLClassifier(added, removed, file_source_code) files_changing_tags = classifier.classify(modification.filename.lower(),features) # elif((re.match(r'\S*\.c', modification.filename.lower()) != None) or re.match(r'\S*\.h', modification.filename.lower()) != None): else: #print("SOU AM") if(modification.change_type.value != 1 and modification.change_type.value != 4): diff = modification.diff parsed_lines = GR.parse_diff(diff) added = parsed_lines['added'] removed = parsed_lines['deleted'] file_source_code = modification.source_code.split('\n') classifier = SPLClassifier(added, removed, file_source_code)
def __diff_text__(self): GR = GitRepository(self.commit.tag.project.project_path) parsed_lines = GR.parse_diff(self.diff) return parsed_lines
lastCommitDate = commit.committer_date if commit.committer not in committerList: committerList.append(commit.committer) for modification in commit.modifications: noOfModifications += 1 if modification.new_path != None: visitedFile.append( modification.new_path) # add file to list of visited files if modification.nloc != None: totalLOC += modification.nloc fileLocCache.update({modification.new_path: modification.nloc}) gr = GitRepository(repoPath) parsed_lines = gr.parse_diff(modification.diff) for item in parsed_lines['added']: totalAddedLOC += item[0] for item in parsed_lines['deleted']: totalDeletedLOC += item[0] else: fileLocCache.update({modification.new_path: 0}) # remove visited files from cache (already added to totalLOC) for file in list(fileLocCache): if file in visitedFile: del fileLocCache[file] # add remaining to totalLOC for file, fileLoc in fileLocCache.items():