def _get_commit_refs(self, repo_url, local_path, from_tag, to_tag): """ Scan all commits between the two tags [`tag_start` .. `tag_end`] Extract any text from the commit message showing a github tag reference `#{number}` and return a list of ints :param repo_url: GitHub URL, used for finding issues/Pull requests :type repo_url: str :param local_path: (Optional) path to scan a local repository and cross reference with GitHub :type local_path: Path :param from_tag: Git Start Tag :type from_tag: str :param to_tag: Git end tag :type to_tag: str :return: Github rife references :rtype: List of ints """ self.logger.info("Fetching commits between tags {}...{} ".format(from_tag, to_tag)) if local_path: repo = RepositoryMining(local_path, from_tag=from_tag, to_tag=to_tag) else: repo = RepositoryMining(repo_url, from_tag=from_tag, to_tag=to_tag) commit_list = [re.findall(r'#\d+', commit.msg) for commit in repo.traverse_commits()] commit_list = sum(commit_list, []) return set(map(lambda cm: int(cm[1:]), commit_list))
def fetch_keyword_introduce(repo, keyword): conditional_tag = re.compile(r'\+@Conditional.*\(') conds = dict() repo = RepositoryMining(repo, only_modifications_with_file_types=['.java']) commits = repo.traverse_commits() search = keyword def process(data): try: for m in data.modifications: matches = re.findall(conditional_tag, m.diff) for e in matches: print(e[2:len(e)-1], data.committer_date, sep=" ; ") except TypeError: # print("WARNING cannot analyse commit : ", commit.hash) pass for commit in commits: t = Thread(target=process, args=(commit,)) t.start()
def collect_data(path_to_repo, commit_shas): rm = RepositoryMining(path_to_repo, only_commits=commit_shas) commits = [] modifications = [] for commit in rm.traverse_commits(): commits.append(commit) for mod in commit.modifications: modifications.append((commit, mod)) return commits, modifications
def run_worker(i: int, repos: List[Path]) -> None: idx = 0 for repo_path in repos: commit_count = 0 start_time = time.time() # noinspection PyBroadException try: repo_mining = RepositoryMining( str(repo_path), only_modifications_with_file_types=['.py']) Path(f'./result{i}/').mkdir(exist_ok=True) with open(f'./result{i}/{repo_path.name}.stats.csv', 'w', newline='') as csv_file: writer = csv.writer(csv_file, quoting=csv.QUOTE_MINIMAL) writer.writerow(COLUMNS) for commit in repo_mining.traverse_commits(): commit_count += 1 test_file_modifications = [ m for m in commit.modifications if 'test' in m.filename ] if len(test_file_modifications) == 0: continue for modification in test_file_modifications: writer.writerow((commit.project_name, commit.msg, modification.old_path, modification.new_path, commit.hash)) except Exception: tb = traceback.format_exc() with open(f'error_log{i}.txt', 'a') as fp: fp.write(f'repo_path={repo_path}\n') fp.write(f'{tb}\n\n') finally: used_time = time.time() - start_time with open(f'progress_log{i}.txt', 'a') as fp: fp.write(f'idx={idx} (out of {len(repos)})\n') fp.write(f'repo_path={repo_path}\n') fp.write(f'commit_count={commit_count}\n') fp.write(f'used_time={used_time} seconds\n') fp.write(f'timestamp={datetime.datetime.now()}\n\n') idx += 1
def mine(self, **kwargs: Any) -> None: """Gather data from repository. To be extended in subclasses.""" miner = RepositoryMining(self.repo, **kwargs) for commit in miner.traverse_commits(): for m in commit.modifications: m.committer = commit.committer m.committer_date = commit.committer_date m.msg = commit.msg if self.include(m): self.update_stats(m)
def parse_by_date(repo, bgn, end): lines = [] miner = RepositoryMining(repo, since=bgn, to=end) for commit in miner.traverse_commits(): for modification in commit.modifications: added = modification.diff_parsed['added'] for item in added: line = item[1] if should_ignore(line): continue lines.append(line) return lines
def test_commits_pickling(): repo_path = os.path.join(settings.get('git_repositories_dir'), 'trfl') repo = RepositoryMining(repo_path) commits = list(repo.traverse_commits())[:10] cnt_before = len(objgraph.by_type("Commit")) print(f'Starting with {cnt_before}') for n, commit in enumerate(commits): pickle.dumps(commit) print(f'#{n+1} {len(objgraph.by_type("Commit"))}') cnt_after = len(objgraph.by_type("Commit")) print(f'Ending with {cnt_after}') assert cnt_before == cnt_after # Track issue on https://github.com/ishepard/pydriller/issues/102
class GitCommitModel(GitModel): def __init__(self): super(GitCommitModel, self).__init__() self.repository_miner = RepositoryMining(self.dir_path) self.current_commit = None self.current_commit_file = None """ A model for listing and manipulating git commits. """ def list_commits(self): """ Lists commits for the current branch for the commits table. :return: An array of arrays with commit information inside. """ _commits_with_info = [] for commit in reversed(list(self.repository_miner.traverse_commits())): _commit_info = [ commit.hash[:12], commit.msg, commit.author.name, commit.author_date.strftime("%d/%m/%Y, %H:%M:%S") ] _commit_info_with_hash_as_key = [_commit_info, commit] _commits_with_info.append(_commit_info_with_hash_as_key) return _commits_with_info def list_files_in_current_commit(self): _file_map = [] if self.current_commit is not None: for diff in self.current_commit.modifications: _file_map.append([diff.new_path, diff]) return _file_map def current_file_diff(self): _diff = "" if self.current_commit_file is not None: _diff = self.current_commit_file.diff return _diff def checkout_commit(self): if self.current_commit is not None: # self.repo.git.stash() self.repo.git.checkout(self.current_commit.hash)
def mining( self, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None, from_commit: Optional[str] = None, to_commit: Optional[str] = None, ) -> None: repos = RepositoryMining( self.__repos_path, since=start_date, to=end_date, from_commit=from_commit, to_commit=to_commit, ) for commit in repos.traverse_commits(): if self.__recorder.is_record_before(commit.hash): continue for modification in commit.modifications: self.__handle_modification(modification, commit.hash) self.__recorder.record_git_commit(commit.hash, commit.author_date) return None
def watch_version(self): content = requests.get( 'https://api.github.com/repos/Yokotes/MangaWatcher/contents/version' ).json() version_and_date = base64.b64decode( content['content']).decode('utf-8').split(', ') new_version = version_and_date[0] new_date = version_and_date[1] if self.current_version != new_version: repo = RepositoryMining('https://github.com/Yokotes/MangaWatcher', since=self.current_date) for commit in repo.traverse_commits(): for mod in commit.modifications: if not 'manga.json' in mod.filename and not 'manga_sites.json' in mod.filename: try: with open('./' + mod.new_path, 'w', encoding='utf-8') as file: file.write(mod.source_code) except: return None
class RepoMiner: """ This class represents a repoMiner for analysing the historical changes in software repositories. """ def __init__(self, repoURL, first=None, second=None, fromCommit=None, since=None, to=None): start = time.perf_counter() self.__gitRepo = GitRepository(repoURL) if first is not None and second is not None and since is None and to is None: self.repo = RepositoryMining(repoURL, from_commit=first, to_commit=second) self.__repo_type = RepoType.BETWEEN_COMMITS elif first is not None and second is None and since is None and to is None: self.repo = RepositoryMining(repoURL, single=first) self.__repo_type = RepoType.SINGLE_COMMIT elif first is None and second is None and since is not None and to is not None: try: date1 = parser.parse(since) date2 = parser.parse(to) self.repo = RepositoryMining(repoURL, since=date1, to=date2) self.__repo_type = RepoType.DATETIME except Exception: raise Exception("Entered Datetime is not valid.") elif fromCommit is not None: self.repo = RepositoryMining(path_to_repo=repoURL, from_commit=fromCommit) self.__repo_type = RepoType.FROM_COMMIT else: self.repo = RepositoryMining(path_to_repo=repoURL) self.__repo_type = RepoType.ALL print("repoMiner was created") self.__files = [] # number of analyzed files self.__files_with_methods = [] self.__test_files = [] # number of test files self.__production_files = [] # number of production files self.__commits = [] # List[str] of analysed commits hash self.__commits_with_modified_methods = set( ) # List[str] of analysed commits with modified methods hash self.__production_methods = [] # List[ModifiedMethods] self.__test_methods = [] # List[ModifiedMethods] self.__modified_methods = [] # List[ModifiedMethods] self.__moved_files_without_changes = [ ] # List of files without changes self.__analyzed_commits = [] # List[AnalyzedCommits] self.__matched_files = [] # List of matched files self.__not_matched_files = None # instance of NotMatchedFiles self.__GetModifications() # performs analysis end = time.perf_counter() self.__analyse_time = "{:.2f}".format( (end - start) / 60) # analysis performing time @property def analyze_time(self): """Returns time for analysis.""" return self.__analyse_time @property def repo_type(self) -> RepoType: """ Returns the repo type. """ return self.__repo_type @property def project_name(self) -> str: """ Returns the project name. """ return self.__gitRepo.project_name @property def modified_methods_count(self) -> int: """ Returns the number of all methods including the test methods. """ # count = self.__CalculateMethodsCount(self.__all_methods_data) return len(self.__modified_methods) @property def production_methods_count(self) -> int: """ Returns the number of methods excluding the test methods. """ # count = self.__CalculateMethodsCount(self.__methods_data) return len(self.__production_methods) @property def test_methods_count(self) -> int: """ Returns the number of test methods. """ # count = self.__CalculateMethodsCount(self.__test_methods_data) return len(self.__test_methods) @property def files(self) -> int: """ Returns number of analysed files that contain modified methods. """ return len(self.__files) @property def unique_files(self): files = set(x for x in self.__files) return len(files) @property def test_files(self) -> int: """ Returns the number of test files. """ return len(self.__test_files) @property def unique_test_files(self): files = set(x for x in self.__test_files) return len(files) @property def production_files(self) -> int: """Returns number of production files.""" return len(self.__production_files) @property def unique_production_files(self): files = set(x for x in self.__production_files) return len(files) @property def files_with_methods(self): """Returns number of analyzed files that contain methods.""" return len(self.__files_with_methods) @property def unique_files_with_methods(self): """Returns number of unique analyzed files that contain methods.""" files = set(x for x in self.__files_with_methods) return len(files) @property def commits(self) -> int: """Returns the number of commits.""" count = len(self.__commits) return count @property def commits_with_modified_methods(self) -> int: """Returns the number of commits with modified methods.""" count = len(self.__commits_with_modified_methods) return count @property def moved_files(self): """Returns a list of moves files without changes.""" return self.__moved_files_without_changes @property def modified_methods(self) -> List[ModifiedMethod]: """Returns a list of all methods including the test methods.""" return self.__modified_methods @property def production_methods(self) -> List[ModifiedMethod]: """Returns a list of only methods.""" return self.__production_methods @property def highest_code_churn_methods(self) -> List[ModifiedMethod]: """Returns a list of recommended methods.""" recommended_methods = self.GetRecommendedMethods( self.production_methods, 10, True) return recommended_methods @property def highest_change_frequency_methods(self) -> List[ModifiedMethod]: """Returns a list of recommended methods.""" recommended_methods = self.GetRecommendedMethods( self.production_methods, 10, False) return recommended_methods @property def test_methods(self) -> List[ModifiedMethod]: """Returns a list of only test methods.""" return self.__test_methods @property def matched_files(self) -> List[MatchedFiles]: """Returns a list of matched files between file and test file.""" return self.__matched_files @property def matched_files_count(self) -> int: """Returns the number of matched files.""" return len(self.__matched_files) @property def not_matched_files(self) -> NotMatchedFiles: """Returns a not matched file object of the analyze.""" return self.__not_matched_files @property def summarized_production_methods(self) -> List[SummarizedMethod]: """Summarizes all instances of the same production method. Returns summarized production methods.""" return self.SummarizeMethods(self.production_methods) @property def summarized_test_methods(self) -> List[SummarizedMethod]: """Summarizes all instances of the same test method. Returns summarized test methods.""" return self.SummarizeMethods(self.test_methods, True) @property def analyzed_commits(self): """Returns a list of analyzed commits""" return self.__analyzed_commits def __GetModifications(self): """Return modified methods. (commit hash, file name, methods)""" print("analyze commits") for commit in self.repo.traverse_commits(): commit_hash = commit.hash analyzed_files = [] self.__commits.append(commit.hash) for file in commit.modifications: if JAVA_FILE_SUFFIX in file.filename: print(commit.hash, file.filename) self.__files.append(file.filename) if file.source_code is None and file.source_code_before is None: self.__moved_files_without_changes.append( (commit.hash, file)) elif self.IsTestFile(file): self.__test_files.append(file.filename) test_methods = self.__AnalyseFile( file, True, commit_hash) analyzed_files.append( AnalyzedFile(commit.hash, file, True, test_methods)) for method in test_methods: self.__test_methods.append(method) self.__modified_methods.append(method) else: self.__production_files.append(file.filename) methods = self.__AnalyseFile(file, False, commit_hash) analyzed_files.append( AnalyzedFile(commit.hash, file, False, methods)) for method in methods: self.__production_methods.append(method) self.__modified_methods.append(method) self.__analyzed_commits.append( AnalyzedCommit(commit, analyzed_files)) self.__CalculateFrequencyOfChanges(self.__modified_methods) if self.__repo_type == RepoType.BETWEEN_COMMITS or self.__repo_type == RepoType.SINGLE_COMMIT: self.__GetMatchedFiles() # self.__GetMultipleTimesRenamedMethods() @staticmethod def IsTestFile(file) -> bool: files = re.findall(r'(test.java)|(tests.java)', file.filename.lower()) imports = re.findall(r'org.junit.*', file.source_code.lower() ) if file.source_code is not None else False path = re.findall( r'src\\test', file.new_path.lower()) if file.new_path is not None else False after = any(annotation in file.source_code for annotation in java_test_annotations) if \ file.source_code is not None else False before = any(annotation in file.source_code_before for annotation in java_test_annotations) if \ file.source_code_before is not None else False if file.source_code is not None and file.source_code_before is not None: if files or imports or path or before or after: return True elif file.source_code is not None and file.source_code_before is None: if files or imports or path or after: return True elif file.source_code is None and file.source_code_before is not None: if files or imports or path or before: return True return False def __AnalyseFile(self, file, isTestFile, commit_hash) -> List[ModifiedMethod]: """Analyzes a file and returns a list of modified methods.""" deleted_lines, added_lines = self.GetLinesFromDiff(file) lines = [added_lines, deleted_lines] sourceCodeAfter = self.StoreSourceCodeAsLines(file.source_code) sourceCodeBefore = self.StoreSourceCodeAsLines(file.source_code_before) methodsAfter = self.GetMethods(file.methods, sourceCodeAfter, isTestFile, commit_hash) methodsBefore = self.GetMethods(file.methods_before, sourceCodeBefore, isTestFile, commit_hash) if file.change_type.name == "ADD": methods = self.GetAddedOrDeletedMethods( methodsAfter, ModificationType.NEWLY_ADDED, lines) elif file.change_type.name == "DELETE": methods = self.GetAddedOrDeletedMethods( methodsBefore, ModificationType.COMPLETELY_DELETED, lines) elif file.change_type.name == "RENAME": methods = self.GetModifiedMethods(methodsBefore, methodsAfter, lines, file.change_type.name) elif file.change_type.name == "MODIFY": methods = self.GetModifiedMethods(methodsBefore, methodsAfter, lines) else: methods = [] if len(methods) > 0: self.__files_with_methods.append(file.filename) self.__commits_with_modified_methods.add(commit_hash) return methods @staticmethod def GetModifiedMethods(methodsBefore, methodsAfter, lines, fileType=None): """Return list of modified methods""" modifiedMethodsBefore = [] modifiedMethodsAfter = [] modifiedMethods = [] # mapping deleted lines and methodsBefore and adding modification type for method in methodsBefore: modificationType = RepoMiner.GetTypeOfMethods(method, lines[1], after=False) modifiedMethodsBefore.append((method, modificationType)) # mapping added lines and methodsAfter and adding modification type for method in methodsAfter: modificationType = RepoMiner.GetTypeOfMethods(method, lines[0], after=True) modifiedMethodsAfter.append((method, modificationType)) # get renamed methods in modifiedMethodsBefore methodsBeforeRenamed = [ x for x in modifiedMethodsBefore if x[1] == ModificationType.RENAMED ] # get renamed methods in modifiedMethodsAfter methodsAfterRenamed = [ x for x in modifiedMethodsAfter if x[1] == ModificationType.RENAMED ] # get mapped renamed methods of methodsBeforeRenamed and methodsAfterRenamed renamedMethods = RepoMiner.GetRenamedMethods(methodsBeforeRenamed, methodsAfterRenamed, lines) # get added methods (only additions) newlyAddedMethods = [ method for method in modifiedMethodsAfter if method[1] == ModificationType.NEWLY_ADDED ] # get deleted methods (only deletions) completelyDeletedMethods = [ method for method in modifiedMethodsBefore if method[1] == ModificationType.COMPLETELY_DELETED ] # remove method that are already considered modifiedMethodsBefore = RepoMiner.RemoveMethods( modifiedMethodsBefore, completelyDeletedMethods) modifiedMethodsBefore = RepoMiner.RemoveMethods( modifiedMethodsBefore, methodsBeforeRenamed) modifiedMethodsAfter = RepoMiner.RemoveMethods(modifiedMethodsAfter, newlyAddedMethods) modifiedMethodsAfter = RepoMiner.RemoveMethods(modifiedMethodsAfter, methodsAfterRenamed) # mapping methodsBefore and methodsAfter for methodBefore in modifiedMethodsBefore: try: if fileType: fileNameBefore = methodBefore[0].long_name.split('::')[0] methodNameBefore = methodBefore[0].long_name.lstrip( fileNameBefore).lstrip('::') match = next(x for x in modifiedMethodsAfter if methodNameBefore in x[0].long_name) else: match = next( x for x in modifiedMethodsAfter if methodBefore[0].long_name == x[0].long_name) if match[1] == ModificationType.ADDED and methodBefore[ 1] == ModificationType.DELETED: modifiedMethods.append( ModifiedMethod( methodBefore=methodBefore[0], methodAfter=match[0], modificationType=ModificationType.MODIFIED.name, lines=lines)) elif match[1] == ModificationType.UNKNOWN and methodBefore[ 1] == ModificationType.DELETED: modifiedMethods.append( ModifiedMethod( methodBefore=methodBefore[0], methodAfter=match[0], modificationType=ModificationType.DELETED.name, lines=lines)) elif match[1] == ModificationType.ADDED and methodBefore[ 1] == ModificationType.UNKNOWN: modifiedMethods.append( ModifiedMethod( methodBefore=methodBefore[0], methodAfter=match[0], modificationType=ModificationType.ADDED.name, lines=lines)) except StopIteration: pass # append newly added methods for method in newlyAddedMethods: modifiedMethods.append( ModifiedMethod(methodAfter=method[0], modificationType=method[1].name, lines=lines)) # append completely deleted methods for method in completelyDeletedMethods: modifiedMethods.append( ModifiedMethod(methodBefore=method[0], modificationType=method[1].name, lines=lines)) # append renamed methods for method in renamedMethods: modifiedMethods.append(method) return modifiedMethods @staticmethod def RemoveMethods(methods, methodsToRemove): """Removes certain methods from a list and the remaining list of methods.""" for method in methodsToRemove: methods.remove(method) return methods @staticmethod def GetLinesFromDiff(file): """Parses diff and return deleted and added lines.""" parsed_diff: Any = file.diff_parsed return parsed_diff['deleted'], parsed_diff['added'] @staticmethod def StoreSourceCodeAsLines(sourceCode) -> Optional[list]: """Stores source code as lines and returns list of source code lines.""" if sourceCode is not None: sourceCodeLines = [line for line in sourceCode.split("\n")] else: sourceCodeLines = None return sourceCodeLines @staticmethod def GetTypeOfMethods(method, lines, after=True) -> ModificationType: """Returns type of methods depending on after value.""" codeLines = method.code_lines if all(codeLine in lines for codeLine in codeLines): return ModificationType.NEWLY_ADDED if after else ModificationType.COMPLETELY_DELETED elif any(codeLine in lines for codeLine in codeLines): if not codeLines[0] in lines: return ModificationType.ADDED if after else ModificationType.DELETED elif codeLines[0] in lines: return ModificationType.RENAMED elif not any(codeLine in lines for codeLine in codeLines): return ModificationType.UNKNOWN @staticmethod def GetMethods(methods, sourceCode, isTestFile, commit_hash) -> List[Method]: """Returns list of methods.""" listOfMethods = [] sourceCodeLines = '' for method in methods: start = method.start_line - 1 if isTestFile: if any(annotation in sourceCode[method.start_line - 2] for annotation in java_test_annotations): start = method.start_line - 2 for i in range(start, method.end_line): sourceCodeLines += sourceCode[i] + "\n" listOfMethods.append(Method(method, sourceCodeLines, commit_hash)) sourceCodeLines = '' return listOfMethods @staticmethod def GetAddedOrDeletedMethods(methods, modificationType, lines) -> List[ModifiedMethod]: """Return newly added or completely deleted methods depending on the entered methods and type.""" if modificationType.name == "COMPLETELY_DELETED": addedOrDeletedMethods = [ ModifiedMethod(methodBefore=method, modificationType=modificationType.name, lines=lines) for method in methods ] else: addedOrDeletedMethods = [ ModifiedMethod(methodAfter=method, modificationType=modificationType.name, lines=lines) for method in methods ] return addedOrDeletedMethods @staticmethod def GetRenamedMethods(methodsBefore, methodsAfter, lines, similarity=0.8) -> List[ModifiedMethod]: """Calculates the levenshtein distance between methods and returns a list of renamed methods.""" MIN_SIMILARITY_SIGNATURE = similarity modifiedMethods = [] methods = set() renamedMethodPairs = [] notRenamedMethodPairs = [] # consider special case: if len(methodsBefore) != len(methodsAfter): if len(methodsBefore) == 0 and len(methodsAfter) == 1: modifiedMethods.append( ModifiedMethod( methodAfter=methodsAfter[0][0], modificationType=ModificationType.ADDED.name, lines=lines)) return modifiedMethods elif len(methodsBefore) == 1 and len(methodsAfter) == 0: modifiedMethods.append( ModifiedMethod( methodBefore=methodsBefore[0][0], modificationType=ModificationType.DELETED.name, lines=lines)) return modifiedMethods for methodAfter in methodsAfter: for methodBefore in methodsBefore: ratio_signature = Levenshtein.ratio(methodBefore[0].signature, methodAfter[0].signature) ratio_method_body = Levenshtein.ratio( methodBefore[0].method_body, methodAfter[0].method_body) current_object = (methodBefore[0], methodAfter[0], ratio_signature, ratio_method_body) if ratio_signature >= 0.9: MIN_SIMILARITY_METHOD_BODY = similarity else: MIN_SIMILARITY_METHOD_BODY = ( (1 - similarity) / 2) + similarity if len(methodsBefore) == 1 and len(methodsAfter) == 1: renamedMethodPairs.append(current_object) elif ratio_signature >= MIN_SIMILARITY_SIGNATURE and ratio_method_body >= MIN_SIMILARITY_METHOD_BODY: if methodBefore[0] in [x[0] for x in renamedMethodPairs]: match = [ x for x in renamedMethodPairs if methodBefore[0] == x[0] ] if current_object[2] > match[0][2]: index = renamedMethodPairs.index(match[0]) renamedMethodPairs[index] = current_object else: renamedMethodPairs.append(current_object) else: notRenamedMethodPairs.append(current_object) for method in renamedMethodPairs: if method[0] not in methods and method[1] not in methods: modifiedMethods.append( ModifiedMethod( methodBefore=method[0], methodAfter=method[1], modificationType=ModificationType.RENAMED.name, ratio=(method[2], method[3]), lines=lines)) methods.add(method[0]) methods.add(method[1]) for method in notRenamedMethodPairs: if method[0] not in methods: modifiedMethods.append( ModifiedMethod( methodBefore=method[0], modificationType=ModificationType.DELETED.name, lines=lines)) methods.add(method[0]) if method[1] not in methods: modifiedMethods.append( ModifiedMethod( methodAfter=method[1], modificationType=ModificationType.ADDED.name, lines=lines)) methods.add(method[1]) return modifiedMethods @staticmethod def __CalculateFrequencyOfChanges(methods): """Calculate the change frequency for each method.""" print("calculate change frequency") for x in methods: x.change_frequency = sum( x.long_name == y.long_name and x.file_name == y.file_name for y in methods) @staticmethod def SummarizeMethods(methods, isTestfile=None) -> List[SummarizedMethod]: """Summarizes the same method instances.""" summarized_methods = [] method_names = set(x.long_name for x in methods) for method_name in method_names: method = [x for x in methods if x.long_name == method_name] if isTestfile: summarized_methods.append( SummarizedMethod(method, isTestMethod=True)) else: summarized_methods.append( SummarizedMethod(method, isTestMethod=False)) return summarized_methods @staticmethod def __SortMethodsByChangedFrequency(methods) -> List[ModifiedMethod]: """Returns sorted list of methods by change frequency.""" methods.sort(key=lambda x: x.change_frequency, reverse=True) return methods @staticmethod def GetRecommendedMethods(methods, number, code_churn): """Returns a list of recommended methods.""" recommended_methods = [] filteredMethod = None if number > len(methods): print("number is bigger then identified methods") number = len(methods) for i in range(0, number): max_value = 0 for method in methods: value = method.code_churn if code_churn else method.change_frequency if value > max_value: max_value = value filteredMethod = method methods = [ x for x in methods if x.long_name != filteredMethod.long_name ] recommended_methods.append(filteredMethod) return recommended_methods def __GetMatchedFiles(self): """Determines corresponding files and test files. Returns matched files.""" print("calculate matched files") methods = self.production_methods test_methods = self.test_methods summarized_methods = self.SummarizeMethods(methods) summarized_test_method = self.SummarizeMethods(test_methods, True) methods_file_names = set(method.file_name for method in methods) test_methods_file_names = set(method.file_name for method in test_methods) matched, not_matched = self.GetMatchedFileNames( methods_file_names, test_methods_file_names) for match in matched: sublist_methods = [] sublist_test_methods = [] for method in summarized_methods: if match[0] == method.summarized_method.file_name: sublist_methods.append(method.summarized_method) for method in summarized_test_method: if match[1] == method.summarized_test_method.file_name: sublist_test_methods.append(method.summarized_test_method) self.__matched_files.append( MatchedFiles(match[0], match[1], sublist_methods, sublist_test_methods)) not_matched_methods = [ x for x in methods if x.file_name in not_matched[0] ] not_matched_test_methods = [ x for x in test_methods if x.file_name in not_matched[1] ] self.__not_matched_files = NotMatchedFiles(not_matched[0], not_matched[1], not_matched_methods, not_matched_test_methods) @staticmethod def GetMatchedFileNames(production_file_names, test_file_names): """Find pairs of corresponding file and test file names. Returns identified pairs.""" matched = [] for file_name in production_file_names: name = re.sub('<.*>', '', file_name) regEx1 = r"" + name.lower().split(".")[0] + r"test.java" regEx2 = r"" + name.lower().split(".")[0] + r"testcase.java" regEx3 = r"" + name.lower().split(".")[0] + r"tests.java" for test_file_name in test_file_names: match1 = re.fullmatch(regEx1, test_file_name.lower()) match2 = re.fullmatch(regEx2, test_file_name.lower()) match3 = re.fullmatch(regEx3, test_file_name.lower()) if match1 or match2 or match3: matched.append((file_name, test_file_name)) matched_files = [y[0] for y in matched] matched_test_files = [y[1] for y in matched] not_match_files = [ x for x in production_file_names if x not in matched_files ] not_match_test_files = [ x for x in test_file_names if x not in matched_test_files ] not_matched = [not_match_files, not_match_test_files] print("len matched:", len(matched)) return matched, not_matched # TODO: muss noch getestet werden def __GetMultipleTimesRenamedMethods(self, ): """Searches for each method multiple corresponding renamed methods.""" all_methods = self.modified_methods all_renamed_methods = [x for x in all_methods if x.type == "RENAMED"] for x in all_renamed_methods: found_a_renamed_method = True renamed_method = x while found_a_renamed_method: found_a_renamed_method, renamed_method = self.__FindRenamedMethods( renamed_method, all_renamed_methods) if renamed_method is not None: x.renamed_methods.append(renamed_method) x.multiple_renamed = True @staticmethod def __FindRenamedMethods(method, methods): renamed_method = None for x in methods: if method.signature == x.old_signature and x.ratio_signature < 1.0: renamed_method = x break if renamed_method is not None: return True, renamed_method else: return False, None
def main(): parser = argparse.ArgumentParser() parser.add_argument("from_tag") parser.add_argument("to_tag") args = parser.parse_args() pr_commits = [] dependency_updates = [] migrations = [] mine = RepositoryMining(".", from_tag=args.from_tag, to_tag=args.to_tag) for commit in mine.traverse_commits(): for mod in commit.modifications: if mod.change_type == ModificationType.ADD and "/migrations/" in mod.new_path: migrations.append(get_migration_desc(mod)) if not commit.merge: # Not a PR merge continue commit.prs = get_pr_numbers(commit) if not commit.prs: continue if is_dependency_update(commit): dependency_updates.append(commit) else: pr_commits.append(commit) # output print(f"# Version {args.to_tag}") print() print(f"## PRs merged since {args.from_tag}") print() if not pr_commits: print("None") for commit in pr_commits: for pr in commit.prs: print( f"* [PR {pr}]({PR_URL_TMPL.format(pr)}): {get_pr_title(commit, pr)}" ) print() print("## Dependency updates") print() if not dependency_updates: print("None") for commit in dependency_updates: for pr in commit.prs: print( f"* [PR {pr}]({PR_URL_TMPL.format(pr)}): {get_pr_title(commit, pr)}" ) print() print("## Migrations") print() if not migrations: print("None") for migration in migrations: print(f"* {migration['path']} - {migration['description']}")
value += 1 value %= 13 return 1 if value == 0 else value for repo in repos: if repo != "generator-jhipster": continue repository_folder = "{}{}/".format(repo_folder, repo) print(repository_folder) repository = RepositoryMining(repository_folder, only_modifications_with_file_types=['.java']) selected_commits = dict() commits = repository.traverse_commits() with open("../out/{}.csv".format(repo.replace(".git", "")), 'w') as output: conditional_tag = re.compile(r'\+@Conditional.*\(') profile_tag = re.compile(r'\+@Profile.*\(') conditional_remove_tag = re.compile(r'-@Conditional.*\(') profile_remove_tag = re.compile(r'-@Profile.*\(') conds = dict() repo = RepositoryMining(repo) print("DATE, +@Conditional, +@Profile, -@Conditional, -@Profile") print("DATE, +@Conditional, +@Profile, -@Conditional, -@Profile", file=output) for commit in commits:
single_java_vuln_commits = [] for repo in repos: repo_url = repo["url"] clean_local_repo(repo_url) print(f'Cloning {repo_url} ...', end=' ') try: repo_mining = RepositoryMining(repo_url, clone_repo_to="/tmp", order='reverse', only_in_branch='master') except GitCommandError as e: print('cannot mine. Skipping.') logging.error(e) else: try: commits = list(repo_mining.traverse_commits()) print("done!") vuln_commits = [c for c in commits if c.hash in repo["hashes"]] if vuln_commits: for vc in vuln_commits: if is_single_java(vc): head_msg = vc.msg.split("\n", 1)[0] single_java_vuln_commits.append({ "project": repo_url, "hash": vc.hash }) print(f'\t{vc.hash} is interesting!') else: print(f'\t{vc.hash} found but not interesting.') else: print('\tcommit(s) not found :(')
class ExtractSourceFilesInfo: # repository_path = name of the repository (same of github, and same of local repo) # path_to_file = path to the file, in order to avoid the computation of test files def __init__(self, repository_path, path_to_file): self._repository = RepositoryMining("https://www.github.com/" + repository_path + ".git") self._repository_path = repository_path self._path_to_file = path_to_file self._classNames = [] # This function creates the file-developers dictionary def getFileDevDictionary(self): # dictionary instance commitDict = dict() # Iterating the commits... for commit in self._repository.traverse_commits(): # N.B. Each commit may contain more than one modification: this is because a developer may modify more than # one file, and so may commit more modified file. # Iterating the modifications in the commit... for m in commit.modifications: # if the filename of the modification 'm' isn't already in the dictionary, let's add it as key of # commitDict: the corresponding value is another dictionary! # commitDict = {'filename': {} } if m.filename not in commitDict: commitDict[m.filename] = dict() # if the author modify the file 'filename' for the FIRST TIME, let's put the author name as a key of # the internal dictionary # (in turn, it is the value of the corresponding filename of the commitDict dictionary) # and '1' as value: this value will be the counter of times that the author modify that file.! if commit.author.name not in commitDict[m.filename]: commitDict[m.filename][commit.author.name] = 1 # if the author modifiy the file 'filename' for the SECOND TIME (or more), let's increase the # corresponding value! else: commitDict[m.filename][commit.author.name] += 1 # Create the graph y = nx.Graph() file_name_list = [] committer_list = [] for x, committers in commitDict.items(): file_name_list.append(x) for committer, num_commit in committers.items(): if committer not in committer_list: committer_list.append(committer) # Add edges to the graph y.add_nodes_from(file_name_list, bipartite=0) y.add_nodes_from(committer_list, bipartite=1) list_to_add = [] for filename, committers in commitDict.items(): for committer, num_commit in committers.items(): list_to_add.append((filename, committer)) y.add_edges_from(list_to_add) pos = nx.spring_layout(y, k=0.4, iterations=20) nx.draw_networkx_nodes(y, pos, node_size=40) nx.draw_networkx_edges(y, pos, edgelist=y.edges, edge_color="b", style="solid") nx.draw_networkx_labels(y, pos, font_size=7, font_family="sans-serif") # Show the graph plt.axis("off") plt.figure(figsize=(10, 8), dpi=300) plt.show() return commitDict # This function creates the file-file developers dictionary def getFileFileDictionary(self): repo_dir = self._repository_path + "/" + self._path_to_file subprocess.call( ['java', '-jar', 'depends/depends.jar', 'java', repo_dir, 'outputDep', '--auto-include', '-d=depends']) def getFileFileMatrix(self): self.getFileFileDictionary() with open("depends/outputDep.json") as f: data = json.load(f) # Get class names of the entire project name_of_classes = list() for key in data['variables']: filename = pathlib.PureWindowsPath(key) # Convert path to the right format for the current operating system path = pathlib.PurePath(filename) name_of_classes.append(path.name) self._classNames = name_of_classes dependencies = list() dependenciesRow = list() # Iterating all the pairs of classes that have dependencies: index goes from 0 to n (#number of classes) for i in range(0, len(data["variables"])): # Iterating all classes (from 0 to n) for j in range(0, len(data["variables"])): # Boolean variable that tell us whether any dependencies are found noDependencies = True # Iterating the dependencies found by "Depends": for index in range(0, len(data["cells"])): # If there are dependencies from the class indexed as 'i'... if (data["cells"][index]["src"] == i): # ...to the class indexed as 'j' if (data["cells"][index]["dest"] == j): # DEPENDENCY FOUND! Put the boolean = False and compute the sum of the dependencies! noDependencies = False dependenciesRow.append(sum(data["cells"][index]["values"].values())) # No dependencies between the class 'i' and the class 'j': put 0 in the list if (noDependencies): dependenciesRow.append(0) # We are going to the next row, this means that 'i' is going to change (another class is going to be # analyzed): let's copy in a support list the 'partialDepencies' list, in order to save results in the # 'dependencies' matrix, and re-use the 'dependenciesRow' list in another iteration! supportList = deepcopy(dependenciesRow) # copy del dependenciesRow[:] # empty the list dependencies.extend([supportList]) # dependencies matrix filling k = 0 dict_to_return = dict() for class_name in name_of_classes: j = 0 dict_to_return[class_name] = dict() for class_name_2 in name_of_classes: if dependencies[k][j] > 0: dict_to_return[class_name][class_name_2] = dependencies[k][j] j = j + 1 k = k + 1 # Create the graph y = nx.Graph() for file, file_dep in dict_to_return.items(): for file2, val in file_dep.items(): y.add_edge(file, file2, weight=val) # Add the edges to the graph pos = nx.spring_layout(y) nx.draw_networkx_nodes(y, pos, node_size=70) nx.draw_networkx_edges(y, pos, edgelist=y.edges, edge_color="b", style="solid") nx.draw_networkx_labels(y, pos, font_size=5, font_family="sans-serif") # Print the graph plt.axis("off") plt.show() return dependencies, name_of_classes def getFileDevMatrix(self): # Getting data data = self.getFileDevDictionary() # Get all file names fileNames = (list)(data.keys()) devNames = [] # Get all developers names for file in self._classNames: for key in data[file].keys(): if key not in devNames: # print(key) devNames.append(key) # File dev matrix fileDevMatrix = list() # A list, used for each row of the matrix: at each iteration is used and then empty, in order # to re-use it in the next iteration fileDevRow = [] # Iterating file names for i in range(0, len(self._classNames)): # Iterating developers names for j in range(0, len(devNames)): # If a developer name is in the dictionary associated to a certain file... (this means that he made # at least 1 commit on that file if (devNames[j] in data[self._classNames[i]]): # append the number of commits on that file fileDevRow.append(data[self._classNames[i]][devNames[j]]) else: # otherwise put 0 fileDevRow.append(0) # We are going to the next row, this means that 'i' is going to change (another file is going to be # analyzed): let's copy in a support list the 'fileDevRow' list, in order to save results in the # matrix, and re-use the 'fileDevRow' list in another iteration! supportList = deepcopy(fileDevRow) # copy del fileDevRow[:] # empty the list fileDevMatrix.extend([supportList]) # matrix filling return fileDevMatrix, devNames
class ArgumentCommits(): def __init__(self, urls=[]): self.mining_object = RepositoryMining( urls, only_in_main_branch=True, only_modifications_with_file_types=['.java']) self.commit_records = defaultdict(list) self.result_for_csv = '' self.result_for_tsv = '' def fetch_commit_data(self): ''' This routine fetches the commit data of the specified repository and runs static analysis on the code files in the commit. ''' for commit in self.mining_object.traverse_commits(): for mod in commit.modifications: if mod.filename[ -5:] == '.java': # get only Java files from the commit commit_code_details = lizard.analyze_file.analyze_source_code( mod.filename, mod.source_code) for func in commit_code_details.function_list: d = func.__dict__ if 'for(' in d["long_name"]: continue # handling the case where 'for()' is considered a function by lizard. self.commit_records[ f'{mod.filename}~{d["name"]}'].append({ 'hash': commit.hash, 'doc': commit.author_date.strftime( "%Y-%m-%d %H:%M:%S" ), # doc == date of commit 'current_signature': d["long_name"], 'args': d["parameters"] }) def find_commits_with_additional_parameters(self): ''' This routine finds the commits where one or more arguments was added to a function. ''' print("Fetching commits and running static analysis: In Progress...") self.fetch_commit_data() print("Fetching commits and running static analysis: Done!") print("Finding Commits with Additional Parameters: In Progress...") for key, value in self.commit_records.items(): value.sort(key=lambda el: parse(el["doc"]) ) # sort the commits by date of commit file_name = key.split('~')[0] l = len(value) current_signature = value[0]['current_signature'] for i in range(0, l - 1): if len(value[i + 1]['args']) > len(value[i]['args']) and value[ i + 1]["current_signature"] != current_signature and value[ i + 1]["hash"] != value[i]["hash"]: self.result_for_csv += f'{value[i + 1]["hash"]},{file_name},{value[i]["current_signature"]},{value[i + 1]["current_signature"]}\n' self.result_for_tsv += f'{value[i + 1]["hash"]}\t{file_name}\t{value[i]["current_signature"]}\t{value[i + 1]["current_signature"]}\n' current_signature = value[i + 1]["current_signature"] print("Finding Commits with Additional Parameters: Done!") def write_to_csv(self, filename="default_file_name"): with open(f'{filename}.csv', 'w') as file: file.write( 'Commit SHA,Java File,Old function signature,New function signature\n' ) for line in self.result_for_csv: file.write(line) def write_to_tsv(self, filename="default_file_name"): with open(f'{filename}.tsv', 'w') as file: file.write( 'Commit SHA\tJava File\tOld function signature\tNew function signature\n' ) for line in self.result_for_tsv: file.write(line)
local directory containing Java project repositories """ home = str(Path.home()) # @TODO add this to configuration # Path containing GitHub repositories of Java projects dataset_path = os.path.join(home, "ml4se_dataset", "unique_mining") project_names = os.listdir(dataset_path) print("Starting to look through projects") for project_name in project_names: path = os.path.join(dataset_path, project_name) repository_mining = RepositoryMining(path) # print("Starting to analyze commits for {}".format(path)) try: for commit in repository_mining.traverse_commits(): gr = GitRepository(repository_mining.path_to_repo[0]) for modified_file in commit.modifications: if modified_file.filename.endswith(".java"): diff = modified_file.diff parsed_diff = gr.parse_diff(diff) lines_containing_less_or_equal = [] lines_containing_less = [] lines_containing_greater_or_equal = [] lines_containing_greater = [] for deletion in parsed_diff['deleted']: line_nr = deletion[0] content = deletion[1] if " <= " in content: lines_containing_less_or_equal.append(line_nr)
def search_repository(repo_mining: pd.RepositoryMining, severity: Level, confidence: Level): """ Iterate through all commits of the given repository from the given revision (default: active branch) :param repo_mining: The RepositoryMining object :param severity: The minimum severity level of vulnerabilities :param confidence: The minimum confidence level of vulnerabilities """ output = {} gitpython_repo = git.Repo(repo_mining._path_to_repo) commit_count = len(list(copy.copy(repo_mining).traverse_commits())) for commit in tqdm(repo_mining.traverse_commits(), total=commit_count): # Too many files changed will cause the program to hang if len(commit.modifications) > 100: continue commit_message = process_commit_message(commit.msg) output[commit.hash] = {'date': str(commit.author_date)} # Find matching vulnerabilities output[commit.hash]['vulnerabilities'] = [{'name': vulnerability.name, 'match': regex_match.group()} for vulnerability in vuln.vulnerability_list for regex_match in [vulnerability.regex.search(commit_message)] if regex_match] if not output[commit.hash]['vulnerabilities']: output[commit.hash].pop('vulnerabilities') # Add files changed, each modification is a file changed for modification in commit.modifications: file = modification.old_path if modification.change_type.name is 'DELETE' else modification.new_path file_extension = os.path.splitext(file)[1].lower() # Skip this file if the file extension is not supported if file_extension not in lang.supported_extensions: continue source_code_dict = get_source_code_dict(gitpython_repo, commit.hash, file, modification.source_code) # Encoding will be None or not supported by decode() for some cases if not source_code_dict: continue diff = repo.parse_diff(modification.diff) # Run Flawfinder for C/C++ files if file_extension in c_lang.c_extensions: partial_output = run_flawfinder(diff, source_code_dict, severity, confidence) # Run bandit for Python files elif file_extension in py_lang.py_extensions: partial_output = run_bandit(diff, source_code_dict, severity, confidence) # Run 'grep'-like analysis for other languages files (very noisy) else: diff['unchanged'] = get_unchanged_lines(diff, source_code_dict) partial_output = process_diff(diff, file_extension, severity, confidence) # Only add the file if it has useful code changes (comments already removed) if partial_output: if 'files_changed' not in output[commit.hash]: output[commit.hash]['files_changed'] = [] output[commit.hash]['files_changed'].append({'file': file, **partial_output}) # Remove the commit if regex doesnt match or no vulnerable lines of code are detected if 'vulnerabilities' not in output[commit.hash] and 'files_changed' not in output[commit.hash]: output.pop(commit.hash) return output
def ExtractFromCommits(since, to, url, excludes): # limit to time of writing script for reproduceable results commits = RepositoryMining(path_to_repo=url, since=since ,to=to) # Data extraction variables project_name = "" count = 0 merges = 0 all_authors = [] author_commit_dict = dict() internal_authors = [] external_authors = [] code_changes = [] iac_changes = [] excluded_files = [] for commit in commits.traverse_commits(): if project_name == "": project_name = commit.project_name msg = commit.msg author = commit.author.email org_author = commit.committer.email count = count + 1 if commit.merge: merges = merges + 1 # extract files in this commit changedFiles = commit.modifications # remove files that match exclude paths files_for_analysis = [] for file in changedFiles: path = "" if not file.new_path == None: path = file.new_path else: path = file.old_path addToCollection = True for exclude_path in excludes: # maybe handle wildcard here if exclude_path in path: #if file in files_for_analysis: #files_for_analysis.remove(file) addToCollection = False if (file.filename, exclude_path) not in excluded_files: excluded_files.append((file.filename, exclude_path)) if addToCollection: files_for_analysis.append(file) #print("removed", changedFiles.__len__() - files_for_analysis.__len__()) for file in files_for_analysis: filename = file.filename loc = file.nloc if not loc == None: # code files lines_added = file.added lines_removed = file.removed code_changes.append((commit.hash, author, filename, msg, lines_added, lines_removed, org_author)) else: # documentation and IAC files lines_added = file.added lines_removed = file.removed iac_changes.append((commit.hash, author, filename, msg, lines_added, lines_removed, org_author)) # Create overall collection of all authors independnt of company if not all_authors.__contains__(author): all_authors.append(author) author_commit_dict[author] = 1 else: author_commit_dict[author] = author_commit_dict[author] + 1 # split into internal and external if author.__contains__("eficode") or author.__contains__("praqma"): if not internal_authors.__contains__(author): internal_authors.append(author) else: if not external_authors.__contains__(author): external_authors.append(author) return (project_name, count, merges, all_authors, author_commit_dict, internal_authors, external_authors, code_changes, iac_changes, excluded_files)
class RepositoryProcessor: def __init__(self, repository: str, owner: str): self.owner = owner self.repository = os.path.split(repository)[-1] self.repo = GitRepository(repository) self.mining = RepositoryMining(repository) self.pairs = [] random.seed(42) def run(self): self.get_all_filepairs() with open(os.path.join('filepairs', self.repository, 'pairs.txt'), 'w') as f: f.write('\n'.join( map(lambda x: f'{x[0]} {x[1]} {x[2]}', self.pairs))) f.write('\n') def get_all_filepairs(self, file_filter=java_file_filter): commits = list( filter(lambda x: not x.merge, self.mining.traverse_commits())) for commit in commits: for modification in commit.modifications: if modification.change_type == ModificationType.MODIFY: if file_filter(modification.filename): self.get_file_pair(commit, modification) def get_file_pair(self, commit, modification: Modification): parent = commit.parents[0] repo = self.repo.project_name commit_hash = commit.hash filename = modification.filename path = os.path.join('filepairs', repo, commit_hash, filename) os.makedirs(path, exist_ok=True) self.repo.checkout(parent) before = os.path.join(self.repository, modification.old_path) before_saved = os.path.join(path, 'before_' + commit_hash + '_' + filename) copyfile(before, before_saved) self.repo.checkout(commit_hash) after = os.path.join(self.repository, modification.new_path) after_saved = os.path.join(path, 'after__' + commit_hash + '_' + filename) copyfile(after, after_saved) self.pairs.append( (before_saved, after_saved, commit_hash + '.' + self.owner + '.' + before.replace('/', '.'))) def run_random(self, number): self.get_random_filepairs(number) with open(os.path.join('filepairs', self.repository, 'pairs.txt'), 'w') as f: f.write('\n'.join( map(lambda x: f'{x[0]} {x[1]} {x[2]}', self.pairs))) f.write('\n') def get_random_filepairs(self, number, file_filter=java_file_filter): commits = random.choices(list( filter(lambda x: not x.merge, self.mining.traverse_commits())), k=number) for idx, commit in enumerate(commits): print(f'Processing commit №{idx}: {commit.hash}') for modification in commit.modifications: if modification.change_type == ModificationType.MODIFY: if file_filter(modification.filename): self.get_file_pair(commit, modification)
if new_path != path: path = new_path else: return None return None if __name__ == "__main__": current_repo() if __name__ == "__main__": book_miner = RepositoryMining(current_repo()) if __name__ == "__main__": book_commits = book_miner.traverse_commits() book_first_commit = next(book_commits) if __name__ == "__main__": [attr for attr in dir(book_first_commit) if not attr.startswith('_')] if __name__ == "__main__": book_first_commit.msg if __name__ == "__main__": [ attr for attr in dir(book_first_commit.author) if not attr.startswith('_') ] if __name__ == "__main__":
def main(): parser = argparse.ArgumentParser() parser.add_argument("from_tag") parser.add_argument("to_tag") parser.add_argument("--verbose", "-v", action="count", default=0) args = parser.parse_args() log_level = logging.WARNING if args.verbose >= 2: log_level = logging.DEBUG elif args.verbose >= 1: log_level = logging.INFO logging.config.dictConfig({ "version": 1, "disable_existing_loggers": False, "formatters": { "development": { "format": "%(levelname)s %(name)s: %(message)s" } }, "handlers": { "console": { "level": "DEBUG", "class": "logging.StreamHandler", "stream": sys.stderr, "formatter": "development", } }, "root": { "handlers": ["console"], "level": "WARNING" }, "loggers": { "normandy": { "propagate": False, "handlers": ["console"], "level": log_level } }, }) pr_commits = [] dependency_updates = [] migrations = [] mine = RepositoryMining(".", from_tag=args.from_tag, to_tag=args.to_tag) num_commits_processed = 0 for commit in mine.traverse_commits(): for mod in commit.modifications: if mod.change_type == ModificationType.ADD and "/migrations/" in mod.new_path: migrations.append(get_migration_desc(mod)) if not commit.merge: log.debug(f"Skipping {commit.hash[:7]}: Not a merge") # Not a PR merge continue commit.prs = get_pr_numbers(commit) if not commit.prs: log.debug(f"Skipping {commit.hash[:7]}: No PR numbers") continue if is_dependency_update(commit): log.debug( f"Processing commit {commit.hash[:7]} as dependency commit") dependency_updates.append(commit) else: log.debug(f"Processing commit {commit.hash[:7]} as normal commit") pr_commits.append(commit) num_commits_processed += 1 if num_commits_processed == 0: log.error("No commits processed") raise Exception("No commits processed") # Accrue output in a buffer and print all at once so that log lines don't pollute it output = "" def output_line(line=""): nonlocal output output += line + "\n" output_line(f"# Version {args.to_tag}") output_line() output_line(f"## PRs merged since {args.from_tag}") output_line() if not pr_commits: output_line("None") for commit in pr_commits: for pr in commit.prs: output_line( f"* [PR {pr}]({PR_URL_TMPL.format(pr)}): {get_pr_title(commit, pr)}" ) output_line() output_line("## Dependency updates") output_line() if not dependency_updates: output_line("None") for commit in dependency_updates: for pr in commit.prs: print( f"* [PR {pr}]({PR_URL_TMPL.format(pr)}): {get_pr_title(commit, pr)}" ) output_line() output_line("## Migrations") output_line() if not migrations: output_line("None") for migration in migrations: output_line(f"* {migration['path']} - {migration['description']}") print(f"\n\n{output}")
print(f"detecting branch... ", end='', flush=True) found_default_branch = False for default_branch in branches_search_chain: if found_default_branch: break try: repo = RepositoryMining(repo_path, only_in_branch=default_branch, only_no_merge=True) for commit in repo.traverse_commits(): check = commit.msg if check is not None: found_default_branch = True branch = default_branch print(f"using {branch}", flush=True) break except git.exc.GitCommandError as gce: continue if not found_default_branch: print(f"Can't detect default branch", flush=True) sys.exit(1) else: