def main(): parser = argparse.ArgumentParser() parser.add_argument( 'directory', help='Path to the repository. Example usage: run.sh path/to/directory') parser.add_argument('--output', default='./repo_data.json', dest='output', help='Path to the JSON file that will contain the result') parser.add_argument('--skip_obfuscation', default=False, dest='skip_obfuscation', help='If true it won\'t obfuscate the sensitive data such as emails and file names. Mostly for testing purpuse') parser.add_argument('--parse_libraries', default=False, dest='parse_libraries', help='If true, used libraries will be parsed') args = parser.parse_args() repo = git.Repo(args.directory) ar = AnalyzeRepo(repo) q = Questions() print('Initialization...') for branch in repo.branches: ar.create_commits_entity_from_branch(branch.name) ar.flag_duplicated_commits() ar.get_commit_stats() r = ar.create_repo_entity(args.directory) # Ask the user if we cannot find remote URL if r.primary_remote_url == '': answer = q.ask_primary_remote_url(r) authors = [(c['name'], c['email']) for _, c in r.contributors.items()] identities_err = None identities = q.ask_user_identity(authors, identities_err) MAX_LIMIT = 50 while len(identities['user_identity']) == 0 or len(identities['user_identity']) > MAX_LIMIT: if len(identities['user_identity']) == 0: identities_err = 'Please select at least one author' if len(identities['user_identity']) > MAX_LIMIT: identities_err = 'You cannot select more than', MAX_LIMIT identities = q.ask_user_identity(authors, identities_err) r.local_usernames = identities['user_identity'] if args.parse_libraries: # build authors from the selection al = AnalyzeLibraries(r.commits, authors, repo.working_tree_dir) libs = al.get_libraries() # combine repo stats with libs used for i in range(len(r.commits)): c = r.commits[i] if c.hash in libs.keys(): r.commits[i].libraries = libs[c.hash] if not args.skip_obfuscation: r = obfuscate(r) er = ExportResult(r) er.export_to_json(args.output)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'directory', help='Path to the repository. Example usage: run.sh path/to/directory') parser.add_argument( '--output', default='./repo_data.json', dest='output', help='Path to the JSON file that will contain the result') parser.add_argument( '--skip_obfuscation', default=False, dest='skip_obfuscation', help= 'If true it won\'t obfuscate the sensitive data such as emails and file names. Mostly for testing purpuse' ) args = parser.parse_args() repo = git.Repo(args.directory) ar = AnalyzeRepo(repo, args.skip_obfuscation) q = Questions() print('Initialization...') for branch in repo.branches: ar.create_commits_entity_from_branch(branch.name) ar.flag_duplicated_commits() ar.get_commit_stats() r = ar.create_repo_entity(args.directory) # Ask the user if we cannot find remote URL if r.primary_remote_url == '': answer = q.ask_primary_remote_url(r) identities = q.ask_user_identity(r) MAX_LIMIT = 50 while len(identities['user_identity']) == 0 or len( identities['user_identity']) > MAX_LIMIT: if len(identities['user_identity']) == 0: print('Please select at least one.') if len(identities['user_identity']) > MAX_LIMIT: print('You cannot select more than', MAX_LIMIT) identities = q.ask_user_identity(r) r.local_usernames = identities['user_identity'] er = ExportResult(r) er.export_to_json(args.output)
def initialize(directory, skip_obfuscation, output, parse_libraries, email, skip_upload, debug_mode, skip, commit_size_limit, file_size_limit): # Initialize logger logger = logging.getLogger("main") if debug_mode: logger.setLevel(logging.DEBUG) fh = logging.FileHandler('extractor_debug_info.log') fh.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) else: logger.setLevel(logging.WARNING) logger.debug("Initialized main logger.") repo = git.Repo(directory) ar = AnalyzeRepo(repo) q = Questions() print('Analyzing repo under %s ...' % (directory)) try: # Stop parsing if there are no branches if not repo.branches: print('No branches detected, will ignore this repo') return for branch in repo.branches: ar.create_commits_entity_from_branch(branch.name) ar.flag_duplicated_commits() ar.get_commit_stats() r = ar.create_repo_entity(directory) # Stop parsing if there are no remotes if not r.original_remotes: print('No remotes detected, will ignore this repo') return # Ask the user if we cannot find remote URL if r.primary_remote_url == '': answer = q.ask_primary_remote_url(r) if not r.contributors.items(): print('No authors detected, will ignore this repo') return authors = [(c['name'], c['email']) for _, c in r.contributors.items()] identities = {} identities['user_identity'] = [] # Stop parsing if there are no authors if len(authors) == 0: print('No authors detected, will ignore this repo') return identities_err = None identities = q.ask_user_identity(authors, identities_err, email) MAX_LIMIT = 50 while len(identities['user_identity']) == 0 or len( identities['user_identity']) > MAX_LIMIT: if len(identities['user_identity']) == 0: identities_err = 'Please select at least one author' if len(identities['user_identity']) > MAX_LIMIT: identities_err = 'You cannot select more than', MAX_LIMIT identities = q.ask_user_identity(authors, identities_err) r.local_usernames = identities['user_identity'] if parse_libraries: # build authors from the selection # extract email from name -> email list author_emails = [i.split(' -> ', 1)[1] for i in r.local_usernames] if author_emails: al = AnalyzeLibraries(r.commits, author_emails, repo.working_tree_dir, skip, commit_size_limit, file_size_limit) libs = al.get_libraries() # combine repo stats with libs used for i in range(len(r.commits)): c = r.commits[i] if c.hash in libs.keys(): r.commits[i].libraries = libs[c.hash] if not skip_obfuscation: r = obfuscate(r) er = ExportResult(r) er.export_to_json_interactive(output, skip_upload) except KeyboardInterrupt: print("Cancelled by user") return