def export_to_json(self, file_name): f = open(file_name, 'w+') f.write(json.dumps(self.data.json_ready(), indent=4)) f.close() # Zip the output with ZipFile(file_name + '.zip', 'w', compression=8) as zip: zip.write(file_name) zip.close() print('Result has has been saved in: ' + file_name + '.zip') q = Questions() result = q.query_yes_no( 'Do you want to upload the result to your profile automatically?') if result: response = uploadRepo(file_name + '.zip') if response is not None: reponame = self.data.repo_name url = 'https://profile.codersrank.io/repo?token=' + response[ 'token'] + '&reponame=' + reponame # print("Open this url to upload the private repo:") print('Go to this link in the browser => ' + url) webbrowser.open(url) os.remove(file_name)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'directory', help='Path to the repository. Example usage: run.sh path/to/directory') parser.add_argument('--output', default='./repo_data.json', dest='output', help='Path to the JSON file that will contain the result') parser.add_argument('--skip_obfuscation', default=False, dest='skip_obfuscation', help='If true it won\'t obfuscate the sensitive data such as emails and file names. Mostly for testing purpuse') parser.add_argument('--parse_libraries', default=False, dest='parse_libraries', help='If true, used libraries will be parsed') args = parser.parse_args() repo = git.Repo(args.directory) ar = AnalyzeRepo(repo) q = Questions() print('Initialization...') for branch in repo.branches: ar.create_commits_entity_from_branch(branch.name) ar.flag_duplicated_commits() ar.get_commit_stats() r = ar.create_repo_entity(args.directory) # Ask the user if we cannot find remote URL if r.primary_remote_url == '': answer = q.ask_primary_remote_url(r) authors = [(c['name'], c['email']) for _, c in r.contributors.items()] identities_err = None identities = q.ask_user_identity(authors, identities_err) MAX_LIMIT = 50 while len(identities['user_identity']) == 0 or len(identities['user_identity']) > MAX_LIMIT: if len(identities['user_identity']) == 0: identities_err = 'Please select at least one author' if len(identities['user_identity']) > MAX_LIMIT: identities_err = 'You cannot select more than', MAX_LIMIT identities = q.ask_user_identity(authors, identities_err) r.local_usernames = identities['user_identity'] if args.parse_libraries: # build authors from the selection al = AnalyzeLibraries(r.commits, authors, repo.working_tree_dir) libs = al.get_libraries() # combine repo stats with libs used for i in range(len(r.commits)): c = r.commits[i] if c.hash in libs.keys(): r.commits[i].libraries = libs[c.hash] if not args.skip_obfuscation: r = obfuscate(r) er = ExportResult(r) er.export_to_json(args.output)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'directory', help='Path to the repository. Example usage: run.sh path/to/directory') parser.add_argument( '--output', default='./repo_data.json', dest='output', help='Path to the JSON file that will contain the result') parser.add_argument( '--skip_obfuscation', default=False, dest='skip_obfuscation', action='store_true', help= 'If true it won\'t obfuscate the sensitive data such as emails and file names. Mostly for testing purpuse' ) parser.add_argument('--parse_libraries', default=True, action='store_true', dest='parse_libraries', help='If true, used libraries will be parsed') parser.add_argument( '--email', default='', dest='email', help='If set, commits from this email are preselected on authors list') parser.add_argument('--skip_upload', default=False, action='store_true', dest='skip_upload', help="If true, don't prompt for inmediate upload") try: args = parser.parse_args() folders = args.directory.split('|,|') if len(folders) > 1: q = Questions() repos = q.ask_which_repos(folders) if 'chosen_repos' not in repos or len(repos['chosen_repos']) == 0: print("No repos chosen, will exit") for repo in repos['chosen_repos']: repo_name = os.path.basename(repo).replace(' ', '_') output = ('./%s.json' % (repo_name)) initialize(repo, args.skip_obfuscation, output, args.parse_libraries, args.email, args.skip_upload) print('Finished analyzing %s ' % (repo_name)) else: initialize(args.directory, args.skip_obfuscation, args.output, args.parse_libraries, args.email, args.skip_upload) except KeyboardInterrupt: print("Cancelled by user") os._exit(0)
def init_headless(directory, skip_obfuscation, output, parse_libraries, emails, debug_mode, user_commits, reponame, skip, commit_size_limit, file_size_limit): # Initialize logger logger = logging.getLogger("main") if debug_mode: logger.setLevel(logging.DEBUG) fh = logging.FileHandler('extractor_debug_info.log') fh.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) else: logger.setLevel(logging.WARNING) repo = git.Repo(directory) ar = AnalyzeRepo(repo) q = Questions() print('Initialization...') for branch in repo.branches: ar.create_commits_entity_from_branch(branch.name) ar.flag_duplicated_commits() ar.get_commit_stats() print('Analysing the master branch..') ar.analyse_master_user_commits(user_commits) print('Creating the repo entity..') r = ar.create_repo_entity(directory) r.local_usernames = list(set(r.local_usernames + emails)) print('Setting the local user_names ::', r.local_usernames) r.repo_name = reponame if parse_libraries: # build authors from the the email list provided # we are provided only emails in the headless mode # TODO! Support both name -> email and email formats author_emails = [] for email in r.local_usernames: author_emails.append(email) if author_emails: al = AnalyzeLibraries(r.commits, author_emails, repo.working_tree_dir, skip, commit_size_limit, file_size_limit) libs = al.get_libraries() # combine repo stats with libs used for i in range(len(r.commits)): c = r.commits[i] if c.hash in libs.keys(): r.commits[i].libraries = libs[c.hash] if not skip_obfuscation: r = obfuscate(r) er = ExportResult(r) er.export_to_json_headless(output) print('Successfully analysed the repo ==>' + reponame)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'directory', help='Path to the repository. Example usage: run.sh path/to/directory') parser.add_argument( '--output', default='./repo_data.json', dest='output', help='Path to the JSON file that will contain the result') parser.add_argument( '--skip_obfuscation', default=False, dest='skip_obfuscation', help= 'If true it won\'t obfuscate the sensitive data such as emails and file names. Mostly for testing purpuse' ) args = parser.parse_args() repo = git.Repo(args.directory) ar = AnalyzeRepo(repo, args.skip_obfuscation) q = Questions() print('Initialization...') for branch in repo.branches: ar.create_commits_entity_from_branch(branch.name) ar.flag_duplicated_commits() ar.get_commit_stats() r = ar.create_repo_entity(args.directory) # Ask the user if we cannot find remote URL if r.primary_remote_url == '': answer = q.ask_primary_remote_url(r) identities = q.ask_user_identity(r) MAX_LIMIT = 50 while len(identities['user_identity']) == 0 or len( identities['user_identity']) > MAX_LIMIT: if len(identities['user_identity']) == 0: print('Please select at least one.') if len(identities['user_identity']) > MAX_LIMIT: print('You cannot select more than', MAX_LIMIT) identities = q.ask_user_identity(r) r.local_usernames = identities['user_identity'] er = ExportResult(r) er.export_to_json(args.output)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'directory', help='Path to the repository. Example usage: run.sh path/to/directory') parser.add_argument('--output', default='./repo_data.json', dest='output', help='Path to the JSON file that will contain the result. Use .json extension otherwise it cannot be recognized.') parser.add_argument('--skip_obfuscation', default=False, dest='skip_obfuscation', action='store_true', help='If true it won\'t obfuscate the sensitive data such as emails and file names. Mostly for testing purpose') parser.add_argument('--parse_libraries', default=True, action='store_true', dest='parse_libraries', help='If true, used libraries will be parsed') parser.add_argument('--email', default='', dest='email', help='If set, commits from this email are preselected on authors list') parser.add_argument('--skip_upload', default=False, action='store_true', dest='skip_upload', help="If true, don't prompt for inmediate upload") parser.add_argument('--debug_mode', default=False, action='store_true', dest='debug_mode', help="Print additional debug info into extractor_debug_info.log") parser.add_argument('--noskip', default=True, dest='skip', action='store_false', help='Do not skip any commits in analyze_libraries. May impact running time.') parser.add_argument('--commit_size_limit', default=5, type=int, help='If the estimated size of the changed files is bigger than this, we skip the commit') parser.add_argument('--file_size_limit', default=2, type=int, help='The library analyzer skips files bigger than this limit') try: args = parser.parse_args() folders=args.directory.split('|,|') if len(folders) > 1: q = Questions() repos = q.ask_which_repos(folders) if 'chosen_repos' not in repos or len(repos['chosen_repos']) == 0: print("No repos chosen, will exit") for repo in repos['chosen_repos']: repo_name = os.path.basename(repo).replace(' ','_') output=('./%s.json' % (repo_name)) initialize(repo, args.skip_obfuscation, output, args.parse_libraries, args.email, args.skip_upload, args.debug_mode, args.skip, args.commit_size_limit, args.file_size_limit) print('Finished analyzing %s ' % (repo_name)) else: initialize(args.directory, args.skip_obfuscation, args.output, args.parse_libraries, args.email, args.skip_upload, args.debug_mode, args.skip, args.commit_size_limit, args.file_size_limit) except KeyboardInterrupt: print("Cancelled by user") os._exit(0)
def export_to_json_interactive(self, file_name, skip_upload=False): self.dump(file_name) q = Questions() if skip_upload != False: result = False else: result = q.query_yes_no( 'Do you want to upload the result to your profile automatically?' ) if result: response = uploadRepo(file_name + '.zip') if response is not None: reponame = self.data.repo_name url = 'https://profile.codersrank.io/repo?token=' + \ response['token']+'&reponame='+reponame print('Go to this link in the browser => ' + url) webbrowser.open(url) os.remove(file_name)
def init_headless(directory, skip_obfuscation, output, parse_libraries, emails, user_commits, reponame): repo = git.Repo(directory) ar = AnalyzeRepo(repo) q = Questions() print('Initialization...') for branch in repo.branches: ar.create_commits_entity_from_branch(branch.name) ar.flag_duplicated_commits() ar.get_commit_stats() print('Analysing the master branch..') ar.analyse_master_user_commits(user_commits) print('Creating the repo entity..') r = ar.create_repo_entity(directory) r.local_usernames = list(set(r.local_usernames + emails)) print('Setting the local user_names ::', r.local_usernames) r.repo_name = reponame if parse_libraries: # build authors from the the email list provided # we are provided only emails in the headless mode # TODO! Support both name -> email and email formats author_emails = [] for email in r.local_usernames: author_emails.append(email) if author_emails: al = AnalyzeLibraries(r.commits, author_emails, repo.working_tree_dir) libs = al.get_libraries() # combine repo stats with libs used for i in range(len(r.commits)): c = r.commits[i] if c.hash in libs.keys(): r.commits[i].libraries = libs[c.hash] if not skip_obfuscation: r = obfuscate(r) er = ExportResult(r) er.export_to_json_headless(output) print('Successfully analysed the repo ==>' + reponame)
def initialize(directory, skip_obfuscation, output, parse_libraries, email, skip_upload, debug_mode, skip, commit_size_limit, file_size_limit): # Initialize logger logger = logging.getLogger("main") if debug_mode: logger.setLevel(logging.DEBUG) fh = logging.FileHandler('extractor_debug_info.log') fh.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) else: logger.setLevel(logging.WARNING) logger.debug("Initialized main logger.") repo = git.Repo(directory) ar = AnalyzeRepo(repo) q = Questions() print('Analyzing repo under %s ...' % (directory)) try: # Stop parsing if there are no branches if not repo.branches: print('No branches detected, will ignore this repo') return for branch in repo.branches: ar.create_commits_entity_from_branch(branch.name) ar.flag_duplicated_commits() ar.get_commit_stats() r = ar.create_repo_entity(directory) # Stop parsing if there are no remotes if not r.original_remotes: print('No remotes detected, will ignore this repo') return # Ask the user if we cannot find remote URL if r.primary_remote_url == '': answer = q.ask_primary_remote_url(r) if not r.contributors.items(): print('No authors detected, will ignore this repo') return authors = [(c['name'], c['email']) for _, c in r.contributors.items()] identities = {} identities['user_identity'] = [] # Stop parsing if there are no authors if len(authors) == 0: print('No authors detected, will ignore this repo') return identities_err = None identities = q.ask_user_identity(authors, identities_err, email) MAX_LIMIT = 50 while len(identities['user_identity']) == 0 or len( identities['user_identity']) > MAX_LIMIT: if len(identities['user_identity']) == 0: identities_err = 'Please select at least one author' if len(identities['user_identity']) > MAX_LIMIT: identities_err = 'You cannot select more than', MAX_LIMIT identities = q.ask_user_identity(authors, identities_err) r.local_usernames = identities['user_identity'] if parse_libraries: # build authors from the selection # extract email from name -> email list author_emails = [i.split(' -> ', 1)[1] for i in r.local_usernames] if author_emails: al = AnalyzeLibraries(r.commits, author_emails, repo.working_tree_dir, skip, commit_size_limit, file_size_limit) libs = al.get_libraries() # combine repo stats with libs used for i in range(len(r.commits)): c = r.commits[i] if c.hash in libs.keys(): r.commits[i].libraries = libs[c.hash] if not skip_obfuscation: r = obfuscate(r) er = ExportResult(r) er.export_to_json_interactive(output, skip_upload) except KeyboardInterrupt: print("Cancelled by user") return
def init_headless(directory, skip_obfuscation, output, parse_libraries, emails, debug_mode, user_commits, reponame, skip, commit_size_limit, file_size_limit, seed, timeout_seconds=600): # Initialize logger logger = logging.getLogger("main") if debug_mode: logger.setLevel(logging.DEBUG) fh = logging.FileHandler('extractor_debug_info.log') fh.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) else: logger.setLevel(logging.WARNING) repo = git.Repo(directory) ar = AnalyzeRepo(repo) q = Questions() timer = Timer(timeout_seconds, timeout) timer.start() # Use a context manager with signal to measure seconds, and timeout try: print('Initialization...') for branch in repo.branches: ar.create_commits_entity_from_branch(branch.name) ar.flag_duplicated_commits() ar.get_commit_stats() print('Analysing the master branch..') ar.analyse_master_user_commits(user_commits) print('Creating the repo entity..') r = ar.create_repo_entity(directory) r.local_usernames = list(set(r.local_usernames + emails)) MAX_EMAIL_LIMIT = 50 if len(r.local_usernames) > MAX_EMAIL_LIMIT: print("Email count (" + str(len(r.local_usernames)) + ") for this repo exceeds the limit of " + str(MAX_EMAIL_LIMIT) + " emails.") r.local_usernames = r.local_usernames[0:MAX_EMAIL_LIMIT] print('Setting the local user_names ::', r.local_usernames) r.repo_name = reponame if parse_libraries and len(ar.commit_list) > 0: # build authors from the the email list provided # we are provided only emails in the headless mode # TODO! Support both name -> email and email formats author_emails = [] for email in r.local_usernames: author_emails.append(email) if author_emails: al = AnalyzeLibraries(r.commits, author_emails, repo.working_tree_dir, skip, commit_size_limit, file_size_limit) libs = al.get_libraries() # combine repo stats with libs used for i in range(len(r.commits)): c = r.commits[i] if c.hash in libs.keys(): r.commits[i].libraries = libs[c.hash] # new email detection try: emails_v2 = match_emails(directory, seed) r.emails_v2 = emails_v2["emails"] except: r.emails_v2 = list() if not skip_obfuscation: r = obfuscate(r) er = ExportResult(r) er.export_to_json_headless(output) print('Successfully analysed the repo ==>' + reponame) except KeyboardInterrupt: print("{} timeouted after {} seconds.".format(repo.working_dir, timeout_seconds)) print("Deleting", repo.working_dir) try: shutil.rmtree(repo.working_dir) except (PermissionError, NotADirectoryError, Exception) as e: print("Error when deleting {}. Message: {}".format( repo.working_dir, str(e))) finally: timer.cancel()