示例#1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'directory', help='Path to the repository. Example usage: run.sh path/to/directory')
    parser.add_argument('--output', default='./repo_data.json', dest='output',
                        help='Path to the JSON file that will contain the result')
    parser.add_argument('--skip_obfuscation', default=False, dest='skip_obfuscation',
                        help='If true it won\'t obfuscate the sensitive data such as emails and file names. Mostly for testing purpuse')
    parser.add_argument('--parse_libraries', default=False,
                        dest='parse_libraries', help='If true, used libraries will be parsed')

    args = parser.parse_args()

    repo = git.Repo(args.directory)
    ar = AnalyzeRepo(repo)
    q = Questions()

    print('Initialization...')
    for branch in repo.branches:
        ar.create_commits_entity_from_branch(branch.name)
    ar.flag_duplicated_commits()
    ar.get_commit_stats()
    r = ar.create_repo_entity(args.directory)

    # Ask the user if we cannot find remote URL
    if r.primary_remote_url == '':
        answer = q.ask_primary_remote_url(r)

    authors = [(c['name'], c['email']) for _, c in r.contributors.items()]

    identities_err = None
    identities = q.ask_user_identity(authors, identities_err)
    MAX_LIMIT = 50
    while len(identities['user_identity']) == 0 or len(identities['user_identity']) > MAX_LIMIT:
        if len(identities['user_identity']) == 0:
            identities_err = 'Please select at least one author'
        if len(identities['user_identity']) > MAX_LIMIT:
            identities_err = 'You cannot select more than', MAX_LIMIT
        identities = q.ask_user_identity(authors, identities_err)
    r.local_usernames = identities['user_identity']

    if args.parse_libraries:
        # build authors from the selection
        al = AnalyzeLibraries(r.commits, authors, repo.working_tree_dir)
        libs = al.get_libraries()

        # combine repo stats with libs used
        for i in range(len(r.commits)):
            c = r.commits[i]
            if c.hash in libs.keys():
                r.commits[i].libraries = libs[c.hash]

    if not args.skip_obfuscation:
        r = obfuscate(r)

    er = ExportResult(r)
    er.export_to_json(args.output)
示例#2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'directory',
        help='Path to the repository. Example usage: run.sh path/to/directory')
    parser.add_argument(
        '--output',
        default='./repo_data.json',
        dest='output',
        help='Path to the JSON file that will contain the result')
    parser.add_argument(
        '--skip_obfuscation',
        default=False,
        dest='skip_obfuscation',
        help=
        'If true it won\'t obfuscate the sensitive data such as emails and file names. Mostly for testing purpuse'
    )
    args = parser.parse_args()

    repo = git.Repo(args.directory)
    ar = AnalyzeRepo(repo, args.skip_obfuscation)
    q = Questions()

    print('Initialization...')
    for branch in repo.branches:
        ar.create_commits_entity_from_branch(branch.name)
    ar.flag_duplicated_commits()
    ar.get_commit_stats()
    r = ar.create_repo_entity(args.directory)

    # Ask the user if we cannot find remote URL
    if r.primary_remote_url == '':
        answer = q.ask_primary_remote_url(r)

    identities = q.ask_user_identity(r)
    MAX_LIMIT = 50
    while len(identities['user_identity']) == 0 or len(
            identities['user_identity']) > MAX_LIMIT:
        if len(identities['user_identity']) == 0:
            print('Please select at least one.')
        if len(identities['user_identity']) > MAX_LIMIT:
            print('You cannot select more than', MAX_LIMIT)
        identities = q.ask_user_identity(r)
    r.local_usernames = identities['user_identity']
    er = ExportResult(r)
    er.export_to_json(args.output)
示例#3
0
def initialize(directory, skip_obfuscation, output, parse_libraries, email,
               skip_upload, debug_mode, skip, commit_size_limit,
               file_size_limit):

    # Initialize logger
    logger = logging.getLogger("main")
    if debug_mode:
        logger.setLevel(logging.DEBUG)
        fh = logging.FileHandler('extractor_debug_info.log')
        fh.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        fh.setFormatter(formatter)
        logger.addHandler(fh)
    else:
        logger.setLevel(logging.WARNING)

    logger.debug("Initialized main logger.")

    repo = git.Repo(directory)
    ar = AnalyzeRepo(repo)
    q = Questions()

    print('Analyzing repo under %s ...' % (directory))

    try:
        # Stop parsing if there are no branches
        if not repo.branches:
            print('No branches detected, will ignore this repo')
            return

        for branch in repo.branches:
            ar.create_commits_entity_from_branch(branch.name)
        ar.flag_duplicated_commits()
        ar.get_commit_stats()
        r = ar.create_repo_entity(directory)

        # Stop parsing if there are no remotes
        if not r.original_remotes:
            print('No remotes detected, will ignore this repo')
            return

        # Ask the user if we cannot find remote URL
        if r.primary_remote_url == '':
            answer = q.ask_primary_remote_url(r)

        if not r.contributors.items():
            print('No authors detected, will ignore this repo')
            return

        authors = [(c['name'], c['email']) for _, c in r.contributors.items()]
        identities = {}
        identities['user_identity'] = []

        # Stop parsing if there are no authors
        if len(authors) == 0:
            print('No authors detected, will ignore this repo')
            return

        identities_err = None
        identities = q.ask_user_identity(authors, identities_err, email)
        MAX_LIMIT = 50
        while len(identities['user_identity']) == 0 or len(
                identities['user_identity']) > MAX_LIMIT:
            if len(identities['user_identity']) == 0:
                identities_err = 'Please select at least one author'
            if len(identities['user_identity']) > MAX_LIMIT:
                identities_err = 'You cannot select more than', MAX_LIMIT
            identities = q.ask_user_identity(authors, identities_err)
        r.local_usernames = identities['user_identity']

        if parse_libraries:
            # build authors from the selection
            # extract email from name -> email list
            author_emails = [i.split(' -> ', 1)[1] for i in r.local_usernames]

            if author_emails:
                al = AnalyzeLibraries(r.commits, author_emails,
                                      repo.working_tree_dir, skip,
                                      commit_size_limit, file_size_limit)
                libs = al.get_libraries()
                # combine repo stats with libs used
                for i in range(len(r.commits)):
                    c = r.commits[i]
                    if c.hash in libs.keys():
                        r.commits[i].libraries = libs[c.hash]

        if not skip_obfuscation:
            r = obfuscate(r)
        er = ExportResult(r)
        er.export_to_json_interactive(output, skip_upload)
    except KeyboardInterrupt:
        print("Cancelled by user")
        return