Python GithubPersonalAccessTokenHelper примеры использования

Язык программирования: Python

Пространство имен/Пакет: gitTokenHelper

Класс/Тип: GithubPersonalAccessTokenHelper

Примеров на hotexamples.com: 4

Python GithubPersonalAccessTokenHelper - 4 примера найдено. Это лучшие примеры Python кода для gitTokenHelper.GithubPersonalAccessTokenHelper, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GithubPersonalAccessTokenHelper(2)

get_access_token(2)

Пример #1

Показать файл

Файл: dev.py Проект: davidagustin/BlockchainDevReport

 def __init__(self, save_path: str, frequency):
     self.save_path = save_path
     self.gh_pat_helper = GithubPersonalAccessTokenHelper(get_pats())
     PAT = self._get_access_token()
     self.gh = Github(PAT)
     # churn, commit frequency
     self.frequency = frequency

Пример #2

Показать файл

Файл: contr.py Проект: davidagustin/BlockchainDevReport

 def __init__(self, save_path: str):
     self.save_path = save_path
     # TODO: fix this to be an array
     self.gh_pat_helper = GithubPersonalAccessTokenHelper(get_pats())

Пример #3

Показать файл

Файл: dev.py Проект: davidagustin/BlockchainDevReport

class DevOracle:
    def __init__(self, save_path: str, frequency):
        self.save_path = save_path
        self.gh_pat_helper = GithubPersonalAccessTokenHelper(get_pats())
        PAT = self._get_access_token()
        self.gh = Github(PAT)
        # churn, commit frequency
        self.frequency = frequency

    def _get_access_token(self):
        res = self.gh_pat_helper.get_access_token()
        if "token" in res and res["token"] is not None:
            return res["token"]
        print('Going to sleep since no token exists with usable rate limit')
        time.sleep(res["sleep_time_secs"])
        return self._get_access_token()

    def get_and_save_full_stats(self, chain_name: str):
        github_orgs = self._read_orgs_for_chain_from_toml(chain_name)

        stats_counter = Counter()
        hist_data = None

        for org_url in github_orgs:
            if not org_url.startswith("https://github.com/"):
                # TODO: If Gitlab repo then use Gitlab APIs
                print("%s is not a github repo...Skipping" % org_url)
                continue
            org = org_url.split("https://github.com/")[1]
            print("Fetching repo data for", org)
            org_repo_data_list = self._get_repo_data_for_org(org)
            print("Fetching stats(stargazers, forks, releases, churn_4w) for",
                  org_url)
            stats_counter += self._get_stats_for_org_from_repo_data(
                org_repo_data_list)
            hist_data_for_org = self._get_historical_progress(
                org_repo_data_list)
            print("Combining hist data ...")
            hist_data = self._combine_hist_data(hist_data, hist_data_for_org)

        if hist_data == None or stats_counter == {}:
            remove_chain_from_config(chain_name)
            print('No data found for organisation in toml file')
            sys.exit(1)

        path_prefix = self.save_path + '/' + chain_name
        with open(path_prefix + '_stats.json', 'w') as outfile:
            outfile.write(json.dumps(dict(stats_counter)))
        with open(path_prefix + '_history.json', 'w') as outfile:
            outfile.write(json.dumps(dict(hist_data)))

    # list all the repos of a github org/user
    # Ensure chain_name is same as name of toml file
    def _read_orgs_for_chain_from_toml(self, chain_name):
        toml_file_path = path.join(dir_path, 'protocols', chain_name + '.toml')
        if not path.exists(toml_file_path):
            print(".toml file not found for %s in /protocols folder" %
                  chain_name)
            sys.exit(1)
        try:
            with open(toml_file_path, 'r') as f:
                data = f.read()
            print("Fetching organizations for %s from toml file ..." %
                  chain_name)
            github_orgs = toml.loads(data)['github_organizations']
            return github_orgs
        except:
            print('Could not open toml file - check formatting.')
            sys.exit(1)

    # get the data for all the repos of a github organization
    def _get_repo_data_for_org(self, org_name: str):
        org_repos = self._make_org_repo_list(org_name)
        forked_repos = []
        page = 1
        url = f"https://api.github.com/orgs/{org_name}/repos?type=forks&page={page}&per_page=100"
        PAT = self._get_access_token()
        response = requests.get(url, headers={'Authorization': 'Token ' + PAT})
        while len(response.json()) > 0:
            for repo in response.json():
                forked_repos.append(repo["full_name"])
            page += 1
            url = f"https://api.github.com/orgs/{org_name}/repos?type=forks&page={page}&per_page=100"
            response = requests.get(url,
                                    headers={'Authorization': 'Token ' + PAT})
        unforked_repos = list(set(org_repos) - set(forked_repos))
        # GitHub API can hit spam limit
        number_of_hyperthreads = multiprocessing.cpu_count()
        n_jobs = 2 if number_of_hyperthreads > 2 else number_of_hyperthreads
        repo_data_list = Parallel(n_jobs=n_jobs)(
            delayed(self._get_single_repo_data)(repo)
            for repo in unforked_repos)
        return repo_data_list

    # given the org_name, return list of organisation repos
    def _make_org_repo_list(self, org_name: str):
        org_repos = []
        try:
            entity = self.gh.get_organization(org_name)
        except:
            entity = self.gh.get_user(org_name)
        for repo in entity.get_repos():
            org_repos.append(repo.name)
        org_repos = [org_name + '/{0}'.format(repo) for repo in org_repos]
        return org_repos

    # get repo data using a repo URL in the form of `org/repo`
    def _get_single_repo_data(self, org_then_slash_then_repo: str):
        print('Fetching repo data for ', org_then_slash_then_repo)
        try:
            repo = self.gh.get_repo(org_then_slash_then_repo)
            weekly_add_del = repo.get_stats_code_frequency()
            weekly_commits = repo.get_stats_participation().all
            # TODO: Remove contributor specific code
            contributors = repo.get_stats_contributors()
            releases = repo.get_releases()
            return {
                "name": org_then_slash_then_repo,
                "repo": repo,
                "weekly_add_del": weekly_add_del,
                "weekly_commits": weekly_commits,
                "contributors": contributors,
                "releases": releases
            }
        except Exception as e:
            if e.status == 403:
                print("Token rate limit reached, switching tokens")
                PAT = self._get_access_token()
                self.gh = Github(PAT)
                return self._get_single_repo_data(org_then_slash_then_repo)
            print('Could not find data for ' + org_then_slash_then_repo)
            return {}

    # given a list of repo_data of org, analyze for churn_4w, commits_4w, stars, releases
    def _get_stats_for_org_from_repo_data(self, org_repo_data_list):
        number_of_hyperthreads = multiprocessing.cpu_count()
        n_jobs = 2 if number_of_hyperthreads > 2 else number_of_hyperthreads
        repo_stats_list = Parallel(n_jobs=n_jobs)(
            delayed(self._analyse_repo_data_for_churn_and_commits_4w)(
                repo_data) for repo_data in org_repo_data_list)
        stats_counter = Counter()
        for repo_stats in repo_stats_list:
            stats_counter += Counter(repo_stats)
        sc_dict = dict(stats_counter)
        max_contributors = 0

        sc_dict[
            'num_releases'] = 0 if 'num_releases' not in sc_dict else sc_dict[
                'num_releases']
        # TODO: remove contributor specific data
        # FIXME find an efficient way to count distinct devs. This is a good lower bound number.
        for dictionary in repo_stats_list:
            try:
                this_contributors = dictionary['contributors']
            except:
                this_contributors = 0
            max_contributors = this_contributors if this_contributors > max_contributors else max_contributors
        # GitHub API only returns up to 100 contributors FIXME FIX THIS ====================================================================================================
        sc_dict['contributors'] = max_contributors
        sc_dict[
            'num_releases'] = 0 if 'num_releases' not in sc_dict else sc_dict[
                'num_releases']
        return sc_dict

    # analyse churn, commits from a git repo data for 'self.frequency' number of weeks
    # TODO: change 4w to make it more generic
    # analyses for latest 4w currently
    def _analyse_repo_data_for_churn_and_commits_4w(self, repo_data: dict):
        repo = repo_data["repo"]
        weekly_add_del = repo_data["weekly_add_del"]
        weekly_commits = repo_data["weekly_commits"]
        # TODO: remove contributor specific data
        contributors = repo_data["contributors"]
        releases = repo_data["releases"]

        churn_4w = 0
        commits_4w = 0
        if weekly_add_del and weekly_commits:
            for i in range(1, self.frequency + 1):
                try:
                    # weekly-add_del [<Week In UNIX Timestamp>, <additions>, <deletions with neg symbol>]
                    # Deletions is negative, so churn is being calculated as #additions - #deletions
                    churn_4w += (weekly_add_del[-i]._rawData[1] -
                                 weekly_add_del[-i]._rawData[2])
                    commits_4w += weekly_commits[-i]
                except:
                    break
        # TODO: remove contributor specific data
        num_contributors = len(contributors) if contributors else 0
        stats = {
            'churn_4w': churn_4w,
            'commits_4w': commits_4w,
            'contributors': num_contributors,
            'stars': repo.stargazers_count,
            'forks': repo.forks_count,
            'num_releases': releases.totalCount
        }
        return stats

    # given a list of repo_data for org, analyze for
    # weekly_commits and weekly_churn for all weeks till now;
    # Weekly commit, churn serve as indicators for historical progress
    def _get_historical_progress(self, org_repo_data_list: list):
        # GitHub API can hit spam limit
        number_of_hyperthreads = multiprocessing.cpu_count()
        n_jobs = 2 if number_of_hyperthreads > 2 else number_of_hyperthreads
        repo_count_list = Parallel(n_jobs=n_jobs)(
            delayed(self._get_weekly_churn_and_commits_of_repo)(repo_data)
            for repo_data in org_repo_data_list)
        churns = []
        commits = []
        for repo in repo_count_list:
            this_churn = repo['weekly_churn']
            this_commits = repo['weekly_commits']
            # Reverse churn and commits array to show latest week data first
            churns.append(this_churn[::-1])
            commits.append(this_commits[::-1])
        # Element wise addition of list of lists
        # Re-reverse churn and commits array to show oldesr week data first
        churns = [sum(x) for x in zip_longest(*churns, fillvalue=0)][::-1]
        commits = [sum(x) for x in zip_longest(*commits, fillvalue=0)][::-1]
        # churns = churns[-52:]
        # TODO: figure out why this assert is failing
        # assert len(churns) == len(commits)

        # Reversed weeks_ago based on the length of churn/commit weeks
        weeks_ago = list(range(len(churns)))[::-1]
        sc_dict = {
            'weekly_churn': churns,
            'weekly_commits': commits,
            'weeks_ago': weeks_ago
        }
        return sc_dict

    def _get_weekly_churn_and_commits_of_repo(self, repo_data: dict):
        org_then_slash_then_repo = repo_data["name"]
        weekly_commits = repo_data["weekly_commits"]
        weekly_add_del = repo_data["weekly_add_del"]
        try:
            # For front-end app use, combining this github API call with that for single_repo_stats would be beneficial
            weekly_churn = []
            if weekly_add_del:
                for i in range(len(weekly_add_del)):
                    # Deletions is negative
                    weekly_churn.append(weekly_add_del[i]._rawData[1] -
                                        weekly_add_del[i]._rawData[2])
            stats = {
                'weekly_churn': weekly_churn,
                'weekly_commits': weekly_commits,
                'repo': org_then_slash_then_repo
            }
            return stats
        except Exception as e:
            print(e)
            stats = {
                'weekly_churn': [],
                'weekly_commits': weekly_commits,
                'repo': org_then_slash_then_repo
            }
            return stats

    # Do element wise addition for `weekly_churn`, `weekly_commits`, `weeks_ago` lists
    # to get the cumulative historical data for a given chain
    def _combine_hist_data(self, cumulative_hist_data, hist_data_for_org):
        if cumulative_hist_data is None:
            cumulative_hist_data = hist_data_for_org
        else:
            cumulative_hist_data["weekly_churn"] = \
                element_wise_addition_lists(
                    cumulative_hist_data["weekly_churn"][::-1],
                    hist_data_for_org["weekly_churn"][::-1]
                )[::-1]
            cumulative_hist_data["weekly_commits"] = \
                element_wise_addition_lists(
                    cumulative_hist_data["weekly_commits"][::-1],
                    hist_data_for_org["weekly_commits"][::-1]
                )[::-1]
            cumulative_hist_data["weeks_ago"] = \
                element_wise_addition_lists(
                    cumulative_hist_data["weeks_ago"][::-1],
                    hist_data_for_org["weeks_ago"][::-1]
                )[::-1]
        return cumulative_hist_data

Пример #4

Показать файл

Файл: contr.py Проект: davidagustin/BlockchainDevReport

class Contributors:

    def __init__(self, save_path: str):
        self.save_path = save_path
        # TODO: fix this to be an array
        self.gh_pat_helper = GithubPersonalAccessTokenHelper(get_pats())

    # list all the repos of a protocol from toml 
    # Includes all the core github org/user repos and the repo urls listed in toml
    # Ensure protocol is same as name of toml file
    async def get_repos_for_protocol_from_toml(self, protocol):
        pat = await self._get_access_token()
        repos = set()
        toml_file_path = path.join(dir_path, 'protocols', protocol + '.toml')
        if not path.exists(toml_file_path):
            print(".toml file not found for %s in /protocols folder" % chain_name)
            sys.exit(1)
        try:
            with open(toml_file_path, 'r') as f:
                data = f.read()
            github_orgs = toml.loads(data)['github_organizations']
            repos_in_toml = toml.loads(data)['repo']
        except:
            print('Could not open toml file - check formatting!!')
            sys.exit(1)
 
        for org in github_orgs:
            if not org.lower().startswith("https://github.com/"):
                continue
            org_name = org.split('https://github.com/')[1]
            try:
                # Get all repos 
                all_org_repos = []
                page = 1
                url = f"https://api.github.com/orgs/{org_name}/repos?page={page}&per_page=100"
                response = requests.get(url, headers={'Authorization': 'Token ' + pat})
                while len(response.json()) > 0:
                    for repo in response.json():
                        all_org_repos.append(repo["full_name"])
                    page += 1
                    url = f"https://api.github.com/orgs/{org_name}/repos?page={page}&per_page=100"
                    response = requests.get(url, headers={'Authorization': 'Token ' + pat})
                # Get forked repos
                forked_org_repos = []
                page = 1
                url = f"https://api.github.com/orgs/{org_name}/repos?type=forks&page={page}&per_page=100"
                response = requests.get(url, headers={'Authorization': 'Token ' + pat})
                while len(response.json()) > 0:
                    for repo in response.json():
                        forked_org_repos.append(repo["full_name"])
                    page += 1
                    url = f"https://api.github.com/orgs/{org_name}/repos?type=forks&page={page}&per_page=100"
                    response = requests.get(url, headers={'Authorization': 'Token ' + pat})
                # Find difference
                unforked_repos = list(set(all_org_repos) - set(forked_org_repos))
                for repo in unforked_repos:
                    repos.add(repo.lower())
            except:
                # Core org is not org but a user
                # Get repos of user
                url = f"https://api.github.com/users/{org_name}/repos"
                response = requests.get(url, headers={'Authorization': 'Token ' + pat})
                for repo in response.json():
                    repos.add(repo["full_name"].lower())
        return list(repos)
    
    async def _get_access_token(self):
        res = self.gh_pat_helper.get_access_token()
        if "token" in res and res["token"] is not None:
            return res["token"]
        print('Going to sleep since no token exists with usable rate limit')
        await asyncio.sleep(res["sleep_time_secs"])
        return await self._get_access_token()

    async def get_contributors_of_repo_in_last_n_years(self, org_then_slash_then_repo: str, n_years: int = 1):
        # Commits are not chronological, so need to pull all and filter
        commits = []

        # get personal access token
        pat = await self._get_access_token()

        async with ClientSession() as session:
            initial_request = await get_commits(session, pat, org_then_slash_then_repo, page=1)
            # Repo doesn't exist
            if initial_request["error"] or (type(initial_request["data"]) == dict and initial_request["data"].message == 'Not Found'):
                return []  
            if isinstance(initial_request["data"], list) and len(initial_request["data"]) == 0:
                return []
            commits.extend(initial_request["data"])

            rate_limit_remaining = initial_request["rate_limit_remaining"]
            remaining_requests_to_be_made = 0
            if initial_request["total_pages"]:
                remaining_requests_to_be_made = initial_request["total_pages"] - 1

            # starting page
            batch_start = 2
            while remaining_requests_to_be_made > 0:
                if remaining_requests_to_be_made > min(rate_limit_remaining, 200):
                    batch_end = batch_start + min(rate_limit_remaining, 200)
                else:
                    batch_end = batch_start + remaining_requests_to_be_made

                print("Start", batch_start, "End", batch_end)

                # get data for page from batch_start to batch_end
                tasks = []
                for page in range(batch_start, batch_end + 1):
                    task = ensure_future(
                        get_commits(session, pat, org_then_slash_then_repo, page)
                    )
                    tasks.append(task)

                responses = await asyncio.gather(*tasks)
                if len(responses) == 0:
                    sys.exit(1)

                successful_responses_count = 0
                rate_limit_exceeded = False
                for response in responses:
                    if response["error"]:
                        # Get 502 some times
                        if response["error_code"] == 403 or response["error_code"] // 100 == 5:
                            print("Rate limit trigger detected")
                            rate_limit_exceeded = True
                            break
                        # Printing unhandled error and exiting
                        print(response)
                        sys.exit(1)

                    if not isinstance(response["data"], list):
                        print(response["error"])
                        sys.exit(1)
                    successful_responses_count += 1
                    commits.extend(response["data"])

                if rate_limit_exceeded:
                    print("Hourly rate limit exceeded for current token")
                    pat = await self._get_access_token()

                print("Successful reqs: ", successful_responses_count)
                remaining_requests_to_be_made -= successful_responses_count
                rate_limit_remaining -= successful_responses_count
                batch_start += successful_responses_count

        days_count = 365 * n_years # TODO: Adjust for leap years
        # Remove older commits
        year_ago_date = dt.datetime.now() - dt.timedelta(days=days_count)  
        contributors = []
        for item in commits:
            try:
                date_string = item['commit']['author']['date']
                date = dt.datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%SZ')
                if date > year_ago_date:
                    if item['author']:  # Can be null (user not logged in)
                        contributors.append(item['author']['login']) # GitHub username
            except Exception as e:
                print(e)
                sys.exit(1)
        # De-duplicate commiters
        deduplicated_contributors = list(set(contributors))
        return deduplicated_contributors

    async def get_monthly_contributors_of_repo_in_last_n_years(self, org_then_slash_then_repo: str, n_years: int = 1):
        # Commits are not chronological, so need to pull all and filter
        commits = []

        # get personal access token
        pat = await self._get_access_token()

        month_count_plus_one = 12 * n_years + 1
        # create empty 2D list of (12 * n_years) empty list elements)
        # explicity append rather than []*12 as this uses same memory ref, thus append to one element means append to all
        contributors = []
        for i in range(1, month_count_plus_one):
            contributors.append([])

        async with ClientSession() as session:
            initial_request = await get_commits(session, pat, org_then_slash_then_repo, page=1)
            # Repo doesn't exist
            if initial_request["error"] or (type(initial_request["data"]) == dict and initial_request["data"].message == 'Not Found'):
                return contributors
            if isinstance(initial_request["data"], list) and len(initial_request["data"]) == 0:
                return contributors
            commits.extend(initial_request["data"])

            rate_limit_remaining = initial_request["rate_limit_remaining"]
            remaining_requests_to_be_made = 0
            if initial_request["total_pages"]:
                remaining_requests_to_be_made = initial_request["total_pages"] - 1

            # starting page
            batch_start = 2
            while remaining_requests_to_be_made > 0:
                if remaining_requests_to_be_made > min(rate_limit_remaining, 200):
                    batch_end = batch_start + min(rate_limit_remaining, 200)
                else:
                    batch_end = batch_start + remaining_requests_to_be_made

                print("Start", batch_start, "End", batch_end)

                # get data for page from batch_start to batch_end
                tasks = []
                for page in range(batch_start, batch_end + 1):
                    task = ensure_future(
                        get_commits(session, pat, org_then_slash_then_repo, page)
                    )
                    tasks.append(task)

                responses = await asyncio.gather(*tasks)
                if len(responses) == 0:
                    sys.exit(1)

                successful_responses_count = 0
                rate_limit_exceeded = False
                for response in responses:
                    if response["error"]:
                        # Get 502 some times
                        if response["error_code"] == 403 or response["error_code"] // 100 == 5:
                            print("Rate limit trigger detected")
                            rate_limit_exceeded = True
                            break
                        # Printing unhandled error and exiting
                        print(response)
                        sys.exit(1)

                    if not isinstance(response["data"], list):
                        print(response["error"])
                        sys.exit(1)
                    successful_responses_count += 1
                    commits.extend(response["data"])

                if rate_limit_exceeded:
                    print("Hourly rate limit exceeded for current token")
                    pat = await self._get_access_token()

                print("Successful reqs: ", successful_responses_count)
                remaining_requests_to_be_made -= successful_responses_count
                rate_limit_remaining -= successful_responses_count
                batch_start += successful_responses_count

        # If wanting to create a record of every repo's commits, uncomment this
        # with open(org_then_slash_then_repo + '_commits.json', 'w+') as outfile:
        #    json.dump(commits, outfile)
        # Remove older commits
        month_start_dates = [dt.datetime.now()]  # Include final end date for later use
        for month in range(1, month_count_plus_one):  # Generate (12 * n_years) months of start dates
            month_start_dates.append(month_start_dates[-1] - dt.timedelta(days=30))  # 12 'months' is 360 days
        month_start_dates.reverse()
        for item in commits:
            try:
                date_string = item['commit']['author']['date']
                date = dt.datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%SZ')
                # FIXME find a more efficient way to do this
                for index, (start, end) in enumerate(zip(month_start_dates, month_start_dates[1:])):
                    if date >= start and date < end and item['author']:  # Can be null (user not logged in)
                        contributors[index].append(item['author']['login'])
            except Exception as e:
                print('Failed to get monthly contributors for ' + org_then_slash_then_repo)
                print(e)
                sys.exit(1)
        # De-duplicate commiters
        for index, month_of_contributors in enumerate(contributors):
            deduplicated_contributors = list(set(month_of_contributors))
            contributors[index] = deduplicated_contributors
        return contributors

    async def get_contr_from_toml(self, toml_file: str, monthly: bool = True, years_count: int = 1):
        toml_file_without_protocols = toml_file.split('protocols/')[1]
        protocol_name = toml_file_without_protocols.split('.toml')[0]
        out_file_name = toml_file_without_protocols.replace('.toml', '_contributors.json')
        out_file_name_with_path = self.save_path + '/' + out_file_name
        # Useful if left running e.g. over weekend - if failed, re-run INCLUDING last repo listed
        progress_file_name = toml_file.replace('.toml', '_repos_seen.txt')

        month_count_plus_one = 12 * years_count + 1
        # create empty 2D list of (12 * years_count) empty list elements)
        # explicity append rather than []*12 as this uses same memory ref, thus append to one element means append to all
        list_2d = []
        for i in range(1, month_count_plus_one):
            list_2d.append([])

        stats = None
        seen_repos = []
        if path.exists(out_file_name_with_path):
            with open(out_file_name_with_path, 'r') as stats_json:
                stats = json.load(stats_json)
            if not stats == list_2d:
                if path.exists(progress_file_name):
                    progress_file = open(progress_file_name, 'r')
                    progress_repos_list = progress_file.readlines()
                    for (_, repo_name_with_line_term) in enumerate(progress_repos_list):
                        repo_name = repo_name_with_line_term.split("\n")[0]
                        seen_repos.append(repo_name)
            elif path.exists(progress_file_name):
                remove(progress_file_name)

        if stats:
            core_array = stats
        elif monthly:
            # Explicity def, see above
            # TODO: change this length to make it configurable 
            core_array = list_2d
        else:
            # yearly
            core_array = []
        
        with open(out_file_name_with_path, 'w') as outfile:
            json.dump(core_array, outfile)
        
        repos = await self.get_repos_for_protocol_from_toml(protocol_name)
        unseen_repo = []
        for repo in repos:
            if repo in seen_repos:
                print("Ignoring seen repo: ", repo)
                continue
            unseen_repo.append(repo)

        # Don't thread this - API limit
        for repo in unseen_repo:
            print("Analysing repo: ", repo)
            if monthly:
                contributors = await self.get_monthly_contributors_of_repo_in_last_n_years(repo, n_years=years_count)
            else:
                contributors = await self.get_contributors_of_repo_in_last_n_years(repo, n_years=years_count)
            # Save progress in case of failure
            try:
                with open(out_file_name_with_path) as json_file:
                    data = json.load(json_file)
                if monthly:
                    # FIXME efficiency, note np.concatenate on axis 1 doesn't play well with our core array
                    for index, item in enumerate(data):
                        item.extend(contributors[index])
                else:
                    data.extend(contributors)
                with open(progress_file_name, 'a') as progress_file:
                    progress_file.write(repo + '\n')
                with open(out_file_name_with_path, 'w') as outfile:
                    json.dump(data, outfile)
            except Exception as e:
                print('Failed to collate monthly contributors for all repos in toml file')
                print(e)
                sys.exit(1)
        try:
            with open(out_file_name_with_path) as json_file:
                data = json.load(json_file)
        except Exception as e:
            print(e)
            sys.exit(1)
        if monthly:
            print('Monthly active developers in the past year:')
            for index, month_of_contributors in enumerate(data):
                deduplicated_monthly_contributors = list(set(month_of_contributors))
                data[index] = deduplicated_monthly_contributors
                print('Month ' + str(index + 1) + ': ' + str(len(deduplicated_monthly_contributors)))
            deduplicated_contributors = data
        else:
            deduplicated_contributors = list(set(data))
            print('Total active developers in the past year: ' + str(len(deduplicated_contributors)))
        with open(out_file_name_with_path, 'w') as outfile:
            json.dump(deduplicated_contributors, outfile)
        return deduplicated_contributors