Пример #1
0
def get_dataframe_from_repo(repo, num=100):
    """Create pandas dataframe of contributors by country.

    Args:
        repo - a full GitHub repo URL
        num - number of contributors to analyze per repo

    Returns:
        df - a pandas dataframe of contributors by country
        num_contributors - total number of contributors
    """
    # get contributors
    repo_ending_string = extract_github_owner_and_repo(repo)
    contributors = get_contributors(repo_ending_string, num)
    num_contributors = len(contributors)

    # get count of countries
    country_list = []
    for contributor in contributors:
        location = get_contributor_location(contributor)
        country = get_country_from_location(location)
        country_list.append(country)
    country_counter = Counter(country_list)

    # convert counter to pandas dataframe
    df = pd.DataFrame.from_records(country_counter.most_common(),
                                   columns=["country", "contributor_count"])

    return df, num_contributors
Пример #2
0
def print_by_contributor(software_name,
                         contributors,
                         output_csv=False,
                         pypi_data=None):
    """Print location results by contributor.

    Print contributors and countries to terminal window. If output csv is set
    to true, then also output results to a csv file.

    Args:
        software_name - name of package or repo
        contributors - a list of contributors
        output_csv - whether to output a csv.
        pypi_data - a pypi data object.

    Returns:
        null
    """
    # create csv if output_csv specified
    if output_csv:
        # unique current time timestamp to create unique filename
        timestamp = time.strftime("%Y%m%d-%H%M%S")
        create_csv("contributor", timestamp)

    print("CONTRIBUTOR, LOCATION")
    if pypi_data is not None:
        print("* indicates PyPI maintainer")
    print("---------------------")
    for contributor in contributors:
        location = get_contributor_location(contributor)
        country = get_country_from_location(location)
        if output_csv:
            add_committer_to_csv("contributor", software_name, timestamp,
                                 contributor, location, country)
        try:
            # Check if pypi_data is not None, indicating a PyPI package scan
            if pypi_data is not None and contributor in pypi_data[
                    "pypi_maintainers"]:
                print(contributor, "*", "|", location, "|", country)
            else:
                print(contributor, "|", location, "|", country)
        except UnicodeEncodeError:
            print(contributor, "| error")
Пример #3
0
def print_by_country(contributors):
    """Print contributors aggregated by country.

    Print contributor county by country to terminal window.

    Args:
        contributors: a list of contributors

    Returns:
    null
    """
    print("COUNTRY | # OF CONTRIBUTORS")
    print("---------------------------")
    country_list = []
    for contributor in contributors:
        location = get_contributor_location(contributor)
        country = get_country_from_location(location)
        country_list.append(country)

    country_counter = Counter(country_list)
    for country, count in country_counter.most_common():
        print(country, count)
Пример #4
0
def scan_multiple_repos(input_file="repos.txt", num=100):
    """Create csv of data for multiple repos.

    Scan through repos provided in repos.txt and create a single csv that
    stores all contributor-related data for each contributor in each repo.

    Args:
        input_file - file containing repo list
        num - max number of contributors to analyze per repo

    Returns:
        None
    """
    # create csv to store multi-repo scan results
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    create_csv("multirepo", timestamp)

    # open file that contains repos to scan and append contributors for each
    # repo to csv. Also, repos.txt must contain repo names, one repo per line.
    with open(input_file, "r") as input_repos:
        for repo in input_repos:
            # Skip blank lines
            if repo == "":
                continue
            # strip blank space before extracting owner and repo name
            repo_ending_string = extract_github_owner_and_repo(repo.strip())
            contributors = get_contributors(repo_ending_string, num)
            for contributor in contributors:
                location = get_contributor_location(contributor)
                country = get_country_from_location(location)
                add_committer_to_csv(
                    "multirepo",
                    repo_ending_string,
                    timestamp,
                    contributor,
                    location,
                    country,
                )
Пример #5
0
 def test_get_contributor_location(self):
     """Unit test for get_contributor_location()."""
     assert get_contributor_location("anarkiwi") == "Wellington, New Zealand"