示例#1
0
from datamanager.filemanager import FileManager
from properties import dataFolderPath
import pandas as pd
import numpy as np
import os
import re

# dataFolderPath = '/Users/georgia/Desktop/Thesis/DataUser/datasets'
fm = FileManager()


def remove_outliers(data, percentile):
    new_data = []
    limit = np.nanpercentile(data, percentile)

    for item in data:
        if item <= limit:
            new_data.append(item)

    return new_data


def normalise_data(data):
    data_normalised = []
    new_data_df = pd.DataFrame(data)

    for item in data:
        x = ((item - new_data_df.min()) /
             (new_data_df.max() - new_data_df.min())) * 100
        data_normalised.append(x[0])
    return data_normalised
import os
import sys
import traceback
import requests
import json
from logger.downloadlogger import Logger
from datamanager.dbmanager import DBManager
from datamanager.filemanager import FileManager
from downloader.gitdownloader import GitDownloader
from downloader.githubdownloader import GithubDownloader
#from list_of_repos_urls import List_of_repos_urls
from properties import GitHubAuthToken, dataFolderPath, gitExecutablePath, verbose

fm = FileManager()
db = DBManager()
lg = Logger(verbose)
ghd = GithubDownloader(GitHubAuthToken)
gd = GitDownloader(gitExecutablePath, lg)

'''
Haven't implemented checking if comments exist already nor step_action.
This e
'''

#user_api_address = "https://api.github.com/users/" + '/'.join(user_address.split('/')[-1:])
#user_name = '_'.join(user_address.split('/')[-1:])
user_name = "nbriz"

db.initialize_write_to_disk(user_name)
project = db.read_project_from_disk(user_name)
- project popularity stats - watchers (mean)

- project scale stats - commits (mean)
- project scale stats - contributors (mean)
- project scale stats - releases (mean)



- issue_assigned_to_user_and_closed_by_user_time_diff - check seconds (mean)
- issue_created_by_user_closed_by_user_time_diff - check seconds (mean)
- time_diff_between_consequtive_commiits_committed_by_user - check seconds (mean)
- pull merge time diff
'''

# dataFolderPath = '/Users/georgia/Desktop/Thesis/DataUser/datasets'
fm = FileManager()
stats = fm.read_stats_jsons_from_folder(
    os.path.join(dataFolderPath, "datasets"))


def remove_outliers(data, percentile):
    new_data = []
    limit = np.nanpercentile(data, percentile)

    for item in data:
        if item <= limit:
            new_data.append(item)

    return new_data

import json
from properties import GitHubAuthToken, dataFolderPath, gitExecutablePath, verbose, packageFolderPath
from downloader.githubdownloader import GithubDownloader
from datamanager.filemanager import FileManager

'''
Here I download the profile links of the users that have more than ten public repos and more than 20 followers. 
These users will be used to do the benchmarking
'''

ghd = GithubDownloader(GitHubAuthToken)
fm = FileManager()
users = "https://api.github.com/search/users?q=repos:10+followers:20"
logins = []
final_list = []

for user in ghd.download_paginated_object(users):
    logins.append(user["html_url"])



for item in logins:
	name = '_'.join(item.split('/')[-1:])
	try:
		stats = fm.read_json_from_file(dataFolderPath + "/" + name + "/user_stats.json")
		if stats["commit_authored"]>100:
			final_list.append(item)
	except:
		continue
from datasetcreator.project_preferences import Project_preferences
#from datasetcreator.response_time import response_time_to_comments_mentioned
from datasetcreator.testing import add_test_case, test_comments, closed_issues
from datasetcreator.commits import files_in_commits, commit_changes, empty_commit_message, bug_fixing_contribution
from datasetcreator.operational import documentation_comments, documentation_commit

#user_address = "https://github.com/nbriz"
#user_api_address = "https://api.github.com/users/" + '/'.join(user_address.split('/')[-1:])
user_name = 'nbriz'
dataFolderPath = '/Users/georgia/Desktop'
'''
This is a file to run to gather a first example of all possible raw data that can be dowloaded
'''

productivity = Productivity(dataFolderPath, user_name)
fm = FileManager()
cm = Communication()
lan = Languages()
pm = Project_management(dataFolderPath, user_name)
pp = Project_preferences()
lg = Logger(verbose)
gd = GitDownloader(gitExecutablePath, lg)

lg.start_action("Retrieving user data ...", 29)

contribution_days = productivity.contribution_days(dataFolderPath, user_name)
fm.write_json_to_file(
    dataFolderPath + "/" + user_name + "/all_data/contribution_days.json",
    contribution_days)
print("contribution_days done")
lg.step_action()
from datamanager.filemanager import FileManager
from list_of_repos_urls import List_of_repos_urls
from helpers import get_number_of, print_usage, read_file_in_lines, get_total_count
from properties import GitHubAuthToken, dataFolderPath, gitExecutablePath, verbose, \
download_commits_authored, download_commits_committed, download_issues_assigned, \
download_issues_authored, download_issues_mentions, download_issues_commented, \
download_issues_owened, download_repositories_owned, download_user_repos, download_issues_owened_full, \
download_issues_commented_full, download_issues_mentions_full, download_issues_authored_full, \
download_issues_assigned_full, download_commits_committed_full, download_commits_authored_full, download_issue_comments, \
download_commit_comments

db = DBManager()
lg = Logger(verbose)
ghd = GithubDownloader(GitHubAuthToken)
gd = GitDownloader(gitExecutablePath, lg)
fm = FileManager()


def download_information(user_address):
    '''
	Downloads all the data of a user given its GitHub URL.

	:param user_address: the URL of the user of which the data are downloaded.
	'''

    user_api_address = "https://api.github.com/users/" + '/'.join(
        user_address.split('/')[-1:])
    user_name = '_'.join(user_address.split('/')[-1:])

    db.initialize_write_to_disk(user_name)
示例#7
0
from datamanager.filemanager import FileManager
from properties import dataFolderPath
import pandas as pd
import numpy as np
import json
import os

# dataFolderPath = '/Users/georgia/Desktop/Thesis/DataUser/datasets'
fm = FileManager()
stats = fm.read_stats_jsons_from_folder(
    os.path.join(dataFolderPath, "datasets"))

profile_choices = {
    "A": "Ascending",
    "D": "Descending",
    "M": "Middle",
    "U": "Unclassified",
    "E": "Not Included"
}
category_choices = {
    "A": "Main_Page",
    "B": "Productivity",
    "C": "Dev_Productivity",
    "D": "Ops_Productivity",
    "E": "Project_Management",
    "F": "Quality&Testing"
}
'''
Ascending profile: The higher the better
Descending profile: The lower the better
Middle profile: Either extreme is bad
from properties import dataFolderPath
from datamanager.filemanager import FileManager
import json
import os
from analysis import to_day_hour_min_sec
import time
import calendar
import operator
from collections import OrderedDict
'''
Create the view files for the UI
'''

fm = FileManager()
datasets = fm.read_stats_jsons_from_folder(
    os.path.join(dataFolderPath, "datasets", "final_datasets"))
model = fm.read_json_from_file(
    os.path.join(dataFolderPath, "datasets", "model", "model.json"))
# datasets = fm.read_stats_jsons_from_folder(os.path.join(dataFolderPath,"datasets", "final_datasets"))

for user in datasets:
    view = {}

    #initialising
    view["scores"] = []
    view["stats"] = {}
    view["timeactive"] = {}
    view["projects"] = {}
    view["projects"]["labels"] = ["Owner", "Contributor"]

    view["languages"] = []