Пример #1
0
def import_to_database (json_dict, credentials_file):
#	number_of_files = 0	
	db_conn = sd.get_connection(credentials_file);
#	start = json_dict["start"]
#	end = json_dict["end"]
#	json_dict = json_dict["result"]
	for username, repo in json_dict.iteritems():
		sd.save_user_data(db_conn, username);
		#print username
		for reponame, repo_dict in repo.iteritems():
			date_created = repo_dict["created_at"]
			repo_size = repo_dict["size"]
			last_pushed = repo_dict["pushed_at"]
			contributors_url = repo_dict["contributors_url"]
			description = repo_dict["description"]
			repo_id = sd.save_repo_data(db_conn, reponame, date_created, username, repo_size, last_pushed, repo_dict["url"], repo_dict["forks_url"], contributors_url, description, repo_dict["stargazers"], repo_dict["forks"]);

			file_list = repo_dict["files"];
			for file_entry in file_list:
				#print file_info
				for filename, datapoints in file_entry.iteritems():
					file_id = sd.save_file_data(db_conn, filename, repo_id, "") 
				 	for thing in datapoints:
						line = thing["line"]
						code_sample = thing["code_sample"]
						sd.save_vulnerability_data(db_conn, file_id, line, code_sample);
Пример #2
0
def test(credentials_file='mysqlcreds-throwaway.csv'):
    con = sd.get_connection(credentials_file)
    username = "******"
    sd.save_user_data(con, username, "*****@*****.**")

    #repo_id = save_repo_data("test_repo", date_created, user_id, repo_size, date_collected);
    #fill in with appropriate data types
    repo_id = sd.save_repo_data(con, "test_repo", datetime.date.today(),
                                username, 2400, datetime.date.today())

    sd.save_repo_contributor_data(con, username, repo_id)

    file_id = sd.save_file_data(con, "test_file.c", repo_id, "")

    #optional date and author parameters
    vuln_id = sd.save_vulnerability_data(con, file_id, 24, "code sample;",
                                         "vulnerability description/regex")

    sd.close_connection(con)
import load_data as L
import save_data as S
import get_info as G
import tr_te_split as T
import extract_feature as E

mv_lens_100k = L.load_data('100k')
user_info = mv_lens_100k.load_user_info()
movie_info = mv_lens_100k.load_movie_info()
rating_info = mv_lens_100k.load_ratings()

# year_info = G.get_year_info(movie_info)
# age_info = G.get_age_info(user_info)

data = E.extract_feature(user_info, movie_info, rating_info)

tr_data, te_data = T.tr_te_split(data)
S.save_user_data(tr_data, te_data)
else: #get the stragglers
	rows = sd.select_many_query(db_conn, "select repo_id, contributors_url, owner_name from gh_repo where repo_id not in (select repo_id from gh_repo_contributors) order by repo_id")

header = {'Authorization': 'token ' + token}

for row in rows:
	repo_id = row[0];
	if (repo_id % 10 == 0):
		print "repo_id ", repo_id
	query_url = row[1];
	owner_name = row[2];
	try:
		r = requests.get(query_url, headers=header)
		item = json.loads(r.text or r.content)
		for thing in item:
			contributions = thing['contributions']
			username = thing['login']
			sd.save_user_data(db_conn, username);
			sd.save_repo_contributor_data(db_conn, username, repo_id, contributions);
		headers = r.headers;
		ratelimit_remaining = int(headers['x-ratelimit-remaining'])
		reset_time = int(headers['x-ratelimit-reset'])
		if (ratelimit_remaining % 10 == 0):
			print "ratelimit_remaining ", ratelimit_remaining
		if ratelimit_remaining == 0: 
			print "napping for ", reset_time
			util.nap(reset_time)
	except:# ValueError, requests.exceptions.ConnectionError:
		print "error: ", sys.exc_info()[0]
		print "skipping repo: ", repo_id
Пример #5
0
    )

header = {'Authorization': 'token ' + token}

for row in rows:
    repo_id = row[0]
    if (repo_id % 10 == 0):
        print "repo_id ", repo_id
    query_url = row[1]
    owner_name = row[2]
    try:
        r = requests.get(query_url, headers=header)
        item = json.loads(r.text or r.content)
        for thing in item:
            contributions = thing['contributions']
            username = thing['login']
            sd.save_user_data(db_conn, username)
            sd.save_repo_contributor_data(db_conn, username, repo_id,
                                          contributions)
        headers = r.headers
        ratelimit_remaining = int(headers['x-ratelimit-remaining'])
        reset_time = int(headers['x-ratelimit-reset'])
        if (ratelimit_remaining % 10 == 0):
            print "ratelimit_remaining ", ratelimit_remaining
        if ratelimit_remaining == 0:
            print "napping for ", reset_time
            util.nap(reset_time)
    except:  # ValueError, requests.exceptions.ConnectionError:
        print "error: ", sys.exc_info()[0]
        print "skipping repo: ", repo_id