def test_fetch_since_date(self): """Test whether commits are fetched from a Git repository since the given date""" new_path = os.path.join(self.tmp_path, 'newgit') from_date = datetime.datetime(2014, 2, 11, 22, 7, 49) git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch(from_date=from_date)] expected = [('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0), ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0), ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x][0]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['updated_on'], expected[x][1]) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Test it using a datetime that includes the timezone from_date = datetime.datetime(2012, 8, 14, 14, 30, 00, tzinfo=dateutil.tz.tzoffset( None, -36000)) git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch(from_date=from_date)] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x][0]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['updated_on'], expected[x][1]) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) shutil.rmtree(new_path)
def test_fetch(self): """Test whether commits are fetched from a Git repository""" new_path = os.path.join(self.tmp_path, 'newgit') git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch()] expected = [('bc57a9209f096a130dcc5ba7089a8663f758a703', 1344965413.0), ('87783129c3f00d2c81a3a8e585eb86a47e39891a', 1344965535.0), ('7debcf8a2f57f86663809c58b5c07a398be7674c', 1344965607.0), ('c0d66f92a95e31c77be08dc9d0f11a16715d1885', 1344965702.0), ('c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', 1344966351.0), ('589bb080f059834829a2a5955bebfd7c2baa110a', 1344967441.0), ('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0), ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0), ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x][0]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['updated_on'], expected[x][1]) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) shutil.rmtree(new_path)
def commit_counter(own, repo_url, d1, df, k): # url for the git repo to analyze #repo_url = 'https://github.com/kmn5409/INFO1601.git' print("Owner\t\tRepository") print(own, "\t", repo_url) repo_url = 'https://github.com/' + own + '/' + repo_url + '.git' # directory for letting Perceval clone the git repo repo_dir = '/tmp/' + repo_url + '.git' # create a Git object, pointing to repo_url, using repo_dir for cloning repo = Git(uri=repo_url, gitpath=repo_dir) count = 0 # fetch all commits as an iteratoir, and iterate it printing each hash mon = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" ] for commit in repo.fetch(): temp = commit['data']['CommitDate'].split(" ") day = int(temp[2]) month = temp[1] for i in range(len(mon)): if (month == mon[i]): month_num = i + 1 year = int(temp[4]) if (isrecent(d1, day, month_num, year)): count += 1 print("Number of commmits: ", count) df['Number of commits'][k] = count return count
def list_committers(self, url, directory): # Return the list containing the strings from a git repository related to the users ordered by commit including repeated users to allow count gender contributions. repo = Git(uri=url, gitpath=directory) list_committers = [] for user in repo.fetch(): committer = self.removeMail(user['data']['Author']) list_committers.append(committer) return list_committers
def test_fetch_empty_log(self): """Test whether it parsers an empty log""" new_path = os.path.join(self.tmp_path, 'newgit') from_date = datetime.datetime(2020, 1, 1, 1, 1, 1) git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch(from_date=from_date)] self.assertListEqual(commits, [])
def test_fetch_from_empty_repository(self): """Test whether it parses from empty repository""" new_path = os.path.join(self.tmp_path, 'newgit') git = Git(self.git_empty_path, new_path) commits = [commit for commit in git.fetch()] self.assertListEqual(commits, []) shutil.rmtree(new_path)
def run(args): """ """ repo_url = args['git_repository'] period_length = args['period'] threshold = args['threshold'] active_days = True # directory for letting Perceval clone the git repo # TODO: this is Linux-operating system specific. Should change repo_dir = '/tmp/' + repo_url.split('/')[-1] + '.git' first_commit = datetime.now(timezone.utc) authorDict = defaultdict(list) repo = Git(uri=repo_url, gitpath=repo_dir) for commit in repo.fetch(): commitdate = datetime.strptime(commit['data']['AuthorDate'], '%a %b %d %H:%M:%S %Y %z') if commitdate < first_commit: first_commit = commitdate authorDict[commit['data']['Author']].append(commitdate) logging.info("Authors found: " + str(len(authorDict))) simplemerge(authorDict) logging.info("Authors after merge: " + str(len(authorDict))) author_count = author_counting(authorDict, period_length, active_days) # print(author_count) (effort_periods, full_time_periods, non_full_time_periods) = project_period_effort(author_count, threshold, period_length) maxeffort_periods = project_period_maxeffort(author_count, period_length) # Printing results print() print("CONFIGURATIONS:") print(" Length of period (in months):", period_length) print(" Threshold t (in commits in a period):", threshold) print() print("RESULTS:") print(" First commit date:", first_commit, "--", round((datetime.now(timezone.utc)-first_commit).days/30, 2) , "months ago") print(" Maximum possible development effort (in person-months):", sum(maxeffort_periods.values())) print() print(pretty_print_period(period_length, first_commit, ["FT", "Non-FT", "Effort"], full_time_periods, non_full_time_periods, effort_periods)) print(" " * 8, "FT: Full-time developers") print() print(" ---> Estimated development effort (in person-months):", round(sum(effort_periods.values()), 2)) print() print("For more information, visit http://github.com/gregoriorobles/git2effort") print()
def get_commits(username, reponame, commits, config): """ TODO: Add docstring. See: https://realpython.com/documenting-python-code/ TODO: Implement recursion argument, default to False. Parameters ========== `username` : str, required `reponame` : str, required `commits` : list, required Raises ====== NotImplementedError If no sound is set for the animal or passed in as a parameter. """ repo_URL = 'https://github.com/' + username + '/' + reponame # checks whether the export dir exists and if not creates it # TODO: this is a code snippet we use three times, we should make a function out of it local_dir = os.path.join(config["data_dir"],'grimoire_dumps') if not os.path.isdir(local_dir): os.makedirs(local_dir) data_dump_path = os.path.join(local_dir, username + '-' + reponame) git = Git(repo_URL, data_dump_path) # `fetch()` gets commits from all branches by default. # It returns a list of dictionaries, where the `data` key in each # dictionary contains the actual metadata for each commit. # Other stuff are metadata about the perceval `fetch()` operation. try: repo_fetched = [commit for commit in git.fetch()] # issue 33 (very ugly) band aid: delete *.pack files once downloaded by perceval shutil.rmtree(os.path.join(data_dump_path, 'objects','pack'), ignore_errors=True) # Keep just commit `data` for commit_data in repo_fetched: commits.append(commit_data["data"]) except RepositoryError as repo_error: logging.warning("Error with this repository: " + username + "/" + reponame, file=stderr) pass
def getAllGitCommits(repo_url,repo_dir ): # DEFAULT_LAST_DATETIME = datetime.datetime(2019, 11, 19, 0, 0, 0, tzinfo=dateutil.tz.tzutc()) repo = Git(uri=repo_url, gitpath=repo_dir) # commits = [commit for commit in repo.fetch(to_date= DEFAULT_LAST_DATETIME)] commits = [commit for commit in repo.fetch()] return commits
def analyze_git(es_write): #INDEX = 'git_gecko' #PROJECT = 'gecko' #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log") #INDEX = 'git_webkit' #PROJECT = 'webkit' #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log") INDEX = "git_blink" PROJECT = "blink" git = Git("https://chromium.googlesource.com/chromium", "../blink_final_log_no_cm_options.log") commits = [] cont = 1 uniq_id = 1 first = True docs = [] all_files = pandas.DataFrame() es_write.indices.delete(INDEX, ignore=[400, 404]) es_write.indices.create(INDEX, body=MAPPING_GIT) for item in git.fetch(): commits.append(item) if cont % 15000 == 0: git_events = events.Git(commits) events_df = git_events.eventize(1) # Add flags if found message_log = MessageLogFlag(events_df) events_df = message_log.enrich('message') splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for webkit # If there's a bot committing code, then we need to use the values flag if PROJECT == 'webkit': ## Fix values in the owner column events_df.loc[events_df["email"] == '*****@*****.**', "owner"] = events_df["values"] # Re-do this analysis to calculate the right email and user splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for Blink # If there's a flag, then we need to update the owner if PROJECT == 'blink': events_df.loc[(events_df["values"] == '') ^ True, "owner"] = events_df["values"] splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") splitdomain = SplitEmailDomain(events_df) events_df = splitdomain.enrich("email") #events_df.drop("message", axis=1, inplace=True) # Add project information events_df["project"] = PROJECT test = events_df.to_dict("index") docs = [] for i in test.keys(): header = { "_index": INDEX, "_type": "item", "_id": int(uniq_id), "_source": test[i] } docs.append(header) uniq_id = uniq_id + 1 helpers.bulk(es_write, docs) commits = [] cont = cont + 1 helpers.bulk(es_write, docs)
with open(out_file) as str_data: print(str_data) json_data = json.load(str_data) # # getting data via perceval in Python # url = 'https://github.com/chaoss/grimoirelab-toolkit' local_path = './chaoss-grimoirelab-toolkit' output_file = './grimoirelab-git.json' git = Git(url, local_path) commits = [commit for commit in git.fetch()] dumped = json.dumps(commits, sort_keys=True, indent=4) # save the Perceval docs to a file with open(output_file, 'w') as f: f.write(dumped) # load the Perceval docs from a file with open(output_file, 'r') as f: content = f.read() commits = json.loads(content) for c in commits: print(c)
#! /usr/bin/env python3 from flake8.api import legacy as flake8 from graal.graal import GraalRepository from perceval.backends.core.git import Git import random repo_url = input("Enter url: ") repo_dir = input("Enter dir: ") worktree_path = input("Enter worktree path: ") # Git object, pointing to repo_url and repo_dir for cloning ggit = Git(uri=repo_url , gitpath=repo_dir) # clone the repository (if it doesn't exist locally) ggit.fetch_items(category='commit') commits = list(ggit.fetch()) # hash of random commit commit = random.choice(commits) _hash = commit['data']['commit'] print(_hash) # or input the hash of certain commit # _hash = input("Enter hash: ") gral_repo = GraalRepository(uri=repo_url, dirpath=repo_dir) gral_repo.worktree(worktree_path) # checkout the commit gral_repo.checkout(_hash) style_guide = flake8.get_style_guide() files = worktree_path # generate report by flake8
def numCommits(self, url, directory): repo = Git(uri=url, gitpath=directory) count = 0 for commit in repo.fetch(): count += 1 return count
def main(args): github_key = args.github_token list_jsons = os.listdir(os.path.abspath(args.output_path)) repo_set = set() with open(args.urls_file, 'r') as url_file: os.chdir(os.path.abspath(args.output_path)) for line in url_file: if line in ['\n', '\r\n']: continue try: url = line.split('/') if not url: continue repo = "%s/%s" % (url[3], url[4]) except IndexError: print("url:" + line) logger.error("Error in repo (line) " + line + "\r\n") continue repo_set.add(repo) for repo in sorted(repo_set): repo_split = repo.split('/') outfile_name = "%s_%s.json" % (repo_split[0], repo_split[1]) outfile_path = "%s/%s" % (args.output_path, outfile_name) if outfile_name in list_jsons: logger.info("Already downloaded: %s " % outfile_name) continue if "framework" in outfile_name: logger.info("Skipping <framework> repository") continue api_url = "https://api.github.com/repos/" + str(repo) + "?access_token=" + github_key logger.info("Checking metadata for repo %s" % api_url) try: response = urllib.request.urlopen(api_url) except urllib.error.HTTPError: logger.error("HTTP 404: Not found: %s" % repo) continue try: json_data = response.read().decode('utf-8') dicc_out = json.loads(json_data) except ValueError: logger.warning("Error in response (ValueError)") continue if 'message' in dicc_out: result = dicc_out['message'] elif dicc_out == {}: result = 'False' else: result = dicc_out['private'] if result == 'Not Found': logger.error("Not found: %s" % repo) elif result == 'True': logger.error("Private: %s" % repo) else: repo_url = "https://github.com/%s" % repo + ".git" logger.info('Executing Perceval with repo: %s' % repo) logger.debug('Repo stats. Size: %s KB' % dicc_out["size"]) gitpath = '%s/%s' % (os.path.abspath(args.perceval_path), repo) git = Git(uri=repo_url, gitpath=gitpath) try: commits = [commit for commit in git.fetch()] except Exception as e: logger.warning("Failure while fetching commits. Repo: %s" % repo) logger.error(e) continue logger.info('Exporting results to JSON...') with open(outfile_path, "w", encoding='utf-8') as jfile: json.dump(commits, jfile, indent=4, sort_keys=True) logger.info('Exported to %s' % outfile_path) if not args.cache_mode_on: remove_dir(gitpath)
repo_list = [] for repo in resp: repo_name = repo['name'] repo_url = repo['html_url'] #github repository url repo_dir = '/tmp/' + repo_name print("Started " + repo_name) commit_count = 0 issue_count = 0 pull_count = 0 git_commit = Git(uri=repo_url, gitpath=repo_dir) #count no of commits for commit in git_commit.fetch(): date_diff = datetime.now() - datetime.strptime( commit['data']['CommitDate'][:-6], "%a %b %d %H:%M:%S %Y") if date_diff.days <= 90: commit_count += 1 items = GitHub(owner=parent, repository=repo_name, api_token=github_token) #count no of pull_requests,issues for item in items.fetch(): date_diff = datetime.now() - datetime.strptime( item['data']['created_at'], "%Y-%m-%dT%H:%M:%SZ") if date_diff.days <= 90: if 'pull_request' in item['data']: pull_count += 1 else: issue_count += 1
def git_repos(): users = [] # creating empty lists test = [] #target url"vidyaratna.git" repo_url = 'https://github.com/amfoss/vidyaratna.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url"cms" repo_url = 'https://github.com/amfoss/cms.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url""TempleApp" repo_url = 'https://github.com/amfoss/TempleApp.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url""website.git" repo_url = 'https://github.com/amfoss/website.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url""WebApp.git" repo_url = 'https://github.com/amfoss/WebApp.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url"cms-mobile"" repo_url = 'https://github.com/amfoss/cms-mobile.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url repo_url = 'https://github.com/amfoss/Praveshan.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/bot.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/tasks.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/star-me.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/amdec-website.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Wiki.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/GitLit.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Qujini.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/attendance-tracker.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/events.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Hack4Amrita.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/master-syllabus.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/test-repo.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/webspace.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/internal-hackathon.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/foss-meetups.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/automated-scripts.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/fosswebsite.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/fosster.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Foss-talks.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/cybergurukulam.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/kdeconf.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/android-workshop-summer-2018.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/App.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Workshops.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Wikimedia_Hackathon_Amrita_University.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/website_old.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details for u in Counter(users).keys(): a = "Commits:" + str(Counter(users).get(u)) + "\t User: "******"\n" test.append(a) # appending all the fetched commits into 'test' return test
parser.add_argument( "-fr", "--fromdate", help="Date that you want to fetch information from in format YYYYMMDD") parser.add_argument( "-to", "--todate", help="Date that you want to fetch information till in format YYYYMMDD") args = parser.parse_args() # Owner and repository names (owner, repo) = args.repo.split('/') repo_git_uri = "http://github.com/{}/{}.git".format(owner, repo) repo_dir = 'tmp/perceval' # Convert from and to date to datetime object fr_dt_tuple = map(int, (args.fromdate[:4], args.fromdate[4:6], args.fromdate[6:])) fr_dt = datetime.datetime(*fr_dt_tuple) to_dt_tuple = map(int, (args.todate[:4], args.todate[4:6], args.todate[6:])) to_dt = datetime.datetime(*to_dt_tuple) git_obj = Git(uri=repo_git_uri, gitpath=repo_dir) github_obj = GitHub(owner=owner, repository=repo, api_token=args.token) # Big dicts printed, can be pretty printed for convenience for commit in git_obj.fetch(): print(commit, '\n') for item in github_obj.fetch(from_date=fr_dt, to_date=to_dt): print(item, '\n')
import json import datetime import dateutil parser = argparse.ArgumentParser(description="Simple parser for Git commits.") parser.add_argument("-r", "--repo", help = "Git repository URI") parser.add_argument("-p", "--gitpath", help = "Gitpath of the repository") parser.add_argument("-d", "--create_dump", help = "y for for creating json dump of data, empty for printing to terminal") args = parser.parse_args() from_date = datetime.datetime(2019, 6, 1, 0, 0, 0, tzinfo=dateutil.tz.tzutc()) to_date = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=dateutil.tz.tzutc()) # make sure a cloned repo folder is deleted in gitpath if already present repo = Git(uri=args.repo, gitpath=args.gitpath) commits_gen = repo.fetch(from_date=from_date, to_date=to_date) commits = list(commits_gen) if args.create_dump == 'y': with open("git_commits.json", "w") as file: json.dump(commits, file) else: for commit in commits: for field in commit.keys(): print(str(field) + ':' + str(commit[field])) print("-----------")
from perceval.backends.core.git import GitCommand from datetime import datetime #setting up Git Argument parser parser = GitCommand.setup_cmd_parser() # making arguments list arg = [ 'https://github.com/sumitskj/Prajawalan2019.git', '--git-path', '/tmp/clone' ] args = parser.parse(*arg) # making Git object repo = Git(uri=args.uri, gitpath=args.git_path) # finding the no. of commits and listing them all count = 0 from_date = datetime(2018, 10, 12) to_date = datetime(2019, 12, 9) branches = 'master' item = list(repo.fetch(category='commit', from_date=from_date, to_date=to_date)) print("Number of commmits: %d." % len(item)) j = 0 for i in item: j = j + 1 print("Commit no " + str(j) + ": " + i['data']['commit'])
def analyze_git(es_write): #INDEX = 'git_gecko' #PROJECT = 'gecko' #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log") #INDEX = 'git_webkit' #PROJECT = 'webkit' #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log") INDEX = "git_blink" PROJECT = "blink" git = Git("https://chromium.googlesource.com/chromium", "../blink_final_log_no_cm_options.log") commits = [] cont = 1 uniq_id = 1 first = True docs = [] all_files = pandas.DataFrame() es_write.indices.delete(INDEX, ignore=[400, 404]) es_write.indices.create(INDEX, body=MAPPING_GIT) for item in git.fetch(): commits.append(item) if cont % 15000 == 0: git_events = events.Git(commits) events_df = git_events.eventize(1) # Add flags if found message_log = MessageLogFlag(events_df) events_df = message_log.enrich('message') splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for webkit # If there's a bot committing code, then we need to use the values flag if PROJECT == 'webkit': ## Fix values in the owner column events_df.loc[events_df["email"]=='*****@*****.**', "owner"] = events_df["values"] # Re-do this analysis to calculate the right email and user splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for Blink # If there's a flag, then we need to update the owner if PROJECT == 'blink': events_df.loc[(events_df["values"]=='') ^ True, "owner"] = events_df["values"] splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") splitdomain = SplitEmailDomain(events_df) events_df = splitdomain.enrich("email") #events_df.drop("message", axis=1, inplace=True) # Add project information events_df["project"] = PROJECT test = events_df.to_dict("index") docs = [] for i in test.keys(): header = { "_index": INDEX, "_type": "item", "_id": int(uniq_id), "_source": test[i] } docs.append(header) uniq_id = uniq_id + 1 helpers.bulk(es_write, docs) commits = [] cont = cont + 1 helpers.bulk(es_write, docs)
classifier = nltk.NaiveBayesClassifier.train(train_set) # Read command line arguments parser = argparse.ArgumentParser(description="Count commits in a git repo") parser.add_argument("repo", help="Repository url") parser.add_argument("dir", help="Directory for cloning the repository") parser.add_argument("--print", action='store_true', help="Print hashes") args = parser.parse_args() # create a Git object, and count commmits repo = Git(uri=args.repo, gitpath=args.dir) countcommit = 0 countuser = 0 pprint(repo) for commit in repo.fetch(): if args.print: # print("PPRINT COMMIT['DATA']") # pprint(commit['data']) print(commit['data']['commit']) countcommit += 1 males = 0 females = 0 for user in repo.fetch(): # print("PPRINT USER['DATA']") # pprint(classifier.classify(gender_features(user['data']['Author']))) if (classifier.classify(gender_features( user['data']['Author'])) == 'male'): males += 1 elif (classifier.classify(gender_features(
git_repo_dir = '/tmp/perceval.git' # Create a Git object, pointing to repo_url, using repo_dir for cloning repo = Git(uri=git_repo_url, gitpath=git_repo_dir) print("Starting 1") ''' Uses the git object to print information about the repository, this will then create the directory /tmp/perceval.git other parameters you can use are: commit: aaa7a9209f096aaaadccaaa7089aaaa3f758a703 Author: John Smith <*****@*****.**> AuthorDate: Tue Aug 14 14:30:13 2012 -0300 Commit: John Smith <*****@*****.**> CommitDate: Tue Aug 14 14:30:13 2012 -0300 ''' for commit in repo.fetch(): #print("ugh") print(commit['data']['Author']) print("Starting 2") # Url for the mailing list to analyze mail_repo_url = 'https://mail-archives.apache.org/mod_mbox/httpd-dev/' # Directory for letting Perceval clone the mailing list mail_repo_dir = '/tmp/perceval/' repo = PipermailList(url=mail_repo_url, dirpath=mail_repo_dir) #Does not seem to affect what repositories are printed k = str_to_datetime("1996-04") k = datetime_to_utc(k) print(k) for message in repo.fetch(from_date=k): print(message[0])
def test_fetch_branch(self): """Test whether commits are fetched from a Git repository for a given branch""" new_path = os.path.join(self.tmp_path, 'newgit') from_date = datetime.datetime(2014, 2, 11, 22, 7, 49) git = Git(self.git_path, new_path) # Let's fetch master commits = [commit for commit in git.fetch(branches=['master'])] expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703', '87783129c3f00d2c81a3a8e585eb86a47e39891a', '7debcf8a2f57f86663809c58b5c07a398be7674c', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', '589bb080f059834829a2a5955bebfd7c2baa110a', 'ce8e0b86a1e9877f42fe9453ede418519115f367', '51a3b654f252210572297f47597b31527c475fb8', '456a68ee1407a77f3e804a30dff245bb6c6b872f'] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Now let's fetch lzp commits = [commit for commit in git.fetch(branches=['lzp'])] expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703', '87783129c3f00d2c81a3a8e585eb86a47e39891a', '7debcf8a2f57f86663809c58b5c07a398be7674c', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', '589bb080f059834829a2a5955bebfd7c2baa110a', '51a3b654f252210572297f47597b31527c475fb8'] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Now, let's fech master and lzp commits = [commit for commit in git.fetch(branches=['master', 'lzp'])] expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703', '87783129c3f00d2c81a3a8e585eb86a47e39891a', '7debcf8a2f57f86663809c58b5c07a398be7674c', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', '589bb080f059834829a2a5955bebfd7c2baa110a', 'ce8e0b86a1e9877f42fe9453ede418519115f367', '51a3b654f252210572297f47597b31527c475fb8', '456a68ee1407a77f3e804a30dff245bb6c6b872f'] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Now, let's fetch None, which means "all commits" commits = [commit for commit in git.fetch(branches=None)] expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703', '87783129c3f00d2c81a3a8e585eb86a47e39891a', '7debcf8a2f57f86663809c58b5c07a398be7674c', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', '589bb080f059834829a2a5955bebfd7c2baa110a', 'ce8e0b86a1e9877f42fe9453ede418519115f367', '51a3b654f252210572297f47597b31527c475fb8', '456a68ee1407a77f3e804a30dff245bb6c6b872f'] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Now, let's fetch [], which means "no commits" commits = [commit for commit in git.fetch(branches=[])] expected = [] self.assertEqual(len(commits), len(expected)) shutil.rmtree(new_path)
def getCommits(user_owner, repo_name): repo = Git(f"https://github.com/{user_owner}/{repo_name}.git", f"https://github.com/{user_owner}/{repo_name}.git") commits = repo.fetch() return commits
count = 0 t_data = [] data = [] # with open(filepath2) as outfile: time_data = json.load(codecs.open(filepath2, 'r', 'utf-8-sig')) # json.loads(open(filepath2).read().decode('utf-8-sig')) for p in time_data: st = p['WindowsTimeZones'][0]['Name'] # print(st[4:10]) p['TimeZone'] = st[4:7] + st[8:10] t_data.append(p) with open(filepath2, 'w') as outfile: json.dump(t_data, outfile) for x in repo.fetch(): t_val = x['data']['AuthorDate'][-5:] list_of_countries = [] for p in time_data: if t_val == p['TimeZone']: # print(t_val,p['TimeZone'],'afdf') list_of_countries.append(p['CountryName']) x['data']['CountryName'] = list_of_countries x['data']['TimeZone'] = t_val print(x) data.append(x) with open(filepath, 'w') as outfile: json.dump(data, outfile) # for commit in repo.fetch(): # if args.print: # print(commit['data']['commit'])
#! /usr/bin/env python3 # Count commits import argparse from perceval.backends.core.git import Git # Read command line arguments parser = argparse.ArgumentParser(description="Count commits in a git repo") parser.add_argument("repo", help="Repository url") parser.add_argument("dir", help="Directory for cloning the repository") parser.add_argument("--print", action='store_true', help="Print hashes") args = parser.parse_args() # create a Git object, and count commmits repo = Git(uri=args.repo, gitpath=args.dir) count = 0 for commit in repo.fetch(): if args.print: print(commit['data']['commit']) count += 1 print("Number of commmits: %d." % count)
# Initializing the Git backend git_backend = Git(uri=REPOSITORY_URL, gitpath=REPO_DIR) # Range of dates in which commits are to be fetched from_date = datetime(2018, 10, 1) to_date = datetime(2019, 2, 5) # Repo Branches from which commits to be fetched [ 2/3 ] repo_branches = ["master", "develop"] # Calling fetch method # The method retrieves from a Git repository or a log file a list of # commits. Commits are returned in the same order they were obtained. range_commits = git_backend.fetch(branches=repo_branches, from_date=from_date, to_date=to_date) range_commits_list = list(range_commits) n_commits = len(range_commits_list) print("NUMBER OF COMMITS: ", n_commits) last_commit = range_commits_list[n_commits - 1] pprint(last_commit) pprint(range_commits_list[n_commits - 1].keys()) for commit in range_commits_list: print( "COMMIT DATE: {commit_date}\nAUTHOR: {author_name}\nCOMMIT MESSAGE: {commit_message}" .format(commit_date=commit["data"]["CommitDate"], author_name=commit["data"]["Author"], commit_message=commit["data"]["message"]))