def test_fetch_since_date(self): """Test whether commits are fetched from a Git repository since the given date""" new_path = os.path.join(self.tmp_path, 'newgit') from_date = datetime.datetime(2014, 2, 11, 22, 7, 49) git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch(from_date=from_date)] expected = [('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0), ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0), ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x][0]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['updated_on'], expected[x][1]) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Test it using a datetime that includes the timezone from_date = datetime.datetime(2012, 8, 14, 14, 30, 00, tzinfo=dateutil.tz.tzoffset( None, -36000)) git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch(from_date=from_date)] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x][0]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['updated_on'], expected[x][1]) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) shutil.rmtree(new_path)
def test_fetch(self): """Test whether commits are fetched from a Git repository""" new_path = os.path.join(self.tmp_path, 'newgit') git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch()] expected = [('bc57a9209f096a130dcc5ba7089a8663f758a703', 1344965413.0), ('87783129c3f00d2c81a3a8e585eb86a47e39891a', 1344965535.0), ('7debcf8a2f57f86663809c58b5c07a398be7674c', 1344965607.0), ('c0d66f92a95e31c77be08dc9d0f11a16715d1885', 1344965702.0), ('c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', 1344966351.0), ('589bb080f059834829a2a5955bebfd7c2baa110a', 1344967441.0), ('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0), ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0), ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x][0]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x][0]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['updated_on'], expected[x][1]) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) shutil.rmtree(new_path)
def test_get_elastic_items_filter(self): """Test whether the elastic method works properly with filter""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) filter = { "name": "uuid", "value": [ "43f217b2f678a5691fdbc5c6c5302243e79e5a90", "00ee6902e34b309cd05706c26e3e195a62492f60" ] } eitems = ElasticItems(perceval_backend) eitems.elastic = elastic r_json = eitems.get_elastic_items(_filter=filter) hits = r_json['hits']['hits'] self.assertEqual(len(hits), 2) self.assertEqual(hits[0]['_source']['uuid'], "43f217b2f678a5691fdbc5c6c5302243e79e5a90") self.assertEqual(hits[1]['_source']['uuid'], "00ee6902e34b309cd05706c26e3e195a62492f60")
def commit_counter(own, repo_url, d1, df, k): # url for the git repo to analyze #repo_url = 'https://github.com/kmn5409/INFO1601.git' print("Owner\t\tRepository") print(own, "\t", repo_url) repo_url = 'https://github.com/' + own + '/' + repo_url + '.git' # directory for letting Perceval clone the git repo repo_dir = '/tmp/' + repo_url + '.git' # create a Git object, pointing to repo_url, using repo_dir for cloning repo = Git(uri=repo_url, gitpath=repo_dir) count = 0 # fetch all commits as an iteratoir, and iterate it printing each hash mon = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" ] for commit in repo.fetch(): temp = commit['data']['CommitDate'].split(" ") day = int(temp[2]) month = temp[1] for i in range(len(mon)): if (month == mon[i]): month_num = i + 1 year = int(temp[4]) if (isrecent(d1, day, month_num, year)): count += 1 print("Number of commmits: ", count) df['Number of commits'][k] = count return count
def list_committers(self, url, directory): # Return the list containing the strings from a git repository related to the users ordered by commit including repeated users to allow count gender contributions. repo = Git(uri=url, gitpath=directory) list_committers = [] for user in repo.fetch(): committer = self.removeMail(user['data']['Author']) list_committers.append(committer) return list_committers
def test_fetch_empty_log(self): """Test whether it parsers an empty log""" new_path = os.path.join(self.tmp_path, 'newgit') from_date = datetime.datetime(2020, 1, 1, 1, 1, 1) git = Git(self.git_path, new_path) commits = [commit for commit in git.fetch(from_date=from_date)] self.assertListEqual(commits, [])
def test_initialization(self): """Test whether attributes are initializated""" git = Git('http://example.com', self.git_path, tag='test') self.assertEqual(git.uri, 'http://example.com') self.assertEqual(git.gitpath, self.git_path) self.assertEqual(git.origin, 'http://example.com') self.assertEqual(git.tag, 'test') # When tag is empty or None it will be set to # the value in uri git = Git('http://example.com', self.git_path) self.assertEqual(git.origin, 'http://example.com') self.assertEqual(git.tag, 'http://example.com') git = Git('http://example.com', self.git_path, tag='') self.assertEqual(git.origin, 'http://example.com') self.assertEqual(git.tag, 'http://example.com')
def test_fetch_from_empty_repository(self): """Test whether it parses from empty repository""" new_path = os.path.join(self.tmp_path, 'newgit') git = Git(self.git_empty_path, new_path) commits = [commit for commit in git.fetch()] self.assertListEqual(commits, []) shutil.rmtree(new_path)
def run(args): """ """ repo_url = args['git_repository'] period_length = args['period'] threshold = args['threshold'] active_days = True # directory for letting Perceval clone the git repo # TODO: this is Linux-operating system specific. Should change repo_dir = '/tmp/' + repo_url.split('/')[-1] + '.git' first_commit = datetime.now(timezone.utc) authorDict = defaultdict(list) repo = Git(uri=repo_url, gitpath=repo_dir) for commit in repo.fetch(): commitdate = datetime.strptime(commit['data']['AuthorDate'], '%a %b %d %H:%M:%S %Y %z') if commitdate < first_commit: first_commit = commitdate authorDict[commit['data']['Author']].append(commitdate) logging.info("Authors found: " + str(len(authorDict))) simplemerge(authorDict) logging.info("Authors after merge: " + str(len(authorDict))) author_count = author_counting(authorDict, period_length, active_days) # print(author_count) (effort_periods, full_time_periods, non_full_time_periods) = project_period_effort(author_count, threshold, period_length) maxeffort_periods = project_period_maxeffort(author_count, period_length) # Printing results print() print("CONFIGURATIONS:") print(" Length of period (in months):", period_length) print(" Threshold t (in commits in a period):", threshold) print() print("RESULTS:") print(" First commit date:", first_commit, "--", round((datetime.now(timezone.utc)-first_commit).days/30, 2) , "months ago") print(" Maximum possible development effort (in person-months):", sum(maxeffort_periods.values())) print() print(pretty_print_period(period_length, first_commit, ["FT", "Non-FT", "Effort"], full_time_periods, non_full_time_periods, effort_periods)) print(" " * 8, "FT: Full-time developers") print() print(" ---> Estimated development effort (in person-months):", round(sum(effort_periods.values()), 2)) print() print("For more information, visit http://github.com/gregoriorobles/git2effort") print()
def test_get_elastic_items_error(self): """Test whether a message is logged if an error occurs when getting items from an index""" items = json.loads(read_file('data/git.json')) perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) elastic.bulk_upload(items, field_id="uuid") # Load items eitems = ElasticItems(perceval_backend) eitems.elastic = elastic with self.assertLogs(logger, level='DEBUG') as cm: r_json = eitems.get_elastic_items() self.assertIsNone(r_json) self.assertRegex(cm.output[-1], 'DEBUG:grimoire_elk.elastic_items:No results found from*')
def get_commits(username, reponame, commits, config): """ TODO: Add docstring. See: https://realpython.com/documenting-python-code/ TODO: Implement recursion argument, default to False. Parameters ========== `username` : str, required `reponame` : str, required `commits` : list, required Raises ====== NotImplementedError If no sound is set for the animal or passed in as a parameter. """ repo_URL = 'https://github.com/' + username + '/' + reponame # checks whether the export dir exists and if not creates it # TODO: this is a code snippet we use three times, we should make a function out of it local_dir = os.path.join(config["data_dir"],'grimoire_dumps') if not os.path.isdir(local_dir): os.makedirs(local_dir) data_dump_path = os.path.join(local_dir, username + '-' + reponame) git = Git(repo_URL, data_dump_path) # `fetch()` gets commits from all branches by default. # It returns a list of dictionaries, where the `data` key in each # dictionary contains the actual metadata for each commit. # Other stuff are metadata about the perceval `fetch()` operation. try: repo_fetched = [commit for commit in git.fetch()] # issue 33 (very ugly) band aid: delete *.pack files once downloaded by perceval shutil.rmtree(os.path.join(data_dump_path, 'objects','pack'), ignore_errors=True) # Keep just commit `data` for commit_data in repo_fetched: commits.append(commit_data["data"]) except RepositoryError as repo_error: logging.warning("Error with this repository: " + username + "/" + reponame, file=stderr) pass
def test_fetch_filter_raw(self): """Test whether the fetch with filter raw properly works""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) eitems = ElasticItems(perceval_backend) eitems.set_filter_raw("data.commit:87783129c3f00d2c81a3a8e585eb86a47e39891a") eitems.elastic = elastic items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 1)
def test_fetch(self): """Test whether the fetch method properly works""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) eitems = ElasticItems(perceval_backend) eitems.scroll_size = 2 eitems.elastic = elastic items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 9)
def test_get_elastic_items(self): """Test whether the elastic method works properly""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) eitems = ElasticItems(perceval_backend) eitems.elastic = elastic r_json = eitems.get_elastic_items() total = r_json['hits']['total'] total = total['value'] if isinstance(total, dict) else total self.assertEqual(total, 9)
def test_fetch_no_results(self): """Test whether a message is logged when no results are found""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest-not_found', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) eitems = ElasticItems(perceval_backend) eitems.elastic = elastic with self.assertLogs(logger, level='DEBUG') as cm: items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 0) self.assertRegex(cm.output[-2], 'DEBUG:grimoire_elk.elastic_items:No results found.*') self.assertRegex(cm.output[-1], 'DEBUG:grimoire_elk.elastic_items:Releasing scroll_id=*')
def test_fetch_from_date(self): """Test whether the fetch method with from_date properly works""" perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo') elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping) # Load items items = json.loads(read_file('data/git.json')) ocean = GitOcean(perceval_backend) ocean.elastic = elastic ocean.feed_items(items) # Fetch total items eitems = ElasticItems(perceval_backend) eitems.elastic = elastic items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 9) # Fetch with from date from_date = str_to_datetime("2018-02-09T08:33:22.699+00:00") eitems = ElasticItems(perceval_backend, from_date=from_date) eitems.elastic = elastic items = [ei for ei in eitems.fetch()] self.assertEqual(len(items), 2)
def getCommits(user_owner, repo_name): repo = Git(f"https://github.com/{user_owner}/{repo_name}.git", f"https://github.com/{user_owner}/{repo_name}.git") commits = repo.fetch() return commits
## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ## ## Authors: ## Jesus M. Gonzalez-Barahona <*****@*****.**> ## from perceval.backends.core.git import Git import elasticsearch # Url for the git repo to analyze repo_url = 'http://github.com/grimoirelab/perceval.git' # Directory for letting Perceval clone the git repo repo_dir = '/tmp/perceval.git' # ElasticSearch instance (url) es = elasticsearch.Elasticsearch(['http://localhost:9200/']) # Create the 'commits' index in ElasticSearch es.indices.create('commits') # Create a Git object, pointing to repo_url, using repo_dir for cloning repo = Git(uri=repo_url, gitpath=repo_dir) # Fetch all commits as an iteratoir, and iterate it uploading to ElasticSearch for commit in repo.fetch(): # Create the object (dictionary) to upload to ElasticSearch summary = {'hash': commit['data']['commit']} print(summary) # Upload the object to ElasticSearch es.index(index='commits', doc_type='summary', body=summary)
def main(args): github_key = args.github_token list_jsons = os.listdir(os.path.abspath(args.output_path)) repo_set = set() with open(args.urls_file, 'r') as url_file: os.chdir(os.path.abspath(args.output_path)) for line in url_file: if line in ['\n', '\r\n']: continue try: url = line.split('/') if not url: continue repo = "%s/%s" % (url[3], url[4]) except IndexError: print("url:" + line) logger.error("Error in repo (line) " + line + "\r\n") continue repo_set.add(repo) for repo in sorted(repo_set): repo_split = repo.split('/') outfile_name = "%s_%s.json" % (repo_split[0], repo_split[1]) outfile_path = "%s/%s" % (args.output_path, outfile_name) if outfile_name in list_jsons: logger.info("Already downloaded: %s " % outfile_name) continue if "framework" in outfile_name: logger.info("Skipping <framework> repository") continue api_url = "https://api.github.com/repos/" + str(repo) + "?access_token=" + github_key logger.info("Checking metadata for repo %s" % api_url) try: response = urllib.request.urlopen(api_url) except urllib.error.HTTPError: logger.error("HTTP 404: Not found: %s" % repo) continue try: json_data = response.read().decode('utf-8') dicc_out = json.loads(json_data) except ValueError: logger.warning("Error in response (ValueError)") continue if 'message' in dicc_out: result = dicc_out['message'] elif dicc_out == {}: result = 'False' else: result = dicc_out['private'] if result == 'Not Found': logger.error("Not found: %s" % repo) elif result == 'True': logger.error("Private: %s" % repo) else: repo_url = "https://github.com/%s" % repo + ".git" logger.info('Executing Perceval with repo: %s' % repo) logger.debug('Repo stats. Size: %s KB' % dicc_out["size"]) gitpath = '%s/%s' % (os.path.abspath(args.perceval_path), repo) git = Git(uri=repo_url, gitpath=gitpath) try: commits = [commit for commit in git.fetch()] except Exception as e: logger.warning("Failure while fetching commits. Repo: %s" % repo) logger.error(e) continue logger.info('Exporting results to JSON...') with open(outfile_path, "w", encoding='utf-8') as jfile: json.dump(commits, jfile, indent=4, sort_keys=True) logger.info('Exported to %s' % outfile_path) if not args.cache_mode_on: remove_dir(gitpath)
# This is also assuming you have installed perceval onto your computer import datetime import pytz from perceval.backends.core.git import Git from perceval.backends.core.pipermail import PipermailList from grimoirelab.toolkit.datetime import datetime_utcnow from grimoirelab.toolkit.datetime import str_to_datetime from grimoirelab.toolkit.datetime import datetime_to_utc # Url for the git repo to analyze git_repo_url = 'https://github.com/mozilla/labs-vcap-tests.git' # Directory for letting Perceval clone the git repo git_repo_dir = '/tmp/perceval.git' # Create a Git object, pointing to repo_url, using repo_dir for cloning repo = Git(uri=git_repo_url, gitpath=git_repo_dir) print("Starting 1") ''' Uses the git object to print information about the repository, this will then create the directory /tmp/perceval.git other parameters you can use are: commit: aaa7a9209f096aaaadccaaa7089aaaa3f758a703 Author: John Smith <*****@*****.**> AuthorDate: Tue Aug 14 14:30:13 2012 -0300 Commit: John Smith <*****@*****.**> CommitDate: Tue Aug 14 14:30:13 2012 -0300 ''' for commit in repo.fetch(): #print("ugh") print(commit['data']['Author'])
def test_fetch_branch(self): """Test whether commits are fetched from a Git repository for a given branch""" new_path = os.path.join(self.tmp_path, 'newgit') from_date = datetime.datetime(2014, 2, 11, 22, 7, 49) git = Git(self.git_path, new_path) # Let's fetch master commits = [commit for commit in git.fetch(branches=['master'])] expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703', '87783129c3f00d2c81a3a8e585eb86a47e39891a', '7debcf8a2f57f86663809c58b5c07a398be7674c', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', '589bb080f059834829a2a5955bebfd7c2baa110a', 'ce8e0b86a1e9877f42fe9453ede418519115f367', '51a3b654f252210572297f47597b31527c475fb8', '456a68ee1407a77f3e804a30dff245bb6c6b872f'] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Now let's fetch lzp commits = [commit for commit in git.fetch(branches=['lzp'])] expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703', '87783129c3f00d2c81a3a8e585eb86a47e39891a', '7debcf8a2f57f86663809c58b5c07a398be7674c', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', '589bb080f059834829a2a5955bebfd7c2baa110a', '51a3b654f252210572297f47597b31527c475fb8'] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Now, let's fech master and lzp commits = [commit for commit in git.fetch(branches=['master', 'lzp'])] expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703', '87783129c3f00d2c81a3a8e585eb86a47e39891a', '7debcf8a2f57f86663809c58b5c07a398be7674c', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', '589bb080f059834829a2a5955bebfd7c2baa110a', 'ce8e0b86a1e9877f42fe9453ede418519115f367', '51a3b654f252210572297f47597b31527c475fb8', '456a68ee1407a77f3e804a30dff245bb6c6b872f'] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Now, let's fetch None, which means "all commits" commits = [commit for commit in git.fetch(branches=None)] expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703', '87783129c3f00d2c81a3a8e585eb86a47e39891a', '7debcf8a2f57f86663809c58b5c07a398be7674c', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', '589bb080f059834829a2a5955bebfd7c2baa110a', 'ce8e0b86a1e9877f42fe9453ede418519115f367', '51a3b654f252210572297f47597b31527c475fb8', '456a68ee1407a77f3e804a30dff245bb6c6b872f'] self.assertEqual(len(commits), len(expected)) for x in range(len(commits)): expected_uuid = uuid(self.git_path, expected[x]) commit = commits[x] self.assertEqual(commit['data']['commit'], expected[x]) self.assertEqual(commit['origin'], self.git_path) self.assertEqual(commit['uuid'], expected_uuid) self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], self.git_path) # Now, let's fetch [], which means "no commits" commits = [commit for commit in git.fetch(branches=[])] expected = [] self.assertEqual(len(commits), len(expected)) shutil.rmtree(new_path)
from perceval.backends.core.git import Git url = 'http://github.com/abhiandthetruth/JournalJar' dir = './temp/Saarthi' repo = Git(uri=url, gitpath=dir) for commit in repo.fetch(): print(commit)
with open(out_file, 'w') as f: f.write(json.dumps(commits, sort_keys=True, indent=4)) with open(out_file) as str_data: print(str_data) json_data = json.load(str_data) # # getting data via perceval in Python # url = 'https://github.com/chaoss/grimoirelab-toolkit' local_path = './chaoss-grimoirelab-toolkit' output_file = './grimoirelab-git.json' git = Git(url, local_path) commits = [commit for commit in git.fetch()] dumped = json.dumps(commits, sort_keys=True, indent=4) # save the Perceval docs to a file with open(output_file, 'w') as f: f.write(dumped) # load the Perceval docs from a file with open(output_file, 'r') as f: content = f.read() commits = json.loads(content) for c in commits:
def test_get_field_date(self): """Test whether the field date is correctly returned""" perceval_backend = Git('http://example.com', '/tmp/foo') eitems = ElasticOcean(perceval_backend) self.assertEqual(eitems.get_field_date(), 'metadata__updated_on')
def numCommits(self, url, directory): repo = Git(uri=url, gitpath=directory) count = 0 for commit in repo.fetch(): count += 1 return count
from perceval.backends.core.git import Git from perceval.backends.core.git import GitCommand from datetime import datetime #setting up Git Argument parser parser = GitCommand.setup_cmd_parser() # making arguments list arg = [ 'https://github.com/sumitskj/Prajawalan2019.git', '--git-path', '/tmp/clone' ] args = parser.parse(*arg) # making Git object repo = Git(uri=args.uri, gitpath=args.git_path) # finding the no. of commits and listing them all count = 0 from_date = datetime(2018, 10, 12) to_date = datetime(2019, 12, 9) branches = 'master' item = list(repo.fetch(category='commit', from_date=from_date, to_date=to_date)) print("Number of commmits: %d." % len(item)) j = 0 for i in item: j = j + 1
def git_repos(): users = [] # creating empty lists test = [] #target url"vidyaratna.git" repo_url = 'https://github.com/amfoss/vidyaratna.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url"cms" repo_url = 'https://github.com/amfoss/cms.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url""TempleApp" repo_url = 'https://github.com/amfoss/TempleApp.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url""website.git" repo_url = 'https://github.com/amfoss/website.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url""WebApp.git" repo_url = 'https://github.com/amfoss/WebApp.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url"cms-mobile"" repo_url = 'https://github.com/amfoss/cms-mobile.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details #target url repo_url = 'https://github.com/amfoss/Praveshan.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/bot.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/tasks.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/star-me.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/amdec-website.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Wiki.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/GitLit.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Qujini.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/attendance-tracker.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/events.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Hack4Amrita.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/master-syllabus.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/test-repo.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/webspace.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/internal-hackathon.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/foss-meetups.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/automated-scripts.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/fosswebsite.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/fosster.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Foss-talks.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/cybergurukulam.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/kdeconf.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/android-workshop-summer-2018.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/App.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Workshops.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/Wikimedia_Hackathon_Amrita_University.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details repo_url = 'https://github.com/amfoss/website_old.git' repo_dir = '/perceval.tests' #target directory repo = Git(uri=repo_url, gitpath=repo_dir) #it wil search in git for commit in repo.fetch( ): # repo.fetch() fetch the commits in the repo (every line) users.append( commit['data']['Author']) # collect the data and author details for u in Counter(users).keys(): a = "Commits:" + str(Counter(users).get(u)) + "\t User: "******"\n" test.append(a) # appending all the fetched commits into 'test' return test
def analyze_git(es_write): #INDEX = 'git_gecko' #PROJECT = 'gecko' #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log") #INDEX = 'git_webkit' #PROJECT = 'webkit' #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log") INDEX = "git_blink" PROJECT = "blink" git = Git("https://chromium.googlesource.com/chromium", "../blink_final_log_no_cm_options.log") commits = [] cont = 1 uniq_id = 1 first = True docs = [] all_files = pandas.DataFrame() es_write.indices.delete(INDEX, ignore=[400, 404]) es_write.indices.create(INDEX, body=MAPPING_GIT) for item in git.fetch(): commits.append(item) if cont % 15000 == 0: git_events = events.Git(commits) events_df = git_events.eventize(1) # Add flags if found message_log = MessageLogFlag(events_df) events_df = message_log.enrich('message') splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for webkit # If there's a bot committing code, then we need to use the values flag if PROJECT == 'webkit': ## Fix values in the owner column events_df.loc[events_df["email"] == '*****@*****.**', "owner"] = events_df["values"] # Re-do this analysis to calculate the right email and user splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") # Code for Blink # If there's a flag, then we need to update the owner if PROJECT == 'blink': events_df.loc[(events_df["values"] == '') ^ True, "owner"] = events_df["values"] splitemail = SplitEmail(events_df) events_df = splitemail.enrich("owner") splitdomain = SplitEmailDomain(events_df) events_df = splitdomain.enrich("email") #events_df.drop("message", axis=1, inplace=True) # Add project information events_df["project"] = PROJECT test = events_df.to_dict("index") docs = [] for i in test.keys(): header = { "_index": INDEX, "_type": "item", "_id": int(uniq_id), "_source": test[i] } docs.append(header) uniq_id = uniq_id + 1 helpers.bulk(es_write, docs) commits = [] cont = cont + 1 helpers.bulk(es_write, docs)
#! /usr/bin/env python3 # Count commits import argparse from perceval.backends.core.git import Git # Read command line arguments parser = argparse.ArgumentParser(description="Count commits in a git repo") parser.add_argument("repo", help="Repository url") parser.add_argument("dir", help="Directory for cloning the repository") parser.add_argument("--print", action='store_true', help="Print hashes") args = parser.parse_args() # create a Git object, and count commmits repo = Git(uri=args.repo, gitpath=args.dir) count = 0 for commit in repo.fetch(): if args.print: print(commit['data']['commit']) count += 1 print("Number of commmits: %d." % count)
parser.add_argument( "-fr", "--fromdate", help="Date that you want to fetch information from in format YYYYMMDD") parser.add_argument( "-to", "--todate", help="Date that you want to fetch information till in format YYYYMMDD") args = parser.parse_args() # Owner and repository names (owner, repo) = args.repo.split('/') repo_git_uri = "http://github.com/{}/{}.git".format(owner, repo) repo_dir = 'tmp/perceval' # Convert from and to date to datetime object fr_dt_tuple = map(int, (args.fromdate[:4], args.fromdate[4:6], args.fromdate[6:])) fr_dt = datetime.datetime(*fr_dt_tuple) to_dt_tuple = map(int, (args.todate[:4], args.todate[4:6], args.todate[6:])) to_dt = datetime.datetime(*to_dt_tuple) git_obj = Git(uri=repo_git_uri, gitpath=repo_dir) github_obj = GitHub(owner=owner, repository=repo, api_token=args.token) # Big dicts printed, can be pretty printed for convenience for commit in git_obj.fetch(): print(commit, '\n') for item in github_obj.fetch(from_date=fr_dt, to_date=to_dt): print(item, '\n')