def get_diff_commits_origin_raw(self, ocean_backend): """Return the commit hashes which are stored in the raw index but not in the original repo. :param ocean_backend: Ocean backend """ repo_origin = anonymize_url(self.perceval_backend.origin) fltr = { 'name': 'origin', 'value': [repo_origin] } current_hashes = [] try: git_repo = GitRepository(self.perceval_backend.uri, self.perceval_backend.gitpath) current_hashes = [commit for commit in git_repo.rev_list()] except EmptyRepositoryError: logger.warning("No commits retrieved from {}, repo is empty".format(repo_origin)) except RepositoryError: logger.warning("No commits retrieved from {}, repo doesn't exist locally".format(repo_origin)) except Exception as e: logger.error("[git] No commits retrieved from {}, " "git rev-list command failed: {}".format(repo_origin, e)) current_hashes = set(current_hashes) raw_hashes = set([item['data']['commit'] for item in ocean_backend.fetch(ignore_incremental=True, _filter=fltr)]) hashes_to_delete = list(raw_hashes.difference(current_hashes)) return hashes_to_delete
def update_items(self, ocean_backend, enrich_backend): """Retrieve the commits not present in the original repository and delete the corresponding documents from the raw and enriched indexes""" fltr = { 'name': 'origin', 'value': [self.perceval_backend.origin] } logger.debug("[update-items] Checking commits for %s.", self.perceval_backend.origin) git_repo = GitRepository(self.perceval_backend.uri, self.perceval_backend.gitpath) try: current_hashes = set([commit for commit in git_repo.rev_list()]) except Exception as e: logger.error("Something went wrong with %s, %s", git_repo.uri, e, exc_info=True) return raw_hashes = set([item['data']['commit'] for item in ocean_backend.fetch(ignore_incremental=True, _filter=fltr)]) hashes_to_delete = list(raw_hashes.difference(current_hashes)) to_process = [] for _hash in hashes_to_delete: to_process.append(_hash) if len(to_process) != MAX_BULK_UPDATE_SIZE: continue # delete documents from the raw index self.remove_commits(to_process, ocean_backend.elastic.index_url, 'data.commit', self.perceval_backend.origin) # delete documents from the enriched index self.remove_commits(to_process, enrich_backend.elastic.index_url, 'hash', self.perceval_backend.origin) to_process = [] if to_process: # delete documents from the raw index self.remove_commits(to_process, ocean_backend.elastic.index_url, 'data.commit', self.perceval_backend.origin) # delete documents from the enriched index self.remove_commits(to_process, enrich_backend.elastic.index_url, 'hash', self.perceval_backend.origin) logger.debug("[update-items] %s commits deleted from %s with origin %s.", len(hashes_to_delete), ocean_backend.elastic.anonymize_url(ocean_backend.elastic.index_url), self.perceval_backend.origin) logger.debug("[update-items] %s commits deleted from %s with origin %s.", len(hashes_to_delete), enrich_backend.elastic.anonymize_url(enrich_backend.elastic.index_url), self.perceval_backend.origin) # update branch info self.delete_commit_branches(enrich_backend) self.add_commit_branches(git_repo, enrich_backend)
def test_count_objects_invalid_output(self): """Test if an exception is raised when count_objects output is invalid""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_path, new_path) # Check missing value expected = "unable to parse 'count-objects' output;" + \ " reason: 'in-pack' entry not found" with unittest.mock.patch('perceval.backends.core.git.GitRepository._exec') as mock_exec: mock_exec.return_value = b'count: 69\n:sze: 900\n' with self.assertRaises(RepositoryError) as e: _ = repo.count_objects() self.assertEqual(str(e.exception), expected) # Check invalid output with unittest.mock.patch('perceval.backends.core.git.GitRepository._exec') as mock_exec: mock_exec.return_value = b'invalid value' with self.assertRaises(RepositoryError) as e: _ = repo.count_objects() shutil.rmtree(new_path)
def enrich_git_branches(self, ocean_backend, enrich_backend): """Update the information about branches within the documents representing commits in the enriched index. :param ocean_backend: the ocean backend :param enrich_backend: the enrich backend """ logger.debug("[git] study git-branches start") for ds in self.prjs_map: if ds != "git": continue urls = self.prjs_map[ds] for url in urls: cmd = GitCommand(*[url]) git_repo = GitRepository(cmd.parsed_args.uri, cmd.parsed_args.gitpath) logger.debug("[git] study git-branches delete branch info for repo {} in index {}".format( git_repo.uri, self.elastic.anonymize_url(enrich_backend.elastic.index_url))) self.delete_commit_branches(git_repo, enrich_backend) logger.debug("[git] study git-branches add branch info for repo {} in index {}".format( git_repo.uri, self.elastic.anonymize_url(enrich_backend.elastic.index_url))) self.add_commit_branches(git_repo, enrich_backend) logger.debug("[git] study git-branches repo {} in index {} processed".format( git_repo.uri, self.elastic.anonymize_url(enrich_backend.elastic.index_url))) logger.debug("[git] study git-branches end")
def test_not_existing_repo_on_init(self): """Test if init fails when the repos does not exists""" expected = "git repository '%s' does not exist" % (self.tmp_path) with self.assertRaisesRegex(RepositoryError, expected): _ = GitRepository('http://example.org', self.tmp_path)
def test_log_from_date(self): """Test if commits are returned from the given date""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_path, new_path) gitlog = repo.log(from_date=datetime.datetime(2014, 2, 11, 22, 7, 49)) gitlog = [line for line in gitlog] self.assertEqual(len(gitlog), 36) self.assertEqual(gitlog[0][:14], "commit ce8e0b8") # Use a timezone, it will return an empty line from_date = datetime.datetime(2014, 2, 11, 22, 7, 49, tzinfo=dateutil.tz.tzoffset( None, -36000)) gitlog = repo.log(from_date=from_date) gitlog = [line for line in gitlog] self.assertEqual(gitlog, []) shutil.rmtree(new_path)
def test_clone_existing_directory(self): """Test if it raises an exception when tries to clone an existing directory""" expected = "git command - fatal: destination path '%s' already exists" \ % (self.tmp_path) with self.assertRaisesRegex(RepositoryError, expected): _ = GitRepository.clone(self.git_path, self.tmp_path)
def test_init(self): """Test initialization""" repo = GitRepository('http://example.git', self.git_path) self.assertIsInstance(repo, GitRepository) self.assertEqual(repo.uri, 'http://example.git') self.assertEqual(repo.dirpath, self.git_path)
def enrich_git_branches(self, ocean_backend, enrich_backend, run_month_days=[7, 14, 21, 28]): """Update the information about branches within the documents representing commits in the enriched index. The example below shows how to activate the study by modifying the setup.cfg. The study `enrich_git_branches` will be run on days depending on the parameter `run_month_days`, by default the days are 7, 14, 21, and 28 of each month. ``` [git] raw_index = git_raw enriched_index = git_enriched ... studies = [enrich_git_branches] [enrich_git_branches] run_month_days = [5, 22] ``` :param ocean_backend: the ocean backend :param enrich_backend: the enrich backend :param run_month_days: days of the month to run this study """ logger.debug("[git] study git-branches start") day = datetime_utcnow().day run_month_days = list(map(int, run_month_days)) if day not in run_month_days: logger.debug("[git] study git-branches will execute only the days {} of each month".format(run_month_days)) logger.debug("[git] study git-branches end") return for ds in self.prjs_map: if ds != "git": continue urls = self.prjs_map[ds] for url in urls: cmd = GitCommand(*[url]) git_repo = GitRepository(cmd.parsed_args.uri, cmd.parsed_args.gitpath) logger.debug("[git] study git-branches delete branch info for repo {} in index {}".format( git_repo.uri, anonymize_url(enrich_backend.elastic.index_url))) self.delete_commit_branches(git_repo, enrich_backend) logger.debug("[git] study git-branches add branch info for repo {} in index {}".format( git_repo.uri, anonymize_url(enrich_backend.elastic.index_url))) try: self.add_commit_branches(git_repo, enrich_backend) except Exception as e: logger.error("[git] study git-branches failed on repo {}, due to {}".format(git_repo.uri, e)) continue logger.debug("[git] study git-branches repo {} in index {} processed".format( git_repo.uri, anonymize_url(enrich_backend.elastic.index_url))) logger.debug("[git] study git-branches end")
def test_git_parser_from_iter(self): """Test if the static method parses a git log from a repository""" repo = GitRepository(self.git_path, self.git_path) commits = Git.parse_git_log_from_iter(repo.log()) result = [commit['commit'] for commit in commits] expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703', '87783129c3f00d2c81a3a8e585eb86a47e39891a', '7debcf8a2f57f86663809c58b5c07a398be7674c', 'c0d66f92a95e31c77be08dc9d0f11a16715d1885', 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', '589bb080f059834829a2a5955bebfd7c2baa110a', 'ce8e0b86a1e9877f42fe9453ede418519115f367', '51a3b654f252210572297f47597b31527c475fb8', '456a68ee1407a77f3e804a30dff245bb6c6b872f'] self.assertListEqual(result, expected)
def test_is_detached(self): """Test if a repository is in detached state or not""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_path, new_path) is_detached = repo.is_detached() self.assertEqual(is_detached, False) shutil.rmtree(new_path) new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_detached_path, new_path) is_detached = repo.is_detached() self.assertEqual(is_detached, True) shutil.rmtree(new_path)
def test_is_empty(self): """Test if a repository is empty or not""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_path, new_path) is_empty = repo.is_empty() self.assertEqual(is_empty, False) shutil.rmtree(new_path) new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_empty_path, new_path) is_empty = repo.is_empty() self.assertEqual(is_empty, True) shutil.rmtree(new_path)
def test_pull(self): """Test if the repository is updated to 'origin' status""" def count_commits(): """Get the number of commits counting the entries on the log""" cmd = ['git', 'log', '--oneline'] gitlog = subprocess.check_output(cmd, stderr=subprocess.STDOUT, cwd=new_path, env={ 'LANG': 'C', 'PAGER': '' }) commits = gitlog.strip(b'\n').split(b'\n') return len(commits) new_path = os.path.join(self.tmp_path, 'newgit') new_file = os.path.join(new_path, 'newfile') repo = GitRepository.clone(self.git_path, new_path) # Count the number of commits before adding a new one ncommits = count_commits() self.assertEqual(ncommits, 9) # Create a new file and commit it to the repository with open(new_file, 'w') as f: f.write("Testing pull method") cmd = ['git', 'add', new_file] subprocess.check_output(cmd, stderr=subprocess.STDOUT, cwd=new_path, env={'LANG': 'C'}) cmd = [ 'git', '-c', 'user.name="mock"', '-c', 'user.email="*****@*****.**"', 'commit', '-m', 'Testing pull' ] subprocess.check_output(cmd, stderr=subprocess.STDOUT, cwd=new_path, env={'LANG': 'C'}) # Count the number of commits after the adding a new one ncommits = count_commits() self.assertEqual(ncommits, 10) # Update the repository to its original status repo.pull() # The number of commits should be updated to its original value ncommits = count_commits() self.assertEqual(ncommits, 9) shutil.rmtree(new_path)
def test_count_objects(self): """Test if it gets the number of objects in a repository""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_path, new_path) nobjs = repo.count_objects() self.assertEqual(nobjs, 42) shutil.rmtree(new_path)
def test_pull_empty_repository(self): """Test if an exception is raised when the repository is empty""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_empty_path, new_path) with self.assertRaises(EmptyRepositoryError): repo.pull() shutil.rmtree(new_path)
def test_clone_error(self): """Test if it raises an exception when an error occurs cloning a repository""" # Clone a non-git repository new_path = os.path.join(self.tmp_path, 'newgit') expected = "git command - fatal: repository '%s' does not exist" \ % self.tmp_path with self.assertRaisesRegex(RepositoryError, expected): _ = GitRepository.clone(self.tmp_path, new_path)
def test_log_from_empty_repository(self): """Test if an exception is raised when the repository is empty""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_empty_path, new_path) gitlog = repo.log() with self.assertRaises(EmptyRepositoryError): _ = [line for line in gitlog] shutil.rmtree(new_path)
def test_log_empty(self): """Test if no line is returned when the log is empty""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_path, new_path) gitlog = repo.log(from_date=datetime.datetime(2020, 1, 1, 1, 1, 1)) gitlog = [line for line in gitlog] self.assertListEqual(gitlog, []) shutil.rmtree(new_path)
def test_log(self): """Test log command""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_path, new_path) gitlog = repo.log() gitlog = [line for line in gitlog] self.assertEqual(len(gitlog), 108) self.assertEqual(gitlog[0][:14], "commit bc57a92") shutil.rmtree(new_path)
def test_not_git(self): """Test if a supposed git repo is not a git repo""" new_path = os.path.join(self.tmp_path, 'falsegit') if not os.path.isdir(new_path): os.makedirs(new_path) expected = "git repository '%s' does not exist" % new_path with self.assertRaisesRegex(RepositoryError, expected): repo = GitRepository(uri="", dirpath=new_path) shutil.rmtree(new_path)
def test_clone(self): """Test if a git repository is cloned""" new_path = os.path.join(self.tmp_path, 'newgit') repo = GitRepository.clone(self.git_path, new_path) self.assertIsInstance(repo, GitRepository) self.assertEqual(repo.uri, self.git_path) self.assertEqual(repo.dirpath, new_path) self.assertTrue(os.path.exists(new_path)) self.assertTrue(os.path.exists(os.path.join(new_path, '.git'))) shutil.rmtree(new_path)
def enrich_git_branches(self, ocean_backend, enrich_backend): """Update the information about branches within the documents representing commits in the enriched index. :param ocean_backend: the ocean backend :param enrich_backend: the enrich backend """ for ds in self.prjs_map: if ds != "git": continue urls = self.prjs_map[ds] for url in urls: cmd = GitCommand(*[url]) git_repo = GitRepository(cmd.parsed_args.uri, cmd.parsed_args.gitpath) self.delete_commit_branches(git_repo, enrich_backend) self.add_commit_branches(git_repo, enrich_backend)
def update_items(self, ocean_backend, enrich_backend): """Retrieve the commits not present in the original repository and delete the corresponding documents from the raw and enriched indexes""" fltr = {'name': 'origin', 'value': [self.perceval_backend.origin]} logger.debug("[git] update-items Checking commits for {}.".format( self.perceval_backend.origin)) try: git_repo = GitRepository(self.perceval_backend.uri, self.perceval_backend.gitpath) current_hashes = set([commit for commit in git_repo.rev_list()]) except EmptyRepositoryError: logger.warning("[git] Skip updating branch info for repo {}, " "repo is empty".format( self.perceval_backend.origin)) return except RepositoryError: logger.warning("[git] Skip updating branch info for repo {}, " "repo doesn't exist locally".format( self.perceval_backend.origin)) return except Exception as e: logger.error("[git] Skip updating branch info for repo {}, " "git rev-list command failed: {}".format( self.perceval_backend.origin, e)) return raw_hashes = set([ item['data']['commit'] for item in ocean_backend.fetch(ignore_incremental=True, _filter=fltr) ]) hashes_to_delete = list(raw_hashes.difference(current_hashes)) to_process = [] for _hash in hashes_to_delete: to_process.append(_hash) if len(to_process) != MAX_BULK_UPDATE_SIZE: continue # delete documents from the raw index self.remove_commits(to_process, ocean_backend.elastic.index_url, 'data.commit', self.perceval_backend.origin) # delete documents from the enriched index self.remove_commits(to_process, enrich_backend.elastic.index_url, 'hash', self.perceval_backend.origin) to_process = [] if to_process: # delete documents from the raw index self.remove_commits(to_process, ocean_backend.elastic.index_url, 'data.commit', self.perceval_backend.origin) # delete documents from the enriched index self.remove_commits(to_process, enrich_backend.elastic.index_url, 'hash', self.perceval_backend.origin) logger.debug( "[git] update-items {} commits deleted from {} with origin {}.". format( len(hashes_to_delete), ocean_backend.elastic.anonymize_url( ocean_backend.elastic.index_url), self.perceval_backend.origin)) logger.debug( "[git] update-items {} commits deleted from {} with origin {}.". format( len(hashes_to_delete), enrich_backend.elastic.anonymize_url( enrich_backend.elastic.index_url), self.perceval_backend.origin))