示例#1
0
    def test_fetch_since_date(self):
        """Test whether commits are fetched from a Git repository since the given date"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        from_date = datetime.datetime(2014, 2, 11, 22, 7, 49)
        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch(from_date=from_date)]

        expected = [('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0),
                    ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0),
                    ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)]

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x][0])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x][0])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['updated_on'], expected[x][1])
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Test it using a datetime that includes the timezone
        from_date = datetime.datetime(2012,
                                      8,
                                      14,
                                      14,
                                      30,
                                      00,
                                      tzinfo=dateutil.tz.tzoffset(
                                          None, -36000))
        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch(from_date=from_date)]

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x][0])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x][0])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['updated_on'], expected[x][1])
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        shutil.rmtree(new_path)
示例#2
0
    def test_fetch(self):
        """Test whether commits are fetched from a Git repository"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch()]

        expected = [('bc57a9209f096a130dcc5ba7089a8663f758a703', 1344965413.0),
                    ('87783129c3f00d2c81a3a8e585eb86a47e39891a', 1344965535.0),
                    ('7debcf8a2f57f86663809c58b5c07a398be7674c', 1344965607.0),
                    ('c0d66f92a95e31c77be08dc9d0f11a16715d1885', 1344965702.0),
                    ('c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', 1344966351.0),
                    ('589bb080f059834829a2a5955bebfd7c2baa110a', 1344967441.0),
                    ('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0),
                    ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0),
                    ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)]

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x][0])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x][0])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['updated_on'], expected[x][1])
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        shutil.rmtree(new_path)
    def test_get_elastic_items_filter(self):
        """Test whether the elastic method works properly with filter"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        filter = {
            "name": "uuid",
            "value": [
                "43f217b2f678a5691fdbc5c6c5302243e79e5a90",
                "00ee6902e34b309cd05706c26e3e195a62492f60"
            ]
        }

        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic
        r_json = eitems.get_elastic_items(_filter=filter)
        hits = r_json['hits']['hits']

        self.assertEqual(len(hits), 2)
        self.assertEqual(hits[0]['_source']['uuid'], "43f217b2f678a5691fdbc5c6c5302243e79e5a90")
        self.assertEqual(hits[1]['_source']['uuid'], "00ee6902e34b309cd05706c26e3e195a62492f60")
def commit_counter(own, repo_url, d1, df, k):
    # url for the git repo to analyze
    #repo_url = 'https://github.com/kmn5409/INFO1601.git'
    print("Owner\t\tRepository")
    print(own, "\t", repo_url)
    repo_url = 'https://github.com/' + own + '/' + repo_url + '.git'
    # directory for letting Perceval clone the git repo
    repo_dir = '/tmp/' + repo_url + '.git'

    # create a Git object, pointing to repo_url, using repo_dir for cloning
    repo = Git(uri=repo_url, gitpath=repo_dir)
    count = 0
    # fetch all commits as an iteratoir, and iterate it printing each hash
    mon = [
        "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
        "Nov", "Dec"
    ]
    for commit in repo.fetch():
        temp = commit['data']['CommitDate'].split(" ")
        day = int(temp[2])
        month = temp[1]
        for i in range(len(mon)):
            if (month == mon[i]):
                month_num = i + 1
        year = int(temp[4])
        if (isrecent(d1, day, month_num, year)):
            count += 1
    print("Number of commmits: ", count)
    df['Number of commits'][k] = count
    return count
示例#5
0
 def list_committers(self, url, directory):
     # Return the list containing the strings from a git repository related to the users ordered by commit including repeated users to allow count gender contributions.
     repo = Git(uri=url, gitpath=directory)
     list_committers = []
     for user in repo.fetch():
         committer = self.removeMail(user['data']['Author'])
         list_committers.append(committer)
     return list_committers
示例#6
0
    def test_fetch_empty_log(self):
        """Test whether it parsers an empty log"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        from_date = datetime.datetime(2020, 1, 1, 1, 1, 1)
        git = Git(self.git_path, new_path)
        commits = [commit for commit in git.fetch(from_date=from_date)]

        self.assertListEqual(commits, [])
示例#7
0
    def test_initialization(self):
        """Test whether attributes are initializated"""

        git = Git('http://example.com', self.git_path, tag='test')

        self.assertEqual(git.uri, 'http://example.com')
        self.assertEqual(git.gitpath, self.git_path)
        self.assertEqual(git.origin, 'http://example.com')
        self.assertEqual(git.tag, 'test')

        # When tag is empty or None it will be set to
        # the value in uri
        git = Git('http://example.com', self.git_path)
        self.assertEqual(git.origin, 'http://example.com')
        self.assertEqual(git.tag, 'http://example.com')

        git = Git('http://example.com', self.git_path, tag='')
        self.assertEqual(git.origin, 'http://example.com')
        self.assertEqual(git.tag, 'http://example.com')
示例#8
0
    def test_fetch_from_empty_repository(self):
        """Test whether it parses from empty repository"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        git = Git(self.git_empty_path, new_path)
        commits = [commit for commit in git.fetch()]

        self.assertListEqual(commits, [])

        shutil.rmtree(new_path)
示例#9
0
def run(args):
    """
    """  
    repo_url = args['git_repository']
    period_length = args['period']
    threshold = args['threshold']
    active_days = True

    # directory for letting Perceval clone the git repo
    # TODO: this is Linux-operating system specific. Should change
    repo_dir = '/tmp/' + repo_url.split('/')[-1] + '.git'

    first_commit = datetime.now(timezone.utc)
    authorDict = defaultdict(list)

    repo = Git(uri=repo_url, gitpath=repo_dir)

    for commit in repo.fetch():
        commitdate = datetime.strptime(commit['data']['AuthorDate'], '%a %b %d %H:%M:%S %Y %z')
        if commitdate < first_commit:
            first_commit = commitdate
        authorDict[commit['data']['Author']].append(commitdate)
    logging.info("Authors found: " + str(len(authorDict)))

    simplemerge(authorDict)
    logging.info("Authors after merge: " + str(len(authorDict)))
    
    author_count = author_counting(authorDict, period_length, active_days)
#    print(author_count)
    (effort_periods, full_time_periods, non_full_time_periods) = project_period_effort(author_count, threshold, period_length)
    maxeffort_periods = project_period_maxeffort(author_count, period_length)

    # Printing results
    print()
    print("CONFIGURATIONS:")
    print("  Length of period (in months):", period_length)
    print("  Threshold t (in commits in a period):", threshold)
    print()
    print("RESULTS:")
    print("  First commit date:", first_commit, "--", round((datetime.now(timezone.utc)-first_commit).days/30, 2) , "months ago")
    print("  Maximum possible development effort (in person-months):", sum(maxeffort_periods.values()))
    print()
    print(pretty_print_period(period_length, first_commit, ["FT", "Non-FT", "Effort"], full_time_periods, non_full_time_periods, effort_periods))
    print(" " * 8, "FT: Full-time developers")
    print()
    print("  ---> Estimated development effort (in person-months):", round(sum(effort_periods.values()), 2))
    print()
    print("For more information, visit http://github.com/gregoriorobles/git2effort")
    print()
    def test_get_elastic_items_error(self):
        """Test whether a message is logged if an error occurs when getting items from an index"""

        items = json.loads(read_file('data/git.json'))
        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)
        elastic.bulk_upload(items, field_id="uuid")

        # Load items
        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic

        with self.assertLogs(logger, level='DEBUG') as cm:
            r_json = eitems.get_elastic_items()
            self.assertIsNone(r_json)
            self.assertRegex(cm.output[-1], 'DEBUG:grimoire_elk.elastic_items:No results found from*')
示例#11
0
def get_commits(username, reponame, commits, config):
    """
    TODO: Add docstring. See: https://realpython.com/documenting-python-code/
    TODO: Implement recursion argument, default to False.

    Parameters
    ==========

    `username` : str, required
    `reponame` : str, required
    `commits` : list, required
 
    Raises
    ======

    NotImplementedError
        If no sound is set for the animal or passed in as a
        parameter.
    """
    
    repo_URL = 'https://github.com/' + username + '/' + reponame

     # checks whether the export dir exists and if not creates it # TODO: this is a code snippet we use three times, we should make a function out of it
    local_dir = os.path.join(config["data_dir"],'grimoire_dumps')
    if not os.path.isdir(local_dir):
        os.makedirs(local_dir)
    data_dump_path = os.path.join(local_dir, username + '-' + reponame)

    git = Git(repo_URL, data_dump_path)
    
    # `fetch()` gets commits from all branches by default.
    # It returns a list of dictionaries, where the `data` key in each
    # dictionary contains the actual metadata for each commit.
    # Other stuff are metadata about the perceval `fetch()` operation.
    try:
        repo_fetched = [commit for commit in git.fetch()]

        # issue 33 (very ugly) band aid: delete *.pack files once downloaded by perceval
        shutil.rmtree(os.path.join(data_dump_path, 'objects','pack'), ignore_errors=True)
        
        # Keep just commit `data`
        for commit_data in repo_fetched:
            commits.append(commit_data["data"])
    except RepositoryError as repo_error:
        logging.warning("Error with this repository: " + username + "/" + reponame, file=stderr)
        pass
    def test_fetch_filter_raw(self):
        """Test whether the fetch with filter raw properly works"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        eitems = ElasticItems(perceval_backend)
        eitems.set_filter_raw("data.commit:87783129c3f00d2c81a3a8e585eb86a47e39891a")
        eitems.elastic = elastic
        items = [ei for ei in eitems.fetch()]
        self.assertEqual(len(items), 1)
    def test_fetch(self):
        """Test whether the fetch method properly works"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        eitems = ElasticItems(perceval_backend)
        eitems.scroll_size = 2
        eitems.elastic = elastic

        items = [ei for ei in eitems.fetch()]
        self.assertEqual(len(items), 9)
    def test_get_elastic_items(self):
        """Test whether the elastic method works properly"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic
        r_json = eitems.get_elastic_items()

        total = r_json['hits']['total']
        total = total['value'] if isinstance(total, dict) else total
        self.assertEqual(total, 9)
    def test_fetch_no_results(self):
        """Test whether a message is logged when no results are found"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest-not_found', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic

        with self.assertLogs(logger, level='DEBUG') as cm:
            items = [ei for ei in eitems.fetch()]
            self.assertEqual(len(items), 0)
            self.assertRegex(cm.output[-2], 'DEBUG:grimoire_elk.elastic_items:No results found.*')
            self.assertRegex(cm.output[-1], 'DEBUG:grimoire_elk.elastic_items:Releasing scroll_id=*')
    def test_fetch_from_date(self):
        """Test whether the fetch method with from_date properly works"""

        perceval_backend = Git('/tmp/perceval_mc84igfc/gittest', '/tmp/foo')
        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)

        # Load items
        items = json.loads(read_file('data/git.json'))
        ocean = GitOcean(perceval_backend)
        ocean.elastic = elastic
        ocean.feed_items(items)

        # Fetch total items
        eitems = ElasticItems(perceval_backend)
        eitems.elastic = elastic
        items = [ei for ei in eitems.fetch()]
        self.assertEqual(len(items), 9)

        # Fetch with from date
        from_date = str_to_datetime("2018-02-09T08:33:22.699+00:00")
        eitems = ElasticItems(perceval_backend, from_date=from_date)
        eitems.elastic = elastic
        items = [ei for ei in eitems.fetch()]
        self.assertEqual(len(items), 2)
示例#17
0
def getCommits(user_owner, repo_name):
    repo = Git(f"https://github.com/{user_owner}/{repo_name}.git",
               f"https://github.com/{user_owner}/{repo_name}.git")
    commits = repo.fetch()
    return commits
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
## Authors:
##   Jesus M. Gonzalez-Barahona <*****@*****.**>
##

from perceval.backends.core.git import Git
import elasticsearch

# Url for the git repo to analyze
repo_url = 'http://github.com/grimoirelab/perceval.git'
# Directory for letting Perceval clone the git repo
repo_dir = '/tmp/perceval.git'
# ElasticSearch instance (url)
es = elasticsearch.Elasticsearch(['http://localhost:9200/'])

# Create the 'commits' index in ElasticSearch
es.indices.create('commits')
# Create a Git object, pointing to repo_url, using repo_dir for cloning
repo = Git(uri=repo_url, gitpath=repo_dir)
# Fetch all commits as an iteratoir, and iterate it uploading to ElasticSearch
for commit in repo.fetch():
    # Create the object (dictionary) to upload to ElasticSearch
    summary = {'hash': commit['data']['commit']}
    print(summary)
    # Upload the object to ElasticSearch
    es.index(index='commits', doc_type='summary', body=summary)
示例#19
0
def main(args):
    github_key = args.github_token
    list_jsons = os.listdir(os.path.abspath(args.output_path))
    repo_set = set()
    with open(args.urls_file, 'r') as url_file:
        os.chdir(os.path.abspath(args.output_path))
        for line in url_file:
            if line in ['\n', '\r\n']:
                continue
            try:
                url = line.split('/')
                if not url:
                    continue
                repo = "%s/%s" % (url[3], url[4])
            except IndexError:
                print("url:" + line)
                logger.error("Error in repo (line) " + line + "\r\n")
                continue

            repo_set.add(repo)

    for repo in sorted(repo_set):

        repo_split = repo.split('/')
        outfile_name = "%s_%s.json" % (repo_split[0], repo_split[1])
        outfile_path = "%s/%s" % (args.output_path, outfile_name)

        if outfile_name in list_jsons:
            logger.info("Already downloaded: %s " % outfile_name)
            continue
        if "framework" in outfile_name:
            logger.info("Skipping <framework> repository")
            continue

        api_url = "https://api.github.com/repos/" + str(repo) + "?access_token=" + github_key
        logger.info("Checking metadata for repo %s" % api_url)
        try:
            response = urllib.request.urlopen(api_url)
        except urllib.error.HTTPError:
            logger.error("HTTP 404: Not found: %s" % repo)
            continue

        try:
            json_data = response.read().decode('utf-8')
            dicc_out = json.loads(json_data)
        except ValueError:
            logger.warning("Error in response (ValueError)")
            continue


        if 'message' in dicc_out:
            result = dicc_out['message']
        elif dicc_out == {}:
            result = 'False'
        else:
            result = dicc_out['private']

        if result == 'Not Found':
            logger.error("Not found: %s" % repo)
        elif result == 'True':
            logger.error("Private: %s" % repo)
        else:
            repo_url = "https://github.com/%s" % repo + ".git"

            logger.info('Executing Perceval with repo: %s' % repo)
            logger.debug('Repo stats. Size: %s KB' % dicc_out["size"])
            gitpath = '%s/%s' % (os.path.abspath(args.perceval_path), repo)
            git = Git(uri=repo_url, gitpath=gitpath)
            try:
                commits = [commit for commit in git.fetch()]
            except Exception as e:
                logger.warning("Failure while fetching commits. Repo: %s" % repo)
                logger.error(e)
                continue
            logger.info('Exporting results to JSON...')
            with open(outfile_path, "w", encoding='utf-8') as jfile:
                json.dump(commits, jfile, indent=4, sort_keys=True)
            logger.info('Exported to %s' % outfile_path)
            if not args.cache_mode_on:
                remove_dir(gitpath)
示例#20
0
# This is also assuming you have installed perceval onto your computer

import datetime
import pytz
from perceval.backends.core.git import Git
from perceval.backends.core.pipermail import PipermailList
from grimoirelab.toolkit.datetime import datetime_utcnow
from grimoirelab.toolkit.datetime import str_to_datetime
from grimoirelab.toolkit.datetime import datetime_to_utc

# Url for the git repo to analyze
git_repo_url = 'https://github.com/mozilla/labs-vcap-tests.git'
# Directory for letting Perceval clone the git repo
git_repo_dir = '/tmp/perceval.git'
# Create a Git object, pointing to repo_url, using repo_dir for cloning
repo = Git(uri=git_repo_url, gitpath=git_repo_dir)
print("Starting 1")
'''
Uses the git object to print information about the repository,
this will then create the directory /tmp/perceval.git
other parameters you can use are:
	commit: aaa7a9209f096aaaadccaaa7089aaaa3f758a703
	Author:     John Smith <*****@*****.**>
	AuthorDate: Tue Aug 14 14:30:13 2012 -0300
	Commit:     John Smith <*****@*****.**>
	CommitDate: Tue Aug 14 14:30:13 2012 -0300
'''

for commit in repo.fetch():
    #print("ugh")
    print(commit['data']['Author'])
示例#21
0
    def test_fetch_branch(self):
        """Test whether commits are fetched from a Git repository for a given branch"""

        new_path = os.path.join(self.tmp_path, 'newgit')

        from_date = datetime.datetime(2014, 2, 11, 22, 7, 49)
        git = Git(self.git_path, new_path)
        # Let's fetch master
        commits = [commit for commit in git.fetch(branches=['master'])]

        expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703',
                    '87783129c3f00d2c81a3a8e585eb86a47e39891a',
                    '7debcf8a2f57f86663809c58b5c07a398be7674c',
                    'c0d66f92a95e31c77be08dc9d0f11a16715d1885',
                    'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb',
                    '589bb080f059834829a2a5955bebfd7c2baa110a',
                    'ce8e0b86a1e9877f42fe9453ede418519115f367',
                    '51a3b654f252210572297f47597b31527c475fb8',
                    '456a68ee1407a77f3e804a30dff245bb6c6b872f']

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Now let's fetch lzp
        commits = [commit for commit in git.fetch(branches=['lzp'])]

        expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703',
                    '87783129c3f00d2c81a3a8e585eb86a47e39891a',
                    '7debcf8a2f57f86663809c58b5c07a398be7674c',
                    'c0d66f92a95e31c77be08dc9d0f11a16715d1885',
                    'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb',
                    '589bb080f059834829a2a5955bebfd7c2baa110a',
                    '51a3b654f252210572297f47597b31527c475fb8']

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Now, let's fech master and lzp
        commits = [commit for commit in git.fetch(branches=['master', 'lzp'])]

        expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703',
                    '87783129c3f00d2c81a3a8e585eb86a47e39891a',
                    '7debcf8a2f57f86663809c58b5c07a398be7674c',
                    'c0d66f92a95e31c77be08dc9d0f11a16715d1885',
                    'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb',
                    '589bb080f059834829a2a5955bebfd7c2baa110a',
                    'ce8e0b86a1e9877f42fe9453ede418519115f367',
                    '51a3b654f252210572297f47597b31527c475fb8',
                    '456a68ee1407a77f3e804a30dff245bb6c6b872f']

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Now, let's fetch None, which means "all commits"
        commits = [commit for commit in git.fetch(branches=None)]

        expected = ['bc57a9209f096a130dcc5ba7089a8663f758a703',
                    '87783129c3f00d2c81a3a8e585eb86a47e39891a',
                    '7debcf8a2f57f86663809c58b5c07a398be7674c',
                    'c0d66f92a95e31c77be08dc9d0f11a16715d1885',
                    'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb',
                    '589bb080f059834829a2a5955bebfd7c2baa110a',
                    'ce8e0b86a1e9877f42fe9453ede418519115f367',
                    '51a3b654f252210572297f47597b31527c475fb8',
                    '456a68ee1407a77f3e804a30dff245bb6c6b872f']

        self.assertEqual(len(commits), len(expected))

        for x in range(len(commits)):
            expected_uuid = uuid(self.git_path, expected[x])
            commit = commits[x]
            self.assertEqual(commit['data']['commit'], expected[x])
            self.assertEqual(commit['origin'], self.git_path)
            self.assertEqual(commit['uuid'], expected_uuid)
            self.assertEqual(commit['category'], 'commit')
            self.assertEqual(commit['tag'], self.git_path)

        # Now, let's fetch [], which means "no commits"
        commits = [commit for commit in git.fetch(branches=[])]

        expected = []

        self.assertEqual(len(commits), len(expected))

        shutil.rmtree(new_path)
from perceval.backends.core.git import Git

url = 'http://github.com/abhiandthetruth/JournalJar'

dir = './temp/Saarthi'

repo = Git(uri=url, gitpath=dir)

for commit in repo.fetch():
    print(commit)
示例#23
0
with open(out_file, 'w') as f:
    f.write(json.dumps(commits, sort_keys=True, indent=4))

with open(out_file) as str_data:
    print(str_data)
    json_data = json.load(str_data)

#
# getting data via perceval in Python
#

url = 'https://github.com/chaoss/grimoirelab-toolkit'
local_path = './chaoss-grimoirelab-toolkit'
output_file = './grimoirelab-git.json'

git = Git(url, local_path)

commits = [commit for commit in git.fetch()]

dumped = json.dumps(commits, sort_keys=True, indent=4)

# save the Perceval docs to a file
with open(output_file, 'w') as f:
    f.write(dumped)

# load the Perceval docs from a file
with open(output_file, 'r') as f:
    content = f.read()
    commits = json.loads(content)

for c in commits:
示例#24
0
    def test_get_field_date(self):
        """Test whether the field date is correctly returned"""

        perceval_backend = Git('http://example.com', '/tmp/foo')
        eitems = ElasticOcean(perceval_backend)
        self.assertEqual(eitems.get_field_date(), 'metadata__updated_on')
示例#25
0
 def numCommits(self, url, directory):
     repo = Git(uri=url, gitpath=directory)
     count = 0
     for commit in repo.fetch():
         count += 1
     return count
示例#26
0
from perceval.backends.core.git import Git
from perceval.backends.core.git import GitCommand
from datetime import datetime

#setting up Git Argument parser
parser = GitCommand.setup_cmd_parser()

# making arguments list
arg = [
    'https://github.com/sumitskj/Prajawalan2019.git', '--git-path',
    '/tmp/clone'
]
args = parser.parse(*arg)

# making Git object
repo = Git(uri=args.uri, gitpath=args.git_path)

# finding the no. of commits and listing them all
count = 0

from_date = datetime(2018, 10, 12)
to_date = datetime(2019, 12, 9)
branches = 'master'

item = list(repo.fetch(category='commit', from_date=from_date,
                       to_date=to_date))

print("Number of commmits: %d." % len(item))
j = 0
for i in item:
    j = j + 1
示例#27
0
def git_repos():
    users = []  # creating empty lists
    test = []

    #target url"vidyaratna.git"
    repo_url = 'https://github.com/amfoss/vidyaratna.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url"cms"
    repo_url = 'https://github.com/amfoss/cms.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url""TempleApp"
    repo_url = 'https://github.com/amfoss/TempleApp.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url""website.git"
    repo_url = 'https://github.com/amfoss/website.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url""WebApp.git"
    repo_url = 'https://github.com/amfoss/WebApp.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url"cms-mobile""
    repo_url = 'https://github.com/amfoss/cms-mobile.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    #target url
    repo_url = 'https://github.com/amfoss/Praveshan.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/bot.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/tasks.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/star-me.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/amdec-website.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Wiki.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/GitLit.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Qujini.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/attendance-tracker.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/events.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Hack4Amrita.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/master-syllabus.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/test-repo.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/webspace.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/internal-hackathon.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/foss-meetups.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/automated-scripts.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/fosswebsite.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/fosster.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Foss-talks.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/cybergurukulam.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/kdeconf.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/android-workshop-summer-2018.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/App.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Workshops.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/Wikimedia_Hackathon_Amrita_University.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    repo_url = 'https://github.com/amfoss/website_old.git'

    repo_dir = '/perceval.tests'  #target directory

    repo = Git(uri=repo_url, gitpath=repo_dir)  #it wil search in git

    for commit in repo.fetch(
    ):  # repo.fetch() fetch the commits in the repo (every line)
        users.append(
            commit['data']['Author'])  # collect the data and author details

    for u in Counter(users).keys():
        a = "Commits:" + str(Counter(users).get(u)) + "\t User: "******"\n"
        test.append(a)  # appending all the fetched commits into 'test'

    return test
示例#28
0
def analyze_git(es_write):

    #INDEX = 'git_gecko'
    #PROJECT = 'gecko'
    #git = Git("https://github.com/mozilla/gecko-dev.git", "../gecko_all_commits_final_version_no_cm_options_nobrowser_nochrome_notoolkit.log")

    #INDEX = 'git_webkit'
    #PROJECT = 'webkit'
    #git = Git("https://github.com/WebKit/webkit.git", "../webkit_final_log_no_mc_options.log")

    INDEX = "git_blink"
    PROJECT = "blink"
    git = Git("https://chromium.googlesource.com/chromium",
              "../blink_final_log_no_cm_options.log")

    commits = []
    cont = 1
    uniq_id = 1
    first = True
    docs = []

    all_files = pandas.DataFrame()

    es_write.indices.delete(INDEX, ignore=[400, 404])
    es_write.indices.create(INDEX, body=MAPPING_GIT)

    for item in git.fetch():
        commits.append(item)

        if cont % 15000 == 0:
            git_events = events.Git(commits)
            events_df = git_events.eventize(1)

            # Add flags if found
            message_log = MessageLogFlag(events_df)
            events_df = message_log.enrich('message')

            splitemail = SplitEmail(events_df)
            events_df = splitemail.enrich("owner")

            # Code for webkit
            # If there's a bot committing code, then we need to use the values flag
            if PROJECT == 'webkit':
                ## Fix values in the owner column
                events_df.loc[events_df["email"] == '*****@*****.**',
                              "owner"] = events_df["values"]
                # Re-do this analysis to calculate the right email and user
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            # Code for Blink
            # If there's a flag, then we need to update the owner
            if PROJECT == 'blink':
                events_df.loc[(events_df["values"] == '') ^ True,
                              "owner"] = events_df["values"]
                splitemail = SplitEmail(events_df)
                events_df = splitemail.enrich("owner")

            splitdomain = SplitEmailDomain(events_df)
            events_df = splitdomain.enrich("email")
            #events_df.drop("message", axis=1, inplace=True)

            # Add project information
            events_df["project"] = PROJECT

            test = events_df.to_dict("index")

            docs = []
            for i in test.keys():
                header = {
                    "_index": INDEX,
                    "_type": "item",
                    "_id": int(uniq_id),
                    "_source": test[i]
                }
                docs.append(header)
                uniq_id = uniq_id + 1

            helpers.bulk(es_write, docs)

            commits = []
        cont = cont + 1

    helpers.bulk(es_write, docs)
#! /usr/bin/env python3
# Count commits

import argparse

from perceval.backends.core.git import Git

# Read command line arguments
parser = argparse.ArgumentParser(description="Count commits in a git repo")
parser.add_argument("repo", help="Repository url")
parser.add_argument("dir", help="Directory for cloning the repository")
parser.add_argument("--print", action='store_true', help="Print hashes")
args = parser.parse_args()

# create a Git object, and count commmits
repo = Git(uri=args.repo, gitpath=args.dir)
count = 0
for commit in repo.fetch():
    if args.print:
        print(commit['data']['commit'])
    count += 1
print("Number of commmits: %d." % count)
示例#30
0
parser.add_argument(
    "-fr",
    "--fromdate",
    help="Date that you want to fetch information from in format YYYYMMDD")
parser.add_argument(
    "-to",
    "--todate",
    help="Date that you want to fetch information till in format YYYYMMDD")
args = parser.parse_args()

# Owner and repository names
(owner, repo) = args.repo.split('/')
repo_git_uri = "http://github.com/{}/{}.git".format(owner, repo)
repo_dir = 'tmp/perceval'

# Convert from and to date to datetime object
fr_dt_tuple = map(int,
                  (args.fromdate[:4], args.fromdate[4:6], args.fromdate[6:]))
fr_dt = datetime.datetime(*fr_dt_tuple)
to_dt_tuple = map(int, (args.todate[:4], args.todate[4:6], args.todate[6:]))
to_dt = datetime.datetime(*to_dt_tuple)

git_obj = Git(uri=repo_git_uri, gitpath=repo_dir)
github_obj = GitHub(owner=owner, repository=repo, api_token=args.token)

# Big dicts printed, can be pretty printed for convenience
for commit in git_obj.fetch():
    print(commit, '\n')

for item in github_obj.fetch(from_date=fr_dt, to_date=to_dt):
    print(item, '\n')