def run(name, **kwargs): mr_kwargs = {} if "finalize" in kwargs: mr_kwargs["finalize"] = load_js(kwargs["finalize"]) mongo.db().repositories.map_reduce( load_js(kwargs["map"]), load_js(kwargs["reduce"]), "summary-%s" % name, **mr_kwargs )
def remove_undefined(collection_name): collection = mongo.db()['summary-%s' % collection_name] for d in collection.find(): if 'undefined' in d['value']: del d['value']['undefined'] collection.save(d)
import json from pycheckup import mongo db = mongo.db() def json_encoder(obj): if hasattr(obj, "isoformat"): return obj.isoformat() else: raise TypeError, "Could not encode %s" % type(obj) def stats(user, repo): doc = db.repositories.find_one({"_id": "%s/%s" % (user, repo)}) doc["commits"] = json.dumps(doc["commits"], default=json_encoder) doc["line_count"] = json.dumps(doc["line_count"], default=json_encoder) doc["pep8"] = json.dumps(doc["pep8"], default=json_encoder) doc["pyflakes"] = json.dumps(doc["pyflakes"], default=json_encoder) doc["swearing"] = json.dumps(doc["swearing"], default=json_encoder) return doc
import math from django.core.cache import cache from pycheckup import mongo collection = mongo.db().repositories TYPES = { # document field, data attribute, max 'line_count': ('line_count', 'total', 6000000), 'open_issues': ('popularity', 'open_issues', 400), 'forks': ('popularity', 'forks', 800), 'watchers': ('popularity', 'watchers', 4500), 'num_collaborators': ('popularity', 'num_collaborators', 300), 'swearing': ('swearing', 'total', 6000), 'pep8': ('pep8', 'total', 600000), 'pyflakes': ('pyflakes', 'total', 15000), } def distribution(name): if name not in TYPES: raise ValueError cache_key = 'distribution-%s' % name cached = cache.get(cache_key) if cached is not None: return cached data = get_latest(TYPES[name][0], TYPES[name][1])
def bootstrap_repo(user, repo_name): logger.info('Bootstraping %s/%s' % (user, repo_name)) repo = GitRepo(user, repo_name) repo.clone() repo.load_commits() doc = document.empty(user, repo_name) # doc = collection.find_one({'_id': '%s/%s' % (user, repo_name)}) # if doc is None: # raise Exception('%s/%s not in db!!!!!' % (user, repo_name)) working_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) one_week = timedelta(days=7) # one-time probes license.run(repo, doc) readme.run(repo, doc) setup_py.run(repo, doc) tabs_or_spaces.run(repo, doc) popularity.run(repo, doc, working_date) current_rev = None if len(repo.commits) == 0: doc['empty'] = True print 'No commits in time period, skipping...' # range(31) means the script will grab 31 weeks of history else: for _ in range(31): for c in repo.commits: if c['date'] <= working_date: # Only check it out if we need to if c['rev'] != current_rev: logger.info('Checking out %s (%s)' % (c['rev'], c['date'])) repo.checkout(c['rev']) current_rev = c['rev'] # weekly probes commits.run(repo, doc, working_date) line_count.run(repo, doc, working_date) pep8.run(repo, doc, working_date) pyflakes.run(repo, doc, working_date) swearing.run(repo, doc, working_date) break working_date -= one_week doc['commits'].reverse() doc['swearing'].reverse() doc['line_count'].reverse() doc['pep8'].reverse() doc['pyflakes'].reverse() repo_collection = mongo.db().repositories repo_collection.save(doc) repo.cleanup() print 'Finished %s/%s' % (user, repo_name)