def get_repositories_created(self): ''' get the details of any repositories that were created by the user which are NOT forks (as they are already covered by get_forks_created) 1. repos created before the course started 2. repos created after the course ended ''' #get data db = connect() repos_created = db.events.find({ 'actor.login': self.username, 'type': 'CreateEvent', 'payload.ref_type': 'repository', }) #store data self.repositories_created = {} self.repositories_created = { 'count': repos_created.count(), 'repos': repos_created.distinct('repo.name') } return self.repositories_created
def get_repositories_created(self): ''' get the details of any repositories that were created by the user which are NOT forks (as they are already covered by get_forks_created) 1. repos created before the course started 2. repos created after the course ended ''' #get data db = connect() repos_created = db.events.find({ 'actor.login' : self.username, 'type' : 'CreateEvent', 'payload.ref_type' : 'repository', }) #store data self.repositories_created = {} self.repositories_created = { 'count' : repos_created.count(), 'repos' : repos_created.distinct('repo.name') } return self.repositories_created
def get_events_aggregates_for_user(u): ''' Get aggregates of all type of events for a user. If an user is not specified, returns all type of events that are available. ''' if type(u) is github.NamedUser.NamedUser: username = u.login elif type(u) is unicode or type(u) is str: username = u else: username = None events_collection = connect()['events'] if username is not None: cursor = events_collection.aggregate( [ {"$match" : {"actor.login" : username }}, {"$group" : {"_id" : "$type", "count":{"$sum":1}}} #group the sum ] ) event_aggregates = {} for d in cursor: event_aggregates[d['_id']] = d['count'] return event_aggregates else: return events_collection.distinct('type')
def get_commits_made(self): ''' Gives back: number of commits made on the course repo number of commits made on the course repo's fork number of commits made on other repos before the course started number of commits made on other repos after the course ended ''' #TODO: Further filter the commits on other repos based on date. #get data db = connect() on_course_repo = db.events.find({ 'actor.login' : self.username, 'repo.name' : COURSE_REPO, 'type' : 'PushEvent', }) on_course_repo_fork = db.events.find({ 'actor.login' : self.username, 'repo.name' : '%s/%s' % (self.username, COURSE_REPO_NAME), 'type' : 'PushEvent', }) on_other_repos = db.events.find({ 'actor.login' : self.username, 'repo.name' : {'$nin' : [COURSE_REPO, '%s/%s' % (self.username, COURSE_REPO_NAME)]}, 'type' : 'PushEvent', }) #store data self.commits_made = {} self.commits_made['on_course_repo'] = { 'pushes' : on_course_repo.count(), #total pushes 'commits' : sum([push['payload']['size'] for push in on_course_repo],0), #sum total of commits in each push 'repos' : on_course_repo.distinct('repo.name') } self.commits_made['on_course_repo_fork'] = { 'pushes' : on_course_repo_fork.count(), 'commits' : sum([push['payload']['size'] for push in on_course_repo_fork],0), 'repos' : on_course_repo_fork.distinct('repo.name'), } self.commits_made['on_other_repos'] = { 'pushes' : on_other_repos.count(), 'commits' : sum([push['payload']['size'] for push in on_other_repos],0), 'repos' : on_other_repos.distinct('repo.name'), } return self.commits_made
def get_issues_resolved(self): ''' Gets the details of any issues CLOSED by the user on 1. the course repo 2. fork of the course repo 3. other repos before the course started 4. other repos after the course ended ''' #TODO: Further filter the issues based on date as required by 3) and 4) above db = connect() #get data on_course_repo = db.events.find({ 'actor.login' : self.username, 'repo.name' : COURSE_REPO, 'type' : 'IssuesEvent', 'payload.action': 'closed', }) on_course_repo_fork = db.events.find({ 'actor.login' : self.username, 'repo.name' : '%s/%s' % (self.username, COURSE_REPO_NAME), 'type' : 'IssuesEvent', 'payload.action': 'closed', }) on_other_repos = db.events.find({ 'actor.login' : self.username, 'repo.name' : {'$nin' : [COURSE_REPO, '%s/%s' % (self.username, COURSE_REPO_NAME)]}, 'type' : 'IssuesEvent', 'payload.action': 'closed', }) #store the data self.issues_resolved = {} self.issues_resolved['on_course_repo'] = { 'count' : on_course_repo.count(), 'repos' : on_course_repo.distinct('repo.name'), } self.issues_resolved['on_course_repo_fork'] = { 'count' : on_course_repo_fork.count(), 'repos' : on_course_repo_fork.distinct('repo.name'), } self.issues_resolved['on_other_repos'] = { 'count' : on_other_repos.count(), 'repos' : on_other_repos.distinct('repo.name'), } return self.issues_resolved
def get_pull_requests_made(self): ''' Gives back: number of pull requests made on the course repo number of pull requests made on the course repo's fork number of pull requests made on other repos before the course started number of pull requests made on other repos after the course ended ''' #get data db = connect() on_course_repo = db.events.find({ 'actor.login': self.username, 'repo.name': COURSE_REPO, #TODO: Figure out why repo.full_name doesn't work here! 'type': 'PullRequestEvent', }) on_other_repos = db.events.find({ 'actor.login': self.username, 'repo.name': { '$nin': [COURSE_REPO, '%s/%s' % (self.username, COURSE_REPO_NAME)] }, 'type': 'PullRequestEvent', }) #store data self.pull_requests_made = {} self.pull_requests_made['on_course_repo'] = { 'count': on_course_repo.count(), #total pull requests 'repos': on_course_repo.distinct('repo.name') } self.pull_requests_made['on_other_repos'] = { 'count': on_other_repos.count(), 'repos': on_other_repos.distinct('repo.name'), } return self.pull_requests_made
def get_events_aggregates_for_user(u): ''' Get aggregates of all type of events for a user. If an user is not specified, returns all type of events that are available. ''' if type(u) is github.NamedUser.NamedUser: username = u.login elif type(u) is unicode or type(u) is str: username = u else: username = None events_collection = connect()['events'] if username is not None: cursor = events_collection.aggregate([ { "$match": { "actor.login": username } }, { "$group": { "_id": "$type", "count": { "$sum": 1 } } } #group the sum ]) event_aggregates = {} for d in cursor: event_aggregates[d['_id']] = d['count'] return event_aggregates else: return events_collection.distinct('type')
def get_forks_created(self): ''' get the details of any forks that were created by the user of the course repo other repos before the course started other repos after the course ended ''' #get data db = connect() of_course_repo = db.events.find({ 'actor.login': self.username, 'repo.name': COURSE_REPO, 'type': 'ForkEvent', }) of_other_repos = db.events.find({ 'actor.login': self.username, 'repo.name': { '$ne': COURSE_REPO }, 'type': 'ForkEvent', }) #store data self.forks_created = {} self.forks_created['of_course_repo'] = { 'count': of_course_repo.count( ), #total forks created -- I know this weird but it is 0400 hrs and I hv more imp things in code to worry about 'fork_of': of_course_repo.distinct('repo.name') } self.forks_created['of_other_repos'] = { 'count': of_other_repos.count(), #total forks created 'fork_of': of_other_repos.distinct('repo.name') } return self.forks_created
def get_pull_requests_made(self): ''' Gives back: number of pull requests made on the course repo number of pull requests made on the course repo's fork number of pull requests made on other repos before the course started number of pull requests made on other repos after the course ended ''' #get data db = connect() on_course_repo = db.events.find({ 'actor.login' : self.username, 'repo.name' : COURSE_REPO, #TODO: Figure out why repo.full_name doesn't work here! 'type' : 'PullRequestEvent', }) on_other_repos = db.events.find({ 'actor.login' : self.username, 'repo.name' : {'$nin' : [COURSE_REPO, '%s/%s' % (self.username, COURSE_REPO_NAME)]}, 'type' : 'PullRequestEvent', }) #store data self.pull_requests_made = {} self.pull_requests_made['on_course_repo'] = { 'count' : on_course_repo.count(), #total pull requests 'repos' : on_course_repo.distinct('repo.name') } self.pull_requests_made['on_other_repos'] = { 'count' : on_other_repos.count(), 'repos' : on_other_repos.distinct('repo.name'), } return self.pull_requests_made
def get_forks_created(self): ''' get the details of any forks that were created by the user of the course repo other repos before the course started other repos after the course ended ''' #get data db = connect() of_course_repo = db.events.find({ 'actor.login' : self.username, 'repo.name' : COURSE_REPO, 'type' : 'ForkEvent', }) of_other_repos = db.events.find({ 'actor.login' : self.username, 'repo.name' : {'$ne' : COURSE_REPO}, 'type' : 'ForkEvent', }) #store data self.forks_created = {} self.forks_created['of_course_repo'] = { 'count' : of_course_repo.count(), #total forks created -- I know this weird but it is 0400 hrs and I hv more imp things in code to worry about 'fork_of' : of_course_repo.distinct('repo.name') } self.forks_created['of_other_repos'] = { 'count' : of_other_repos.count(), #total forks created 'fork_of' : of_other_repos.distinct('repo.name') } return self.forks_created
def get_commits_made(self): ''' Gives back: number of commits made on the course repo number of commits made on the course repo's fork number of commits made on other repos before the course started number of commits made on other repos after the course ended ''' #TODO: Further filter the commits on other repos based on date. #get data db = connect() on_course_repo = db.events.find({ 'actor.login': self.username, 'repo.name': COURSE_REPO, 'type': 'PushEvent', }) on_course_repo_fork = db.events.find({ 'actor.login': self.username, 'repo.name': '%s/%s' % (self.username, COURSE_REPO_NAME), 'type': 'PushEvent', }) on_other_repos = db.events.find({ 'actor.login': self.username, 'repo.name': { '$nin': [COURSE_REPO, '%s/%s' % (self.username, COURSE_REPO_NAME)] }, 'type': 'PushEvent', }) #store data self.commits_made = {} self.commits_made['on_course_repo'] = { 'pushes': on_course_repo.count(), #total pushes 'commits': sum([push['payload']['size'] for push in on_course_repo], 0), #sum total of commits in each push 'repos': on_course_repo.distinct('repo.name') } self.commits_made['on_course_repo_fork'] = { 'pushes': on_course_repo_fork.count(), 'commits': sum([push['payload']['size'] for push in on_course_repo_fork], 0), 'repos': on_course_repo_fork.distinct('repo.name'), } self.commits_made['on_other_repos'] = { 'pushes': on_other_repos.count(), 'commits': sum([push['payload']['size'] for push in on_other_repos], 0), 'repos': on_other_repos.distinct('repo.name'), } return self.commits_made
def get_issues_resolved(self): ''' Gets the details of any issues CLOSED by the user on 1. the course repo 2. fork of the course repo 3. other repos before the course started 4. other repos after the course ended ''' #TODO: Further filter the issues based on date as required by 3) and 4) above db = connect() #get data on_course_repo = db.events.find({ 'actor.login': self.username, 'repo.name': COURSE_REPO, 'type': 'IssuesEvent', 'payload.action': 'closed', }) on_course_repo_fork = db.events.find({ 'actor.login': self.username, 'repo.name': '%s/%s' % (self.username, COURSE_REPO_NAME), 'type': 'IssuesEvent', 'payload.action': 'closed', }) on_other_repos = db.events.find({ 'actor.login': self.username, 'repo.name': { '$nin': [COURSE_REPO, '%s/%s' % (self.username, COURSE_REPO_NAME)] }, 'type': 'IssuesEvent', 'payload.action': 'closed', }) #store the data self.issues_resolved = {} self.issues_resolved['on_course_repo'] = { 'count': on_course_repo.count(), 'repos': on_course_repo.distinct('repo.name'), } self.issues_resolved['on_course_repo_fork'] = { 'count': on_course_repo_fork.count(), 'repos': on_course_repo_fork.distinct('repo.name'), } self.issues_resolved['on_other_repos'] = { 'count': on_other_repos.count(), 'repos': on_other_repos.distinct('repo.name'), } return self.issues_resolved
tagsList.append(tagStr) videoCount = videoCount + 1 count = len(playCountList) if save == True: for i in range(count): addCount = addCount + store.storeVideo(titleList[i], hrefList[i], log_vidList[i], uploaderList[i], playCountList[i], pubdateList[i], "4", gameidx, tagsList[i], thumbImgSrc) print log_vidList[i], uploaderList[i], playCountList[i], pubdateList[i], thumbImgSrc time.sleep(5) return videoCount, addCount startdatestr = datetime.strftime(datetime.now(),'%a %b %d %H:%M:%S %z %Y') ftest, atest = fetchaipai("手机", 1, 2, -1, False) print ftest, atest if ftest <= 0: sendMail("aipai Spider error", "aipai spider is error, check it.") store.connect() gameList = store.getGameList() todayFetch = 0 todayAdd = 0 for aGame in gameList: print "###", aGame[1].decode("utf-8") fet, add = fetchaipai(aGame[1].strip(), 1, 10, aGame[0]) todayFetch = todayFetch + fet todayAdd = todayAdd + add time.sleep(1) store.close() sendMail("aipai Spider Success", "aipai spider success, start at %s fetch %d videos, add %d videos" % (startdatestr, todayFetch, todayAdd))