def get_repositories_created(self):
        '''
        get the details of any repositories that were created by the user
        which are NOT forks (as they are already covered by get_forks_created)

        1. repos created before the course started
        2. repos created after the course ended
        '''

        #get data
        db = connect()
        repos_created = db.events.find({
            'actor.login': self.username,
            'type': 'CreateEvent',
            'payload.ref_type': 'repository',
        })

        #store data
        self.repositories_created = {}
        self.repositories_created = {
            'count': repos_created.count(),
            'repos': repos_created.distinct('repo.name')
        }

        return self.repositories_created
    def get_repositories_created(self):
        '''
        get the details of any repositories that were created by the user
        which are NOT forks (as they are already covered by get_forks_created)

        1. repos created before the course started
        2. repos created after the course ended
        '''

        #get data
        db = connect()
        repos_created = db.events.find({
            'actor.login'       : self.username,
            'type'              : 'CreateEvent',
            'payload.ref_type'  : 'repository',
        })

        #store data
        self.repositories_created = {}
        self.repositories_created = {
                'count'     : repos_created.count(),
                'repos'     : repos_created.distinct('repo.name')
                }

        return self.repositories_created
def get_events_aggregates_for_user(u):
    '''
    Get aggregates of all type of events for a user.
    If an user is not specified, returns all type of events that are available.
    '''
    if type(u) is github.NamedUser.NamedUser:
        username = u.login
    elif type(u) is unicode or type(u) is str:
        username = u
    else:
        username = None


    events_collection = connect()['events']

    if username is not None:

        cursor = events_collection.aggregate(
                [

                    {"$match" : {"actor.login" : username }},
                    {"$group" : {"_id" : "$type", "count":{"$sum":1}}} #group the sum
                ]
            )

        event_aggregates = {}

        for d in cursor:
            event_aggregates[d['_id']] = d['count']

        return event_aggregates
    else:
        return events_collection.distinct('type')
    def get_commits_made(self):
        '''
        Gives back:
        
        number of commits made on the course repo
        number of commits made on the course repo's fork
        number of commits made on other repos before the course started
        number of commits made on other repos after the course ended

        '''
        #TODO: Further filter the commits on other repos based on date.

        #get data
        db = connect()
        on_course_repo = db.events.find({
            'actor.login'  : self.username,
            'repo.name'    : COURSE_REPO,
            'type'         : 'PushEvent',
        })

        on_course_repo_fork = db.events.find({
            'actor.login'  : self.username,
            'repo.name'    : '%s/%s' % (self.username, COURSE_REPO_NAME),
            'type'         : 'PushEvent',
        })

        on_other_repos = db.events.find({
            'actor.login'  : self.username,
            'repo.name'    : {'$nin' : [COURSE_REPO, '%s/%s' % (self.username, COURSE_REPO_NAME)]},
            'type'         : 'PushEvent',
        })

        #store data
        self.commits_made = {}
        self.commits_made['on_course_repo'] = {
                'pushes'    : on_course_repo.count(), #total pushes
                'commits'   : sum([push['payload']['size'] for push in on_course_repo],0), #sum total of commits in each push
                'repos'     : on_course_repo.distinct('repo.name')
                }

        self.commits_made['on_course_repo_fork'] = {
                'pushes'    : on_course_repo_fork.count(),
                'commits'   : sum([push['payload']['size'] for push in on_course_repo_fork],0),
                'repos'     : on_course_repo_fork.distinct('repo.name'),
                }

        self.commits_made['on_other_repos'] = {
                'pushes'    : on_other_repos.count(),
                'commits'   : sum([push['payload']['size'] for push in on_other_repos],0),
                'repos'     : on_other_repos.distinct('repo.name'),
                }

        return self.commits_made
    def get_issues_resolved(self):
        '''
        Gets the details of any issues CLOSED by the user on

        1. the course repo
        2. fork of the course repo
        3. other repos before the course started
        4. other repos after the course ended
        '''
        #TODO: Further filter the issues based on date as required by 3) and 4) above
        db = connect()

        #get data
        on_course_repo = db.events.find({
            'actor.login'   : self.username,
            'repo.name'     : COURSE_REPO,
            'type'          : 'IssuesEvent',
            'payload.action': 'closed',
            })

        on_course_repo_fork = db.events.find({
            'actor.login'   : self.username,
            'repo.name'     : '%s/%s' % (self.username, COURSE_REPO_NAME),
            'type'          : 'IssuesEvent',
            'payload.action': 'closed',
            })

        on_other_repos = db.events.find({
            'actor.login'   : self.username,
            'repo.name'     : {'$nin' : [COURSE_REPO, '%s/%s' % (self.username, COURSE_REPO_NAME)]},
            'type'          : 'IssuesEvent',
            'payload.action': 'closed',
            })

        #store the data
        self.issues_resolved = {}

        self.issues_resolved['on_course_repo'] = {
            'count'         : on_course_repo.count(),
            'repos'         : on_course_repo.distinct('repo.name'),
            }

        self.issues_resolved['on_course_repo_fork'] = {
            'count'         : on_course_repo_fork.count(),
            'repos'         : on_course_repo_fork.distinct('repo.name'),
            }

        self.issues_resolved['on_other_repos'] = {
            'count'         : on_other_repos.count(),
            'repos'         : on_other_repos.distinct('repo.name'),
            }

        return self.issues_resolved
    def get_pull_requests_made(self):
        '''
        Gives back:
        
        number of pull requests made on the course repo
        number of pull requests made on the course repo's fork
        number of pull requests made on other repos before the course started
        number of pull requests made on other repos after the course ended

        '''
        #get data
        db = connect()
        on_course_repo = db.events.find({
            'actor.login': self.username,
            'repo.name':
            COURSE_REPO,  #TODO: Figure out why repo.full_name doesn't work here!
            'type': 'PullRequestEvent',
        })

        on_other_repos = db.events.find({
            'actor.login': self.username,
            'repo.name': {
                '$nin':
                [COURSE_REPO,
                 '%s/%s' % (self.username, COURSE_REPO_NAME)]
            },
            'type': 'PullRequestEvent',
        })

        #store data
        self.pull_requests_made = {}

        self.pull_requests_made['on_course_repo'] = {
            'count': on_course_repo.count(),  #total pull requests
            'repos': on_course_repo.distinct('repo.name')
        }

        self.pull_requests_made['on_other_repos'] = {
            'count': on_other_repos.count(),
            'repos': on_other_repos.distinct('repo.name'),
        }

        return self.pull_requests_made
def get_events_aggregates_for_user(u):
    '''
    Get aggregates of all type of events for a user.
    If an user is not specified, returns all type of events that are available.
    '''
    if type(u) is github.NamedUser.NamedUser:
        username = u.login
    elif type(u) is unicode or type(u) is str:
        username = u
    else:
        username = None

    events_collection = connect()['events']

    if username is not None:

        cursor = events_collection.aggregate([
            {
                "$match": {
                    "actor.login": username
                }
            },
            {
                "$group": {
                    "_id": "$type",
                    "count": {
                        "$sum": 1
                    }
                }
            }  #group the sum
        ])

        event_aggregates = {}

        for d in cursor:
            event_aggregates[d['_id']] = d['count']

        return event_aggregates
    else:
        return events_collection.distinct('type')
    def get_forks_created(self):
        '''
        get the details of any forks that were created by the user of

        the course repo
        other repos before the course started
        other repos after the course ended
        '''

        #get data
        db = connect()
        of_course_repo = db.events.find({
            'actor.login': self.username,
            'repo.name': COURSE_REPO,
            'type': 'ForkEvent',
        })

        of_other_repos = db.events.find({
            'actor.login': self.username,
            'repo.name': {
                '$ne': COURSE_REPO
            },
            'type': 'ForkEvent',
        })

        #store data
        self.forks_created = {}
        self.forks_created['of_course_repo'] = {
            'count': of_course_repo.count(
            ),  #total forks created -- I know this weird but it is 0400 hrs and I hv more imp things in code to worry about
            'fork_of': of_course_repo.distinct('repo.name')
        }

        self.forks_created['of_other_repos'] = {
            'count': of_other_repos.count(),  #total forks created 
            'fork_of': of_other_repos.distinct('repo.name')
        }

        return self.forks_created
    def get_pull_requests_made(self):
        '''
        Gives back:
        
        number of pull requests made on the course repo
        number of pull requests made on the course repo's fork
        number of pull requests made on other repos before the course started
        number of pull requests made on other repos after the course ended

        '''
        #get data
        db = connect()
        on_course_repo = db.events.find({
            'actor.login'  : self.username,
            'repo.name'    : COURSE_REPO, #TODO: Figure out why repo.full_name doesn't work here!
            'type'         : 'PullRequestEvent',
        })
        
        on_other_repos = db.events.find({
            'actor.login'  : self.username,
            'repo.name'    : {'$nin' : [COURSE_REPO, '%s/%s' % (self.username, COURSE_REPO_NAME)]},
            'type'         : 'PullRequestEvent',
        })
        
        #store data
        self.pull_requests_made = {}

        self.pull_requests_made['on_course_repo'] = {
                'count'      : on_course_repo.count(), #total pull requests
                'repos'      : on_course_repo.distinct('repo.name')

                }

        self.pull_requests_made['on_other_repos'] = {
                'count'    : on_other_repos.count(),
                'repos'    : on_other_repos.distinct('repo.name'),
                }

        return self.pull_requests_made
    def get_forks_created(self):
        '''
        get the details of any forks that were created by the user of

        the course repo
        other repos before the course started
        other repos after the course ended
        '''

        #get data
        db = connect()
        of_course_repo = db.events.find({
            'actor.login'   : self.username,
            'repo.name'     : COURSE_REPO,
            'type'          : 'ForkEvent',
        })

        of_other_repos = db.events.find({
            'actor.login'      : self.username,
            'repo.name'        : {'$ne' : COURSE_REPO},
            'type'             : 'ForkEvent',
        })

        #store data
        self.forks_created = {}
        self.forks_created['of_course_repo'] = {
                'count'      : of_course_repo.count(), #total forks created -- I know this weird but it is 0400 hrs and I hv more imp things in code to worry about
                'fork_of'    : of_course_repo.distinct('repo.name')
                }

        self.forks_created['of_other_repos'] = {
                'count'      : of_other_repos.count(), #total forks created 
                'fork_of'    : of_other_repos.distinct('repo.name')
                }

        return self.forks_created
    def get_commits_made(self):
        '''
        Gives back:
        
        number of commits made on the course repo
        number of commits made on the course repo's fork
        number of commits made on other repos before the course started
        number of commits made on other repos after the course ended

        '''
        #TODO: Further filter the commits on other repos based on date.

        #get data
        db = connect()
        on_course_repo = db.events.find({
            'actor.login': self.username,
            'repo.name': COURSE_REPO,
            'type': 'PushEvent',
        })

        on_course_repo_fork = db.events.find({
            'actor.login':
            self.username,
            'repo.name':
            '%s/%s' % (self.username, COURSE_REPO_NAME),
            'type':
            'PushEvent',
        })

        on_other_repos = db.events.find({
            'actor.login': self.username,
            'repo.name': {
                '$nin':
                [COURSE_REPO,
                 '%s/%s' % (self.username, COURSE_REPO_NAME)]
            },
            'type': 'PushEvent',
        })

        #store data
        self.commits_made = {}
        self.commits_made['on_course_repo'] = {
            'pushes': on_course_repo.count(),  #total pushes
            'commits':
            sum([push['payload']['size'] for push in on_course_repo],
                0),  #sum total of commits in each push
            'repos': on_course_repo.distinct('repo.name')
        }

        self.commits_made['on_course_repo_fork'] = {
            'pushes':
            on_course_repo_fork.count(),
            'commits':
            sum([push['payload']['size'] for push in on_course_repo_fork], 0),
            'repos':
            on_course_repo_fork.distinct('repo.name'),
        }

        self.commits_made['on_other_repos'] = {
            'pushes': on_other_repos.count(),
            'commits':
            sum([push['payload']['size'] for push in on_other_repos], 0),
            'repos': on_other_repos.distinct('repo.name'),
        }

        return self.commits_made
    def get_issues_resolved(self):
        '''
        Gets the details of any issues CLOSED by the user on

        1. the course repo
        2. fork of the course repo
        3. other repos before the course started
        4. other repos after the course ended
        '''
        #TODO: Further filter the issues based on date as required by 3) and 4) above
        db = connect()

        #get data
        on_course_repo = db.events.find({
            'actor.login': self.username,
            'repo.name': COURSE_REPO,
            'type': 'IssuesEvent',
            'payload.action': 'closed',
        })

        on_course_repo_fork = db.events.find({
            'actor.login':
            self.username,
            'repo.name':
            '%s/%s' % (self.username, COURSE_REPO_NAME),
            'type':
            'IssuesEvent',
            'payload.action':
            'closed',
        })

        on_other_repos = db.events.find({
            'actor.login': self.username,
            'repo.name': {
                '$nin':
                [COURSE_REPO,
                 '%s/%s' % (self.username, COURSE_REPO_NAME)]
            },
            'type': 'IssuesEvent',
            'payload.action': 'closed',
        })

        #store the data
        self.issues_resolved = {}

        self.issues_resolved['on_course_repo'] = {
            'count': on_course_repo.count(),
            'repos': on_course_repo.distinct('repo.name'),
        }

        self.issues_resolved['on_course_repo_fork'] = {
            'count': on_course_repo_fork.count(),
            'repos': on_course_repo_fork.distinct('repo.name'),
        }

        self.issues_resolved['on_other_repos'] = {
            'count': on_other_repos.count(),
            'repos': on_other_repos.distinct('repo.name'),
        }

        return self.issues_resolved
示例#13
0
            tagsList.append(tagStr)
            videoCount = videoCount + 1

        count = len(playCountList)
        if save == True:
            for i in range(count):
                addCount = addCount + store.storeVideo(titleList[i], hrefList[i], log_vidList[i], uploaderList[i], playCountList[i], pubdateList[i], "4", gameidx, tagsList[i], thumbImgSrc)
                print log_vidList[i], uploaderList[i], playCountList[i], pubdateList[i], thumbImgSrc

        time.sleep(5)
    return videoCount, addCount

startdatestr = datetime.strftime(datetime.now(),'%a %b %d %H:%M:%S %z %Y')
ftest, atest = fetchaipai("手机", 1, 2, -1, False)
print ftest, atest
if ftest <= 0:
    sendMail("aipai Spider error", "aipai spider is error, check it.")

store.connect()
gameList = store.getGameList()
todayFetch = 0
todayAdd = 0
for aGame in gameList:
    print "###", aGame[1].decode("utf-8")
    fet, add = fetchaipai(aGame[1].strip(), 1, 10, aGame[0])
    todayFetch = todayFetch + fet
    todayAdd = todayAdd + add
    time.sleep(1)
store.close()
sendMail("aipai Spider Success", "aipai spider success, start at %s fetch %d videos, add %d videos" % (startdatestr, todayFetch, todayAdd))