Exemplo n.º 1
0
def getDeveloper_Contribute(data):
    try:
        # 根据/stats/contributors 获取前99comiit贡献者
        contributor_repo = github.models.Repo_Base_Info.objects.filter(repo_id=data['id'])
        statistic_contributors_url = data['url'] + "/stats/contributors"
        statistic_contributors_html = requests.get(statistic_contributors_url, headers=headers).text
        statistic_contributors_info = json.loads(statistic_contributors_html)
        if (statistic_contributors_info):
            for i in range(len(statistic_contributors_info)):  # 遍历获取statistic_contributors信息
                statistic_contributors_user_id = statistic_contributors_info[i]['author']['id']
                contributor_user = github.models.User_Org_Info.objects.filter(user_id=statistic_contributors_user_id)

                add_perweek = {}
                del_perweek = {}
                commit_perweek = {}
                statistic_contributors_perweek = statistic_contributors_info[i]['weeks']
                for j in range(len(statistic_contributors_perweek)):  # 遍历获取statistic_contributors_info['weeks']信息
                    if isinstance(statistic_contributors_perweek[j], dict):  # 判断是否是字典类型isinstance 返回True false
                        key = statistic_contributors_perweek[j]['w']
                        add_perweek[key] = statistic_contributors_perweek[j]['a']
                        del_perweek[key] = statistic_contributors_perweek[j]['d']
                        commit_perweek[key] = statistic_contributors_perweek[j]['c']
                add_perweek = json.dumps(add_perweek, ensure_ascii=False, encoding='UTF-8')
                del_perweek = json.dumps(del_perweek, ensure_ascii=False, encoding='UTF-8')
                commit_perweek = json.dumps(commit_perweek, ensure_ascii=False, encoding='UTF-8')
                github.models.Repo_Developer_info.objects.filter(Q(repo=contributor_repo) & Q(user=contributor_user)) \
                    .update(user_commit_count_perweek=commit_perweek, user_add_count_perweek=add_perweek,
                            user_del_count_perweek=del_perweek)  # 判断用户或组织是否存在
    except BaseException as e:
        log.set_log(e)
Exemplo n.º 2
0
    def parse(self, html):
        for i in range(len(html)):  # 遍历获取contributor信息
            try:
                v = html[i]
                #github里把pull_request看作一次issue,在这里需要排除
                if (v.has_key('pull_request')):
                    continue
                issue_id = v['id']
                issue_number = v['number']
                issue_state = v['state']
                if (issue_state == 'open'):
                    issue_state = 0
                elif (issue_state == 'closed'):
                    issue_state = 1
                else:
                    issue_state = 2

                issue_create_time = v['created_at']
                issue_update_time = v['updated_at']
                issue_close_time = v['closed_at']
                issue_comment_count = v['comments']
                issue_user_type = v['author_association']
                issue_user_id = v['user']['id']
                user = github.models.User_Org_Info.objects.filter(
                    user_id=issue_user_id)
                if (len(user) < 1):
                    downloadBaseInfo.getUserInfo(v['user'])
                user = github.models.User_Org_Info.objects.filter(
                    user_id=issue_user_id)
                repo = github.models.Repo_Base_Info.objects.filter(
                    repo_id=self.data['id'])
                try:
                    issues = github.models.Repo_Issue_info.objects.filter(
                        Q(issue_id=issue_id)
                        & Q(issue_number=issue_number))  # 判断用户或组织是否存在
                    if (issues and len(issues) > 0):
                        issues[0].issue_state = issue_state
                        issues[0].issue_update_time = issue_update_time
                        issues[0].issue_close_time = issue_close_time
                        issues[0].issue_comment_count = issue_comment_count
                        issues[0].save()
                    else:
                        issues_new = github.models.Repo_Issue_info.objects.create(
                            repo=repo[0],
                            user=user[0],
                            issue_id=issue_id,
                            issue_number=issue_number,
                            issue_create_time=issue_create_time,
                            issue_update_time=issue_update_time,
                            issue_close_time=issue_close_time,
                            issue_comment_count=issue_comment_count,
                            issue_user_type=issue_user_type,
                            issue_state=issue_state)
                        issues_new.save()
                except BaseException as ex:
                    pass

            except BaseException as e:
                log.set_log(e)
Exemplo n.º 3
0
 def run(self):
     try:
         while not CRAWL_EXIT:
             page = self.pageQueue.get(False)
             pulls_url = self.data["pulls_url"][0:-9]+"?state=all"+"&per_page=100&page=" + str(page)
             pulls_html = requests.get(pulls_url, headers=headers).text
             pulls_info = json.loads(pulls_html)
             #把爬下来的数据放到数据队列中
             if (pulls_info):
                 self.dataQueue.put(pulls_info)
     except BaseException as e:
         log.set_log(e)
Exemplo n.º 4
0
    def run(self):
        try:
            repo_id = self.data['id']
            global commit_total_count
            #repo_owner = github.models.Repo_Base_Info.objects.get(repo_id=repo_id)  #获取repo_id
            #先查询contributor,找到所有contributor,然后遍历,获取一些数据,然后在通过commit?auth= 查询具体每次commit
            #contributor_url = self.data['contributors_url']+"?anon=1"
            #flag = True
            while not CRAWL_EXIT:
                page = self.pageQueue.get(False)
                contributor_url = self.data['contributors_url'] + "?anon=1&per_page=100&page=" + str(page)
                contributor_html = requests.get(contributor_url, headers=headers).text
                contributor_info = json.loads(contributor_html)
                if (contributor_info):
                    for i in range(len(contributor_info)):  # 遍历获取contributor信息
                        #time.sleep(1)
                        v = contributor_info[i]
                        contributor_contributions = v['contributions']
                        #如果是用户
                        if(contributor_info[i].has_key('id')):
                            contributor_id = v['id']
                            contributor_name = v['login']
                            contributor_avatar_url = v['avatar_url']
                            contributor_type = v['type']
                            try:
                                contributor_user = github.models.User_Org_Info.objects.get(user_id=contributor_id)  # 获取repo_id
                            except github.models.User_Org_Info.DoesNotExist:
                                user_new = github.models.User_Org_Info.objects.create(user_id=contributor_id,
                                                                                     user_name=contributor_name,
                                                                                     avatar_url=contributor_avatar_url,
                                                                                     user_type=contributor_type)
                                user_new.save()
                            contributor_commit_user = contributor_name
                        #如果是匿名
                        else:
                            contributor_email = v['email']
                            contributor_fullname = v['name']
                            contributor_type = v['type']
                            try:
                                contributor_user = github.models.User_Org_Info.objects.get(email_url=contributor_email)  # 获取repo_id
                                contributor_id = contributor_user.user_id
                            except github.models.User_Org_Info.DoesNotExist:
                                random_id = ''.join(str(random.choice(range(10))) for _ in range(5))   #获取随机数
                                t = time.time()                                                         #获取时间戳
                                time_stamp =str((int(round(t * 1000))))
                                time_stamp = time_stamp[-9:]
                                contributor_id = '9'+random_id+time_stamp
                                user_new = github.models.User_Org_Info.objects.create(user_id = contributor_id,
                                                                                    email_url=contributor_email,
                                                                                      user_fullname=contributor_fullname,
                                                                                     user_type=contributor_type)
                                user_new.save()
                            contributor_commit_user = contributor_email
                        #先查找表里是否存在,如存在且commit数量相同,则跳过
                        contributor_user = github.models.User_Org_Info.objects.filter(user_id=contributor_id)
                        contributor_repo = github.models.Repo_Base_Info.objects.filter(repo_id=repo_id)
                        developer = github.models.Repo_Developer_info.objects.filter(
                            Q(repo=contributor_repo) & Q(user=contributor_user))  # 判断用户或组织是否存在
                        #if(developer and developer[0].user_commit_count == contributor_contributions):
                        #    continue

                        #读取commit的第一页和最后一页,主要读取第一次commit时间和最后一次commit时间
                        commit_user_url_first = self.data["commits_url"][0:-6] + "?author=" + contributor_commit_user
                        commit_user_req = urllib2.Request(commit_user_url_first, headers=headers)
                        commit_user_res = urllib2.urlopen(commit_user_req)
                        commit_user_html = commit_user_res.read()
                        commit_user_info = json.loads(commit_user_html)
                        if(commit_user_info and len(commit_user_info)>0 ):
                            if(commit_user_info[0].has_key('commit')):
                                user_last_update_time = commit_user_info[0]['commit']['author']['date']

                        #if (developer):
                        #    user_creat_time = developer[0].user_creat_time
                        #else:
                        #计算commit页数,按照每页30计算
                            commit_page = contributor_contributions/30
                            commit_page_yu = contributor_contributions%30
                            if(commit_page_yu!=0):
                                commit_page = commit_page+1
                            if(commit_page==0):#如果只有一页
                                if (commit_user_info[len(commit_user_info)-1].has_key('commit')):
                                    user_creat_time = commit_user_info[len(commit_user_info)-1]['commit']['author']['date']
                                    lock.acquire()
                                    commit_total_count += len(commit_user_info)
                                    lock.release()
                            else:
                                commit_user_url_last = self.data["commits_url"][0:-6] + "?author=" + contributor_commit_user + "&page=" + str(commit_page)
                                commit_user_last_req = urllib2.Request(commit_user_url_last, headers=headers)
                                commit_user_last_res = urllib2.urlopen(commit_user_last_req)
                                commit_user_last_html = commit_user_last_res.read()
                                commit_user_last_info = json.loads(commit_user_last_html)
                                if (commit_user_last_info and len(commit_user_last_info) > 0):
                                    if (commit_user_last_info[len(commit_user_last_info) - 1].has_key('commit')):
                                        user_creat_time = commit_user_last_info[len(commit_user_last_info) - 1]['commit']['author']['date']
                                        lock.acquire()
                                        commit_total_count +=(commit_page-1)*30 + len(commit_user_last_info)
                                        lock.release()
                        #插入表格中
                        try:
                            developer = github.models.Repo_Developer_info.objects.get( Q(repo = contributor_repo) & Q(user=contributor_user)) # 判断用户或组织是否存在
                            developer.user_creat_time = user_creat_time
                            developer.user_last_update_time = user_last_update_time
                            developer.user_commit_count = contributor_contributions
                            developer.save()

                        except github.models.Repo_Developer_info.DoesNotExist:
                            developer_new = github.models.Repo_Developer_info.objects.create(repo=contributor_repo[0],
                                                                                             user=contributor_user[0],
                                                                                             user_creat_time=user_creat_time,
                                                                                             user_last_update_time=user_last_update_time,
                                                                                             user_commit_count=contributor_contributions)
                            developer_new.save()

            #循环结束
            #线程1获取commit数量
            if(threading.current_thread().getName()=='tr2'):
                issue_url = self.data["issues_url"][0:-9]+"?filter=all"
        except BaseException as e:
            log.set_log(e)
Exemplo n.º 5
0
pre = '<html><body>\n<h1>Yorg</h1>\n'
post = '\n</body>\n</html>'


def bld_page(page):
    return pre + page + post


emptypage = bld_page(emptypage)
activationpage = bld_page(activationpage)
resetpage = bld_page(resetpage)
resetpage_ok = bld_page(resetpage_ok)
resetpage_ko = bld_page(resetpage_ko)

set_log('yorg_server_web')


class RequestHandler(BaseHTTPRequestHandler):
    def __init__(self, request, client_address, server):
        self.db = DBFrontend('yorg')
        BaseHTTPRequestHandler.__init__(self, request, client_address, server)

    def do_GET(self):
        parsed_path = urlparse(self.path)
        page = self.bld_page(parsed_path.path, parsed_path.query)
        if not page:
            self.send_error(404)
            return
        self.send_response(200)
        self.send_header('Content-Type', 'text/html')
Exemplo n.º 6
0
from log import set_log, Logger

if __name__ == '__main__':
    set_log(Logger())
    # TODO main.py
    # TODO run project
    # TODO MQTT Subscriber