示例#1
0
文件: api.py 项目: shennian/rank
 def _get(cls, query):
     q = query.replace('\n', '')
     m = Database.session.query(API).filter(API.query == query).scalar()
     if m is None:
         log('cache not exist', q)
         raise NotExist
     else:
         return m
示例#2
0
 def all(cls, login, repositories):
     for r in repositories:
         c = Contribution(login, r)
         c.validate()
         log('contribution all <{}> <{}> <{}> <{}> <{}> <{}>'.format(
             login, r.name_with_owner, c.valid, c.star, c.commit_parts,
             c.star_pats))
         if c.valid:
             yield c
示例#3
0
文件: api.py 项目: shennian/rank
 def _set(cls, query, response):
     log('set result for query', query)
     now = int(time.time())
     c = API(
         query=query,
         response=response,
         unixtime=now,
     )
     Database.session.merge(c)
     Database.session.commit()
示例#4
0
def main():
    start = time.time()

    init_db()
    us = all_data()
    log_data(us)
    generate_html(us)

    end = time.time()
    log('total time cost {} seconds'.format(end - start))
示例#5
0
def generate_html(users):
    template = 'template_rank.html'
    timezone = datetime.timezone(datetime.timedelta(hours=8))
    now = datetime.datetime.now(timezone)
    html = Template.render(template, updated=str(now), users=users[:1000])
    filename = 'index.html'
    path = os.path.join(config.static, filename)
    with open(path, 'w', encoding='utf-8') as f:
        f.write(html)
    log('finish generate html, length {}'.format(len(html)))
示例#6
0
文件: api.py 项目: shennian/rank
 def get_crawler(cls, query):
     log('get_crawler', query)
     try:
         m = cls._get(query)
     except NotExist:
         return cls._get_crawler(query)
     else:
         if cls._valid_cache(m):
             return m.response
         else:
             return cls._get_crawler(query)
示例#7
0
 def validate(self):
     # language may be none for some repo due to none files or other reason
     if self.language is None or self.language in config.invalid_language or self.total_star == 0:
         self.valid = False
         self.all_invalid.append(
             (self.name_with_owner, self.total_star, self.language))
     elif not self.valid_name_and_description():
         self.valid = False
         self.all_invalid.append((self.name_with_owner, self.total_star,
                                  self.name_with_owner, self.description))
     elif not self.valid_code_files():
         self.valid = False
         self.all_invalid.append(
             (self.name_with_owner, self.total_star, self.files))
     else:
         self.valid = True
     log('repository.validate <{}> <{}> <{}> <{}>'.format(
         self.name_with_owner, self.total_star, self.valid, self.files))
示例#8
0
文件: api.py 项目: shennian/rank
 def get_v3(cls, query):
     log('get_v3', query)
     try:
         m = cls._get(query)
     except NotExist:
         try:
             return cls._get_v3(query)
         except ErrorCode202:
             time.sleep(5)
             return cls.get_v3(query)
     else:
         if cls._valid_cache(m):
             r = json.loads(m.response)
             return r
         else:
             try:
                 return cls._get_v3(query)
             except ErrorCode202:
                 r = json.loads(m.response)
                 return r
示例#9
0
文件: api.py 项目: shennian/rank
    def get_v4_connection(cls, query, keyword, parameter, format_mapping):
        log('get_v4_connection', query, parameter)
        q = cls._query_for_connection(query, parameter, format_mapping)
        r = cls._get_v4_cache(q)
        c = cls._connection_for_keyword(r['data'], keyword)
        edges = c['edges']
        yield edges
        should_continue = True

        while should_continue:
            end_cursor = c['pageInfo']['endCursor']
            has_next_page = c['pageInfo']['hasNextPage']
            if end_cursor is not None or has_next_page:
                parameter['after'] = end_cursor
                q = cls._query_for_connection(query, parameter, format_mapping)
                r = cls._get_v4_cache(q)
                c = cls._connection_for_keyword(r['data'], keyword)
                edges = c['edges']
                should_continue = yield edges
            else:
                return
示例#10
0
 def all(cls):
     u2 = cls.users_for_extra()
     u1 = cls.users_for_query()
     us = list(u2) + list(u1)
     seen = set()
     for i, u in enumerate(us):
         if u.login not in seen and u.login not in config.block_user:
             seen.add(u.login)
             log('start user no.{} {} {}'.format(i, u.login,
                                                 len(u.repositories)))
             cs = Contribution.all(u.login, u.repositories)
             u.contribution = sorted(cs, key=lambda c: c.star, reverse=True)
             u.star = sum([c.star for c in u.contribution])
             if u.star > 0:
                 ls = {}
                 for c in u.contribution:
                     k = c.repository.language
                     ls[k] = ls.get(k, 0) + c.star
                 u.language = sorted(ls.items(),
                                     key=lambda l: l[1],
                                     reverse=True)
                 yield u
             log('end user no.{} {} {}'.format(i, u.login, u.language))
示例#11
0
文件: api.py 项目: shennian/rank
    def _get_v4(cls, query, cache=True):
        full_query = f"""
        {{
            rateLimit {{
                limit
                cost
                remaining
                resetAt
            }}
            {query}
        }}
        """
        url = 'https://api.github.com/graphql'
        json_query = {'query': full_query}
        headers = {'Authorization': 'bearer {}'.format(secret.token)}

        r = requests.post(url=url, json=json_query, headers=headers)

        if r.status_code == 200:
            j = r.json()
            cls.ensure_not_none(j, f'query <{query}> result is <{j}>')

            if 'errors' in j:
                for e in j['errors']:
                    if e['type'] == 'RATE_LIMITED':
                        j_rate = cls._get_v4('', cache=False)
                        limit, remaining, cost, reset_at, reset_in = cls._rate_v4(
                            j_rate)
                        log('v4 query <{}> rate limit <{}> remaing <{}> cost <{}> resetAt <{}> reset_in <{}>'
                            .format(query, limit, remaining, cost, reset_at,
                                    reset_in))
                        # +3 to ensure
                        log('v4 sleep <{}> and try again <{}>'.format(
                            reset_in, query))
                        time.sleep(reset_in + 3)
                        log('v4 finish sleep <{}>'.format(query))
                        return cls._get_v4(query)
                raise GraphQLError(full_query, j['errors'])
            else:
                limit, remaining, cost, reset_at, reset_in = cls._rate_v4(j)
                log('v4 query <{}> rate limit <{}> remaing <{}> cost <{}> resetAt <{}> reset_in <{}>'
                    .format(query, limit, remaining, cost, reset_at, reset_in))
                if cache:
                    cls._set(query, r.text)
                return j
        else:
            raise ErrorCode(r.status_code, query)
示例#12
0
文件: api.py 项目: shennian/rank
    def _get_v3(cls, query, cache=True):
        base = 'https://api.github.com'
        url = '{}{}'.format(base, query)
        headers = {'Authorization': 'bearer {}'.format(secret.token)}
        r = requests.get(url=url, headers=headers)

        if r.status_code == 200:
            rate_limit, rate_remaing, rate_reset, reset_in = cls._rate_v3(r)
            log('v3 rate limit <{}> rate remaing <{}> rate reset <{}>  reset in <{}>'
                .format(
                    rate_limit,
                    rate_remaing,
                    rate_reset,
                    reset_in,
                ))

            j = r.json()
            cls.ensure_not_none(j, f'query <{query}> result is <{j}>')
            if cache:
                cls._set(query, r.text)
            return j
        elif r.status_code == 202:
            raise ErrorCode202(202, query)
        # don't knwo when rate will be 0, so compare with 3
        elif r.status_code == 403:
            rate_limit, rate_remaing, rate_reset, reset_in = cls._rate_v3(r)
            log('v3 rate limit <{}> rate remaing <{}> rate reset <{}>  reset in <{}>'
                .format(
                    rate_limit,
                    rate_remaing,
                    rate_reset,
                    reset_in,
                ))
            if rate_remaing == 0:
                # +3 to ensure
                log('v3 sleep <{}> and try again <{}>'.format(reset_in, query))
                time.sleep(reset_in + 3)
                log('v3 finish sleep <{}>'.format(query))
            else:
                raise ErrorCode(r.status_code, query)
        else:
            raise ErrorCode(r.status_code, query)
示例#13
0
文件: api.py 项目: shennian/rank
 def get_v4_object(cls, query):
     log('get_v4_object', query)
     return cls._get_v4_cache(query)
示例#14
0
def log_data(users):
    for r in Repository.all_invalid:
        log('invalid repository', r)
    for c in Contribution.all_invalid:
        log('wrong contribution', c)

    for i, u in enumerate(users):
        # if len(u.contribution) > 0 and u.login not in u.contribution[0].repository.name_with_owner:
        formatted = 'user star:'
        formatted += f'{i:3} {u.login:15} {u.star:5} '
        for c in u.contribution[:3]:
            if c.star > 0:
                r = c.repository
                formatted += f'{r.name_with_owner:40} {r.language:12} {c.star:5} '
        log(formatted)

    language = {}
    for u in users:
        for l in u.language:
            if l[0] in language:
                language[l[0]].append((u.login, l[1]))
            else:
                language[l[0]] = [(u.login, l[1])]

    for k, v in language.items():
        log(k)
        log(sorted(v, key=lambda s: s[1], reverse=True))

    log('finish log data to stdout')