forked from bao-qian/rank
-
Notifications
You must be signed in to change notification settings - Fork 1
/
api.py
122 lines (109 loc) · 4.04 KB
/
api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import json
import requests
import time
from requests import HTTPError
import secret
from model import Model
from sqlalchemy import (
Column,
String,
exists,
)
from utility import log
class API(Model.base):
__tablename__ = 'api'
graph_query = Column(String, primary_key=True)
response = Column(String)
@classmethod
def _exist(cls, query):
statement = exists().where(API.graph_query == query)
r = Model.session.query(statement).scalar()
log('cache exist', r, query)
return r
@classmethod
def _get(cls, query):
result = Model.session.query(API).filter(API.graph_query == query).scalar()
log('get result for query', query)
log('get result for query', result)
return result
@classmethod
def _set(cls, query, response):
log('add result for query', query)
c = API(
graph_query=query,
response=response,
)
Model.session.merge(c)
Model.session.commit()
@classmethod
def get_v4(cls, query, force=False):
if not force and cls._exist(query):
c = cls._get(query)
r = json.loads(c.response)
return r
else:
url = 'https://api.github.com/graphql'
json_query = {
'query': query
}
headers = {'Authorization': 'bearer {}'.format(secret.token)}
r = requests.post(url=url, json=json_query, headers=headers)
if r.status_code == 200:
j = r.json()
cls._set(query, r.text)
return j
else:
message = 'url {} get error code {}'.format(url, r.status_code)
raise HTTPError(message, response=r)
@classmethod
def get_v3(cls, query, force=False):
if not force and cls._exist(query):
c = cls._get(query)
r = json.loads(c.response)
return r
else:
base = 'https://api.github.com'
url = '{}{}'.format(base, query)
log('get v3 url', url)
headers = {'Authorization': 'bearer {}'.format(secret.token)}
r = requests.get(url=url, headers=headers)
rate_limit = int(r.headers['X-RateLimit-Limit'])
rate_reset = int(r.headers['X-RateLimit-Reset'])
rate_remaing = int(r.headers['X-RateLimit-Remaining'])
log('rate limit <{}> rate remaing <{}>'.format(rate_limit, rate_remaing))
now = int(time.time())
log('rate will reset in <{}>'.format(now - rate_reset))
if r.status_code == 200:
log('get v3 r', r)
j = r.json()
cls._set(query, r.text)
return j
elif rate_remaing == 0:
log('no rate remaing')
# 保险起见多睡 5 s
time.sleep(now - rate_limit + 5)
else:
message = 'url {} get error code {}'.format(url, r.status_code)
raise HTTPError(message, response=r)
@classmethod
def get_crawler(cls, query, force=False):
if not force and cls._exist(query):
c = cls._get(query)
html = c.response
return html
else:
base = 'https://github.com'
url = '{}{}'.format(base, query)
log('get crawler url', url)
agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
"AppleWebKit/537.36 (KHTML, like Gecko) " \
"Chrome/62.0.3202.94 Safari/537.36"
headers = {'User-Agent': agent}
r = requests.get(url=url, headers=headers)
if r.status_code == 200:
html = r.text
cls._set(query, html)
return html
else:
message = 'url {} get error code {}'.format(url, r.status_code)
raise HTTPError(message, response=r)