def get_user_info(self, contest_cid, course_oj_username): oj_username = course_oj_username.oj_username pass_list = [] rank = 0 http = SpiderHttp() url = 'https://ac.nowcoder.com/acm-heavy/acm/contest/' \ 'real-time-rank-data?id={}&' \ 'searchUserName={}'.format(contest_cid, quote(oj_username)) res = http.get(url=url).json() if res['msg'] != 'OK': return {'success': False, 'data': None} problem_dict = {} for problem_info in res['data']['problemData']: problem_dict[problem_info['problemId']] = problem_info['name'] for item in res['data']['rankData']: if item['userName'] != oj_username: continue rank = item['ranking'] for problem_info in item['scoreList']: if not problem_info['accepted']: continue pass_list.append(problem_dict[problem_info['problemId']]) if rank == 0: return {'success': False, 'data': None} result = { 'success': True, 'data': { 'pass_list': pass_list, 'rank': rank } } return result
def get_user_info(self, oj_username, accept_problems): username = oj_username.oj_username accept_problem_list = [] url = 'http://poj.org/userstatus?user_id={}'.format(username) res = SpiderHttp().get(url=url) if "Sorry,{} doesn't exist".format(username) in res.text: return {'success': False, 'data': []} top = -1 ok = False while not ok: url = 'http://poj.org/status?user_id={}&result=0&top={}'.format( username, top) res = SpiderHttp().get(url=url) soup = BeautifulSoup(res.text, 'lxml') trs = soup.find_all('table')[-1].find_all('tr')[1:] if not trs: break for tr in trs: tds = tr.find_all('td') problem_id = tds[2].text accept_time = tds[8].text top = tds[0].text if accept_problems.get('poj-' + problem_id) == accept_time: ok = True continue accept_problem_list.append({ 'oj': 'poj', 'problem_pid': problem_id, 'accept_time': accept_time }) return {'success': True, 'data': accept_problem_list}
def check_cookie(self): self.zucc_http = SpiderHttp() mapping = Mapping.get_by_id('zucc-cookie') cookie = json.loads(mapping.value) for k, v in cookie.items(): self.zucc_http.sess.cookies.set(k, v) self.zucc_http.sess.cookies.set("lang", "cn") assert self.check_login_status() == ZUCC_ID
def get_new_cookie(self): self.zucc_http = SpiderHttp() self.login(ZUCC_ID, ZUCC_PASSWORD) assert self.check_login_status() == ZUCC_ID cookie = {} for i in self.zucc_http.sess.cookies: cookie[i.name] = i.value Mapping.get_by_id('zucc-cookie').modify( value=json.dumps(cookie, sort_keys=True))
def _find_user_id(username): url = 'https://api.loj.ac.cn/api/user/searchUser?query={}'.format( username) res = SpiderHttp().get(url=url) ok = False try: for i in res.json()['userMetas']: if i['username'] == username: ok = True break except: pass return ok
def get_user_info(self, oj_username, accept_problems): username = oj_username.oj_username page = 1 accept_problem_list = [] success = False while True: url = 'https://i.jisuanke.com/timeline/nanti/{}?page={}'.format( username, page) res = SpiderHttp().get(url=url) try: res_json = json.loads(res.text) except: break success = True res = res_json.get('data', dict()) if not res: break for data in res: problem_id = re.findall('//nanti.jisuanke.com/t/(.*)', data['url'])[0] problem_id = JisuankeSpider._change_problem_id(problem_id) accept_time = data['updated_at'] if accept_problems.get('jisuanke-' + problem_id) == accept_time: continue accept_problem_list.append({ 'oj': 'jisuanke', 'problem_pid': problem_id, 'accept_time': accept_time }) page += 1 return {'success': success, 'data': accept_problem_list}
def _get_id_by_username(username): url = 'https://www.nowcoder.com/search?type=all&query={}'.format( username) res = SpiderHttp().get(url=url) result = re.findall(r'/profile/(\d+)', res.text) if not result: return None return result[0]
def _change_problem_id(problem_id): mapping = Mapping.get_by_id('jisuanke-{}'.format(problem_id)) new_problem_id = mapping.value if new_problem_id: return new_problem_id res = SpiderHttp().get( url='http://nanti.jisuanke.com/t/{}'.format(problem_id)) new_problem_id = re.findall(r'//nanti.jisuanke.com/t/(.*)', res.url)[0] mapping.modify(value=new_problem_id) return new_problem_id
def get_problem_info(self, problem_id): star_rating = [DEFAULT_PROBLEM_RATING, 800, 1200, 1600, 2000, 2400] try: url = 'https://ac.nowcoder.com/acm/problem/list?keyword={}'.format( problem_id) res = SpiderHttp().get(url=url) data = re.findall(r'<td>\n(\d+)星\n</td>', res.text) star = int(data[0][0]) rating = star_rating[star] except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def get_problem_info(self, problem_id): try: url = 'https://nanti.jisuanke.com/t/{}'.format(problem_id) res = SpiderHttp().get(url=url) data = re.findall(r'通过 (\d+) 人次 / 提交 (\d+) 人次', res.text) accept = int(data[0][0]) total = int(data[0][1]) rating = DEFAULT_PROBLEM_RATING except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def get_problem_info(self, problem_id): url = 'http://acm.hdu.edu.cn/showproblem.php?pid={}'.format(problem_id) res = SpiderHttp().get(url=url) try: re_res = re.search( r'<br>Total Submission\(s\): (\d+)( ){4}Accepted Submission\(s\): (\d+)<br>', res.text) total = int(re_res.group(1)) accept = int(re_res.group(3)) rating = DEFAULT_PROBLEM_RATING except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def get_problem_info(self, problem_id): url = 'http://poj.org/problem?id={}'.format(problem_id) res = SpiderHttp().get(url=url) try: total = int( re.search(r'<td><b>Total Submissions:</b> (\d+)</td>', res.text).group(1)) accept = int( re.search(r'<td><b>Accepted:</b> (\d+)</td>', res.text).group(1)) rating = DEFAULT_PROBLEM_RATING except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating}
def _get_gym_contest_rating(contest_id): star_rating = [DEFAULT_PROBLEM_RATING, 1200, 1600, 2000, 2400, 2800] mapping = Mapping.get_by_id('gym-{}'.format(contest_id)) stars = mapping.value if stars: return star_rating[int(stars)] url = 'https://codeforces.com/gyms' req = SpiderHttp() res = req.get(url=url) soup = BeautifulSoup(res.text, 'lxml') token = soup.find('input', {'name': 'csrf_token'})['value'] res = req.post(url=url, data={ 'csrf_token': token, 'searchByNameOrIdQuery': contest_id, 'searchByProblem': False, }) soup = BeautifulSoup(res.text, 'lxml') stars = len( soup.find('tr', { 'data-contestid': contest_id }).findAll('img')) mapping.modify(value=str(stars)) return star_rating[stars]
def get_contest_meta(self, contest_cid): url = 'https://ac.nowcoder.com/acm-heavy/acm/contest/' \ 'real-time-rank-data?id={}'.format(contest_cid) res = SpiderHttp().get(url=url).json() if res['msg'] != 'OK': return {'success': False, 'data': None} max_pass = res['data']['rankData'][0]['acceptedCount'] participants = res['data']['basicInfo']['rankCount'] problems = [i['name'] for i in res['data']['problemData']] return { 'success': True, 'data': { 'max_pass': max_pass, 'participants': participants, 'problems': problems } }
def get_user_info(self, oj_username, accept_problems): username = oj_username.oj_username success = False uid = NowcoderSpider._get_id_by_username(username) if uid: username = uid index = 1 accept_problem_list = [] ok = False while not ok: url = 'https://ac.nowcoder.com/acm/contest/profile/{}/' \ 'practice-coding?pageSize=200&statusTypeFilter=5&orderType=DESC&page={}'.format( username, index) res = SpiderHttp().get(url=url) if res.status_code != 200: break if '<title>页面找不到了</title>' in res.text: break if '用户不存在' in res.text: break if '没有找到你想要的内容呢' in res.text: break success = True soup = BeautifulSoup(res.text, 'lxml') trs = soup.find_all('tr')[1:] for tr in trs: tds = tr.find_all('td') accept_time = tds[8].text problem_id = re.findall(r'/acm/problem/(\d+)', tds[1].find('a')['href'])[0] if accept_problems.get('nowcoder-' + problem_id) == accept_time: ok = True continue time = accept_problems.get('nowcoder-' + problem_id) if time is None or time >= accept_time: accept_problems['nowcoder-' + problem_id] = accept_time accept_problem_list.append({ 'oj': 'nowcoder', 'problem_pid': problem_id, 'accept_time': accept_time }) index += 1 return {'success': success, 'data': accept_problem_list}
def get_user_info(self, oj_username, accept_problems): username = oj_username.oj_username if not self._find_user_id(username): return {'success': False, 'data': []} accept_problem_list = [] url = 'https://api.loj.ac.cn/api/submission/querySubmission' request_data = { 'locale': 'zh_CN', 'status': 'Accepted', 'submitter': username, 'takeCount': 10 } has_smaller = True now = -1 try: while has_smaller: if now != -1: request_data['maxId'] = now - 1 res = SpiderHttp().post(url=url, data=json.dumps(request_data), headers={ 'Content-Type': 'application/json' }).json() has_smaller = res['hasSmallerId'] for data in res['submissions']: problem_id = str(data['problem']['id']) accept_time = self._make_time_strict(data['submitTime']) now = data['id'] if accept_problems.get('loj-' + problem_id) == accept_time: break accept_problem_list.append({ 'oj': 'loj', 'problem_pid': problem_id, 'accept_time': accept_time }) except: return {'success': False, 'data': {}} return {'success': True, 'data': accept_problem_list}
def get_user_info(self, oj_username, accept_problems): username = oj_username.oj_username url = 'http://acm.hdu.edu.cn/status.php?user={}'.format(username) accept_problem_dict = {} finished = False success = False while True: res = SpiderHttp().get(url=url) soup = BeautifulSoup(res.text, 'lxml') table = soup.find_all('table', {'class': 'table_text'})[0] trs = table.find_all('tr')[1:] for tr in trs: tds = tr.find_all('td') success = True if tds[2].text == 'Accepted': accept_time = tds[1].text problem_pid = tds[3].text if accept_problems.get('hdu-' + problem_pid) == accept_time: finished = True continue accept_problem_dict[problem_pid] = accept_time if finished: break next_page = soup.find('a', {'href': re.compile(r'.*first=[0-9].*')}) if next_page: url = 'http://acm.hdu.edu.cn' + next_page['href'] else: break accept_problem_list = [{ 'oj': 'hdu', 'problem_pid': problem_pid, 'accept_time': accept_time } for problem_pid, accept_time in accept_problem_dict.items()] return {'success': success, 'data': accept_problem_list}
def __init__(self): self.vjudge_http = SpiderHttp()
class VjudgeSpider(BaseSpider): def __init__(self): self.vjudge_http = SpiderHttp() def get_user_info(self, oj_username, accept_problems): username = oj_username.oj_username password = oj_username.oj_password try: cookie = json.loads(oj_username.oj_cookies) headers = {'Cookie': Cookie.dict_to_str(cookie)} self.vjudge_http.headers.update(headers) assert self.check_login_status() == username except: try: cookie = self._get_cookies(username, password) except: return {'success': False, 'data': []} headers = {'Cookie': Cookie.dict_to_str(cookie)} self.vjudge_http.headers.update(headers) assert self.check_login_status() == username oj_username.modify(oj_cookies=json.dumps(cookie, sort_keys=True)) accept_problem_list = [] success = False start = 0 length = 20 ok = False while not ok: url = "https://vjudge.net/status/data/?&length=20&res=1&start={}&un={}".format( start, username) res = self.vjudge_http.get(url=url).json() if not res['data']: break success = True for submission in res['data']: if submission['statusCanonical'] != 'AC': continue time_stamp = submission['time'] / 1000 accept_time = timestamp_to_str(time_stamp) oj = self._change_oj_name(submission['oj']) problem_id = submission['probNum'] if accept_problems.get('{}-{}'.format( oj, problem_id)) == accept_time: ok = True continue accept_problem_list.append({ 'oj': oj, 'problem_pid': problem_id, 'accept_time': accept_time }) start += length return {'success': success, 'data': accept_problem_list} def check_login_status(self): url = 'https://vjudge.net/user/update' res = self.vjudge_http.get(url=url).json() return res.get('username', None) def _get_cookies(self, username, password): url = 'https://vjudge.net/user/login' data = {'username': username, 'password': password} res = self.vjudge_http.post(url=url, data=data).text if res == 'success': return Cookie.str_to_dict( Cookie.dict_to_str(self.vjudge_http.sess.cookies)) raise Exception(res) def get_problem_info(self, problem_id): pass @staticmethod def _change_oj_name(name): name = name.lower() if name == 'gym': name = 'codeforces' elif name == 'zju': name = 'zoj' elif name == 'pku': name = 'poj' elif name == '计蒜客': name = 'jisuanke' elif name == 'uvalive': name = 'uva' elif name == 'libreoj': name = 'loj' return name
class ZuccSpider(BaseSpider): zucc_http = SpiderHttp() def __init__(self): try: self.check_cookie() except AssertionError: self.get_new_cookie() self.check_cookie() def check_cookie(self): self.zucc_http = SpiderHttp() mapping = Mapping.get_by_id('zucc-cookie') cookie = json.loads(mapping.value) for k, v in cookie.items(): self.zucc_http.sess.cookies.set(k, v) self.zucc_http.sess.cookies.set("lang", "cn") assert self.check_login_status() == ZUCC_ID def get_new_cookie(self): self.zucc_http = SpiderHttp() self.login(ZUCC_ID, ZUCC_PASSWORD) assert self.check_login_status() == ZUCC_ID cookie = {} for i in self.zucc_http.sess.cookies: cookie[i.name] = i.value Mapping.get_by_id('zucc-cookie').modify( value=json.dumps(cookie, sort_keys=True)) def get_user_info(self, oj_username, accept_problems): username = oj_username.oj_username if not self._judge_user(username): return {'success': False, 'data': []} accept_problem_list = [] url = 'http://acm.zucc.edu.cn/status.php?user_id={}'.format(username) ok = False while not ok: res = self.zucc_http.get(url=url) soup = BeautifulSoup(res.text, 'lxml') trs = soup.find('tbody').find_all('tr') next = -1 if not trs: break for tr in trs: tds = tr.find_all('td') next = int(tds[0].text) status = tds[3].find_all('a')[0]['class'] if 'label-success' not in status: continue problem_id = tds[2].text accept_time = tds[8].text accept_time = re.findall( r'\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}', accept_time)[0] if accept_problems.get('zucc-' + problem_id) == accept_time: ok = True continue accept_problem_list.append({ 'oj': 'zucc', 'problem_pid': problem_id, 'accept_time': accept_time }) new_url = 'http://acm.zucc.edu.cn/status.php?user_id={}&top={}'.format( username, next - 1) if new_url == url: break url = new_url return {'success': True, 'data': accept_problem_list} def get_problem_info(self, problem_id): url = 'http://acm.zucc.edu.cn/problem.php?id={}'.format(problem_id) res = self.zucc_http.get(url=url) try: total = int( re.search(r'Submit: </span>(\d+)( )*<span', res.text).group(1)) accept = int( re.search(r'Solved: </span>(\d+)( )*<br>', res.text).group(1)) rating = DEFAULT_PROBLEM_RATING except: rating = DEFAULT_PROBLEM_RATING return {'rating': rating} def _judge_user(self, username): url = 'http://acm.zucc.edu.cn/userinfo.php?user={}'.format(username) res = self.zucc_http.get(url=url) return not re.findall(r"No such User!", res.text) def check_login_status(self): url = 'http://acm.zucc.edu.cn/template/bs3/profile.php' res = self.zucc_http.get(url=url) try: return re.search( r'document\.getElementById\("profile"\)\.innerHTML="(.*)";', res.text).group(1) except: return None def _get_csrf_value(self): url = 'http://acm.zucc.edu.cn/csrf.php' res = self.zucc_http.get(url=url) return re.search(r'value="(.*?)"', res.text).group(1) def login(self, username, password): url = 'http://acm.zucc.edu.cn/login.php' data = { 'user_id': username, 'password': password, 'csrf': self._get_csrf_value() } res = self.zucc_http.post(url=url, data=data)