def __init__(self, **kwargs): super(Statistic, self).__init__(**kwargs) if not self.name or not self.url: raise InitModuleException() match = re.search(r'\b[0-9]{4}\b', self.key) if not match: raise InitModuleException('Not found year') self.year = int(match.group())
def __init__(self, **kwargs): super().__init__(**kwargs) cid = self.key if ':' in cid: cid, api = cid.split(':', 1) self.api_key = api.split(':') if ':' in api else API_KEYS[api] else: self.api_key = DEFAULT_API_KEY if not re.match('^[0-9]+$', cid): raise InitModuleException(f'Contest id {cid} should be number') self.cid = cid
def get_standings(self, users=None, statistics=None): if 'hashcode_scoreboard' in self.info or re.search(r'\bhash.*code\b.*\(round|final\)$', self.name, re.I): ret = self._hashcode(users, statistics) elif '/codingcompetitions.withgoogle.com/' in self.url: ret = self._api_get_standings(users, statistics) elif '/code.google.com/' in self.url or '/codejam.withgoogle.com/' in self.url: ret = self._old_get_standings(users) else: raise InitModuleException(f'url = {self.url}') if re.search(r'\bfinal\S*(?:\s+round)?$', self.name, re.I): ret['options'] = {'medals': [{'name': name, 'count': 1} for name in ('gold', 'silver', 'bronze')]} return ret
def __init__(self, **kwargs): super().__init__(**kwargs) self.is_spectator_ranklist = self.standings_url and 'spectator/ranklist' in self.standings_url if self.is_spectator_ranklist: return cid = self.key if ':' in cid: cid, api = cid.split(':', 1) self.api_key = api.split(':') if ':' in api else API_KEYS[api] else: self.api_key = DEFAULT_API_KEY if not re.match('^[0-9]+$', cid): raise InitModuleException(f'Contest id {cid} should be number') self.cid = cid
def __init__(self, **kwargs): super(Statistic, self).__init__(**kwargs) if not self.standings_url: url = 'http://usaco.org/index.php?page=contests' page = REQ.get(url) matches = re.finditer( '<a[^>]*href="(?P<url>[^"]*)"[^>]*>(?P<name>[^<]*[0-9]{4}[^<]*Results)</a>', page) month = self.start_time.strftime('%B').lower() prev_standings_url = None for match in matches: name = match.group('name').lower() if (month in name or self.name.lower() in name) and str( self.start_time.year) in name: self.standings_url = urllib.parse.urljoin( url, match.group('url')) break if (month in name or self.name.lower() in name) and str(self.start_time.year - 1) in name: prev_standings_url = urllib.parse.urljoin( url, match.group('url')) else: if prev_standings_url is not None: pred_standings_url = re.sub( '[0-9]+', lambda m: str(int(m.group(0)) + 1), prev_standings_url) url = 'http://usaco.org/' page = REQ.get(url) matches = re.finditer( '<a[^>]*href="?(?P<url>[^"]*)"?[^>]*>here</a>', page) for match in matches: standings_url = urllib.parse.urljoin( url, match.group('url')) if standings_url == pred_standings_url: self.standings_url = standings_url break if not self.standings_url: raise InitModuleException( f'Not found standings url with' f'month = {month}, ' f'year = {self.start_time.year}, ' f'name = {self.name}')
def get_standings(self, users=None, statistics=None): result = {} start_time = self.start_time.replace(tzinfo=None) if not self.standings_url and datetime.now() - start_time < timedelta( days=30): re_round_overview = re.compile( r''' (?:<td[^>]*> (?: [^<]*<a[^>]*href="(?P<url>[^"]*/stat[^"]*rd=(?P<rd>[0-9]+)[^"]*)"[^>]*>(?P<title>[^<]*)</a>[^<]*| (?P<date>[0-9]+\.[0-9]+\.[0-9]+) )</td>[^<]* ){2} ''', re.VERBOSE, ) for url in [ 'https://www.topcoder.com/tc?module=MatchList&nr=100500', 'https://community.topcoder.com/longcontest/stats/?module=MatchList&nr=100500', ]: page = REQ.get(url) matches = re_round_overview.finditer(str(page)) opt = 0.61803398875 for match in matches: date = datetime.strptime(match.group('date'), '%m.%d.%Y') if abs(date - start_time) < timedelta(days=2): title = match.group('title') intersection = len( set(title.split()) & set(self.name.split())) union = len( set(title.split()) | set(self.name.split())) iou = intersection / union if iou > opt: opt = iou self.standings_url = urljoin( url, match.group('url')) if not self.standings_url: raise InitModuleException('Not set standings url for %s' % self.name) url = self.standings_url + '&nr=100000042' page = REQ.get(url) result_urls = re.findall( r'<a[^>]*href="(?P<url>[^"]*)"[^>]*>Results</a>', str(page), re.I) if not result_urls: # marathon match match = re.search( '<[^>]*>Problem:[^<]*<a[^>]*href="(?P<href>[^"]*)"[^>]*>(?P<name>[^<]*)<', page) problem_name = match.group('name').strip() problems_info = [{ 'short': problem_name, 'url': urljoin(url, match.group('href').replace('&', '&')) }] rows = etree.HTML(page).xpath( "//table[contains(@class, 'stat')]//tr") header = None for row in rows: r = parsed_table.ParsedTableRow(row) if len(r.columns) < 8: continue values = [ c.value.strip().replace(u'\xa0', '') for c in r.columns ] if header is None: header = values continue d = OrderedDict(list(zip(header, values))) handle = d.pop('Handle').strip() d = self._dict_as_number(d) if 'rank' not in d or users and handle not in users: continue row = result.setdefault(handle, OrderedDict()) row.update(d) score = row.pop('final_score' if 'final_score' in row else 'provisional_score') row['member'] = handle row['place'] = row.pop('rank') row['solving'] = score row['solved'] = {'solving': 1 if score > 0 else 0} problems = row.setdefault('problems', {}) problem = problems.setdefault(problem_name, {}) problem['result'] = score history_index = values.index('submission history') if history_index: column = r.columns[history_index] href = column.node.xpath('a/@href') if href: problem['url'] = urljoin(url, href[0]) else: # single round match matches = re.finditer('<table[^>]*>.*?</table>', page, re.DOTALL) problems_sets = [] for match in matches: problems = re.findall( '<a[^>]*href="(?P<href>[^"]*c=problem_statement[^"]*)"[^>]*>(?P<name>[^/]*)</a>', match.group(), re.IGNORECASE, ) if problems: problems_sets.append([{ 'short': n, 'url': urljoin(url, u) } for u, n in problems]) problems_info = dict() if len(problems_sets) > 1 else list() for problems_set, result_url in zip(problems_sets, result_urls): url = urljoin(self.standings_url, result_url + '&em=1000000042') url = url.replace('&', '&') division = int(parse_qs(url)['dn'][0]) for p in problems_set: d = problems_info if len(problems_sets) > 1: d = d.setdefault('division', OrderedDict()) d = d.setdefault('I' * division, []) d.append(p) page = REQ.get(url) rows = etree.HTML(page).xpath("//tr[@valign='middle']") header = None url_infos = [] for row in rows: r = parsed_table.ParsedTableRow(row) if len(r.columns) < 10: continue values = [c.value for c in r.columns] if header is None: header = values continue d = OrderedDict(list(zip(header, values))) handle = d.pop('Coders').strip() d = self._dict_as_number(d) if 'division_placed' not in d or users and handle not in users: continue row = result.setdefault(handle, OrderedDict()) row.update(d) if not row.get('new_rating') and not row.get( 'old_rating') and not row.get('rating_change'): row.pop('new_rating', None) row.pop('old_rating', None) row.pop('rating_change', None) row['member'] = handle row['place'] = row.pop('division_placed') row['solving'] = row['point_total'] row['solved'] = {'solving': 0} row['division'] = 'I' * division if 'adv.' in row: row['advanced'] = row.pop('adv.').lower().startswith( 'y') url_info = urljoin(url, r.columns[0].node.xpath('a/@href')[0]) url_infos.append(url_info) def fetch_solution(url): for i in range(2): try: page = REQ.get(url, time_out=60) match = re.search( '<td[^>]*class="problemText"[^>]*>(?P<solution>.*?)</td>', page, re.DOTALL | re.IGNORECASE) ret = html.unescape(match.group('solution')) ret = ret.strip() ret = ret.replace('<BR>', '\n') ret = ret.replace('\xa0', ' ') return ret except FailOnGetResponse: sleep(i * 10 + 3) return None def fetch_info(url): delay = 3 for _ in range(5): try: page = REQ.get(url) break except Exception: sleep(delay) delay *= 2 else: return None, None, None match = re.search( 'class="coderBrackets">.*?<a[^>]*>(?P<handle>[^<]*)</a>', page, re.IGNORECASE) handle = html.unescape(match.group('handle').strip()) match = re.search(r' Room\s*(?P<room>[0-9]+)', page) room = match.group('room') if match else None matches = re.finditer( r''' <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*c=problem_solution[^"]*)"[^>]*>(?P<short>[^<]*)</a>[^<]*</td>[^<]* <td[^>]*>[^<]*</td>[^<]* <td[^>]*>[^<]*</td>[^<]* <td[^>]*>(?P<time>[^<]*)</td>[^<]* <td[^>]*>(?P<status>[^<]*)</td>[^<]* <td[^>]*>(?P<result>[^<]*)</td>[^<]* ''', page, re.VERBOSE | re.IGNORECASE) problems = {} n_fetch_solution = 0 for match in matches: d = match.groupdict() short = d.pop('short') solution_url = urljoin(url, d['url']) d['url'] = solution_url d = self._dict_as_number(d) if d['status'] in [ 'Challenge Succeeded', 'Failed System Test' ]: d['result'] = -d['result'] if abs(d['result']) < 1e-9: d.pop('result') if re.match('^[0.:]+$', d['time']): d.pop('time') solution = (statistics or {}).get(handle, {}).get( 'problems', {}).get(short, {}).get('solution') if not solution: n_fetch_solution += 1 solution = fetch_solution(solution_url) d['solution'] = solution problems[short] = d challenges = [] matches = re.finditer( r''' <td[^>]*>[^<]*<a[^>]*href="[^"]*module=MemberProfile[^"]*"[^>]*>(?P<target>[^<]*)</a>[^<]*</td>[^<]* <td[^>]*>(?P<problem>[^<]*)</td>[^<]* <td[^>]*>(?P<status>[^<]*)</td>[^<]* <td[^>]*>(?P<time>[^<]*)</td>[^<]* <td[^>]*>(?P<result>[^<]*)</td>[^<]* <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*details\s*</a>[^<]*</td>[^<]* ''', page, re.VERBOSE | re.IGNORECASE) for match in matches: d = match.groupdict() d = {k: v.strip() for k, v in d.items()} d['result'] = float(d['result'].replace(',', '.')) d['url'] = urljoin(url, d['url']) p = problems.setdefault(d['problem'], {}) p.setdefault('extra_score', 0) p['extra_score'] += d['result'] p.setdefault( 'extra_info', []).append(f'{d["target"]}: {d["result"]}') challenges.append(d) return url, handle, room, problems, challenges, n_fetch_solution with PoolExecutor(max_workers=20) as executor, tqdm.tqdm( total=len(url_infos)) as pbar: n_fetch_solution = 0 for url, handle, room, problems, challenges, n_sol in executor.map( fetch_info, url_infos): n_fetch_solution += n_sol pbar.set_description(f'div{division} {url}') pbar.set_postfix(n_solution=n_fetch_solution) pbar.update() if handle is not None: if handle not in result: LOG.error( f'{handle} not in result, url = {url}') result[handle]['url'] = url if room: result[handle]['room'] = room result[handle]['problems'] = problems result[handle]['challenges'] = challenges for p in problems.values(): if p.get('result', 0) > 1e-9: result[handle]['solved']['solving'] += 1 if challenges: h = result[handle].setdefault( 'hack', { 'title': 'challenges', 'successful': 0, 'unsuccessful': 0, }) for c in challenges: h['successful' if c['status'].lower() == 'yes' else 'unsuccessful'] += 1 standings = { 'result': result, 'url': self.standings_url, 'problems': problems_info, 'options': { 'fixed_fields': [('hack', 'Challenges')], }, } if re.search(r'\bfinals?(?:\s+rounds?)?$', self.name, re.I): standings['options']['medals'] = [{ 'name': name, 'count': 1 } for name in ('gold', 'silver', 'bronze')] return standings
def __init__(self, **kwargs): super(Statistic, self).__init__(**kwargs) if not self.name or not self.start_time or not self.url: raise InitModuleException()
def __init__(self, **kwargs): super(Statistic, self).__init__(**kwargs) if not self.url.startswith( 'http://stats.ioinformatics.org/olympiads/'): raise InitModuleException( f'Url = {self.url} should be from stats.ioinformatics.org')
def __init__(self, **kwargs): super(Statistic, self).__init__(**kwargs) if not self.standings_url: raise InitModuleException('Not set standings url for %s' % self.name)
def get_standings(self, users=None, statistics=None): result = {} hidden_fields = [] fields_types = {} order = None writers = defaultdict(int) start_time = self.start_time.replace(tzinfo=None) if not self.standings_url and datetime.now() - start_time < timedelta(days=30): opt = 0.61803398875 def canonize_title(value): value = value.lower() value = re.sub(r'\s+-[^-]+$', '', value) value = re.sub(r'\bsingle\s+round\s+match\b', 'srm', value) value = re.sub(r'\bmarathon\s+match\b', 'mm', value) value = re.sub(r'[0-9]*([0-9]{2})\s*tco(\s+)', r'tco\1\2', value) value = re.sub(r'tco\s*[0-9]*([0-9]{2})(\s+)', r'tco\1\2', value) value = re.sub(r'^[0-9]{2}([0-9]{2})(\s+)', r'tco\1\2', value) return set(re.split('[^A-Za-z0-9]+', value)) def process_match(date, title, url): nonlocal opt if abs(date - start_time) > timedelta(days=2): return a1 = canonize_title(title) a2 = canonize_title(self.name) intersection = 0 for w1 in a1: for w2 in a2: if w1.isdigit() or w2.isdigit(): if w1 == w2: intersection += 1 break elif w1.startswith(w2) or w2.startswith(w1): intersection += 1 break union = len(a1) + len(a2) - intersection iou = intersection / union if iou > opt: opt = iou self.standings_url = url url = 'https://www.topcoder.com/tc?module=MatchList&nr=100500' page = REQ.get(url) re_round_overview = re.compile( r''' (?:<td[^>]*>(?: [^<]*<a[^>]*href="(?P<url>[^"]*/stat[^"]*rd=(?P<rd>[0-9]+)[^"]*)"[^>]*>(?P<title>[^<]*)</a>[^<]*| (?P<date>[0-9]+\.[0-9]+\.[0-9]+) )</td>[^<]*){2} ''', re.VERBOSE, ) matches = re_round_overview.finditer(str(page)) for match in matches: date = datetime.strptime(match.group('date'), '%m.%d.%Y') process_match(date, match.group('title'), urljoin(url, match.group('url'))) url = 'https://www.topcoder.com/tc?module=BasicData&c=dd_round_list' page = REQ.get(url) root = ET.fromstring(page) for child in root: data = {} for field in child: data[field.tag] = field.text date = dateutil.parser.parse(data['date']) url = 'https://www.topcoder.com/stat?c=round_overview&er=5&rd=' + data['round_id'] process_match(date, data['full_name'], url) for url in self.url, self.standings_url: if url: match = re.search('/challenges/(?P<cid>[0-9]+)', url) if match: challenge_id = match.group('cid') break else: challenge_id = None if challenge_id: # marathon match url = conf.TOPCODER_API_MM_URL_FORMAT.format(challenge_id) page = REQ.get(url) data = json.loads(page) problems_info = [] hidden_fields.extend(['time', 'submits', 'style']) fields_types = {'delta_rank': ['delta'], 'delta_score': ['delta']} order = ['place_as_int', '-solving', 'addition__provisional_rank', '-addition__provisional_score'] for row in data: handle = row.pop('member') r = result.setdefault(handle, OrderedDict()) r['member'] = handle r['place'] = row.pop('finalRank', None) r['provisional_rank'] = row.pop('provisionalRank', None) r['style'] = row.pop('style') if r['place'] and r['provisional_rank']: r['delta_rank'] = r['provisional_rank'] - r['place'] submissions = row.pop('submissions') has_solution = False for s in submissions: score = s.get('finalScore') if not score or score == '-': if 'provisional_score' not in r: p_score = s.pop('provisionalScore', None) if isinstance(p_score, str): p_score = asfloat(p_score) if p_score is not None: r['provisional_score'] = round(p_score, 2) if p_score >= 0 else False r['time'] = s['created'] has_solution = True continue r['solving'] = score r['solved'] = {'solving': int(score > 0)} p_score = s.pop('provisionalScore') if isinstance(p_score, str): p_score = asfloat(p_score) if p_score is not None and p_score > 0: r['provisional_score'] = round(p_score, 2) r['delta_score'] = round(score - p_score, 2) r['time'] = s['created'] has_solution = True break if not has_solution: continue r['submits'] = len(submissions) if not result: raise ExceptionParseStandings('empty standings') else: # single round match if not self.standings_url: raise InitModuleException('Not set standings url for %s' % self.name) url = self.standings_url + '&nr=100000042' page = REQ.get(url, time_out=100) result_urls = re.findall(r'<a[^>]*href="(?P<url>[^"]*)"[^>]*>Results</a>', str(page), re.I) if not result_urls: raise ExceptionParseStandings('not found result urls') dd_round_results = {} match = re.search('rd=(?P<rd>[0-9]+)', url) if match: rd = match.group('rd') url = f'https://www.topcoder.com/tc?module=BasicData&c=dd_round_results&rd={rd}' try: dd_round_results_page = REQ.get(url) root = ET.fromstring(dd_round_results_page) for child in root: data = {} for field in child: data[field.tag] = field.text handle = data.pop('handle') dd_round_results[handle] = self._dict_as_number(data) except FailOnGetResponse: pass hidden_fields.extend(['coding_phase', 'challenge_phase', 'system_test', 'point_total', 'room']) matches = re.finditer('<table[^>]*>.*?</table>', page, re.DOTALL) problems_sets = [] for match in matches: problems = re.findall( '<a[^>]*href="(?P<href>[^"]*c=problem_statement[^"]*)"[^>]*>(?P<name>[^/]*)</a>', match.group(), re.IGNORECASE, ) if problems: problems_sets.append([ {'short': n, 'url': urljoin(url, u)} for u, n in problems ]) problems_info = dict() if len(problems_sets) > 1 else list() for problems_set, result_url in zip(problems_sets, result_urls): url = urljoin(self.standings_url, result_url + '&em=1000000042') url = url.replace('&', '&') division = int(parse_qs(url)['dn'][0]) division_str = 'I' * division with PoolExecutor(max_workers=3) as executor: def fetch_problem(p): errors = set() for attempt in range(3): try: page = REQ.get(p['url'], time_out=30) match = re.search('<a[^>]*href="(?P<href>[^"]*module=ProblemDetail[^"]*)"[^>]*>', page) page = REQ.get(urljoin(p['url'], match.group('href')), time_out=30) matches = re.findall(r'<td[^>]*class="statTextBig"[^>]*>(?P<key>[^<]*)</td>\s*<td[^>]*>(?P<value>.*?)</td>', page, re.DOTALL) # noqa for key, value in matches: key = key.strip().rstrip(':').lower() if key == 'categories': tags = [t.strip().lower() for t in value.split(',')] tags = [t for t in tags if t] if tags: p['tags'] = tags elif key.startswith('writer') or key.startswith('tester'): key = key.rstrip('s') + 's' p[key] = re.findall('(?<=>)[^<>,]+(?=<)', value) for w in p.get('writers', []): writers[w] += 1 info = p.setdefault('info', {}) matches = re.finditer('<table[^>]*paddingTable2[^>]*>.*?</table>', page, re.DOTALL) for match in matches: html_table = match.group(0) rows = parsed_table.ParsedTable(html_table) for row in rows: key, value = None, None for k, v in row.items(): if k == "": key = v.value elif k and division_str in k.split(): value = v.value if key and value: key = re.sub(' +', '_', key.lower()) info[key] = value if key == 'point_value': value = toint(value) or asfloat(value) if value is not None: p['full_score'] = value except Exception as e: errors.add(f'error parse problem info {p}: {e}') sleep(5 + attempt) else: errors = None if errors: LOG.error(errors) return p for p in tqdm.tqdm(executor.map(fetch_problem, problems_set), total=len(problems_set)): d = problems_info if len(problems_sets) > 1: d = d.setdefault('division', OrderedDict()) d = d.setdefault(division_str, []) d.append(p) if not users and users is not None: continue page = REQ.get(url) rows = etree.HTML(page).xpath("//tr[@valign='middle']") header = None url_infos = [] for row in rows: r = parsed_table.ParsedTableRow(row) if len(r.columns) < 10: continue values = [c.value for c in r.columns] if header is None: header = values continue d = OrderedDict(list(zip(header, values))) handle = d.pop('Coders').strip() d = self._dict_as_number(d) if users and handle not in users: continue row = result.setdefault(handle, OrderedDict()) row.update(d) if not row.get('new_rating') and not row.get('old_rating') and not row.get('rating_change'): row.pop('new_rating', None) row.pop('old_rating', None) row.pop('rating_change', None) row['member'] = handle row['place'] = row.pop('division_placed', None) row['solving'] = row['point_total'] row['solved'] = {'solving': 0} row['division'] = 'I' * division if 'adv.' in row: row['advanced'] = row.pop('adv.').lower().startswith('y') url_info = urljoin(url, r.columns[0].node.xpath('a/@href')[0]) url_infos.append(url_info) def fetch_solution(url): for i in range(2): try: page = REQ.get(url, time_out=60) match = re.search('<td[^>]*class="problemText"[^>]*>(?P<solution>.*?)</td>', page, re.DOTALL | re.IGNORECASE) if not match: break ret = html.unescape(match.group('solution')) ret = ret.strip() ret = ret.replace('<BR>', '\n') ret = ret.replace('\xa0', ' ') return ret except FailOnGetResponse: sleep(i * 10 + 3) return None n_failed_fetch_info = 0 def fetch_info(url): nonlocal n_failed_fetch_info if n_failed_fetch_info > 10: return delay = 10 for _ in range(5): try: page = REQ.get(url, time_out=delay) match = re.search('class="coderBrackets">.*?<a[^>]*>(?P<handle>[^<]*)</a>', page, re.IGNORECASE) if match: break except Exception: sleep(delay + _) else: n_failed_fetch_info += 1 return handle = html.unescape(match.group('handle').strip()) match = re.search(r' Room\s*(?P<room>[0-9]+)', page) room = match.group('room') if match else None matches = re.finditer(r''' <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*c=problem_solution[^"]*)"[^>]*>(?P<short>[^<]*)</a>[^<]*</td>[^<]* <td[^>]*>[^<]*</td>[^<]* <td[^>]*>[^<]*</td>[^<]* <td[^>]*>(?P<time>[^<]*)</td>[^<]* <td[^>]*>(?P<status>[^<]*)</td>[^<]* <td[^>]*>(?P<result>[^<]*)</td>[^<]* ''', page, re.VERBOSE | re.IGNORECASE) problems = {} n_fetch_solution = 0 for match in matches: d = match.groupdict() short = d.pop('short') solution_url = urljoin(url, d['url']) d['url'] = solution_url d = self._dict_as_number(d) if d['status'] in ['Challenge Succeeded', 'Failed System Test']: d['result'] = -d['result'] if abs(d['result']) < 1e-9: d.pop('result') if re.match('^[0.:]+$', d['time']): d.pop('time') else: time_in_seconds = 0 for t in d['time'].split(':'): time_in_seconds = time_in_seconds * 60 + asfloat(t) d['time_in_seconds'] = time_in_seconds solution = (statistics or {}).get(handle, {}).get('problems', {}).get(short, {}).get('solution') if not solution: n_fetch_solution += 1 solution = fetch_solution(solution_url) d['solution'] = solution problems[short] = d challenges = [] matches = re.finditer(r''' <td[^>]*>[^<]*<a[^>]*href="[^"]*module=MemberProfile[^"]*"[^>]*>(?P<target>[^<]*)</a>[^<]*</td>[^<]* <td[^>]*>(?P<problem>[^<]*)</td>[^<]* <td[^>]*>(?P<status>[^<]*)</td>[^<]* <td[^>]*>(?P<time>[^<]*)</td>[^<]* <td[^>]*>(?P<result>[^<]*)</td>[^<]* <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*details\s*</a>[^<]*</td>[^<]* ''', page, re.VERBOSE | re.IGNORECASE) for match in matches: d = match.groupdict() d = {k: v.strip() for k, v in d.items()} d['result'] = float(d['result'].replace(',', '.')) d['url'] = urljoin(url, d['url']) p = problems.setdefault(d['problem'], {}) p.setdefault('extra_score', 0) p['extra_score'] += d['result'] p.setdefault('extra_info', []).append(f'{d["target"]}: {d["result"]}') challenges.append(d) return url, handle, room, problems, challenges, n_fetch_solution with PoolExecutor(max_workers=20) as executor, tqdm.tqdm(total=len(url_infos)) as pbar: n_fetch_solution = 0 for info in executor.map(fetch_info, url_infos): if info is None: continue url, handle, room, problems, challenges, n_sol = info n_fetch_solution += n_sol pbar.set_description(f'div{division} {url}') pbar.set_postfix(n_solution=n_fetch_solution, n_failed_fetch_info=n_failed_fetch_info) pbar.update() if handle is not None: if handle not in result: LOG.error(f'{handle} not in result, url = {url}') row = result[handle] row['url'] = url if room: row['room'] = room row['problems'] = problems row['challenges'] = challenges for p in problems.values(): if p.get('result', 0) > 1e-9: row['solved']['solving'] += 1 if challenges: h = row.setdefault('hack', { 'title': 'challenges', 'successful': 0, 'unsuccessful': 0, }) for c in challenges: h['successful' if c['status'].lower() == 'yes' else 'unsuccessful'] += 1 if dd_round_results: fields = set() hidden_fields_set = set(hidden_fields) for data in result.values(): for field in data.keys(): fields.add(field) k_mapping = {'new_vol': 'new_volatility', 'advanced': None} for handle, data in dd_round_results.items(): if handle not in result: continue row = result[handle] for k, v in data.items(): k = k_mapping.get(k, k) if k and k not in fields: if k in {'new_rating', 'old_rating'} and not v: continue row[k] = v if k not in hidden_fields_set: hidden_fields_set.add(k) hidden_fields.append(k) ks = k.split('_') if ks[0] == 'level' and ks[-1] == 'language' and v and v.lower() != 'unspecified': idx = {'one': 0, 'two': 1, 'three': 2}.get(ks[1], None) d = problems_info if len(problems_sets) > 1: d = d['division'][row['division']] if idx is not None and 0 <= idx < len(d) and d[idx]['short'] in row['problems']: row['problems'][d[idx]['short']]['language'] = v standings = { 'result': result, 'url': self.standings_url, 'problems': problems_info, 'hidden_fields': hidden_fields, 'fields_types': fields_types, 'options': { 'fixed_fields': [('hack', 'Challenges')], }, } if writers: writers = [w[0] for w in sorted(writers.items(), key=lambda w: w[1], reverse=True)] standings['writers'] = writers if re.search(r'\bfinals?(?:\s+rounds?)?$', self.name, re.I): standings['options']['medals'] = [{'name': name, 'count': 1} for name in ('gold', 'silver', 'bronze')] if order: standings['options']['order'] = order return standings
def __init__(self, **kwargs): super(Statistic, self).__init__(**kwargs) if '//stats.ioinformatics.org/olympiads/' not in self.url: raise InitModuleException(f'Url {self.url} should be contains stats.ioinformatics.org/olympiads')
def get_standings(self, users=None, statistics=None): if '/codingcompetitions.withgoogle.com/' in self.url: return self._api_get_standings(users, statistics) if '/code.google.com/' in self.url or '/codejam.withgoogle.com/' in self.url: return self._old_get_standings(users) raise InitModuleException(f'url = {self.url}')