Exemplo n.º 1
0
    def get_source_code(contest, problem):
        if 'url' not in problem:
            raise ExceptionParseStandings('Not found url')

        page = REQ.get(problem['url'])
        match = re.search('<pre[^>]*id="submission-code"[^>]*>(?P<source>[^<]*)</pre>', page)
        if not match:
            raise ExceptionParseStandings('Not found source code')
        solution = html.unescape(match.group('source'))
        return {'solution': solution}
Exemplo n.º 2
0
    def get_source_code(contest, problem):
        if 'url' not in problem:
            raise ExceptionParseStandings('Not found url')

        page = _get(problem['url'])
        match = re.search('<pre[^>]*id="program-source-text"[^>]*class="(?P<class>[^"]*)"[^>]*>(?P<source>[^<]*)</pre>', page)  # noqa
        if not match:
            raise ExceptionParseStandings('Not found source code')
        solution = html.unescape(match.group('source'))
        ret = {'solution': solution}
        for c in match.group('class').split():
            if c.startswith('lang-'):
                ret['lang_class'] = c
        return ret
Exemplo n.º 3
0
 def get_league_name(league):
     nonlocal leagues_names
     if leagues_names is None:
         raise ExceptionParseStandings('not found leagues_names')
     index = league['divisionCount'] - league['divisionIndex'] - 1 + league.get('divisionOffset', 0)
     number = index - len(leagues_names) + 2
     return f'{leagues_names[-1]} {number}' if number >= 1 else leagues_names[index]
Exemplo n.º 4
0
 def _get(url, lock=Lock()):
     attempt = 0
     while True:
         attempt += 1
         try:
             page = REQ.get(url)
             if 'id="id_login"' in page and 'id="id_password"' in page:
                 with lock:
                     if not Statistic.LOGGED_IN:
                         page = REQ.get(Statistic.LOGIN_URL_)
                         page = REQ.submit_form(
                             {
                                 'login': conf.HACKEREARTH_USERNAME,
                                 'password': conf.HACKEREARTH_PASSWORD,
                                 'signin': 'Log In',
                             },
                             limit=0,
                         )
                         Statistic.LOGGED_IN = True
             if 'AJAX' in url:
                 headers = {'x-requested-with': 'XMLHttpRequest'}
                 csrftoken = REQ.get_cookie('csrftoken')
                 if csrftoken:
                     headers['x-csrftoken'] = csrftoken
             else:
                 headers = {}
             return REQ.get(url, headers=headers)
         except FailOnGetResponse as e:
             if attempt == 15 or getattr(e.args[0], 'code', None) != 500:
                 raise ExceptionParseStandings(e.args[0])
             sleep(2 * attempt)
Exemplo n.º 5
0
    def get_standings(self, users=None, statistics=None):
        try:
            page = REQ.get(self.url)
        except FailOnGetResponse as e:
            return {'action': 'delete'} if e.code == 404 else {}

        match = re.search('<table[^>]*past_event_rating[^>]*>.*?</table>',
                          page, re.DOTALL)
        if not match:
            raise ExceptionParseStandings('not found table')

        header_mapping = {
            'Team': 'name',
            'Place': 'place',
            'CTF points': 'solving',
        }
        table = parsed_table.ParsedTable(html=match.group(0),
                                         header_mapping=header_mapping)

        results = {}
        max_score = 0
        for r in table:
            row = OrderedDict()
            for k, v in r.items():
                k = k.strip('*')
                k = k.strip(' ')
                value = ' '.join([c.value for c in v]).strip() if isinstance(
                    v, list) else v.value
                if k == 'name':
                    href = v.column.node.xpath('.//a/@href')[0]
                    match = re.search('/([0-9]+)/?$', href)
                    row['member'] = match.group(1)
                    row['name'] = value
                else:
                    value = as_number(value)
                row[k] = value
            max_score = max(max_score, row.get('solving', 0))
            results[row['member']] = row

        if max_score > 0:
            for row in results.values():
                if 'solving' in row:
                    row['percent'] = f'{row["solving"] * 100 / max_score:.2f}'

        has_medals = not re.search(r'\bqual', self.name,
                                   flags=re.I) and re.search(
                                       r'\bfinal', self.name, flags=re.I)
        medals = [{'name': 'gold', 'count': 1}] if has_medals else []

        return dict(
            standings_url=self.url,
            result=results,
            options={'medals': medals},
        )
Exemplo n.º 6
0
    def get_standings(self, users=None, statistics=None):
        result = {}

        filepath = self.info.get('standings_csv_filepath_')
        if not filepath:
            raise ExceptionParseStandings('not found csv filepath')

        season = self.get_season()

        result = {}
        problems_info = collections.OrderedDict()

        with open(filepath, 'r') as fo:
            data = csv.DictReader(fo)
            last, place = None, None
            for idx, r in enumerate(data, start=1):
                row = collections.OrderedDict()
                problems = row.setdefault('problems', {})
                for k, v in r.items():
                    if k == 'User':
                        row['member'] = v + ' ' + season
                        row['name'] = v
                    elif k == 'Last valid submission':
                        delta = arrow.get(v, ['YYYY-MM-DD H:mm:ss']) - self.start_time
                        row['penalty'] = self.to_time(delta, 3)
                    elif k in ['Global']:
                        row['solving'] = v
                    else:
                        if k not in problems_info:
                            problems_info[k] = {'short': k, 'full_score': 100}
                        if float(v) > 1e-9:
                            p = problems.setdefault(k, {})
                            p['result'] = v
                            p['partial'] = float(v) + 1e-9 < problems_info[k]['full_score']
                score = (row['solving'], row['penalty'])
                if last != score:
                    last = score
                    place = idx
                row['place'] = place
                result[row['member']] = row

        standings = {
            'result': result,
            'problems': list(problems_info.values()),
            'hidden_fields': ['medal'],
        }
        return standings
Exemplo n.º 7
0
 def _get(url):
     attempt = 0
     while True:
         attempt += 1
         try:
             if 'AJAX' in url:
                 headers = {'x-requested-with': 'XMLHttpRequest'}
                 csrftoken = REQ.get_cookie('csrftoken')
                 if csrftoken:
                     headers['x-csrftoken'] = csrftoken
             else:
                 headers = {}
             return REQ.get(url, headers=headers)
         except FailOnGetResponse as e:
             if attempt == 7 or getattr(e.args[0], 'code', None) != 500:
                 raise ExceptionParseStandings(e.args[0])
             sleep(2**attempt)
Exemplo n.º 8
0
    def get_standings(self, users=None, statistics=None):

        result = {}

        @RateLimiter(max_calls=10, period=2)
        def fetch_and_process_page(page):
            url = f'https://practiceapi.geeksforgeeks.org/api/v1/contest/{self.key}/leaderboard/?page={page + 1}&type=current'  # noqa
            page = REQ.get(url)
            data = json.loads(page)

            for row in data['results']['ranks_list']:
                handle = row.pop('profile_link').rstrip('/').rsplit('/', 1)[-1]
                r = result.setdefault(handle, OrderedDict())
                name = row.pop('handle')
                if name != handle:
                    r['name'] = name
                r['member'] = handle
                r['place'] = row.pop('rank')
                r['solving'] = row.pop('score')
                last_correct_submission = row.get('last_correct_submission')
                if last_correct_submission:
                    time = dateutil.parser.parse(last_correct_submission +
                                                 '+05:30')
                    delta = time - self.start_time
                    r['time'] = self.to_time(delta)
                for k, v in list(row.items()):
                    if k.endswith('_score'):
                        r[k] = row.pop(k)

            return data

        data = fetch_and_process_page(0)
        total = data['results']['rows_count']
        per_page = len(data['results']['ranks_list'])
        if not total or not per_page:
            raise ExceptionParseStandings('empty standings')
        n_pages = (total + per_page - 1) // per_page

        with PoolExecutor(max_workers=8) as executor:
            executor.map(fetch_and_process_page, range(1, n_pages))

        ret = {
            'url': os.path.join(self.url, 'leaderboard'),
            'result': result,
        }
        return ret
Exemplo n.º 9
0
 def query(name, variables):
     params = {
         'fb_dtsg': tokens.get('dtsginitialdata', ''),
         'lsd': tokens['lsd'],
         'fb_api_caller_class': 'RelayModern',
         'fb_api_req_friendly_name': name,
         'variables': json.dumps(variables),
         'doc_id': self.info['_scoreboard_ids'][name],
     }
     ret = REQ.get(
         self.API_GRAPH_URL_,
         post=params,
         headers={'accept-language': 'en-US,en;q=1.0'}
     )
     try:
         return json.loads(ret)
     except Exception as e:
         raise ExceptionParseStandings(f'Error on query {name} = {e}')
Exemplo n.º 10
0
        def parse_problems_infos():
            problem_url = self.standings_url.replace('/ranking', '/p')
            page = REQ.get(problem_url)

            match = re.search(
                r'<h1[^>]*>[^<]*</h1>(\s*<[^/][^>]*>)*\s*(?P<table><table[^>]*>.*?</table>)',
                page, re.DOTALL)
            if not match:
                raise ExceptionParseStandings('Not found problems table')
            table = parsed_table.ParsedTable(html=match.group('table'),
                                             ignore_wrong_header_number=False)
            skip = False
            problems_infos = collections.OrderedDict()
            for r in table:
                if isinstance(r, parsed_table.ParsedTableRow):
                    runda = re.sub(r'\s*\(.*\)\s*$', '',
                                   r.columns[0].value).strip()
                    skip = runda.lower() not in self.name.lower()
                    continue

                if skip:
                    continue

                problem_info = {}
                for k, vs in list(r.items()):
                    if isinstance(vs, list):
                        v = ' '.join([v.value for v in vs]).strip()
                    else:
                        v = vs.value
                    if not k:
                        problem_info['short'] = v
                    elif k in ('Nazwa', 'Name'):
                        match = re.search(r'\[(?P<letter>[^\]]+)\]$', v)
                        if match:
                            problem_info['_letter'] = match.group('letter')
                        problem_info['name'] = v
                        href = vs.column.node.xpath('//a/@href')
                        if href:
                            problem_info['url'] = urljoin(problem_url, href[0])
                if problem_info:
                    problems_infos[problem_info['short']] = problem_info
            return problems_infos
Exemplo n.º 11
0
        def rec_fix_type(messages, types, path=[]):
            if not types:
                return
            for message in to_list(messages):
                to_conv = True
                a = []
                for k, v in types.items():
                    a.append(v['type'])
                    if not v['type'].startswith('fixed'):
                        to_conv = False
                        break

                if to_conv:
                    raise ExceptionParseStandings(
                        f'Excepted str value for path = {path}')
                else:
                    for k, v in types.items():
                        if k in message:
                            rec_fix_type(message[k], v.get('message_typedef'),
                                         path + [k])
Exemplo n.º 12
0
    def get_standings(self, users=None, statistics=None):
        season = self.get_season()

        def standings_page(req):
            return req.get(self.standings_url)

        print(self.standings_url)
        with REQ(
            with_proxy=True,
            args_proxy=dict(
                time_limit=3,
                n_limit=30,
                connect=standings_page,
            ),
        ) as req:
            page = req.proxer.get_connect_ret()

        html_table = re.search('<table[^>]*>.*?</table>', page, re.MULTILINE | re.DOTALL)
        if not html_table:
            raise ExceptionParseStandings('Not found html table')
        mapping = {
            'Rank': 'place',
            'Name': 'name',
            'Language': 'language',
        }
        table = parsed_table.ParsedTable(html_table.group(0), header_mapping=mapping)

        result = {}
        for r in table:
            row = dict()
            for k, v in r.items():
                if v.value:
                    row[k] = v.value
            if 'member' not in row:
                row['member'] = f'{row["name"]} {season}'
            result[row['member']] = row

        return {'result': result}
Exemplo n.º 13
0
    def get_standings(self, users=None, statistics=None):
        if not self.standings_url:
            year = self.start_time.year
            name = re.sub(r'(online|onsite)\s+', '', self.name,
                          flags=re.I).strip()
            query = f'site:https://www.facebook.com/hackercup/round/* Facebook Hacker Cup {year} {name}'
            urls = list(googlesearch.search(query, stop=2))
            if len(urls) == 1:
                self.standings_url = urls[0].replace('/round/', '/scoreboard/')
        if not self.standings_url:
            raise ExceptionParseStandings('not found standing url')

        offset = 0
        limit = 100

        result = OrderedDict()

        pbar = None
        total = None
        title = None
        problems_info = None
        while limit:
            url = f'{self.standings_url}?offset={offset}&length={limit}'
            page = REQ.get(url)

            match = re.search(r'"problemData":(?P<data>\[[^\]]*\])', page,
                              re.I)
            if not match:
                limit //= 2
                continue

            problem_data = json.loads(match.group('data'))
            if problems_info is None:
                matches = re.finditer(
                    r'<div[^>]*class="linkWrap noCount"[^>]*>(?P<score>[0-9]+):\s*(?P<title>[^<]*)',
                    page)
                problems_scores = {}
                for match in matches:
                    score = int(match.group('score'))
                    name = html.unescape(match.group('title')).strip()
                    problems_scores[name] = score

                problems_info = []
                for problem in problem_data:
                    name = str(problem['name']).strip()
                    problems_info.append({
                        'code': str(problem['id']),
                        'name': name,
                        'full_score': problems_scores[name],
                    })

            if title is None:
                match = re.search(
                    '<h2[^>]*class="accessible_elem"[^>]*>(?P<title>[^<]*)</h2>',
                    page)
                title = match.group('title')

            match = re.search(r'"scoreboardData":(?P<data>\[[^\]]*\])', page,
                              re.I)
            data = json.loads(match.group('data'))

            if pbar is None:
                match = re.search(r'"pagerData":(?P<data>{[^}]*})', page, re.I)
                pager = json.loads(match.group('data'))
                total = pager['total']
                pbar = tqdm(total=total, desc='paging')

            for row in data:
                handle = str(row.pop('userID'))
                r = result.setdefault(handle, OrderedDict())

                r['member'] = handle
                r['solving'] = row.pop('score')
                r['place'] = row.pop('rank')
                r['name'] = row.pop('profile')['name']

                penalty = row.pop('penalty')
                if penalty:
                    r['penalty'] = self.to_time(penalty)

                problems = r.setdefault('problems', {})
                solved = 0
                for k, v in row.pop('problemData').items():
                    verdict = v.get('result')
                    if not verdict or verdict == 'none':
                        continue
                    p = problems.setdefault(k, {})
                    if verdict == 'accepted':
                        p['result'] = '+'
                        p['binary'] = True
                        solved += 1
                    else:
                        p['result'] = '0'
                        p['verdict'] = verdict
                        p['binary'] = False
                    u = v.get('sourceURI')
                    if v:
                        p['url'] = urljoin(url, u)
                r['solved'] = {'solving': solved}

                pbar.update()
                total -= 1

            if len(data) < limit:
                break

            offset += limit

        pbar.close()

        words = self.name.split()
        words.append(str(self.start_time.year))
        for w in words:
            if w.lower() not in title.lower():
                warnings.warn(f'"{w}" not in title "{title}"')

        if total:
            warnings.warn(f'{total} member(s) did not get')

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': problems_info,
        }

        if re.search(r'\bfinals?\b', self.name, re.I):
            standings['options'] = {
                'medals': [{
                    'name': name,
                    'count': 1
                } for name in ('gold', 'silver', 'bronze')]
            }

        return standings
Exemplo n.º 14
0
    def get_standings(self, users=None, statistics=None):
        result = {}
        writers = defaultdict(int)

        start_time = self.start_time.replace(tzinfo=None)

        if not self.standings_url and datetime.now() - start_time < timedelta(
                days=30):
            re_round_overview = re.compile(
                r'''
(?:<td[^>]*>
    (?:
        [^<]*<a[^>]*href="(?P<url>[^"]*/stat[^"]*rd=(?P<rd>[0-9]+)[^"]*)"[^>]*>(?P<title>[^<]*)</a>[^<]*|
        (?P<date>[0-9]+\.[0-9]+\.[0-9]+)
    )</td>[^<]*
){2}
                ''',
                re.VERBOSE,
            )
            for url in [
                    'https://www.topcoder.com/tc?module=MatchList&nr=100500',
                    'https://community.topcoder.com/longcontest/stats/?module=MatchList&nr=100500',
            ]:
                page = REQ.get(url)
                matches = re_round_overview.finditer(str(page))
                opt = 0.61803398875
                for match in matches:
                    date = datetime.strptime(match.group('date'), '%m.%d.%Y')
                    if abs(date - start_time) < timedelta(days=2):
                        title = match.group('title')
                        intersection = len(
                            set(title.split()) & set(self.name.split()))
                        union = len(
                            set(title.split()) | set(self.name.split()))
                        iou = intersection / union
                        if iou > opt:
                            opt = iou
                            self.standings_url = urljoin(
                                url, match.group('url'))

        if not self.standings_url:
            raise InitModuleException('Not set standings url for %s' %
                                      self.name)

        url = self.standings_url + '&nr=100000042'
        page = REQ.get(url)
        result_urls = re.findall(
            r'<a[^>]*href="(?P<url>[^"]*)"[^>]*>Results</a>', str(page), re.I)

        if not result_urls:  # marathon match
            match = re.search(
                '<[^>]*>Problem:[^<]*<a[^>]*href="(?P<href>[^"]*)"[^>]*>(?P<name>[^<]*)<',
                page)
            if not match:
                raise ExceptionParseStandings('not found problem')
            problem_name = match.group('name').strip()
            problems_info = [{
                'short':
                problem_name,
                'url':
                urljoin(url,
                        match.group('href').replace('&amp;', '&'))
            }]
            rows = etree.HTML(page).xpath(
                "//table[contains(@class, 'stat')]//tr")
            header = None
            for row in rows:
                r = parsed_table.ParsedTableRow(row)
                if len(r.columns) < 8:
                    continue
                values = [
                    c.value.strip().replace(u'\xa0', '') for c in r.columns
                ]
                if header is None:
                    header = values
                    continue

                d = OrderedDict(list(zip(header, values)))
                handle = d.pop('Handle').strip()
                d = self._dict_as_number(d)
                if 'rank' not in d or users and handle not in users:
                    continue
                row = result.setdefault(handle, OrderedDict())
                row.update(d)

                score = row.pop('final_score' if 'final_score' in
                                row else 'provisional_score')
                row['member'] = handle
                row['place'] = row.pop('rank')
                row['solving'] = score
                row['solved'] = {'solving': 1 if score > 0 else 0}

                problems = row.setdefault('problems', {})
                problem = problems.setdefault(problem_name, {})
                problem['result'] = score

                history_index = values.index('submission history')
                if history_index:
                    column = r.columns[history_index]
                    href = column.node.xpath('a/@href')
                    if href:
                        problem['url'] = urljoin(url, href[0])
        else:  # single round match
            matches = re.finditer('<table[^>]*>.*?</table>', page, re.DOTALL)
            problems_sets = []
            for match in matches:
                problems = re.findall(
                    '<a[^>]*href="(?P<href>[^"]*c=problem_statement[^"]*)"[^>]*>(?P<name>[^/]*)</a>',
                    match.group(),
                    re.IGNORECASE,
                )
                if problems:
                    problems_sets.append([{
                        'short': n,
                        'url': urljoin(url, u)
                    } for u, n in problems])

            problems_info = dict() if len(problems_sets) > 1 else list()
            for problems_set, result_url in zip(problems_sets, result_urls):
                url = urljoin(self.standings_url,
                              result_url + '&em=1000000042')
                url = url.replace('&amp;', '&')
                division = int(parse_qs(url)['dn'][0])

                with PoolExecutor(max_workers=3) as executor:

                    def fetch_problem(p):
                        errors = set()
                        for attempt in range(3):
                            try:
                                page = REQ.get(p['url'], time_out=30)
                                match = re.search(
                                    '<a[^>]*href="(?P<href>[^"]*module=ProblemDetail[^"]*)"[^>]*>',
                                    page)
                                page = REQ.get(urljoin(p['url'],
                                                       match.group('href')),
                                               time_out=30)
                                matches = re.findall(
                                    r'<td[^>]*class="statTextBig"[^>]*>(?P<key>[^<]*)</td>\s*<td[^>]*>(?P<value>.*?)</td>',
                                    page, re.DOTALL)  # noqa
                                for key, value in matches:
                                    key = key.strip().rstrip(':').lower()
                                    if key == 'categories':
                                        tags = [
                                            t.strip().lower()
                                            for t in value.split(',')
                                        ]
                                        tags = [t for t in tags if t]
                                        if tags:
                                            p['tags'] = tags
                                    elif key.startswith(
                                            'writer') or key.startswith(
                                                'tester'):
                                        key = key.rstrip('s') + 's'
                                        p[key] = re.findall(
                                            '(?<=>)[^<>,]+(?=<)', value)
                                for w in p.get('writers', []):
                                    writers[w] += 1
                            except Exception as e:
                                errors.add(
                                    f'error parse problem info {p}: {e}')
                                sleep(5**attempt)
                        else:
                            errors = None
                        if errors:
                            LOG.error(errors)

                        return p

                    for p in tqdm.tqdm(executor.map(fetch_problem,
                                                    problems_set),
                                       total=len(problems_set)):
                        d = problems_info
                        if len(problems_sets) > 1:
                            d = d.setdefault('division', OrderedDict())
                            d = d.setdefault('I' * division, [])
                        d.append(p)

                if not users and users is not None:
                    continue

                page = REQ.get(url)
                rows = etree.HTML(page).xpath("//tr[@valign='middle']")
                header = None
                url_infos = []
                for row in rows:
                    r = parsed_table.ParsedTableRow(row)
                    if len(r.columns) < 10:
                        continue
                    values = [c.value for c in r.columns]
                    if header is None:
                        header = values
                        continue

                    d = OrderedDict(list(zip(header, values)))
                    handle = d.pop('Coders').strip()
                    d = self._dict_as_number(d)
                    if users and handle not in users:
                        continue

                    row = result.setdefault(handle, OrderedDict())
                    row.update(d)

                    if not row.get('new_rating') and not row.get(
                            'old_rating') and not row.get('rating_change'):
                        row.pop('new_rating', None)
                        row.pop('old_rating', None)
                        row.pop('rating_change', None)

                    row['member'] = handle
                    row['place'] = row.pop('division_placed', None)
                    row['solving'] = row['point_total']
                    row['solved'] = {'solving': 0}
                    row['division'] = 'I' * division

                    if 'adv.' in row:
                        row['advanced'] = row.pop('adv.').lower().startswith(
                            'y')

                    url_info = urljoin(url,
                                       r.columns[0].node.xpath('a/@href')[0])
                    url_infos.append(url_info)

                def fetch_solution(url):
                    for i in range(2):
                        try:
                            page = REQ.get(url, time_out=60)
                            match = re.search(
                                '<td[^>]*class="problemText"[^>]*>(?P<solution>.*?)</td>',
                                page, re.DOTALL | re.IGNORECASE)
                            ret = html.unescape(match.group('solution'))
                            ret = ret.strip()
                            ret = ret.replace('<BR>', '\n')
                            ret = ret.replace('\xa0', ' ')
                            return ret
                        except FailOnGetResponse:
                            sleep(i * 10 + 3)
                    return None

                def fetch_info(url):
                    delay = 3
                    for _ in range(5):
                        try:
                            page = REQ.get(url)
                            break
                        except Exception:
                            sleep(delay)
                            delay *= 2
                    else:
                        return None, None, None

                    match = re.search(
                        'class="coderBrackets">.*?<a[^>]*>(?P<handle>[^<]*)</a>',
                        page, re.IGNORECASE)
                    handle = html.unescape(match.group('handle').strip())

                    match = re.search(r'&nbsp;Room\s*(?P<room>[0-9]+)', page)
                    room = match.group('room') if match else None

                    matches = re.finditer(
                        r'''
                        <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*c=problem_solution[^"]*)"[^>]*>(?P<short>[^<]*)</a>[^<]*</td>[^<]*
                        <td[^>]*>[^<]*</td>[^<]*
                        <td[^>]*>[^<]*</td>[^<]*
                        <td[^>]*>(?P<time>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<status>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<result>[^<]*)</td>[^<]*
                    ''', page, re.VERBOSE | re.IGNORECASE)
                    problems = {}
                    n_fetch_solution = 0
                    for match in matches:
                        d = match.groupdict()
                        short = d.pop('short')
                        solution_url = urljoin(url, d['url'])
                        d['url'] = solution_url
                        d = self._dict_as_number(d)
                        if d['status'] in [
                                'Challenge Succeeded', 'Failed System Test'
                        ]:
                            d['result'] = -d['result']
                        if abs(d['result']) < 1e-9:
                            d.pop('result')
                        if re.match('^[0.:]+$', d['time']):
                            d.pop('time')

                        solution = (statistics or {}).get(handle, {}).get(
                            'problems', {}).get(short, {}).get('solution')
                        if not solution:
                            n_fetch_solution += 1
                            solution = fetch_solution(solution_url)
                        d['solution'] = solution

                        problems[short] = d

                    challenges = []
                    matches = re.finditer(
                        r'''
                        <td[^>]*>[^<]*<a[^>]*href="[^"]*module=MemberProfile[^"]*"[^>]*>(?P<target>[^<]*)</a>[^<]*</td>[^<]*
                        <td[^>]*>(?P<problem>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<status>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<time>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<result>[^<]*)</td>[^<]*
                        <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*details\s*</a>[^<]*</td>[^<]*
                    ''', page, re.VERBOSE | re.IGNORECASE)
                    for match in matches:
                        d = match.groupdict()
                        d = {k: v.strip() for k, v in d.items()}
                        d['result'] = float(d['result'].replace(',', '.'))
                        d['url'] = urljoin(url, d['url'])

                        p = problems.setdefault(d['problem'], {})
                        p.setdefault('extra_score', 0)
                        p['extra_score'] += d['result']
                        p.setdefault(
                            'extra_info',
                            []).append(f'{d["target"]}: {d["result"]}')
                        challenges.append(d)

                    return url, handle, room, problems, challenges, n_fetch_solution

                with PoolExecutor(max_workers=20) as executor, tqdm.tqdm(
                        total=len(url_infos)) as pbar:
                    n_fetch_solution = 0
                    for url, handle, room, problems, challenges, n_sol in executor.map(
                            fetch_info, url_infos):
                        n_fetch_solution += n_sol
                        pbar.set_description(f'div{division} {url}')
                        pbar.set_postfix(n_solution=n_fetch_solution)
                        pbar.update()
                        if handle is not None:
                            if handle not in result:
                                LOG.error(
                                    f'{handle} not in result, url = {url}')
                            result[handle]['url'] = url
                            if room:
                                result[handle]['room'] = room
                            result[handle]['problems'] = problems
                            result[handle]['challenges'] = challenges
                            for p in problems.values():
                                if p.get('result', 0) > 1e-9:
                                    result[handle]['solved']['solving'] += 1
                            if challenges:
                                h = result[handle].setdefault(
                                    'hack', {
                                        'title': 'challenges',
                                        'successful': 0,
                                        'unsuccessful': 0,
                                    })
                                for c in challenges:
                                    h['successful' if c['status'].lower() ==
                                      'yes' else 'unsuccessful'] += 1

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': problems_info,
            'options': {
                'fixed_fields': [('hack', 'Challenges')],
            },
        }

        if writers:
            writers = [
                w[0] for w in sorted(
                    writers.items(), key=lambda w: w[1], reverse=True)
            ]
            standings['writers'] = writers

        if re.search(r'\bfinals?(?:\s+rounds?)?$', self.name, re.I):
            standings['options']['medals'] = [{
                'name': name,
                'count': 1
            } for name in ('gold', 'silver', 'bronze')]

        return standings
Exemplo n.º 15
0
    def get_standings(self, users=None, statistics=None):
        if not self.standings_url:
            self.standings_url = f'https://projecteuler.net/fastest={self.key}'

        user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'  # noqa
        page = REQ.get(self.standings_url, headers={'User-Agent': user_agent})

        sign_out = re.search('<form[^>]*action="sign_out"[^>]*>', page)
        if not sign_out:
            for attempt in range(20):
                while True:
                    value = f'{random.random():.16f}'
                    image_bytes = REQ.get(f'https://projecteuler.net/captcha/show_captcha.php?{value}')
                    image_stream = io.BytesIO(image_bytes)
                    image_rgb = Image.open(image_stream)
                    text = pytesseract.image_to_string(image_rgb, config='--oem 0 --psm 13 digits')
                    text = text.strip()
                    if re.match('^[0-9]{5}$', text):
                        break

                REQ.get('https://projecteuler.net/sign_in')
                page = REQ.submit_form(
                    name='sign_in_form',
                    action=None,
                    data={
                        'username': conf.PROJECTEULER_USERNAME,
                        'password': conf.PROJECTEULER_PASSWORD,
                        'captcha': text,
                        'remember_me': '1',
                    },
                )
                match = re.search('<p[^>]*class="warning"[^>]*>(?P<message>[^<]*)</p>', page)
                if match:
                    REQ.print(match.group('message'))
                else:
                    break
            else:
                raise ExceptionParseStandings('Did not recognize captcha for sign in')
            page = REQ.get(self.standings_url)

        result = {}

        problem_name = self.name.split('.', 1)[1].strip()
        problems_info = [{'name': problem_name, 'url': self.url}]

        regex = '<table[^>]*>.*?</table>'
        html_table = re.search(regex, page, re.DOTALL)

        if html_table:
            table = parsed_table.ParsedTable(html_table.group(0))
            for r in table:
                row = OrderedDict()
                row['solving'] = 1
                for k, v in r.items():
                    if isinstance(v, list):
                        place, country = v
                        row['place'] = re.match('[0-9]+', place.value).group(0)
                        country = first(country.column.node.xpath('.//@title'))
                        if country:
                            row['country'] = country
                    elif k == 'Time To Solve':
                        params = {}
                        for x in v.value.split(', '):
                            value, field = x.split()
                            if field[-1] != 's':
                                field += 's'
                            params[field] = int(value)
                        rel_delta = relativedelta(**params)
                        now = timezone.now()
                        delta = now - (now - rel_delta)
                        row['penalty'] = f'{delta.total_seconds() / 60:.2f}'
                    elif k == 'User':
                        member = first(v.column.node.xpath('.//@title')) or v.value
                        row['member'] = member
                    else:
                        row[k.lower()] = v.value
                problems = row.setdefault('problems', {})
                problem = problems.setdefault(problem_name, {})
                problem['result'] = '+'
                problem['binary'] = True
                row['_skip_for_problem_stat'] = True
                if 'member' not in row:
                    continue
                result[row['member']] = row

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': problems_info,
        }

        if len(result) < 100:
            delta = timezone.now() - self.start_time
            if delta < timedelta(days=1):
                standings['timing_statistic_delta'] = timedelta(minutes=60)
            elif delta < timedelta(days=30):
                standings['timing_statistic_delta'] = timedelta(days=1)

        return standings
Exemplo n.º 16
0
Arquivo: yandex.py Projeto: kmyk/clist
    def get_standings(self, users=None, statistics=None):
        if not hasattr(self, 'season'):
            year = self.start_time.year - (0
                                           if self.start_time.month > 8 else 1)
            season = f'{year}-{year + 1}'
        else:
            season = self.season

        result = {}
        problems_info = OrderedDict()

        if not re.search('/[0-9]+/', self.standings_url):
            return {}

        url = self.standings_url
        n_page = 1
        while True:
            page = REQ.get(url)

            match = re.search(
                '<table[^>]*class="[^"]*standings[^>]*>.*?</table>', page,
                re.MULTILINE | re.DOTALL)
            if not match:
                raise ExceptionParseStandings('Not found table standings')

            html_table = match.group(0)
            table = parsed_table.ParsedTable(html_table)

            for r in table:
                row = {}
                problems = row.setdefault('problems', {})
                solved = 0
                has_solved = False
                for k, v in list(r.items()):
                    if 'table__cell_role_result' in v.attrs['class']:
                        letter = k.split(' ', 1)[0]
                        if letter == 'X':
                            continue

                        p = problems_info.setdefault(letter, {'short': letter})
                        names = v.header.node.xpath('.//span/@title')
                        if len(names) == 1:
                            p['name'] = names[0]

                        p = problems.setdefault(letter, {})
                        n = v.column.node
                        if n.xpath(
                                'img[contains(@class,"image_type_success")]'):
                            res = '+'
                            p['binary'] = True
                        elif n.xpath(
                                'img[contains(@class,"image_type_fail")]'):
                            res = '-'
                            p['binary'] = False
                        else:
                            if ' ' not in v.value:
                                problems.pop(letter)
                                continue
                            res = v.value.split(' ', 1)[0]
                        p['result'] = res
                        p['time'] = v.value.split(' ', 1)[-1]
                        if 'table__cell_firstSolved_true' in v.attrs['class']:
                            p['first_ac'] = True

                        if '+' in res or res.startswith('100'):
                            solved += 1

                        try:
                            has_solved = has_solved or '+' not in res and float(
                                res) > 0
                        except ValueError:
                            pass
                    elif 'table__cell_role_participant' in v.attrs['class']:
                        title = v.column.node.xpath('.//@title')
                        if title:
                            name = title[0]
                        else:
                            name = v.value.replace(' ', '', 1)
                        row['name'] = name
                        row['member'] = name if ' ' not in name else f'{name} {season}'
                    elif 'table__cell_role_place' in v.attrs['class']:
                        row['place'] = v.value
                    elif 'table__header_type_penalty' in v.attrs['class']:
                        row['penalty'] = int(
                            v.value) if v.value.isdigit() else v.value
                    elif 'table__header_type_score' in v.attrs['class']:
                        row['solving'] = int(round(float(v.value)))
                if has_solved:
                    row['solved'] = {'solving': solved}
                result[row['member']] = row

            n_page += 1
            match = re.search(
                f'<a[^>]*href="(?P<href>[^"]*standings[^"]*p[^"]*={n_page})"[^>]*>',
                page)
            if not match:
                break
            url = urljoin(url, match.group('href'))

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
        }
        return standings
Exemplo n.º 17
0
    def get_standings(self, users=None, statistics=None):
        page = REQ.get(self.COMPETITION_INFO_API_URL_)
        data = json.loads(page)
        for round_data in data['rounds']:
            match = re.search(
                r'start\s*date\s*(?:<b[^>]*>)?(?P<start_time>[^<]*)(?:</b>)?.*end\s*date',
                round_data['description'], re.IGNORECASE)
            start_time = parser.parse(match.group('start_time'),
                                      tzinfos={'CET': 'UTC+1'})
            title = re.sub(r'\s+', ' ', round_data['name'])
            if start_time == self.start_time and title == self.name:
                break
        else:
            raise ExceptionParseStandings('not found round')

        m = re.search('maxPointsForProblem=(?P<score>[0-9]+)',
                      round_data['description'])
        max_points_challenge_problem = int(m.group('score')) if m else None

        page = REQ.get(self.ROUND_INFO_API_URL_)
        round_infos = json.loads(page)
        for round_info in round_infos['roundDisplayInfo']:
            title = re.sub(r'\s+', ' ', round_info['displayName'])
            if title == self.name:
                break
        else:
            raise ExceptionParseStandings('not found round')

        default_problems_info = OrderedDict([(p['code'], {
            'code':
            p['code'],
            'short':
            chr(i + ord('A')),
            'name':
            p['name'],
            'url':
            self.PROBLEM_URL_.format(**p),
        }) for i, p in enumerate(round_data['problems'])])
        if self.name.startswith('Round'):
            level = int(self.name.split()[-1])
            if level in [1, 2]:
                for p in default_problems_info.values():
                    p['full_score'] = level
        d_problems_info = OrderedDict()

        result = dict()

        has_scoring = {}
        divisions_order = []
        for cid, ctype in (
            (round_infos['teamCompetitionPremierLeagueId'], 'Team'),
            (round_infos['teamCompetitionRisingStarsId'], 'Team'),
            (round_infos['teamCompetitionPremierLeagueId'], 'individual'),
        ):
            url = self.RESULTS_API_URL_.format(cid=cid,
                                               url=round_info['url'],
                                               ctype=ctype)
            page = REQ.get(url)
            data = json.loads(page)

            division = data['displayedName'].replace(self.name,
                                                     '').strip().lower()
            if division not in divisions_order:
                divisions_order.append(division)
            problems_info = d_problems_info.setdefault(
                division, deepcopy(default_problems_info))

            participaty_type = {
                'Team': 'Team',
                'individual': 'Competitor',
            }[ctype]

            sorted_data = sorted(data['standings'],
                                 key=lambda r: r['score'],
                                 reverse=True)
            division_result = dict()

            with PoolExecutor(max_workers=20) as executor, tqdm.tqdm(
                    total=len(sorted_data)) as pbar:

                def fetch_team_results(d):
                    member = str(d['id'])
                    url = self.TEAM_RESULTS_URL_.format(cid=cid,
                                                        uid=member,
                                                        name=participaty_type)
                    page = REQ.get(url)

                    matches = re.finditer(
                        r'<a[^>]*href="[^"]*/Problem/(?P<code>[^"/]*)">[^<]*(?:\s*<[^>]*>)*(?P<score>[.0-9]+)',
                        page,
                    )
                    problems = {}
                    for m in matches:
                        k = m['code']
                        if k not in problems_info:
                            continue
                        p = problems.setdefault(problems_info[k]['short'], {})
                        p['result'] = m['score']

                    matches = re.finditer(
                        '<a[^>]*href="[^"]*/CompetitorResults/[^"]*/(?P<account>[0-9]+)/?">(?P<name>[^<]*)</a>',
                        page,
                    )
                    users = [m.groupdict() for m in matches]

                    info = {
                        'problems': problems,
                        'url': url,
                        'member': member,
                    }

                    matches = re.finditer(
                        r'<tr[^>]*>\s*<td[^>]*><b>(?P<key>[^<]*)</b></td>\s*<td[^>]*>(?P<value>[^<]*)</td>\s*</tr>',
                        page,
                    )

                    more_info = {}
                    for m in matches:
                        k = m.group('key').lower().replace(' ', '_')
                        v = m.group('value')
                        if not v:
                            continue
                        more_info[k] = v
                    if more_info.get('name') and more_info.get('surname'):
                        info['full_name'] = '{name} {surname}'.format(
                            **more_info)
                    if more_info.get('birth_year') == '0':
                        more_info.pop('birth_year')
                    for k in 'school', 'city', 'birth_year':
                        if more_info.get(k):
                            info[k] = more_info[k]

                    return d, info, users

                place = None
                last = None
                for index, (r, row, users) in enumerate(executor.map(
                        fetch_team_results, sorted_data),
                                                        start=1):
                    if last is None or abs(r['score'] - last) > 1e-7:
                        place = index
                        last = r['score']

                    row['name'] = r['name']
                    if users:
                        row['_members'] = users
                    row['place'] = place
                    row['solving'] = r['score']

                    country = unquote(r['country'])
                    country = re.sub(r'\s*\(.*$', '', country)
                    row['country'] = country

                    row['division'] = division
                    if ctype == 'individual':
                        row['_skip_for_problem_stat'] = True

                    division_result[row['member']] = row
                    pbar.update()

            if max_points_challenge_problem is not None:
                for code, problem_info in problems_info.items():
                    key = problem_info['short']
                    target = self.info.get('parse',
                                           {}).get('problems',
                                                   {}).get(key,
                                                           {}).get('target')

                    if target is None:
                        url = self.PROBLEM_API_URL_.format(**problem_info)
                        if url not in has_scoring:
                            page = REQ.get(url)
                            data = json.loads(page)
                            has_scoring[url] = bool(
                                re.search(r'####\s*Scoring:\s+',
                                          data['statement']))
                        if has_scoring[url]:
                            for r in division_result.values():
                                p = r['problems'].get(key, {})
                                if 'result' not in p:
                                    continue
                                p['status'] = p.pop('result')
                        continue

                    problem_info['full_score'] = max_points_challenge_problem

                    if target == 'minimize':
                        func = min
                    elif target == 'maximize':
                        func = max
                    else:
                        raise ExceptionParseStandings(
                            f'unknown target = {target}')

                    opt = None
                    for r in division_result.values():
                        res = r['problems'].get(key, {}).get('result')
                        if res is None:
                            continue
                        res = float(res)
                        if opt is None:
                            opt = res
                        else:
                            opt = func(opt, res)

                    for r in division_result.values():
                        p = r['problems'].get(key, {})
                        if 'result' not in p:
                            continue
                        p['status'] = p['result']
                        if opt is None or abs(opt) < 1e-9:
                            p.pop('result')
                            continue
                        if target == 'minimize':
                            coefficient = 1 - (1 -
                                               opt / float(p['result']))**.5
                        elif target == 'maximize':
                            coefficient = 1 - (1 -
                                               float(p['result']) / opt)**.5
                        if coefficient < 1:
                            p['partial'] = True
                        p['result'] = round(
                            max_points_challenge_problem * coefficient, 2)

            for r in division_result.values():
                solved = 0
                for p in r['problems'].values():
                    if not p.get('partial') and 'result' in p and float(
                            p['result']) > 0:
                        solved += 1
                r['solved'] = {'solving': solved}

            result.update(division_result)

        standings_url = self.STANDING_URL_.format(
            cid=round_infos['teamCompetitionPremierLeagueId'])

        problem_info = {
            'division':
            OrderedDict(
                ((d, list(ps.values())) for d, ps in d_problems_info.items()))
        }

        if len(problem_info['division']) == 1:
            problems_info = next(iter(problem_info['division'].values()))

        standings = {
            'result': result,
            'url': standings_url,
            'problems': problem_info,
            'divisions_order': divisions_order,
            'hidden_fields': ['full_name', 'school', 'city', 'birth_year'],
        }
        return standings
Exemplo n.º 18
0
    def _api_get_standings(self, users=None, statistics=None):
        match = re.search('/([0-9a-f]{16})$', self.url)
        if not match:
            raise ExceptionParseStandings(f'Not found id in url = {self.url}')
        self.id = match.group(1)
        standings_url = self.url

        api_ranking_url_format = self.API_RANKING_URL_FORMAT_.format(
            **self.__dict__)
        api_attempts_url_format = self.API_ATTEMPTS_URL_FORMAT_.format(
            **self.__dict__)

        def encode(value):
            ret = base64.b64encode(value.encode()).decode()
            ret = ret.replace('+', '-')
            ret = ret.replace('/', '_')
            return ret

        def decode(code):
            code = code.replace('-', '+')
            code = code.replace('_', '/')
            code = re.sub(r'[^A-Za-z0-9\+\/]', '', code)
            code += '=' * ((4 - len(code) % 4) % 4)
            data = json.loads(base64.b64decode(code).decode())
            return data

        def get(offset, num):
            query = f'{{"min_rank":{offset},"num_consecutive_users":{num}}}'
            url = api_ranking_url_format + encode(query)
            content = REQ.get(url)
            return decode(content)

        data = get(1, 1)
        problems_info = [{
            'url':
            os.path.join(self.url, task['id']),
            'code':
            task['id'],
            'name':
            task['title'],
            'full_score':
            sum([test['value'] for test in task['tests']])
        } for task in data['challenge']['tasks']]
        problems_info.sort(key=lambda t: (t['full_score'], t['name']))
        problems_info = OrderedDict([(t['code'], t) for t in problems_info])

        are_results_final = data['challenge']['are_results_final']

        num_consecutive_users = 200
        n_page = (data['full_scoreboard_size'] -
                  1) // num_consecutive_users + 1

        def fetch_page(page):
            return get(page * num_consecutive_users + 1, num_consecutive_users)

        def fetch_attempts(handle):
            query = f'{{"nickname":{json.dumps(handle)},"include_non_final_results":true}}'
            url = api_attempts_url_format + encode(query)
            try:
                content = REQ.get(url)
                data = decode(content)
            except FailOnGetResponse:
                data = None
            return handle, data

        result = {}
        with PoolExecutor(max_workers=8) as executor:
            handles_for_getting_attempts = []
            for data in tqdm.tqdm(executor.map(fetch_page, range(n_page)),
                                  total=n_page,
                                  desc='paging'):
                for row in data['user_scores']:
                    if not row['task_info']:
                        continue
                    handle = row.pop('displayname')
                    if users and handle not in users:
                        continue

                    r = result.setdefault(handle, {})
                    r['member'] = handle
                    r['place'] = row.pop('rank')
                    r['solving'] = row.pop('score_1')
                    r['penalty'] = self.to_time(-row.pop('score_2') / 10**6)
                    if '/round/' in self.url:
                        query = encode(handle)
                        url = self.url.replace(
                            '/round/',
                            '/submissions/').rstrip('/') + f'/{query}'
                        r['url'] = url.rstrip('=')

                    country = row.pop('country', None)
                    if country:
                        r['country'] = country

                    solved = 0
                    problems = r.setdefault('problems', {})
                    for task_info in row['task_info']:
                        tid = task_info['task_id']
                        p = problems.setdefault(tid, {})
                        if task_info['penalty_micros'] > 0:
                            p['time'] = self.to_time(
                                task_info['penalty_micros'] / 10**6)
                        p['result'] = task_info['score']
                        if p['result'] and p['result'] != problems_info[tid][
                                'full_score']:
                            p['partial'] = True
                        if task_info['penalty_attempts']:
                            p['penalty'] = task_info['penalty_attempts']
                        solved += task_info['tests_definitely_solved']
                    r['solved'] = {'solving': solved}

                    if statistics and handle in statistics and statistics[
                            handle].get('_with_subscores'):
                        result[handle] = self.merge_dict(
                            r, statistics.pop(handle))
                    else:
                        handles_for_getting_attempts.append(handle)

            if are_results_final:
                for handle, data in tqdm.tqdm(
                        executor.map(fetch_attempts,
                                     handles_for_getting_attempts),
                        total=len(handles_for_getting_attempts),
                        desc='attempting'):
                    if data is None:
                        continue
                    challenge = data['challenge']
                    if not challenge.get('are_results_final'):
                        break
                    tasks = {t['id']: t for t in challenge['tasks']}

                    row = result[handle]
                    problems = row['problems']

                    for attempt in sorted(data['attempts'],
                                          key=lambda a: a['timestamp_ms']):
                        task_id = attempt['task_id']
                        problem = problems.setdefault(task_id, {})

                        subscores = []
                        score = 0
                        for res, test in zip(
                                attempt['judgement'].pop('results'),
                                tasks[task_id]['tests']):
                            if not test.get('value'):
                                continue
                            subscore = {'status': test['value']}
                            if 'verdict' in res:
                                subscore['result'] = res['verdict'] == 1
                                subscore['verdict'] = res['verdict__str']
                            else:
                                subscore['verdict'] = res['status__str']
                            subscores.append(subscore)
                            if res.get('verdict') == 1:
                                score += test['value']
                        if score != problem.get('result'):
                            continue

                        problem['subscores'] = subscores
                        problem['solution'] = attempt.pop(
                            'src_content').replace('\u0000', '')
                        language = attempt.get('src_language__str')
                        if language:
                            problem['language'] = language
                        if 'time' not in problem:
                            delta_ms = attempt['timestamp_ms'] - challenge[
                                'start_ms']
                            problem['time'] = self.to_time(delta_ms / 10**3)
                    row['_with_subscores'] = True

        standings = {
            'result': result,
            'url': standings_url,
            'problems': list(problems_info.values()),
        }
        return standings
Exemplo n.º 19
0
    def _hashcode(self, users=None, statistics=None):
        standings_url = None
        is_final_round = self.name.endswith('Final Round')

        data = None
        try:
            page = REQ.get(
                self.ARCHIVE_DATA_URL_FORMAT_.format(
                    year=self.start_time.year))
            data = json.loads(page)
            names = set()
            for data_round in data['rounds']:
                name = data_round['name']
                if name in names:
                    name = 'Qualification Round'
                if self.name.endswith(name) or name in [
                        'Full ranking', 'Main round'
                ] and is_final_round:
                    data = data_round['data']
                    standings_url = self.ARCHIVE_URL_FORMAT_.format(
                        year=self.start_time.year)
                    break
                names.add(name)
            else:
                data = None
        except FailOnGetResponse as e:
            if e.code != 404:
                raise e

        if not data:
            if 'hashcode_scoreboard' in self.info:
                page = REQ.get(self.info['hashcode_scoreboard'])
                data = json.loads(page)
            else:
                raise ExceptionParseStandings('Not found data')

        if 'columns' in data:
            columns = data['columns']
            data = data['rows']
        else:
            columns = None

        result = {}
        season = self.get_season()
        for rank, row in enumerate(data, start=1):
            if columns is not None:
                row = dict(zip(columns, row))
            row = {k.lower().replace(' ', ''): v for k, v in row.items()}

            name = row.pop('teamname')
            name = unescape(name)
            member = f'{name}, {season}'

            if users is not None and name not in users:
                continue

            r = result.setdefault(member, {})
            r['name'] = name
            r['member'] = member

            score = row.pop('score', '0')
            score = re.sub(r'[\s,]', '', str(score))
            try:
                float(score)
            except Exception:
                score = '0'
            r['solving'] = score

            if 'rank' in row:
                r['place'] = row.pop('rank')
            else:
                r['place'] = rank

            if 'country' in row:
                r['_countries'] = re.sub(r',\s+', ',',
                                         row.pop('country')).split(',')
            elif 'countries' in row:
                r['_countries'] = row.pop('countries')

            if 'finalround' in row:
                r['advanced'] = row['finalround']

            stime = row.get('submissiontime', {}).get('iMillis')
            if stime:
                r['time'] = self.to_time(
                    stime / 1000 - self.start_time.timestamp(), 3)

            if 'hubid' in row:
                r['hub_id'] = row.pop('hubid')

        standings = {
            'result': result,
            'hidden_fields': ['hub_id'],
            'problems': [],
        }

        if standings_url:
            standings['url'] = standings_url

        return standings
Exemplo n.º 20
0
Arquivo: dmoj.py Projeto: aropan/clist
    def get_standings(self, users=None, statistics=None):
        api_ranking_url_version = self.resource.info.get('statistics', {}).get(
            'api_ranking_url_version', 'v2')
        resource = '{uri.scheme}://{uri.netloc}'.format(uri=urlparse(self.url))
        infos = self.__dict__
        infos['resource'] = resource
        url = self.API_RANKING_URL_FORMATS_[api_ranking_url_version].format(
            **infos)
        try:
            time.sleep(1)
            page = REQ.get(url)
        except FailOnGetResponse as e:
            if e.code == 404:
                return {'action': 'delete'}
            raise ExceptionParseStandings('not found api ranking url')

        data = json.loads(page)
        if 'data' in data and 'object' in data['data']:
            data = data['data']['object']

        problems_info = []
        for idx, p in enumerate(data.pop('problems'), start=1):
            info = {
                'short': p.get('label', str(idx)),
                'name': p['name'],
                'code': p['code'],
            }
            info['url'] = self.PROBLEM_URL_.format(resource=resource, **info)
            if p.get('points'):
                info['full_score'] = p['points']
            problems_info.append(info)

        result = {}
        prev = None
        skip = 0
        handles_to_get_new_rating = []
        has_rated = data.get('is_rated', True) and data.get('has_rating', True)
        has_rating = False

        rankings = data.pop('rankings')
        for r in rankings:
            for src, dst in (
                ('points', 'score'),
                ('cumtime', 'cumulative_time'),
            ):
                if src in r:
                    r[dst] = r.pop(src)
        rankings = sorted(rankings,
                          key=lambda x: (-x['score'], x['cumulative_time']))

        fields_types = {}
        hidden_fields = set()
        for index, r in enumerate(rankings, start=1):
            solutions = r.pop('solutions')
            if not any(solutions) and not r.get('new_rating'):
                skip += 1
                continue
            handle = r.pop('user')
            row = result.setdefault(handle, collections.OrderedDict())

            row['member'] = handle
            row['solving'] = r.pop('score')
            cumulative_time = r.pop('cumulative_time')
            if cumulative_time:
                row['penalty'] = self.to_time(cumulative_time)

            curr = (row['solving'], cumulative_time)
            if curr != prev:
                prev = curr
                rank = index - skip
            row['place'] = rank

            solved = 0
            problems = row.setdefault('problems', {})
            for prob, sol in zip(problems_info, solutions):
                if not sol:
                    continue
                p = problems.setdefault(prob['short'], {})
                if sol['points'] > 0 and prob.get('full_score'):
                    p['partial'] = prob['full_score'] > sol['points']
                p['result'] = sol.pop('points')
                t = sol.pop('time')
                if t:
                    p['time'] = self.to_time(t)
                if p['result'] > 0 and not p.get('partial', False):
                    solved += 1

            r.pop('is_disqualified', None)
            r.pop('tiebreaker', None)

            row['old_rating'] = r.pop('old_rating', None)
            new_rating = r.pop('new_rating', None)
            if has_rated:
                row['rating_change'] = None
                row['new_rating'] = new_rating

            for k, v in r.items():
                hidden_fields.add(k)
                if k.endswith('_time'):
                    r[k] = arrow.get(v).timestamp
                    fields_types.setdefault(k, ['time'])

            row.update({k: v for k, v in r.items() if k not in row})

            row['solved'] = {'solving': solved}

            if has_rated:
                if row.get('new_rating') is not None:
                    has_rating = True
                elif statistics is None or 'new_rating' not in statistics.get(
                        handle, {}):
                    handles_to_get_new_rating.append(handle)
                else:
                    row['old_rating'] = statistics[handle].get('old_rating')
                    row['new_rating'] = statistics[handle]['new_rating']

        if has_rated and not has_rating and handles_to_get_new_rating:
            with ExitStack() as stack:
                executor = stack.enter_context(PoolExecutor(max_workers=8))
                pbar = stack.enter_context(
                    tqdm.tqdm(total=len(handles_to_get_new_rating),
                              desc='getting new rankings'))

                @RateLimiter(max_calls=1, period=2)
                def fetch_data(handle):
                    url = self.FETCH_USER_INFO_URL_.format(
                        resource=resource, user=quote_plus(handle))
                    page = REQ.get(url)
                    data = json.loads(page)
                    return handle, data

                for handle, data in executor.map(fetch_data,
                                                 handles_to_get_new_rating):
                    rating = data.get('contests', {}).get('current_rating')
                    if rating:
                        result[handle].setdefault('info',
                                                  {})['rating'] = rating

                    contest_addition_update = {}
                    for key, contest in data['contests']['history'].items():
                        rating = contest.get('rating')
                        if not rating:
                            continue
                        if key == self.key:
                            result[handle]['new_rating'] = rating
                        else:
                            contest_addition_update[
                                key] = collections.OrderedDict(
                                    (('new_rating', rating), ))
                    result[handle][
                        'contest_addition_update'] = contest_addition_update
                    pbar.update()

        standings_url = self.url.rstrip(
            '/') + '/ranking/' if result else self.standings_url

        standings = {
            'result': result,
            'url': standings_url,
            'problems': problems_info,
            'fields_types': fields_types,
            'hidden_fields': list(hidden_fields),
        }
        return standings
Exemplo n.º 21
0
    def get_standings(self, users=None, statistics=None):
        result = {}
        hidden_fields = []
        fields_types = {}
        order = None
        writers = defaultdict(int)

        start_time = self.start_time.replace(tzinfo=None)

        if not self.standings_url and datetime.now() - start_time < timedelta(days=30):
            opt = 0.61803398875

            def canonize_title(value):
                value = value.lower()
                value = re.sub(r'\s+-[^-]+$', '', value)
                value = re.sub(r'\bsingle\s+round\s+match\b', 'srm', value)
                value = re.sub(r'\bmarathon\s+match\b', 'mm', value)
                value = re.sub(r'[0-9]*([0-9]{2})\s*tco(\s+)', r'tco\1\2', value)
                value = re.sub(r'tco\s*[0-9]*([0-9]{2})(\s+)', r'tco\1\2', value)
                value = re.sub(r'^[0-9]{2}([0-9]{2})(\s+)', r'tco\1\2', value)
                return set(re.split('[^A-Za-z0-9]+', value))

            def process_match(date, title, url):
                nonlocal opt

                if abs(date - start_time) > timedelta(days=2):
                    return

                a1 = canonize_title(title)
                a2 = canonize_title(self.name)
                intersection = 0
                for w1 in a1:
                    for w2 in a2:
                        if w1.isdigit() or w2.isdigit():
                            if w1 == w2:
                                intersection += 1
                                break
                        elif w1.startswith(w2) or w2.startswith(w1):
                            intersection += 1
                            break
                union = len(a1) + len(a2) - intersection
                iou = intersection / union
                if iou > opt:
                    opt = iou
                    self.standings_url = url

            url = 'https://www.topcoder.com/tc?module=MatchList&nr=100500'
            page = REQ.get(url)
            re_round_overview = re.compile(
                r'''
(?:<td[^>]*>(?:
[^<]*<a[^>]*href="(?P<url>[^"]*/stat[^"]*rd=(?P<rd>[0-9]+)[^"]*)"[^>]*>(?P<title>[^<]*)</a>[^<]*|
(?P<date>[0-9]+\.[0-9]+\.[0-9]+)
)</td>[^<]*){2}
                ''',
                re.VERBOSE,
            )
            matches = re_round_overview.finditer(str(page))
            for match in matches:
                date = datetime.strptime(match.group('date'), '%m.%d.%Y')
                process_match(date, match.group('title'), urljoin(url, match.group('url')))

            url = 'https://www.topcoder.com/tc?module=BasicData&c=dd_round_list'
            page = REQ.get(url)
            root = ET.fromstring(page)
            for child in root:
                data = {}
                for field in child:
                    data[field.tag] = field.text
                date = dateutil.parser.parse(data['date'])
                url = 'https://www.topcoder.com/stat?c=round_overview&er=5&rd=' + data['round_id']
                process_match(date, data['full_name'], url)

        for url in self.url, self.standings_url:
            if url:
                match = re.search('/challenges/(?P<cid>[0-9]+)', url)
                if match:
                    challenge_id = match.group('cid')
                    break
        else:
            challenge_id = None

        if challenge_id:  # marathon match
            url = conf.TOPCODER_API_MM_URL_FORMAT.format(challenge_id)
            page = REQ.get(url)
            data = json.loads(page)
            problems_info = []
            hidden_fields.extend(['time', 'submits', 'style'])
            fields_types = {'delta_rank': ['delta'], 'delta_score': ['delta']}
            order = ['place_as_int', '-solving', 'addition__provisional_rank', '-addition__provisional_score']
            for row in data:
                handle = row.pop('member')
                r = result.setdefault(handle, OrderedDict())
                r['member'] = handle
                r['place'] = row.pop('finalRank', None)
                r['provisional_rank'] = row.pop('provisionalRank', None)
                r['style'] = row.pop('style')
                if r['place'] and r['provisional_rank']:
                    r['delta_rank'] = r['provisional_rank'] - r['place']
                submissions = row.pop('submissions')
                has_solution = False
                for s in submissions:
                    score = s.get('finalScore')
                    if not score or score == '-':
                        if 'provisional_score' not in r:
                            p_score = s.pop('provisionalScore', None)
                            if isinstance(p_score, str):
                                p_score = asfloat(p_score)
                            if p_score is not None:
                                r['provisional_score'] = round(p_score, 2) if p_score >= 0 else False
                                r['time'] = s['created']
                                has_solution = True
                        continue
                    r['solving'] = score
                    r['solved'] = {'solving': int(score > 0)}
                    p_score = s.pop('provisionalScore')
                    if isinstance(p_score, str):
                        p_score = asfloat(p_score)
                    if p_score is not None and p_score > 0:
                        r['provisional_score'] = round(p_score, 2)
                        r['delta_score'] = round(score - p_score, 2)
                    r['time'] = s['created']
                    has_solution = True
                    break
                if not has_solution:
                    continue
                r['submits'] = len(submissions)
            if not result:
                raise ExceptionParseStandings('empty standings')
        else:  # single round match
            if not self.standings_url:
                raise InitModuleException('Not set standings url for %s' % self.name)
            url = self.standings_url + '&nr=100000042'
            page = REQ.get(url, time_out=100)
            result_urls = re.findall(r'<a[^>]*href="(?P<url>[^"]*)"[^>]*>Results</a>', str(page), re.I)
            if not result_urls:
                raise ExceptionParseStandings('not found result urls')

            dd_round_results = {}
            match = re.search('rd=(?P<rd>[0-9]+)', url)
            if match:
                rd = match.group('rd')
                url = f'https://www.topcoder.com/tc?module=BasicData&c=dd_round_results&rd={rd}'
                try:
                    dd_round_results_page = REQ.get(url)
                    root = ET.fromstring(dd_round_results_page)
                    for child in root:
                        data = {}
                        for field in child:
                            data[field.tag] = field.text
                        handle = data.pop('handle')
                        dd_round_results[handle] = self._dict_as_number(data)
                except FailOnGetResponse:
                    pass

            hidden_fields.extend(['coding_phase', 'challenge_phase', 'system_test', 'point_total', 'room'])

            matches = re.finditer('<table[^>]*>.*?</table>', page, re.DOTALL)
            problems_sets = []
            for match in matches:
                problems = re.findall(
                    '<a[^>]*href="(?P<href>[^"]*c=problem_statement[^"]*)"[^>]*>(?P<name>[^/]*)</a>',
                    match.group(),
                    re.IGNORECASE,
                )
                if problems:
                    problems_sets.append([
                        {'short': n, 'url': urljoin(url, u)}
                        for u, n in problems
                    ])

            problems_info = dict() if len(problems_sets) > 1 else list()
            for problems_set, result_url in zip(problems_sets, result_urls):
                url = urljoin(self.standings_url, result_url + '&em=1000000042')
                url = url.replace('&amp;', '&')
                division = int(parse_qs(url)['dn'][0])
                division_str = 'I' * division

                with PoolExecutor(max_workers=3) as executor:
                    def fetch_problem(p):
                        errors = set()
                        for attempt in range(3):
                            try:
                                page = REQ.get(p['url'], time_out=30)
                                match = re.search('<a[^>]*href="(?P<href>[^"]*module=ProblemDetail[^"]*)"[^>]*>', page)
                                page = REQ.get(urljoin(p['url'], match.group('href')), time_out=30)
                                matches = re.findall(r'<td[^>]*class="statTextBig"[^>]*>(?P<key>[^<]*)</td>\s*<td[^>]*>(?P<value>.*?)</td>', page, re.DOTALL)  # noqa
                                for key, value in matches:
                                    key = key.strip().rstrip(':').lower()
                                    if key == 'categories':
                                        tags = [t.strip().lower() for t in value.split(',')]
                                        tags = [t for t in tags if t]
                                        if tags:
                                            p['tags'] = tags
                                    elif key.startswith('writer') or key.startswith('tester'):
                                        key = key.rstrip('s') + 's'
                                        p[key] = re.findall('(?<=>)[^<>,]+(?=<)', value)
                                for w in p.get('writers', []):
                                    writers[w] += 1

                                info = p.setdefault('info', {})
                                matches = re.finditer('<table[^>]*paddingTable2[^>]*>.*?</table>', page, re.DOTALL)
                                for match in matches:
                                    html_table = match.group(0)
                                    rows = parsed_table.ParsedTable(html_table)
                                    for row in rows:
                                        key, value = None, None
                                        for k, v in row.items():
                                            if k == "":
                                                key = v.value
                                            elif k and division_str in k.split():
                                                value = v.value
                                        if key and value:
                                            key = re.sub(' +', '_', key.lower())
                                            info[key] = value
                                            if key == 'point_value':
                                                value = toint(value) or asfloat(value)
                                                if value is not None:
                                                    p['full_score'] = value
                            except Exception as e:
                                errors.add(f'error parse problem info {p}: {e}')
                                sleep(5 + attempt)
                        else:
                            errors = None
                        if errors:
                            LOG.error(errors)

                        return p

                    for p in tqdm.tqdm(executor.map(fetch_problem, problems_set), total=len(problems_set)):
                        d = problems_info
                        if len(problems_sets) > 1:
                            d = d.setdefault('division', OrderedDict())
                            d = d.setdefault(division_str, [])
                        d.append(p)

                if not users and users is not None:
                    continue

                page = REQ.get(url)
                rows = etree.HTML(page).xpath("//tr[@valign='middle']")
                header = None
                url_infos = []
                for row in rows:
                    r = parsed_table.ParsedTableRow(row)
                    if len(r.columns) < 10:
                        continue
                    values = [c.value for c in r.columns]
                    if header is None:
                        header = values
                        continue

                    d = OrderedDict(list(zip(header, values)))
                    handle = d.pop('Coders').strip()
                    d = self._dict_as_number(d)
                    if users and handle not in users:
                        continue

                    row = result.setdefault(handle, OrderedDict())
                    row.update(d)

                    if not row.get('new_rating') and not row.get('old_rating') and not row.get('rating_change'):
                        row.pop('new_rating', None)
                        row.pop('old_rating', None)
                        row.pop('rating_change', None)

                    row['member'] = handle
                    row['place'] = row.pop('division_placed', None)
                    row['solving'] = row['point_total']
                    row['solved'] = {'solving': 0}
                    row['division'] = 'I' * division

                    if 'adv.' in row:
                        row['advanced'] = row.pop('adv.').lower().startswith('y')

                    url_info = urljoin(url, r.columns[0].node.xpath('a/@href')[0])
                    url_infos.append(url_info)

                def fetch_solution(url):
                    for i in range(2):
                        try:
                            page = REQ.get(url, time_out=60)
                            match = re.search('<td[^>]*class="problemText"[^>]*>(?P<solution>.*?)</td>',
                                              page,
                                              re.DOTALL | re.IGNORECASE)
                            if not match:
                                break
                            ret = html.unescape(match.group('solution'))
                            ret = ret.strip()
                            ret = ret.replace('<BR>', '\n')
                            ret = ret.replace('\xa0', ' ')
                            return ret
                        except FailOnGetResponse:
                            sleep(i * 10 + 3)
                    return None

                n_failed_fetch_info = 0

                def fetch_info(url):
                    nonlocal n_failed_fetch_info
                    if n_failed_fetch_info > 10:
                        return
                    delay = 10
                    for _ in range(5):
                        try:
                            page = REQ.get(url, time_out=delay)
                            match = re.search('class="coderBrackets">.*?<a[^>]*>(?P<handle>[^<]*)</a>',
                                              page,
                                              re.IGNORECASE)
                            if match:
                                break
                        except Exception:
                            sleep(delay + _)
                    else:
                        n_failed_fetch_info += 1
                        return

                    handle = html.unescape(match.group('handle').strip())

                    match = re.search(r'&nbsp;Room\s*(?P<room>[0-9]+)', page)
                    room = match.group('room') if match else None

                    matches = re.finditer(r'''
                        <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*c=problem_solution[^"]*)"[^>]*>(?P<short>[^<]*)</a>[^<]*</td>[^<]*
                        <td[^>]*>[^<]*</td>[^<]*
                        <td[^>]*>[^<]*</td>[^<]*
                        <td[^>]*>(?P<time>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<status>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<result>[^<]*)</td>[^<]*
                    ''', page, re.VERBOSE | re.IGNORECASE)
                    problems = {}
                    n_fetch_solution = 0
                    for match in matches:
                        d = match.groupdict()
                        short = d.pop('short')
                        solution_url = urljoin(url, d['url'])
                        d['url'] = solution_url
                        d = self._dict_as_number(d)
                        if d['status'] in ['Challenge Succeeded', 'Failed System Test']:
                            d['result'] = -d['result']
                        if abs(d['result']) < 1e-9:
                            d.pop('result')
                        if re.match('^[0.:]+$', d['time']):
                            d.pop('time')
                        else:
                            time_in_seconds = 0
                            for t in d['time'].split(':'):
                                time_in_seconds = time_in_seconds * 60 + asfloat(t)
                            d['time_in_seconds'] = time_in_seconds

                        solution = (statistics or {}).get(handle, {}).get('problems', {}).get(short, {}).get('solution')
                        if not solution:
                            n_fetch_solution += 1
                            solution = fetch_solution(solution_url)
                        d['solution'] = solution

                        problems[short] = d

                    challenges = []
                    matches = re.finditer(r'''
                        <td[^>]*>[^<]*<a[^>]*href="[^"]*module=MemberProfile[^"]*"[^>]*>(?P<target>[^<]*)</a>[^<]*</td>[^<]*
                        <td[^>]*>(?P<problem>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<status>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<time>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<result>[^<]*)</td>[^<]*
                        <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*details\s*</a>[^<]*</td>[^<]*
                    ''', page, re.VERBOSE | re.IGNORECASE)
                    for match in matches:
                        d = match.groupdict()
                        d = {k: v.strip() for k, v in d.items()}
                        d['result'] = float(d['result'].replace(',', '.'))
                        d['url'] = urljoin(url, d['url'])

                        p = problems.setdefault(d['problem'], {})
                        p.setdefault('extra_score', 0)
                        p['extra_score'] += d['result']
                        p.setdefault('extra_info', []).append(f'{d["target"]}: {d["result"]}')
                        challenges.append(d)

                    return url, handle, room, problems, challenges, n_fetch_solution

                with PoolExecutor(max_workers=20) as executor, tqdm.tqdm(total=len(url_infos)) as pbar:
                    n_fetch_solution = 0
                    for info in executor.map(fetch_info, url_infos):
                        if info is None:
                            continue
                        url, handle, room, problems, challenges, n_sol = info
                        n_fetch_solution += n_sol
                        pbar.set_description(f'div{division} {url}')
                        pbar.set_postfix(n_solution=n_fetch_solution, n_failed_fetch_info=n_failed_fetch_info)
                        pbar.update()
                        if handle is not None:
                            if handle not in result:
                                LOG.error(f'{handle} not in result, url = {url}')
                            row = result[handle]
                            row['url'] = url
                            if room:
                                row['room'] = room
                            row['problems'] = problems
                            row['challenges'] = challenges
                            for p in problems.values():
                                if p.get('result', 0) > 1e-9:
                                    row['solved']['solving'] += 1
                            if challenges:
                                h = row.setdefault('hack', {
                                    'title': 'challenges',
                                    'successful': 0,
                                    'unsuccessful': 0,
                                })
                                for c in challenges:
                                    h['successful' if c['status'].lower() == 'yes' else 'unsuccessful'] += 1

            if dd_round_results:
                fields = set()
                hidden_fields_set = set(hidden_fields)
                for data in result.values():
                    for field in data.keys():
                        fields.add(field)

                k_mapping = {'new_vol': 'new_volatility', 'advanced': None}
                for handle, data in dd_round_results.items():
                    if handle not in result:
                        continue
                    row = result[handle]

                    for k, v in data.items():
                        k = k_mapping.get(k, k)
                        if k and k not in fields:
                            if k in {'new_rating', 'old_rating'} and not v:
                                continue
                            row[k] = v
                            if k not in hidden_fields_set:
                                hidden_fields_set.add(k)
                                hidden_fields.append(k)
                            ks = k.split('_')
                            if ks[0] == 'level' and ks[-1] == 'language' and v and v.lower() != 'unspecified':
                                idx = {'one': 0, 'two': 1, 'three': 2}.get(ks[1], None)
                                d = problems_info
                                if len(problems_sets) > 1:
                                    d = d['division'][row['division']]
                                if idx is not None and 0 <= idx < len(d) and d[idx]['short'] in row['problems']:
                                    row['problems'][d[idx]['short']]['language'] = v
        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': problems_info,
            'hidden_fields': hidden_fields,
            'fields_types': fields_types,
            'options': {
                'fixed_fields': [('hack', 'Challenges')],
            },
        }

        if writers:
            writers = [w[0] for w in sorted(writers.items(), key=lambda w: w[1], reverse=True)]
            standings['writers'] = writers

        if re.search(r'\bfinals?(?:\s+rounds?)?$', self.name, re.I):
            standings['options']['medals'] = [{'name': name, 'count': 1} for name in ('gold', 'silver', 'bronze')]

        if order:
            standings['options']['order'] = order

        return standings
Exemplo n.º 22
0
    def get_standings(self, users=None, statistics=None):
        year = int(re.search(r'\b[0-9]{4}\b', self.key).group(0))
        season = '%d-%d' % (year - 1, year)

        icpc_standings_url = f'https://icpc.global/community/results-{year}'
        icpc_api_standings_url = f'https://icpc.global/api/help/cms/virtpublic/community/results-{year}'

        standings_urls = []
        if not self.standings_url:
            for url in (
                    f'http://static.kattis.com/icpc/wf{year}/',
                    f'https://zibada.guru/finals/{year}/',
                    f'http://web.archive.org/web/{year}/https://icpc.baylor.edu/scoreboard/',
                    f'http://web.archive.org/web/{year}/https://icpc.global/scoreboard/',
                    f'https://cphof.org/standings/icpc/{year}',
                    icpc_api_standings_url,
            ):
                try:
                    page = REQ.get(url)
                except FailOnGetResponse:
                    continue

                if 'web.archive.org' in REQ.last_url and f'/{year}' not in REQ.last_url:
                    continue

                if not re.search(
                        rf'\b(world\s*finals\s*{year}|{year}\s*world\s*finals)\b',
                        page, re.IGNORECASE):
                    continue

                standings_urls.append(url)
        else:
            if self.standings_url == icpc_standings_url:
                standings_urls.append(icpc_api_standings_url)
            else:
                standings_urls.append(self.standings_url)

        if not standings_urls:
            raise ExceptionParseStandings(
                f'Not found standings url year = {year}')

        for standings_url in standings_urls:
            is_icpc_api_standings_url = standings_url == icpc_api_standings_url
            page = REQ.get(standings_url)

            result = {}
            hidden_fields = set(self.info.get('hidden_fields',
                                              [])) | {'region'}
            problems_info = OrderedDict()

            if 'zibada' in standings_url:
                match = re.search(r' = (?P<data>[\{\[].*?);?\s*$', page,
                                  re.MULTILINE)
                if match:
                    names = self._json_load(match.group('data'))
                else:
                    names = None

                try:
                    page = REQ.get('standings.js')
                    match = re.search(r' = (?P<data>\{.*?);?\s*$', page,
                                      re.MULTILINE)
                    data = self._json_load(match.group('data'))
                except Exception:
                    assert names
                    data = names

                for p_name in data['problems']:
                    problems_info[p_name] = {'short': p_name}

                events = data.pop('events', None)
                if events:
                    teams = {}
                    time_divider = 60
                    events.sort(key=lambda e: int(e.split()[-1]))
                    for e in events:
                        tid, p_name, status, attempt, time = e.split()
                        time = int(time)

                        team = teams.setdefault(tid, {})
                        problems = team.setdefault('problems', {})
                        result = problems.get(p_name, {}).get('result', '')
                        if not result.startswith('?') and status.startswith(
                                '?'):
                            continue
                        if status == '+':
                            attempt = int(attempt) - 1
                            p_info = problems_info[p_name]
                        problems[p_name] = {
                            'time':
                            time,
                            'result':
                            '+' if status == '+' and attempt == 0 else
                            f'{status}{attempt}',
                        }
                    for tid, team in teams.items():
                        name = names[int(tid)][0]
                        name = html.unescape(name)
                        team['member'] = f'{name} {season}'
                        team['name'] = name
                        penalty = 0
                        solving = 0
                        for p_name, problem in team.get('problems',
                                                        {}).items():
                            if problem['result'].startswith('+'):
                                solving += 1
                                attempt_penalty = (int(
                                    problem['result'].lstrip('+')
                                    or 0)) * 20 * time_divider
                                penalty += problem['time'] + attempt_penalty
                        team['penalty'] = int(round(penalty / time_divider))
                        team['solving'] = solving
                else:
                    teams = {}
                    time_divider = 1
                    data_teams = data['teams']
                    if isinstance(data_teams, dict):
                        data_teams = data_teams.values()
                    for team in data_teams:
                        row = {}

                        def get(key, index):
                            return team[key] if isinstance(
                                team, dict) else team[index]

                        name = get('name', 0)
                        name = html.unescape(name)
                        row['member'] = f'{name} {season}'
                        row['name'] = name
                        row['solving'] = int(get('score', 2))
                        row['penalty'] = int(get('time', 3))

                        if isinstance(team, dict):
                            team['problems'] = [
                                team[str(index)]
                                for index in range(len(data['problems']))
                            ]

                        problems = row.setdefault('problems', {})
                        for p_name, verdict in zip(data['problems'],
                                                   get('problems', 4)):
                            if not verdict:
                                continue
                            if isinstance(verdict, dict):
                                verdict = {k[0]: v for k, v in verdict.items()}
                                verdict['a'] = int(verdict['a'])
                                if isinstance(verdict.get('p'), int):
                                    verdict['a'] += verdict['p']
                                if isinstance(verdict['s'], str):
                                    verdict['s'] = int(verdict['s'])
                                status = '+' if verdict['s'] else (
                                    '?' if verdict.get('p', False) else '-')
                                time = verdict['t']
                                result = verdict['a']
                                time_divider = 1000 * 60
                                if not result:
                                    continue
                            else:
                                status, result = verdict.split(' ', 1)
                                if ' ' in result:
                                    result, time = result.split()
                                    time = int(time)
                                else:
                                    time = None
                                result = int(result)
                            problem = problems.setdefault(p_name, {})
                            if status == '+':
                                problem['time'] = time
                                problem[
                                    'result'] = '+' if result == 1 else f'+{result - 1}'
                            else:
                                problem['result'] = f'{status}{result}'
                        teams[row['member']] = row

                teams = list(teams.values())
                teams.sort(key=lambda t: (t['solving'], -t['penalty']),
                           reverse=True)
                rank = 0
                prev = None
                for i, t in enumerate(teams):
                    curr = (t['solving'], t['penalty'])
                    if prev != curr:
                        rank = i + 1
                        prev = curr
                    t['place'] = rank
                result = {t['member']: t for t in teams}

                problems_info = OrderedDict(sorted(problems_info.items()))
            else:
                if is_icpc_api_standings_url:
                    page = re.sub(
                        r'</table>\s*<table>\s*(<tr[^>]*>\s*<t[^>]*>)',
                        r'\1',
                        page,
                        flags=re.I)

                regex = '''(?:<table[^>]*(?:id=["']standings|class=["']scoreboard)[^>]*>|"content":"[^"]*<table[^>]*>|<table[^>]*class="[^"]*(?:table[^"]*){3}"[^>]*>).*?</table>'''  # noqa
                match = re.search(regex, page, re.DOTALL)
                if match:
                    html_table = match.group(0)
                    table = parsed_table.ParsedTable(
                        html_table,
                        with_not_full_row=is_icpc_api_standings_url)
                else:
                    table = []
                time_divider = 1
                last_place = None
                honorables = []
                for r in table:
                    row = {}
                    problems = row.setdefault('problems', {})
                    for k, vs in r.items():
                        if isinstance(vs, list):
                            v = ' '.join(i.value for i in vs if i.value)
                        else:
                            v = vs.value
                        k = k.lower().strip('.')
                        v = v.strip()
                        if honorables:
                            if v:
                                honorables.append(v)
                            continue
                        if k in ('rank', 'rk', 'place'):
                            if not isinstance(vs, list):
                                medal = vs.column.node.xpath('.//img/@alt')
                                if medal and medal[0].endswith('medal'):
                                    row['medal'] = medal[0].split()[0]
                            if v and not v[0].isdigit():
                                honorables.append(v)
                            row['place'] = v
                        elif k in ('team', 'name', 'university'):
                            if isinstance(vs, list):
                                for el in vs:
                                    logo = el.column.node.xpath('.//img/@src')
                                    if logo:
                                        logo = urllib.parse.urljoin(
                                            standings_url, logo[0])
                                        row.setdefault('info',
                                                       {})['logo'] = logo
                                        break
                                for el in vs:
                                    region = el.column.node.xpath(
                                        './/*[@class="badge badge-warning"]')
                                    if region:
                                        region = ''.join([
                                            s.strip()
                                            for s in region[0].xpath('text()')
                                        ])
                                        if region:
                                            row['region'] = region
                            if 'cphof' in standings_url:
                                member = vs.column.node.xpath(
                                    './/a/text()')[0].strip()
                                row['member'] = f'{member} {season}'
                            else:
                                row['member'] = f'{v} {season}'
                            row['name'] = v
                        elif k in ('time', 'penalty', 'total time (min)',
                                   'minutes'):
                            if v:
                                row['penalty'] = int(v)
                        elif k in ('slv', 'solved', '# solved'):
                            row['solving'] = int(v)
                        elif k == 'score':
                            if ' ' in v:
                                row['solving'], row['penalty'] = map(
                                    int, v.split())
                            else:
                                row['solving'] = int(v)
                        elif len(k) == 1:
                            k = k.title()
                            if k not in problems_info:
                                problems_info[k] = {'short': k}
                                if 'title' in vs.header.attrs:
                                    problems_info[k]['name'] = vs.header.attrs[
                                        'title']

                            v = re.sub(r'([0-9]+)\s+([0-9]+)\s+tr.*', r'\2 \1',
                                       v)
                            v = re.sub('tr[a-z]*', '', v)
                            v = re.sub('-*', '', v)
                            v = v.strip()
                            if not v:
                                continue

                            p = problems.setdefault(k, {})
                            if '+' in v:
                                v = v.replace(' ', '')
                                p['result'] = f'?{v}'
                            elif ' ' in v:
                                pnt, time = map(int, v.split())
                                p['result'] = '+' if pnt == 1 else f'+{pnt - 1}'
                                p['time'] = time

                                if ('solvedfirst' in vs.column.attrs.get(
                                        'class', ''
                                ) or vs.column.node.xpath(
                                        './/*[contains(@class, "score_first")]'
                                )):
                                    p['first_ac'] = True
                            else:
                                p['result'] = f'-{v}'
                    if row.get('place'):
                        last_place = row['place']
                    elif last_place:
                        row['place'] = last_place
                    if 'member' not in row or row['member'].startswith(' '):
                        continue
                    result[row['member']] = row

                elements = etree.HTML(page).xpath(
                    '//div[@class="card-header"]/following-sibling::div[@class="card-body"]//li'
                )  # noqa
                for el in elements:
                    name = ''.join([s.strip() for s in el.xpath('text()')])
                    member = f'{name} {season}'
                    row = result.setdefault(member, {
                        'member': member,
                        'name': name
                    })

                    logo = el.xpath('./img/@src')
                    if logo:
                        row.setdefault('info',
                                       {})['logo'] = urllib.parse.urljoin(
                                           standings_url, logo[0])

                    while el is not None:
                        prv = el.getprevious()
                        if prv is not None and prv.tag == 'div' and prv.get(
                                'class') == 'card-header':
                            break
                        el = el.getparent()
                    if el is not None:
                        region = ''.join(
                            [s.strip() for s in prv.xpath('text()')])
                        row['region'] = region

                if result and honorables:
                    for name in honorables:
                        if 'honorable' in name.lower():
                            continue
                        row = dict(name=name, member=f'{name} {season}')
                        result[row['member']] = row

            if not result:
                continue

            if statistics:
                for team, row in result.items():
                    stat = statistics.get(team)
                    if not stat:
                        continue
                    for k, v in stat.items():
                        if k not in row:
                            hidden_fields.add(k)
                            row[k] = v

            if any(['region' not in r for r in result.values()]):
                try:
                    url = f'https://icpc.global/api/team/wf/{year}/published'
                    page = REQ.get(url, time_out=60)
                    data = self._json_load(page)
                except Exception:
                    traceback.print_exc()
                    data = None

                if data:

                    def canonize_name(name):
                        name = name.lower()
                        name = name.replace('&', ' and ')
                        name = re.sub(r'\s{2,}', ' ', name)
                        name = re.split(r'(?:\s-\s|\s-|-\s|,\s)', name)
                        name = tuple(sorted([n.strip() for n in name]))
                        return name

                    matching = {}
                    for key, row in result.items():
                        name = row['name']
                        matching.setdefault(name, key)
                        name = canonize_name(name)
                        matching.setdefault(name, key)

                    for site in data:
                        region = site['siteName']
                        for team in site['teams']:
                            name = team['university']
                            if name not in matching:
                                name = canonize_name(name)
                            if name not in matching:
                                name = tuple(
                                    sorted(name + canonize_name(team['name'])))
                            if name not in matching:
                                logger.warning(f'Not found team = {name}')
                            else:
                                row = result[matching[name]]
                                row['region'] = region
                                for k, v in team.items():
                                    k = k.lower()
                                    if k not in row:
                                        hidden_fields.add(k)
                                        row[k] = v

            first_ac_of_all = None
            for team in result.values():
                for p_name, problem in team.get('problems', {}).items():
                    p_info = problems_info[p_name]
                    if not problem['result'].startswith('+'):
                        continue
                    time = problem['time']
                    if 'first_ac' not in p_info or time < p_info['first_ac']:
                        p_info['first_ac'] = time
                    if first_ac_of_all is None or time < first_ac_of_all:
                        first_ac_of_all = time
                    if problem.get('first_ac'):
                        p_info['has_first_ac'] = True

            for team in result.values():
                for p_name, problem in team.get('problems', {}).items():
                    p_info = problems_info[p_name]
                    if problem['result'].startswith('+'):
                        if p_info.get('has_first_ac'
                                      ) and not problem.get('first_ac'):
                            continue
                        if problem['time'] == p_info['first_ac']:
                            problem['first_ac'] = True
                        if problem['time'] == first_ac_of_all:
                            problem['first_ac_of_all'] = True
                    if 'time' in problem:
                        problem['time'] = int(
                            round(problem['time'] / time_divider))

            without_medals = any(p['result'].startswith('?')
                                 for row in result.values()
                                 for p in row.get('problems', {}).values())

            options = {'per_page': None}
            if not without_medals:
                medals = self._get_medals(year)
                if medals:
                    medals = [{
                        'name': k,
                        'count': v
                    } for k, v in medals.items()]
                    options['medals'] = medals

            standings = {
                'result': result,
                'url': icpc_standings_url
                if is_icpc_api_standings_url else standings_url,
                'problems': list(problems_info.values()),
                'options': options,
                'hidden_fields': list(hidden_fields),
            }
            return standings

        raise ExceptionParseStandings(
            f'Not found standings url from {standings_urls}')
Exemplo n.º 23
0
    def get_standings(self, users=None, statistics=None):
        slug = self.url.rstrip('/').rsplit('/', 1)[-1]
        config_url = self.CONFIG_URL_FORMAT_.format(slug=slug)
        page = REQ.get(config_url)
        config_data = json.loads(page)
        style = config_data['contest']['style'].upper()

        jid = config_data['contest']['jid']
        url = self.API_STANDINGS_URL_FORMAT_.format(jid=jid)
        page = REQ.get(url)
        data = json.loads(page)
        users_profiles_map = data['profilesMap']

        problems_url = self.API_PROBLEMS_URL_FORMAT_.format(jid=jid)
        problems_data = json.loads(REQ.get(problems_url))

        problems_info = []
        state = data['data']['scoreboard']['state']
        for idx, (code, short, problem_data) in enumerate(
                zip(state['problemJids'], state['problemAliases'],
                    problems_data['data'])):
            problem_data.update(
                problems_data['problemsMap'][problem_data['problemJid']])
            title = problem_data['titlesByLanguage'][
                problem_data['defaultLanguage']]
            info = {
                'name': title,
                'code': problem_data['slug'],
                'short': short,
            }
            if state['problemPoints']:
                info['full_score'] = state['problemPoints'][idx]
            elif problem_data['points']:
                info['full_score'] = problem_data['points']
            info['url'] = self.PROBLEM_URL_FORMAT_.format(url=self.url,
                                                          short=info['short'])
            problems_info.append(info)

        result = {}
        if users is None or users:
            rows = data['data']['scoreboard']['content']['entries']
            handles_to_get_new_rating = []
            has_old_rating = False
            for row in rows:
                cjid = row['contestantJid']
                if cjid not in users_profiles_map:
                    continue
                user = users_profiles_map[cjid]
                handle = user['username']

                r = result.setdefault(handle, collections.OrderedDict())
                r['member'] = handle
                r['place'] = row.pop('rank')
                if user.get('country'):
                    r['country'] = user['country']

                if style == 'ICPC':
                    r['penalty'] = row.pop('totalPenalties')
                    r['solving'] = row.pop('totalAccepted')
                elif style == 'GCJ' or style == 'TROC':
                    penalty = row.pop('totalPenalties')
                    r['penalty'] = f'{penalty // 60:02d}:{penalty % 60:02d}'
                    r['solving'] = row.pop('totalPoints')
                elif style == 'IOI':
                    r['solving'] = row.pop('totalScores')
                else:
                    raise ExceptionParseStandings(f'style = {style}')

                problems = r.setdefault('problems', {})
                solving = 0
                if style == 'IOI':
                    for idx, score in enumerate(row['scores']):
                        if score is None:
                            continue
                        k = problems_info[idx]['short']
                        p = problems.setdefault(k, {})
                        p['result'] = score
                        p['partial'] = problems_info[idx].get(
                            'full_score', 100) > score
                        if not p['partial']:
                            solving += 1
                else:
                    for idx, (attempt, penalty, pstate) in enumerate(
                            zip(row['attemptsList'], row['penaltyList'],
                                row['problemStateList'])):
                        if not attempt:
                            continue
                        k = problems_info[idx]['short']
                        p = problems.setdefault(k, {})

                        if pstate:
                            solving += 1
                            p['result'] = f"+{'' if attempt == 1 else attempt - 1}"
                            p['time'] = f'{penalty // 60:02d}:{penalty % 60:02d}'
                        else:
                            p['result'] = f"-{attempt}"
                        if pstate == 2:
                            p['first_ac'] = True
                if not problems:
                    result.pop(handle)
                    continue

                if state['problemPoints'] or style == 'IOI':
                    r['solved'] = {'solving': solving}

                r['old_rating'] = (user.get('rating')
                                   or {}).get('publicRating')
                if r['old_rating'] is not None:
                    has_old_rating = True

                if statistics is None or 'new_rating' not in statistics.get(
                        handle, {}):
                    handles_to_get_new_rating.append(handle)
                else:
                    r['new_rating'] = statistics[handle]['new_rating']

            if not has_old_rating:
                for r in result.values():
                    r.pop('old_rating')

            with ExitStack() as stack:
                executor = stack.enter_context(PoolExecutor(max_workers=8))
                pbar = stack.enter_context(
                    tqdm.tqdm(total=len(handles_to_get_new_rating),
                              desc='getting new rankings'))

                def fetch_data(handle):
                    url = self.API_HISTORY_URL_FORMAT_.format(handle=handle)
                    data = json.loads(REQ.get(url))
                    return handle, data

                for handle, data in executor.map(fetch_data,
                                                 handles_to_get_new_rating):
                    max_begin_time = -1
                    for contest in data['data']:
                        if contest['rating']:
                            rating = contest['rating']['publicRating']

                            if contest['contestJid'] == jid:
                                result[handle]['new_rating'] = rating

                            info = data['contestsMap'][contest['contestJid']]
                            if info['beginTime'] > max_begin_time:
                                result[handle]['info'] = {'rating': rating}
                                max_begin_time = info['beginTime']
                    pbar.update()

        standings = {
            'result': result,
            'url': self.STANDING_URL_FORMAT_.format(self),
            'problems': problems_info,
        }
        return standings
Exemplo n.º 24
0
    def get_standings(self, users=None, statistics=None):
        if not self.standings_url:
            page = REQ.get(urljoin(self.url, '/'))

            for name in (
                    'Соревнования',
                    'Тренировочные олимпиады',
            ):
                match = re.search(
                    '<a[^>]*href="(?P<url>[^"]*)"[^>]*>{}<'.format(name), page)
                url = match.group('url')
                page = REQ.get(url)

            match = re.search(
                '{}.*?<a[^>]*href="(?P<url>[^"]*)"[^>]*>{}<'.format(
                    re.escape(self.name), 'Результаты прошедших тренировок'),
                page,
                re.DOTALL,
            )
            if not match:
                raise ExceptionParseStandings('Not found standing url')

            url = match.group('url')
            page = REQ.get(url)

            date = self.start_time.strftime('%Y-%m-%d')
            matches = re.findall(
                r'''
                <tr[^>]*>[^<]*<td[^>]*>{}</td>[^<]*
                <td[^>]*>(?P<title>[^<]*)</td>[^<]*
                <td[^>]*>[^<]*<a[^>]*href\s*=["\s]*(?P<url>[^">]*)["\s]*[^>]*>
            '''.format(date), page, re.MULTILINE | re.VERBOSE)

            urls = [(title, urljoin(url, u)) for title, u in matches]
            if len(urls) > 1:
                urls = [(
                    title, urljoin(url, u)
                ) for title, u in matches if not re.search(
                    r'[0-9]\s*-\s*[0-9].*(?:[0-9]\s*-\s*[0-9].*\bкл\b|школа)',
                    title, re.I)]

            if not urls:
                raise ExceptionParseStandings('Not found standing url')

            if len(urls) > 1:
                ok = True
                urls_set = set()
                for _, u in urls:
                    page = REQ.get(u)
                    path = re.findall(
                        '<td[^>]*nowrap><a[^>]*href="(?P<href>[^"]*)"', page)
                    if len(path) < 2:
                        ok = False
                    parent = urljoin(u, path[-2])
                    urls_set.add(parent)
                if len(urls_set) > 1:
                    _, url = urls[0]
                elif not ok:
                    raise ExceptionParseStandings('Too much standing url')
                else:
                    url = urls_set.pop()
            else:
                _, url = urls[0]

            page = REQ.get(url)
            self.standings_url = REQ.last_url
        else:
            page = REQ.get(self.standings_url)

        def get_table(page):
            html_table = re.search(
                '<table[^>]*bgcolor="silver"[^>]*>.*?</table>', page,
                re.MULTILINE | re.DOTALL).group(0)
            table = parsed_table.ParsedTable(html_table)
            return table

        table = get_table(page)

        problems_info = OrderedDict()
        max_score = defaultdict(float)

        scoring = False

        result = {}
        for r in table:
            row = OrderedDict()
            problems = row.setdefault('problems', {})
            for k, v in list(r.items()):
                if k == 'Имя':
                    href = v.column.node.xpath('a/@href')
                    if not href:
                        continue
                    uid = re.search('[0-9]+$', href[0]).group(0)
                    row['member'] = uid
                    row['name'] = v.value
                elif k == 'Место':
                    row['place'] = v.value
                elif k == 'Время':
                    row['penalty'] = int(v.value)
                elif k in ['Сумма', 'Задачи']:
                    row['solving'] = float(v.value)
                elif re.match('^[a-zA-Z0-9]+$', k):
                    problems_info[k] = {'short': k}
                    if v.value:
                        p = problems.setdefault(k, {})
                        p['result'] = v.value

                        if v.value and v.value[0] not in ['-', '+']:
                            scoring = True

                        try:
                            max_score[k] = max(max_score[k], float(v.value))
                        except ValueError:
                            pass
                elif k:
                    row[k.strip()] = v.value.strip()
                elif v.value.strip().lower() == 'log':
                    href = v.column.node.xpath('.//a/@href')
                    if href:
                        row['url'] = urljoin(self.standings_url, href[0])
            result[row['member']] = row

        if scoring:
            match = re.search(
                r'<b[^>]*>\s*<a[^>]*href="(?P<url>[^"]*)"[^>]*>ACM</a>\s*</b>',
                page)
            if match:
                page = REQ.get(match.group('url'))
                table = get_table(page)
                for r in table:
                    uid = None
                    for k, v in list(r.items()):
                        if k == 'Имя':
                            href = v.column.node.xpath('a/@href')
                            if not href:
                                continue
                            uid = re.search('[0-9]+$', href[0]).group(0)
                        elif re.match('^[a-zA-Z0-9]+$', k) and uid and v.value:
                            if v.value[0] == '-':
                                result[uid]['problems'][k]['partial'] = True
                            elif v.value[0] == '+':
                                result[uid]['problems'][k]['partial'] = False
                                problems_info[k]['full_score'] = result[uid][
                                    'problems'][k]['result']

        for r in result.values():
            solved = 0
            for k, p in r['problems'].items():
                if p.get('partial'):
                    continue
                score = p['result']
                if score.startswith(
                        '+') or 'partial' in p and not p['partial']:
                    solved += 1
                else:
                    try:
                        score = float(score)
                    except ValueError:
                        continue
                    if abs(max_score[k] - score) < 1e-9 and score > 0:
                        solved += 1
            r['solved'] = {'solving': solved}

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
        }

        return standings
Exemplo n.º 25
0
    def get_standings(self, users=None, statistics=None):

        standings_url = (self.url + '/standings').replace(
            'contests', 'contest')

        is_gym = '/gym/' in self.url
        result = {}

        for unofficial in [False, True]:
            params = {
                'contestId': self.cid,
                'showUnofficial': str(unofficial).lower(),
            }
            if users:
                params['handles'] = ';'.join(users)

            try:
                data = _query(
                    method='contest.standings',
                    params=params,
                    api_key=self.api_key,
                )
            except FailOnGetResponse as e:
                if getattr(e.args[0], 'code', None) == 400:
                    return {'action': 'delete'}
                raise ExceptionParseStandings(e.args[0])

            if data['status'] != 'OK':
                raise ExceptionParseStandings(data['status'])

            phase = data['result']['contest'].get('phase', 'FINISHED').upper()
            contest_type = data['result']['contest']['type'].upper()
            duration_seconds = data['result']['contest'].get('durationSeconds')

            result_problems = data['result']['problems']
            problems_info = OrderedDict()
            for p in result_problems:
                d = {'short': p['index'], 'name': p['name']}
                if 'points' in p:
                    d['full_score'] = p['points']
                elif contest_type == 'IOI':
                    d['full_score'] = 100
                d['url'] = urljoin(standings_url.rstrip('/'),
                                   f"problem/{d['short']}")
                problems_info[d['short']] = d

            grouped = any('teamId' in row['party']
                          for row in data['result']['rows'])
            for row in data['result']['rows']:
                party = row['party']

                if is_gym and not party['members']:
                    is_ghost_team = True
                    name = party['teamName']
                    party['members'] = [{
                        'handle': f'{name} {self.get_season()}',
                        'name': name,
                    }]
                else:
                    is_ghost_team = False

                for member in party['members']:
                    if is_gym:
                        upsolve = False
                    else:
                        upsolve = party['participantType'] != 'CONTESTANT'
                        if unofficial != upsolve:
                            continue

                    handle = member['handle']

                    r = result.setdefault(handle, OrderedDict())
                    r['member'] = handle
                    if 'room' in party:
                        r['room'] = str(party['room'])

                    r.setdefault('participant_type',
                                 []).append(party['participantType'])

                    if is_ghost_team:
                        r['name'] = member['name']
                        r['_no_update_name'] = True
                    elif grouped and (not upsolve and not is_gym
                                      or 'name' not in r):
                        r['name'] = ', '.join(m['handle']
                                              for m in party['members'])
                        if 'teamId' in party:
                            r['team_id'] = party['teamId']
                            r['name'] = f"{party['teamName']}: {r['name']}"
                        r['_no_update_name'] = True

                    hack = row['successfulHackCount']
                    unhack = row['unsuccessfulHackCount']

                    problems = r.setdefault('problems', {})
                    for i, s in enumerate(row['problemResults']):
                        k = result_problems[i]['index']
                        points = float(s['points'])

                        n = s.get('rejectedAttemptCount')
                        if n is not None and contest_type == 'ICPC' and points + n > 0:
                            points = f'+{"" if n == 0 else n}' if points > 0 else f'-{n}'

                        u = upsolve
                        if s['type'] == 'FINAL' and (points or n):
                            if not points:
                                points = f'-{n}'
                            p = {'result': points}
                            if contest_type == 'IOI':
                                full_score = problems_info[k].get('full_score')
                                if full_score:
                                    p['partial'] = points < full_score
                            if 'bestSubmissionTimeSeconds' in s:
                                time = s['bestSubmissionTimeSeconds']
                                if time > duration_seconds:
                                    u = True
                                else:
                                    time /= 60
                                    p['time'] = '%02d:%02d' % (time / 60,
                                                               time % 60)
                            a = problems.setdefault(k, {})
                            if u:
                                a['upsolving'] = p
                            else:
                                a.update(p)

                    if row['rank'] and not upsolve:
                        r['place'] = row['rank']
                        r['solving'] = row['points']
                        if contest_type == 'ICPC':
                            r['penalty'] = row['penalty']
                            r['solving'] = int(round(r['solving']))

                    if hack or unhack:
                        r['hack'] = {
                            'title': 'hacks',
                            'successful': hack,
                            'unsuccessful': unhack,
                        }

        try:
            params.pop('showUnofficial')
            data = _query(
                method='contest.ratingChanges',
                params=params,
                api_key=self.api_key,
            )
            if data and data['status'] == 'OK':
                for row in data['result']:
                    if str(row.pop('contestId')) != self.key:
                        continue
                    handle = row.pop('handle')
                    if handle not in result:
                        continue
                    r = result[handle]
                    old_rating = row.pop('oldRating')
                    new_rating = row.pop('newRating')
                    r['old_rating'] = old_rating
                    r['new_rating'] = new_rating
        except FailOnGetResponse:
            pass

        def to_score(x):
            return (1 if x.startswith('+') or float(x) > 0 else
                    0) if isinstance(x, str) else x

        def to_solve(x):
            return not x.get('partial',
                             False) and to_score(x.get('result', 0)) > 0

        for r in result.values():
            upsolving = 0
            solving = 0
            upsolving_score = 0

            for a in r['problems'].values():
                if 'upsolving' in a and to_solve(a['upsolving']) > to_solve(a):
                    upsolving_score += to_score(a['upsolving']['result'])
                    upsolving += to_solve(a['upsolving'])
                else:
                    solving += to_solve(a)
            r.setdefault('solving', 0)
            r['upsolving'] = upsolving_score
            if abs(solving -
                   r['solving']) > 1e-9 or abs(upsolving -
                                               r['upsolving']) > 1e-9:
                r['solved'] = {
                    'solving': solving,
                    'upsolving': upsolving,
                }

        standings = {
            'result': result,
            'url': standings_url,
            'problems': list(problems_info.values()),
            'options': {
                'fixed_fields': [('hack', 'Hacks')],
            },
        }
        if phase != 'FINISHED':
            standings['timing_statistic_delta'] = timedelta(minutes=10)
        return standings
Exemplo n.º 26
0
    def get_standings(self, users=None, statistics=None):
        page = REQ.get(self.COMPETITION_INFO_API_URL_)
        data = json.loads(page)
        for round_data in data['rounds']:
            match = re.search(
                r'start\s*date\s*(?:<b[^>]*>)?(?P<start_time>[^<]*)(?:</b>)?.*end\s*date',
                round_data['description'], re.IGNORECASE)
            start_time = parser.parse(match.group('start_time'),
                                      tzinfos={'CET': 'UTC+1'})
            if start_time == self.start_time and round_data[
                    'name'] == self.name:
                break
        else:
            raise ExceptionParseStandings('not found round')

        m = re.search('maxPointsForProblem=(?P<score>[0-9]+)',
                      round_data['description'])
        max_points_challenge_problem = int(m.group('score')) if m else None

        page = REQ.get(self.ROUND_INFO_API_URL_)
        round_infos = json.loads(page)
        for round_info in round_infos['roundDisplayInfo']:
            if round_info['displayName'] == self.name:
                break
        else:
            raise ExceptionParseStandings('not found round')

        problems_info = collections.OrderedDict([(p['code'], {
            'code':
            p['code'],
            'name':
            p['name'],
            'url':
            self.PROBLEM_URL_.format(**p),
        }) for p in round_data['problems']])
        if self.name.startswith('Round'):
            level = int(self.name.split()[-1])
            if level in [1, 2]:
                for p in problems_info.values():
                    p['full_score'] = level

        result = dict()

        divisions_order = []
        for cid, ctype in (
            (round_infos['teamCompetitionPremierLeagueId'], 'Team'),
            (round_infos['teamCompetitionRisingStarsId'], 'Team'),
            (round_infos['teamCompetitionPremierLeagueId'], 'individual'),
        ):
            url = self.RESULTS_API_URL_.format(cid=cid,
                                               url=round_info['url'],
                                               ctype=ctype)
            page = REQ.get(url)
            data = json.loads(page)

            division = data['displayedName'].replace(self.name,
                                                     '').strip().lower()
            if division not in divisions_order:
                divisions_order.append(division)

            participaty_type = {
                'Team': 'Team',
                'individual': 'Competitor',
            }[ctype]

            sorted_data = sorted(data['standings'],
                                 key=lambda r: r['score'],
                                 reverse=True)
            max_points = collections.defaultdict(int)
            division_result = dict()

            with PoolExecutor(max_workers=20) as executor, tqdm.tqdm(
                    total=len(sorted_data)) as pbar:

                def fetch_team_results(d):
                    member = str(d['id'])
                    url = self.TEAM_RESULTS_URL_.format(cid=cid,
                                                        uid=member,
                                                        name=participaty_type)
                    page = REQ.get(url)

                    matches = re.finditer(
                        r'<a[^>]*href="[^"]*/Problem/(?P<code>[^"/]*)">[^<]*(?:\s*<[^>]*>)*(?P<score>[.0-9]+)',
                        page)
                    problems = {}
                    for m in matches:
                        k = m['code']
                        if k not in problems_info:
                            continue
                        p = problems.setdefault(k, {})
                        p['result'] = m['score']

                    users = re.findall(
                        '<a[^>]*href="[^"]*/CompetitorResults/[^"]*">([^<]*)</a>',
                        page)

                    info = {
                        'problems': problems,
                        'url': url,
                        'member': member,
                    }

                    return d, info, users

                place = None
                last = None
                for index, (r, row, users) in enumerate(executor.map(
                        fetch_team_results, sorted_data),
                                                        start=1):
                    if last is None or abs(r['score'] - last) > 1e-7:
                        place = index
                        last = r['score']

                    row['name'] = r['name']
                    if users:
                        row['name'] += f': {", ".join(users)}'
                    row['place'] = place
                    row['solving'] = r['score']
                    row['country'] = unquote(r['country']).split()[0]
                    row['division'] = division
                    if ctype == 'individual':
                        row['_skip_for_problem_stat'] = True

                    division_result[row['member']] = row

                    for k, p in row['problems'].items():
                        max_points[k] = max(max_points[k], float(p['result']))

                    pbar.update()

            if max_points_challenge_problem is not None:
                for code, value in max_points.items():
                    if code != round_data['problems'][-1][
                            'code'] and value <= 2:
                        continue

                    problems_info[code][
                        'full_score'] = max_points_challenge_problem

                    for r in division_result.values():
                        if code in r['problems']:
                            p = r['problems'][code]
                            p['status'] = p['result']
                            k = 1 - (1 - float(p['result']) / value)**.5
                            if k < 1:
                                p['partial'] = True
                            p['result'] = round(
                                max_points_challenge_problem * k, 2)

            for r in division_result.values():
                solved = 0
                for p in r['problems'].values():
                    if not p.get('partial') and float(p['result']) > 0:
                        solved += 1
                r['solved'] = {'solving': solved}

            result.update(division_result)

        standings_url = self.STANDING_URL_.format(
            cid=round_infos['teamCompetitionPremierLeagueId'])

        standings = {
            'result': result,
            'url': standings_url,
            'problems': list(problems_info.values()),
            'divisions_order': divisions_order,
        }
        return standings
Exemplo n.º 27
0
    def get_standings(self, users=None, statistics=None):
        if not self.standings_url:
            raise ExceptionParseStandings('not standings url')

        def get_results(standings_url, division_data):
            page = REQ.get(standings_url)

            page_format = division_data.get('format')
            if page_format == 'json':
                data = json.loads(page)
                scores_field = None
                if 'problems' in data:
                    scores_field = 'problem'
                elif 'tournaments' in data:
                    scores_field = 'tournament'

                if scores_field:
                    scores_fields_mapping = {'submission': 'T', 'request': 'R'}
                    scores_mapping = OrderedDict()
                    for score in data[f'{scores_field}s']:
                        name = str(score[f'{scores_field}Id'])
                        scores_mapping[name] = scores_fields_mapping.get(name, name.split(':')[-1])

                table = []
                for team in data['teams']:
                    row = OrderedDict()
                    row['name'] = team['team']['teamName']
                    row['solving'] = team['score']
                    row['country'] = team['team']['customData']['country']
                    if scores_field:
                        problems = row.setdefault('_scores', OrderedDict())
                        scores = team[f'{scores_field}s']
                        for field, out in scores_mapping.items():
                            if field in scores:
                                problems[out] = as_number(scores.get(field, {}).get('score'))
                    table.append(row)
            else:
                mapping = {
                    'Rank': 'place',
                    '': 'place',
                    'Score': 'solving',
                    'score': 'solving',
                    'Total Score': 'solving',
                    'Team': 'name',
                    'name': 'name',
                    'score + unspent LAM': 'unspent_lam',
                }
                xpath = division_data.get('xpath', '//table//tr')
                table = parsed_table.ParsedTable(html=page, header_mapping=mapping, xpath=xpath)

            season = self.get_season()
            ret = {}
            was_place = False
            for r in table:
                row = OrderedDict()
                for k, v in r.items():
                    was_place = was_place or k == 'place'
                    if isinstance(v, parsed_table.ParsedTableValue):
                        v = v.value
                    if k == 'name':
                        row['name'] = v
                        row['member'] = f'{v} {season}'
                    else:
                        row[k] = as_number(v) if k in {'place', 'solving'} else v
                ret[row['member']] = row
            if not was_place:
                place = None
                last = None
                for idx, row in enumerate(sorted(ret.values(), key=lambda r: r['solving'], reverse=True), start=1):
                    if row['solving'] != last:
                        last = row['solving']
                        place = idx
                    row['place'] = place
            return ret

        fields_types = {}
        results = {}

        divisions = self.info.get('standings', {}).get('divisions', [])
        divisions_order = []
        divisions_fields_types = defaultdict(OrderedDict)
        for division_data in divisions:
            division = division_data['name']
            division_results = get_results(division_data['standings_url'], division_data)

            medals = []
            for medal in division_data.get('medals', []):
                medals += [medal['name']] * medal['count']

            for handle, result in division_results.items():
                default = OrderedDict(member=result.pop('member'), name=result['name'])
                row = results.setdefault(handle, default)

                place_as_int = toint(result.get('place'))
                if place_as_int is not None and place_as_int <= len(medals):
                    medal = medals[place_as_int - 1]
                    result['medal'] = medal
                    result['_medal_title_field'] = '_'
                    result['_'] = f'{division.title()} {medal.title()}'

                scores = result.pop('_scores', {})

                if divisions_order:
                    prev_division = divisions_order[-1]
                    reverse_mapping = {'place': 'rank', 'solving': 'score'}
                    for k, v in list(result.items()):
                        if k in 'medal' and k not in row:
                            for f in 'medal', '_medal_title_field', '_':
                                row[f] = result[f]
                        if k in {'name', 'medal'} or k.startswith('_'):
                            continue
                        if k in {'place', 'solving'}:
                            new_k = f'{division}_{reverse_mapping.get(k, k)}'
                            row[new_k] = v
                            try:
                                prev_val = row['_division_addition'][prev_division][k]

                                ref_k = f'{prev_division}_{reverse_mapping.get(k, k)}'
                                result[ref_k] = prev_val
                                divisions_fields_types[division].setdefault(ref_k, [])

                                val = float(prev_val) - float(v)
                                val = int(val) if int(val) == val else val
                                if k == 'place':
                                    val = -val
                                field = f'{new_k}_delta'
                                row[field] = val
                                field_types = fields_types.setdefault(field, [])
                                if 'delta' not in field_types:
                                    field_types.append('delta')

                                field = f'{ref_k}_delta'
                                result[field] = val
                                field_types = divisions_fields_types[division].setdefault(field, [])
                                if 'delta' not in field_types:
                                    field_types.append('delta')
                            except Exception:
                                pass
                else:
                    row.update(scores)
                    row.update(result)

                division_addition = row.setdefault('_division_addition', {}).setdefault(division, OrderedDict())
                division_addition.update(scores)
                division_addition.update(result)

            divisions_order.append(division)

        for value in results.values():
            for division, row in value.get('_division_addition', {}).items():
                for k, v in row.items():
                    field_types = divisions_fields_types[division].setdefault(k, [])
                    field_type = type(v).__name__
                    if field_type not in field_types:
                        field_types.append(field_type)

        for idx, division_data in enumerate(divisions):
            division = division_data['name']
            disable_fields = division_data.get('disable_fields', [])
            for field in disable_fields:
                divisions_fields_types[division].pop(field, None)
            if idx == 0:
                for row in results.values():
                    for field in disable_fields:
                        row.pop(field, None)

        return dict(
            result=results,
            fields_types=fields_types,
            divisions_addition={k: dict(fields=list(fields_types.keys()), fields_types=fields_types)
                                for k, fields_types in divisions_fields_types.items()},
            divisions_order=divisions_order,
        )
Exemplo n.º 28
0
    def get_standings(self, users=None, statistics=None):
        if not self.standings_url:
            raise ExceptionParseStandings('Not set stnadings url')
        is_final = self.name.lower().startswith('final round')
        now = datetime.utcnow().replace(tzinfo=pytz.utc)
        if not is_final and self.end_time + timedelta(days=3) < now:
            raise ExceptionParseStandings('Too late')

        page = REQ.get(self.standings_url)

        html_table = re.search('<table[^>]*>.*?</table>', page,
                               re.MULTILINE | re.DOTALL).group(0)
        table = parsed_table.ParsedTable(html_table,
                                         as_list=True,
                                         ignore_wrong_header_number=False,
                                         ignore_display_none=True)

        problems_info = OrderedDict()

        result = {}
        season = self.get_season()
        advanced = False
        for r in table:
            if isinstance(r, parsed_table.ParsedTableRow):
                if re.search(r'qualification\s*threshold', r.columns[0].value,
                             re.I):
                    advanced = True
                    for row in result.values():
                        row['advanced'] = True
                continue
            row = OrderedDict()
            problems = row.setdefault('problems', {})
            if advanced:
                row['advanced'] = False
            pid = 0
            for k, v in r:
                if k == '#':
                    row['place'] = v.value
                elif k == 'Name':
                    row['name'] = v.value
                elif k.startswith('Total'):
                    row['solving'] = v.value
                elif '_top_column' in v.header.attrs:
                    problem_key = str(pid)
                    if problem_key not in problems_info:
                        name = v.header.attrs['_top_column'].value
                        p_info = {'code': problem_key}
                        p_info_regex = r'^(?P<name>.*)\s+\(?(?P<score>[0-9]{2,})\)?$'
                        match = re.search(p_info_regex, name)
                        if match:
                            name = match.group('name').strip()
                        match = re.search(p_info_regex, k)
                        if match:
                            p_info['subname'] = match.group('name').strip()
                            p_info['full_score'] = int(match.group('score'))
                        p_info['name'] = name
                        href = v.header.node.xpath('a/@href')
                        if href:
                            p_info['suburl'] = href[0]
                            p_info['url'] = href[0]
                        problems_info[problem_key] = p_info

                    if v.value:
                        try:
                            val = float(v.value)
                            if val:
                                p = problems.setdefault(problem_key, {})
                                p['result'] = v.value

                                full_score = problems_info[problem_key].get(
                                    'full_score')
                                if full_score is not None:
                                    p['partial'] = val < full_score
                                else:
                                    style = v.attrs.get('style')
                                    if style:
                                        if 'yellow' in style:
                                            p['partial'] = True
                                        elif 'lightgreen' in style:
                                            p['partial'] = False
                                            if full_score is None:
                                                problems_info[problem_key][
                                                    'full_score'] = int(
                                                        round(val, 0))
                        except ValueError:
                            pass
                    pid += 1
                else:
                    row.setdefault('_info', {})[k] = v.value

            if not problems:
                continue

            handle = row['name'] + ' ' + season
            row['member'] = handle
            if handle in result:
                continue
            result[handle] = row

        standings = {
            'result': result,
            'problems': list(problems_info.values()),
        }

        if is_final:
            standings['options'] = {
                'medals': [{
                    'name': k,
                    'count': 1
                } for k in ('gold', 'silver', 'bronze')]
            }

        return standings
Exemplo n.º 29
0
    def get_standings(self, users=None, statistics=None):
        standings_url = self.standings_url
        standings_url = re.sub('.*/(http.*)', r'\1', standings_url)

        web_archive_url = self.info.get('parse', {}).get('web_archive_url')
        if web_archive_url:
            web_archive_url = re.sub('/http.*', '/', web_archive_url)
            standings_url = web_archive_url + standings_url

        passed = datetime.utcnow().replace(tzinfo=pytz.utc) - self.end_time > timedelta(days=30)

        if not web_archive_url and passed:
            raise ExceptionParseStandings('Long time passed')

        total_num_pages = None

        codename = self.name.split('.')[0]

        @RateLimiter(max_calls=10, period=1)
        def fetch_table(page):
            nonlocal web_archive_url
            nonlocal total_num_pages
            nonlocal standings_url
            url = standings_url
            if n_page > 1:
                url += f'/page/{page}'
            if not web_archive_url:
                url += '?locale=en'

            page = Statistic.get(url)

            match = re.search('<title>[^<]*-(?P<name>[^<]*)</title>', page)
            if codename not in match.group('name'):
                return

            if total_num_pages is None:
                matches = re.findall(
                    '<span[^>]*class="[^"]*page-index[^"]*"[^>]*pageindex="([0-9]+)"[^>]*>',
                    page,
                    re.I,
                )
                if matches:
                    total_num_pages = int(matches[-1])

            regex = '''<table[^>]*class="[^>]*table[^>]*"[^>]*>.*?</table>'''
            match = re.search(regex, page, re.DOTALL)
            table = parsed_table.ParsedTable(
                match.group(0),
                header_mapping={
                    '№': '#',
                    'Участник': 'Participant',
                    'Бои': 'Games',
                    'Игры': 'Games',
                    'Побед': 'Won',
                    'Рейтинг': 'Rating',
                    'Язык': 'Language',
                },
            )
            return table

        result = {}
        n_page = 1
        ok = True
        last_rating = None
        while ok and (not users or len(users) != len(result)):
            ok = False
            table = fetch_table(n_page)
            if table is None:
                break

            for row in table:
                ok = True
                r = OrderedDict()

                participant = row.pop('Participant')
                member = participant.value
                if member in result or users and member not in users:
                    continue
                r['member'] = member
                if not web_archive_url:
                    r['info'] = {'avatar': participant.column.node.xpath('.//img/@src')[0]}
                url = participant.column.node.xpath('.//a/@href')[0]
                r['url'] = urllib.parse.urljoin(standings_url, url)

                r['place'] = int(row.pop('#').value)
                score = int(row.pop('Rating').value)
                r['solving'] = score
                r['delta'] = last_rating - score if last_rating is not None else ''
                last_rating = score

                if 'Language' in row:
                    classes = row.pop('Language').column.node.xpath('.//*[contains(@class, "lc")]/@class')
                    if classes:
                        prefix = 'LangIc-'
                        language = None
                        for cls in classes[0].split():
                            if cls.startswith(prefix):
                                language = cls[len(prefix):]
                        if language:
                            r['language'] = Statistic.LANGUAGES_MAPPING.get(language, language)

                if 'Games' in row:
                    n_games = row.pop('Games').value.split()[-1]
                    if n_games != '0':
                        r['games'] = n_games

                if 'Won' in row:
                    p_won = row.pop('Won').value
                    if p_won != '-':
                        r['won'] = p_won

                row.pop('Δ', None)
                for k, v in list(row.items()):
                    r[k.strip().lower()] = v.value

                result[member] = r
            n_page += 1

            if total_num_pages is None or n_page > total_num_pages:
                break

        def fetch_rating(row):
            member = row['member']
            if not statistics or member not in statistics:
                return
            user_id = statistics[member].get('_user_id')
            if not user_id:
                page = Statistic.get(row['url'])
                match = re.search(r'userId\s*:\s*(?P<user_id>[0-9]+)', page)
                user_id = match.group('user_id')

            row['_user_id'] = user_id
            post = {
                'action': 'getRatingChanges',
                'userId': user_id,
                'mode': 'ALL',
                'csrf_token': csrf_token,
            }
            page = Statistic.get('/data/ratingChangeDataPage', post=post)
            rating_changes = json.loads(page)
            rating_data = {}

            ratings = rating_changes.get('ratingChanges')
            if ratings:
                ratings = json.loads(ratings)
                rating_data['ratings'] = ratings
                if ratings and len(ratings) > 1:
                    ema = 0
                    prev = None
                    alpha = 0.1
                    for rating in ratings:
                        if prev is not None:
                            ema += ((rating['rating'] - prev) - ema) * alpha
                        prev = rating['rating']
                    row[f'delta_ema={alpha}'] = f'{ema:.2f}'
                if not passed:
                    row['new_rating'] = ratings[-1]['rating']

            submissions = rating_changes.get('submissions')
            if submissions:
                submissions = json.loads(submissions)
                rating_data['submissions'] = submissions
                row['created'] = Statistic.norm_timestamp(submissions[0]['time'])
                row['updated'] = Statistic.norm_timestamp(submissions[-1]['time'])
                row['version'] = len(submissions)

            rating_data_str = json.dumps(rating_data)
            rating_data_zip = zlib.compress(rating_data_str.encode('utf-8'))
            rating_data_b64 = b64encode(rating_data_zip).decode('ascii')
            row['_rating_data'] = rating_data_b64

        if not web_archive_url and '/1/' in self.standings_url:
            match = re.search('<meta[^>]*name="x-csrf-token"[^>]*content="(?P<token>[^"]*)"[^>]*>', REQ.last_page, re.I)
            csrf_token = match.group('token')

            with PoolExecutor(max_workers=8) as executor:
                for _ in tqdm.tqdm(executor.map(fetch_rating, result.values()), desc='ratings'):
                    pass

        ret = {
            'result': result,
            'url': standings_url,
            'fields_types': {'updated': ['timestamp'], 'created': ['timestamp']},
            'hidden_fields': ['new_rating', 'version', 'created'],
        }

        if self.name.endswith('Finals'):
            ret['options'] = {
                'medals': [
                    {'name': 'gold', 'count': 1},
                    {'name': 'silver', 'count': 1},
                    {'name': 'bronze', 'count': 1},
                    {'name': 'honorable', 'count': 3},
                ]
            }

        return ret
Exemplo n.º 30
0
    def get_standings(self, users=None, statistics=None):
        # REQ.get('https://www.codechef.com/')

        # try:
        #     form = REQ.form()
        #     form['post'].update({
        #         'name': self._username,
        #         'pass': self._password,
        #     })
        #     page = REQ.get(form['url'], post=form['post'])

        #     form = REQ.form()
        #     if form['url'] == '/session/limit':
        #         for field in form['unchecked'][:-1]:
        #             form['post'][field['name']] = field['value'].encode('utf8')
        #         page = REQ.get(form['url'], post=form['post'])
        # except Exception:
        #     pass

        url = self.API_CONTEST_URL_FORMAT_.format(**self.__dict__)
        page = REQ.get(url)
        data = json.loads(page)
        if data['status'] != 'success':
            raise ExceptionParseStandings(json.dumps(data))
        if 'child_contests' in data:
            contest_infos = {
                d['contest_code']: {
                    'division': k
                }
                for k, d in data['child_contests'].items()
            }
        else:
            contest_infos = {self.key: {}}

        result = {}

        problems_info = dict() if len(contest_infos) > 1 else list()

        for key, contest_info in contest_infos.items():
            url = self.STANDINGS_URL_FORMAT_.format(key=key)
            page = REQ.get(url)
            match = re.search(
                '<input[^>]*name="csrfToken"[^>]*id="edit-csrfToken"[^>]*value="([^"]*)"',
                page)
            csrf_token = match.group(1)

            n_page = 0
            per_page = 150
            n_total_page = None
            pbar = None
            contest_type = None
            while n_total_page is None or n_page < n_total_page:
                n_page += 1
                time.sleep(2)
                url = self.API_RANKING_URL_FORMAT_.format(key=key,
                                                          page=n_page,
                                                          per_page=per_page)

                if users:
                    urls = [f'{url}&search={user}' for user in users]
                else:
                    urls = [url]

                for url in urls:
                    delay = 10
                    for _ in range(10):
                        try:
                            headers = {
                                'x-csrf-token': csrf_token,
                                'x-requested-with': 'XMLHttpRequest',
                            }
                            page = REQ.get(url, headers=headers)
                            data = json.loads(page)
                            assert data.get('status') != 'rate_limit_exceeded'
                            break
                        except Exception:
                            traceback.print_exc()
                            delay = min(300, delay * 2)
                            sys.stdout.write(f'url = {url}\n')
                            sys.stdout.write(f'Sleep {delay}... ')
                            sys.stdout.flush()
                            time.sleep(delay)
                            sys.stdout.write('Done\n')
                    else:
                        raise ExceptionParseStandings(
                            f'Failed getting {n_page} by url {url}')

                    if 'status' in data and data['status'] != 'success':
                        raise ExceptionParseStandings(json.dumps(data))

                    unscored_problems = data['contest_info'][
                        'unscored_problems']

                    if n_total_page is None:
                        for p in data['problems']:
                            if p['code'] in unscored_problems:
                                continue
                            d = problems_info
                            if 'division' in contest_info:
                                d = d.setdefault('division', OrderedDict())
                                d = d.setdefault(contest_info['division'], [])
                            d.append({
                                'short':
                                p['code'],
                                'name':
                                p['name'],
                                'url':
                                f"https://www.codechef.com/problems/{p['code']}",
                            })
                        n_total_page = data['availablePages']
                        pbar = tqdm.tqdm(total=n_total_page * len(urls))
                        contest_type = data['contest_info'].get('type')

                    for d in data['list']:
                        handle = d.pop('user_handle')
                        d.pop('html_handle', None)
                        problems_status = d.pop('problems_status')
                        if d['score'] < 1e-9 and not problems_status:
                            LOG.warning(f'Skip handle = {handle}: {d}')
                            continue
                        row = result.setdefault(handle, {})

                        row['member'] = handle
                        row['place'] = d.pop('rank')
                        row['solving'] = d.pop('score')

                        problems = row.setdefault('problems', {})
                        solved, upsolved = 0, 0
                        if problems_status:
                            for k, v in problems_status.items():
                                t = 'upsolving' if k in unscored_problems else 'result'
                                v[t] = v.pop('score')
                                solved += 1 if v.get('result', 0) > 0 else 0
                                upsolved += 1 if v.get('upsolving',
                                                       0) > 0 else 0

                                if contest_type == '1' and 'penalty' in v:
                                    penalty = v.pop('penalty')
                                    if v[t] > 0:
                                        v[t] = f'+{"" if penalty == 0 else penalty}'
                                    else:
                                        v[t] = f'-{penalty}'

                                problems[k] = v
                            row['solved'] = {
                                'solving': solved,
                                'upsolving': upsolved
                            }
                        country = d.pop('country_code')
                        if country:
                            d['country'] = country
                        row.update(d)
                        row.update(contest_info)
                    pbar.set_description(f'key={key} url={url}')
                    pbar.update()

            has_penalty = False
            for row in result.values():
                p = row.get('penalty')
                has_penalty = has_penalty or p and str(p) != "0"
            if not has_penalty:
                for row in result.values():
                    row.pop('penalty', None)

            if pbar is not None:
                pbar.close()

        standings = {
            'result': result,
            'url': self.url,
            'problems': problems_info,
        }
        return standings