Exemplo n.º 1
0
    def get_questions(self) -> List[Question]:
        contest = self.uri.problemset
        logger.info(
            f'Downloading page {self.base_url}/contest/{contest}/problems')

        body = self.download_response(f"/contest/{contest}/problems")
        questions: List[Question] = []

        doc = document_fromstring(body)
        caption = doc.xpath('//div[@class="caption"]/text()')[0]

        logger.info(f'Found: {caption} ✅')
        logger.info('Scraping problems:')

        problems = doc.xpath('//div[@class="problem-statement"]')
        for idx, problem in enumerate(problems, start=1):
            title = problem.find_class("title")[0].text_content()
            time_limit = problem.find_class("time-limit")[0].text_content()

            time_limit = time_limit[len('time limit per test'):].split(' ')[0]
            question = Question(idx, title, self.base_dir, time_limit)

            sample_tests = problem.find_class("sample-test")[0]
            inputs = sample_tests.find_class('input')
            outputs = sample_tests.find_class('output')

            for inp, out in zip(inputs, outputs):
                sample_input = inp.xpath('descendant-or-self::pre/text()')[0]
                sample_output = out.xpath('descendant-or-self::pre/text()')[0]
                question.add_test(sample_input, sample_output)

            questions.append(question)
            logger.info(question)

        return questions
Exemplo n.º 2
0
    def get_questions(self) -> List[Question]:
        contest = self.uri.problemset
        logger.info(f'Downloading page {self.base_url}/contests/{contest}/tasks_print')

        body = self.download_response(f"/contests/{contest}/tasks_print")
        questions: List[Question] = []

        doc = document_fromstring(body)
        caption = doc.xpath('/html/head/title')[0].text_content()

        logger.info(f'Found: {caption} ✅')
        logger.info('Scraping problems:')

        problems = doc.xpath('//div[@class="col-sm-12"]')
        for idx, problem in enumerate(problems, start=1):
            title = problem.find_class("h2")[0].text_content()
            time_limit_memory = problem.xpath('descendant-or-self::p')[0].text_content()
            try:
                time_limit = re.findall(r'Time Limit: (\d+) sec.*', time_limit_memory)[0]
            except IndexError:
                time_limit = 5

            question = Question(idx, title, self.base_dir, time_limit)

            # [4:] -> Skip the `Problem Statement`, `Constraints`, `Input`, `Output` (format)
            sample_tests = problem.find_class("lang-en")[0].find_class("part")[4:]
            inputs = sample_tests[::2]
            outputs = sample_tests[1::2]
            assert len(inputs) == len(outputs)

            for inp, out in zip(inputs, outputs):
                sample_input = inp.xpath('descendant-or-self::pre/text()')[0].strip()
                sample_output = out.xpath('descendant-or-self::pre/text()')[0].strip()
                question.add_test(sample_input, sample_output, custom_testcase=False)

            questions.append(question)
            logger.info(question)

        return questions
Exemplo n.º 3
0
    def download_question(self, idx: int, problem: CSESProblem) -> Question:
        problem_html = self.download_response(problem.url)
        doc = document_fromstring(problem_html)
        time_limit = doc.xpath(
            '//ul[@class="task-constraints"]/li[1]/text()')[0]
        # time = ' 1.00 s' -> 1.00
        time_limit = time_limit.strip()[:-1].strip()
        question = Question(idx, problem.name, self.base_dir, time_limit)

        # Fetch the samples
        curr_idx = 0
        while curr_idx != -1:
            start_idx = problem_html.find('Input:', curr_idx)
            end_idx = problem_html.find('</code>', start_idx) + 7
            input_html = problem_html[start_idx:end_idx]

            start_idx = problem_html.find('Output:', end_idx + 1)
            end_idx = problem_html.find('</code>', start_idx) + 7
            output_html = problem_html[start_idx:end_idx]

            sample_input = document_fromstring(input_html).xpath(
                '//code/text()')
            sample_output = document_fromstring(output_html).xpath(
                '//code/text()')

            if isinstance(sample_input, List):
                sample_input = '\n'.join(sample_input)

            if isinstance(sample_output, List):
                sample_output = '\n'.join(sample_output)

            question.add_test(sample_input, sample_output)
            curr_idx = problem_html.find('Input:', end_idx + 1)

        logger.info(question)

        return question
Exemplo n.º 4
0
    def load_questions(self, force_download=False) -> None:
        if force_download or (not os.path.exists(self.metadata_path)):
            if self.platform == 'cf':
                self.questions = self.get_questions_codeforces()
            elif self.platform == 'cc':
                self.questions = self.get_questions_codechef()
            self.save_questions()
            return

        self.questions = []
        with open(self.metadata_path, 'r') as file:
            metadata = json.load(file)

        for question in metadata['questions']:
            self.questions.append(Question.from_dict(question))
Exemplo n.º 5
0
    def load_questions(self, force_download=False) -> None:
        if force_download or (not os.path.exists(self.platform.metadata_path)):
            try:
                self.questions = self.platform.get_questions()
            except InvalidProblemSetURI as err:
                logger.error(err)

            self.save_questions()
            return

        self.questions = []
        with open(self.platform.metadata_path, 'r') as file:
            metadata = json.load(file)

        for question in metadata['questions']:
            self.questions.append(Question.from_dict(question))
Exemplo n.º 6
0
    def get_questions_codeforces(self) -> List[Question]:
        print(
            f'Downloading page https://codeforces.com/contest/{self.contest}/problems'
        )

        url = 'codeforces.com'
        conn = HTTPSConnection(url)
        conn.request("GET", f"/contest/{self.contest}/problems")
        response = conn.getresponse()

        if response.getcode() != 200:
            err = Exception(
                f'No contest found for codeforces/{self.contest} ❌❌')
            conn.close()
            raise err

        html = response.read().decode()
        conn.close()
        questions: List[Question] = []

        doc = document_fromstring(html)
        caption = doc.xpath('//div[@class="caption"]/text()')[0]

        print(f'Found: {caption} ✅')
        print('Scraping problems:')

        problems = doc.xpath('//div[@class="problem-statement"]')
        for idx, problem in enumerate(problems):
            title = problem.find_class("title")[0].text_content()
            time_limit = problem.find_class("time-limit")[0].text_content()

            time_limit = time_limit[len('time limit per test'):].split(' ')[0]
            try:
                question = Question(idx, title, self.base_dir,
                                    float(time_limit))
            except ValueError:
                question = Question(idx, title, self.base_dir, 5.0)

            sample_tests = problem.find_class("sample-test")[0]
            inputs = sample_tests.find_class('input')
            outputs = sample_tests.find_class('output')

            for inp, out in zip(inputs, outputs):
                sample_input = inp.xpath('descendant-or-self::pre/text()')[0]
                sample_output = out.xpath('descendant-or-self::pre/text()')[0]
                question.add_test(sample_input, sample_output)

            questions.append(question)
            print(question)

        return questions
Exemplo n.º 7
0
    def parse_question(self, idx: int, problem: Dict) -> Question:
        title = problem['problem_code'] + ' ' + problem['problem_name']
        time_limit = problem['max_timelimit']
        question = Question(idx, title, self.base_dir, time_limit)

        body = problem['body']
        for inp, out in self._scape_test_cases('example input',
                                               'example output', body):
            question.add_test(inp, out)
        for inp, out in self._scape_test_cases('sample input', 'sample output',
                                               body):
            question.add_test(inp, out)

        return question