def download(self, session=None): session = session or utils.new_default_session() # get resp = utils.request('GET', self.get_url(), session=session) msgs = AtCoderService._get_messages_from_cookie(resp.cookies) if AtCoderService._report_messages(msgs, unexpected=True): # example message: "message: You cannot see this page." log.warning('are you logged in?') return [] # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() lang = None for pre, h3 in self._find_sample_tags(soup): s = utils.textfile(utils.dos2unix(pre.string.lstrip())) name = h3.string l = self._get_tag_lang(pre) if lang is None: lang = l elif lang != l: log.info( 'skipped due to language: current one is %s, not %s: %s ', lang, l, name) continue samples.add(s, name) return samples.get()
def download_samples(self, session=None): session = session or utils.new_default_session() # get resp = utils.request('GET', self.get_url(), session=session) # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for pre in soup.find_all('pre'): log.debug('pre: %s', str(pre)) hn = utils.previous_sibling_tag(pre) if hn is None: div = pre.parent if div is not None: log.debug('div: %s', str(hn)) hn = utils.previous_sibling_tag(div) log.debug('hN: %s', str(hn)) log.debug(hn) keywords = ['sample', 'example', '入力例', '出力例'] if hn and hn.name in ['h2', 'h3'] and hn.string and any( filter(lambda keyword: keyword in hn.string.lower(), keywords)): s = utils.textfile(pre.string.lstrip()) name = hn.string samples.add(s, name) return samples.get()
def download(self, session=None): session = session or requests.Session() url = self.get_url() # get log.status('GET: %s', url) resp = session.get(url) log.status(utils.describe_status_code(resp.status_code)) resp.raise_for_status() msgs = AtCoderService._get_messages_from_cookie(resp.cookies) if AtCoderService._report_messages(msgs, unexpected=True): return [] # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() lang = None for pre, h3 in self._find_sample_tags(soup): s = utils.textfile(utils.dos2unix(pre.string.lstrip())) name = h3.string l = self._get_tag_lang(pre) if lang is None: lang = l elif lang != l: log.info( 'skipped due to language: current one is %s, not %s: %s ', lang, l, name) continue samples.add(s, name) return samples.get()
def download_samples( self, session: Optional[requests.Session] = None) -> List[TestCase]: session = session or utils.new_default_session() # get resp = utils.request('GET', self.get_url(), session=session) # parse soup = bs4.BeautifulSoup( resp.content, utils.html_parser ) # NOTE: resp.content is not decoded for workaround, see https://github.com/kmyk/online-judge-tools/pull/186 samples = utils.SampleZipper() for pre in soup.find_all('pre'): log.debug('pre: %s', str(pre)) hn = utils.previous_sibling_tag(pre) if hn is None: div = pre.parent if div is not None: log.debug('div: %s', str(hn)) hn = utils.previous_sibling_tag(div) log.debug('hN: %s', str(hn)) log.debug(hn) keywords = ['sample', 'example', '入力例', '出力例'] if hn and hn.name in ['h2', 'h3'] and hn.string and any( filter(lambda keyword: keyword in hn.string.lower(), keywords)): s = utils.textfile(pre.string.lstrip()) name = hn.string samples.add(s, name) return samples.get()
def download( self, session: Optional[requests.Session] = None ) -> List[onlinejudge.problem.TestCase]: session = session or utils.new_default_session() # get resp = utils.request('GET', self.get_url(), session=session) # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for tag in soup.find_all('div', class_=re.compile( '^(in|out)put$')): # Codeforces writes very nice HTML :) log.debug('tag: %s', str(tag)) assert len(list(tag.children)) title, pre = list(tag.children) assert 'title' in title.attrs['class'] assert pre.name == 'pre' s = '' for it in pre.children: if it.name == 'br': s += '\n' else: s += it.string s = s.lstrip() samples.add(s, title.string) return samples.get()
def download(self, session=None): session = session or requests.Session() url = self.get_url() # get log.status('GET: %s', url) resp = session.get(url) log.status(utils.describe_status_code(resp.status_code)) resp.raise_for_status() # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for pre in soup.find_all('pre'): log.debug('pre: %s', str(pre)) hn = utils.previous_sibling_tag(pre) log.debug('hN: %s', str(hn)) log.debug(hn) if hn and hn.name in [ 'h2', 'h3' ] and hn.string and 'ample' in hn.string.lower( ): # 'ample' is the suffix of 'sample', 'example' s = utils.textfile(pre.string.lstrip()) name = hn.string samples.add(s, name) return samples.get()
def download_sample_cases( self, session: Optional[requests.Session] = None ) -> List[onlinejudge.type.TestCase]: session = session or utils.new_default_session() resp = utils.request('GET', self.get_url(), session=session) soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for case in soup.find('table', class_="samples").find('tbody').find_all('tr'): log.debug('case: %s', str(case)) assert len(list(case.children)) input_pre, output_pre = list( map(lambda td: td.find('pre'), list(case.children))) assert input_pre.name == 'pre' assert output_pre.name == 'pre' assert re.search("^preSample.*Input$", input_pre.attrs['id']) assert re.search("^preSample.*Output$", output_pre.attrs['id']) s = input_pre.get_text() s = s.lstrip() samples.add(s, "Input") s = output_pre.get_text() s = s.lstrip() samples.add(s, "Output") return samples.get()
def download(self, session=None): session = session or requests.Session() url = self.get_url() # get log.status('GET: %s', url) resp = session.get(url) log.status(utils.describe_status_code(resp.status_code)) resp.raise_for_status() # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for tag in soup.find_all('div', class_=re.compile( '^(in|out)put$')): # Codeforces writes very nice HTML :) log.debug('tag: %s', str(tag)) assert len(list(tag.children)) title, pre = list(tag.children) assert 'title' in title.attrs['class'] assert pre.name == 'pre' s = '' for it in pre.children: if it.name == 'br': s += '\n' else: s += it.string samples.add(s, title.string) return samples.get()
def download(self, session=None): session = session or utils.new_default_session() # get resp = utils.request('GET', self.get_url(), session=session) # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for h2 in soup.find_all('h2'): it = self._parse_sample_tag(h2) if it is not None: s, name = it samples.add(s, name) return samples.get()
def download_samples(self, session=None): session = session or utils.new_default_session() # get resp = utils.request("GET", self.get_url(), session=session) # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for pre in soup.find_all("pre"): log.debug("pre: %s", str(pre)) it = self._parse_sample_tag(pre) if it is not None: data, name = it samples.add(data, name) return samples.get()
def download_samples(self, session: Optional[requests.Session] = None) -> List[TestCase]: session = session or utils.new_default_session() # get resp = utils.request('GET', self.get_url(), session=session) # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for pre in soup.find_all('pre'): log.debug('pre: %s', str(pre)) it = self._parse_sample_tag(pre) if it is not None: data, name = it samples.add(data, name) return samples.get()
def download(self, session=None): session = session or requests.Session() url = self.get_url() # get log.status('GET: %s', url) resp = session.get(url) log.status(utils.describe_status_code(resp.status_code)) resp.raise_for_status() # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for h2 in soup.find_all('h2'): it = self._parse_sample_tag(h2) if it is not None: s, name = it samples.add(s, name) return samples.get()
def download(self, session=None): session = session or utils.new_default_session() # get resp = utils.request("GET", self.get_url(), session=session) # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() for tag in soup.find_all("div", class_=re.compile( "^(in|out)put$")): # Codeforces writes very nice HTML :) log.debug("tag: %s", str(tag)) assert len(list(tag.children)) title, pre = list(tag.children) assert "title" in title.attrs["class"] assert pre.name == "pre" s = "" for it in pre.children: if it.name == "br": s += "\n" else: s += it.string samples.add(s, title.string) return samples.get()
def download_sample_cases( self, session: Optional[requests.Session] = None ) -> List[onlinejudge.type.TestCase]: session = session or utils.new_default_session() # get # 自分で書き換えた箇所(初期 AtCoder の問題 URl は末尾が数字になっているため) url = self.get_url() for _ in range(2): resp = _request('GET', url, session=session) msgs = AtCoderService._get_messages_from_cookie(resp.cookies) if AtCoderService._report_messages(msgs, unexpected=True): # example message: "message: You cannot see this page." log.warning('are you logged in?') return [] # parse soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) samples = utils.SampleZipper() if len(list(self._find_sample_tags(soup))) > 0: break else: url = url[:-1] + chr(ord(url[-1]) - ord('a') + ord('1')) lang = None for pre, h3 in self._find_sample_tags(soup): s = utils.textfile(utils.dos2unix(pre.string.lstrip())) name = h3.string l = self._get_tag_lang(pre) if lang is None: lang = l elif lang != l: log.info( 'skipped due to language: current one is %s, not %s: %s ', lang, l, name) continue samples.add(s, name) return samples.get()