def parse_thread_page(el: bs4.element.Tag) -> AttrDict: out = AttrDict() out.user = el.select('.postprofile dt')[0].text.strip() out.body_html = str(el.select('.content')[0]).strip() out.body_text = el.select('.content')[0].text.strip() out.date = el.select('.postbody .author')[0].text.strip() return out
def parse_link(link: bs4.element.Tag, domain: str) -> AttrDict: out = AttrDict() out.title = link.select('a:nth-of-type(1)')[0].text out.views = link.select('.views')[0].text.replace('Zugriffe', '').strip() out.answers = link.select('.posts')[0].text.replace('Antworten', '').strip() out.date = link.select('a:nth-of-type(3)')[0].text out.url = domain + link.select('a:nth-of-type(1)')[0].attrs['href'].replace('./', '/') return out