def startParsing(self): soup = BeautifulSoup(self.htmlStr, "html.parser") for a in soup.find_all('div', class_='row'): for one_span in a.find_all('span', class_="pagina"): t = one_span.find_all('a', href=True) if not t: self.currentPage = one_span.getText() else: if self.currentPage is not None: self.nextLinks.append(t[0]['href']) # Identifier, data and tags for tags in a.find_all('div', class_='col-xs-6'): for links in tags.find_all('a', href=True): if links.text.strip().startswith('#'): currentHistory = History(links.text.strip()[1:]) currentHistory.setURL(self.baseURL + links['href']) elif self.datePattern.match(links.text.strip()): currentHistory.setHistoryTime(links.text.strip()) if links['href'].startswith("/bytag"): currentHistory.addTag(links.text) for tags in a.find_all('div', class_="col-xs-12", style="margin:0.5em 0;line-height:1.785em"): # This is text of post #print(tags.text) currentHistory.setHistory(tags.get_text().strip()) for tags in a.find_all('div', class_="col-xs-12", style='text-align:center'): #print(tags.b.prettify()) currentHistory.setVotes(tags.b.get_text()) self.list_of_histories.append(currentHistory)