def function_load(self, url): soup = BeautifulSoup(get_html(url), 'lxml').find('div', class_='b-singlepost-wrapper') title = soup.find('h1').text post = soup.find("div", class_="b-singlepost-bodywrapper") self.load_post(url, title, post) return title
def function_load(self, url): soup = get_html(url) title = BeautifulSoup(soup, 'lxml').find_all( 'h4', {'class': "entry-title-text"})[-1].text title = title.split('entry')[0] post = BeautifulSoup(soup, 'lxml').find('div', class_='entry-content') self.load_post(url, title, post) return title
def get_list_link_post(self, find_all, find, begining=1, period=1): list_link_post = BeautifulSoup(get_html( self.url), 'lxml').find_all(*find_all)[begining:] #\ if find: list_link_post = [i.find(*find) for i in list_link_post] list_link_post = [i.get('href') for i in list_link_post] self.list_link_post = [ url for url in list_link_post if url not in PostBlog.lp.list_saves ] # print(len(self.list_link_post),self.list_link_post) return len(self.list_link_post)
def get_list_link_post(self, number=None, period=1): # print("get_list_link_post") find_all = ('div', {'class': "entry-wrap js-emojis"}) find = ('a', {'class': "subj-link"}) PostBlog.get_list_link_post(self, find_all, find, 0, period) list_link_post = [] for url in self.list_link_post: if 'https://p-balaev.livejournal' in url: title = BeautifulSoup(get_html(url), 'lxml').find_all( 'h4', {'class': "entry-title-text"})[-1].text if 'Мои твиты' not in title: list_link_post.append(url) self.list_link_post = list_link_post[1:number] # print(len(self.list_link_post), self.list_link_post) return len(self.list_link_post)
def function_load(self, url): post = BeautifulSoup(get_html(url), 'lxml').find('div', class_="main-block") title = post.find('h1').text if 'publications' in url: tag = 'article' self.creator = post.find('a').text else: tag = 'div' creator = post.find('div', class_="article-item-text").find('em') or \ post.find('div', class_="classics-item-extra-block").find('li') self.creator = creator.text.strip().replace('„', '').replace( '“', '').replace('"', '').replace('/', '') # print(creator) self.load_post(url, title, post.find(tag, class_="article-item-body"), self.url) return title
def function_load(self, url): soup = BeautifulSoup(get_html(url), 'lxml') title = soup.find_all('dt', class_="entry-title")[-1].text post = soup.find('div', class_='entry-content') self.load_post(url, title, post) return title