def get_post_content(): ok = check_limited_access() if ok is True and \ self.ignore_limited_access is False: Logger.warning('You have a limited access :' + url) invalid = input('\tDo you want to continue - [y/n]: ') if invalid.lower() == 'n': return elif invalid.lower() != 'y': Logger.fail('Abort') return elif ok is True and self.ignore_limited_access: Logger.warning('You have a limited access :' + url) elements_text = self.find_elements_by_xpath(xpath=text_xpath, raise_error=False) elements_figure = self.find_elements_by_xpath(xpath=figure_xpath, raise_error=False) text = get_text(elements_text) img_src, img_caption = get_figure(elements_figure) keys = list(self.posts_content.keys()) if len(keys) == 0: self.posts_content = { 'url': [], 'text': [], 'img_src': [], 'caption': [] } self.posts_content['url'].append(url) self.posts_content['text'].append(text) self.posts_content['img_src'].append(img_src) self.posts_content['caption'].append(img_caption)
def get(self, url): self.driver.set_page_load_timeout(time_to_wait=self.time_to_wait) if 'script_timeout' in self.kwargs.keys(): self.driver.set_script_timeout( time_to_wait=self.kwargs['script_timeout']) else: self.driver.set_script_timeout(0.001) for i in range(self.reload_page_count): try: self.driver.get(url=url) break except TimeoutException as error: if i < self.reload_page_count - 1: Logger.fail( str(i + 1) + ': timeout::page has been reloaded') Logger.set_line(length=60) else: Logger.fail( str(i + 1) + ': timeout::page reload Limit has been exceed\n' '\tdo you want to try again - [y/n]: ', end='') ok = input() Logger.set_line(length=60) if ok.lower() == 'y': self.time_to_wait = float(input('time to wait :')) self.reload_page_count = int(input('reload count :')) self.get(url) elif ok.lower() == 'n': self.___timeout_export__() Logger.error(error) else: self.___timeout_export__() Logger.fail('Abort') Logger.error(error) self.scroll_height = self.driver.execute_script( "return document.body.scrollHeight")