def to_format(self, *, format_data: FormatData): """Output all publication to specific format.""" rich_items = [ self.title, self.description, ] non_rich_item = { 'URL': self.url, } to_print = [ rich_text.to_format(format_data=format_data) for rich_text in rich_items if rich_text ] to_print.extend([ self._kv_to_print(key, value, format_data) for key, value in non_rich_item.items() if value ]) if self.custom_fields: to_print.extend([ self._field_to_print(c_field, format_data) for c_field in self.custom_fields if c_field ]) rich_text = RichText('<p>error<br/>error</p>', format_data=FormatData.HTML) line_feed = rich_text.to_format(format_data=format_data).replace( 'error', '') return line_feed.join(to_print)
async def _get_product(self, product_id: int) -> BlackfirePublication: product_url = self._PRODUCT_URL.format(product_id) html = await self._get_site_content(url=product_url) beautiful_soup = BeautifulSoup(html, 'html.parser') product_name = beautiful_soup.find('h1') product_name_text = product_name.text product_name_rich = str(product_name) product_description_rich = str(beautiful_soup.find(id='tab-description')) product_image_url = self._BLACKFIRE_BASE_URL.format(beautiful_soup.find(id='image').attrs['src']) file = await self._get_file_value_object(url=product_image_url, public_url=self._PUBLIC_URL, pretty_name=product_name_text) beautiful_soup_description = beautiful_soup.find(class_="description").text.split('\n') product_custom_fields_value_object = CustomFields( release_date=self._get_release_date(beautiful_soup_description=beautiful_soup_description), dead_line=self._get_dead_line(beautiful_soup_description=beautiful_soup_description), ) product_value_object = BlackfirePublication(publication_id=product_id, title=RichText(data=product_name_rich, format_data=self._FORMAT_DATA), description=RichText(data=product_description_rich, format_data=self._FORMAT_DATA), url=product_url, timestamp=datetime.utcnow(), color=self._colour, images=[file], author=self._AUTHOR, custom_fields=product_custom_fields_value_object) return product_value_object
async def _get_new_cards(self, card: Tag) -> Optional[Publication]: file_name = os.path.basename(card.attrs['src']) file_name: str = file_name.split('?')[0] file = None if 'ws_today_' in file_name: file = await self._get_file_value_object(url=card.attrs['src'], pretty_name=self._title, filename_unique=False, public_url=False) file_name = file.filename if file_name in self._cache: return None if file is None: file = await self._get_file_value_object(url=urllib.parse.urljoin( self._domain, card.attrs['src']), pretty_name=self._title, public_url=True) rich_title = RichText(data=self._add_html_tag(self._title, self._TITLE_HTML_TAG), format_data=FormatData.HTML) return Publication( publication_id=file_name, title=rich_title, url=self._url, timestamp=datetime.utcnow(), images=[file], )
async def _get_new_new(self, new: element.Tag) -> Optional[Publication]: url: str = new.find('a').attrs['href'] parsed_url = urllib.parse.urlparse(url) images = [] files = [] if not parsed_url.netloc: url = urllib.parse.urljoin(self._DOMAIN, url) parsed_url = urllib.parse.urlparse(url) if url in self._cache: return title_str = new.find(class_='title').text.strip() title_rich = RichText(data=self._add_html_tag( string=str(title_str), tag=self._TITLE_HTML_TAG), format_data=FormatData.HTML) description = None if self._NETLOC == parsed_url.netloc: headers = await self._get_site_head(url=url) if headers.content_type == 'text/html': beautiful_soap = BeautifulSoup( await self._get_site_content(url=url), 'html5lib') data = beautiful_soap.find(class_='entry-content') description = await self._get_description(data=data) images = await self._get_images(data=data, title=title_str, max_images=5) else: file = await self._get_file_value_object( url=url, pretty_name=title_str, filename_unique=self._FILENAME_UNIQUE, public_url=self._PUBLIC_URL) files.append(file) else: file = await self._get_file_value_object( url=new.find('img').attrs['src'].split('?')[0], pretty_name=title_str, filename_unique=self._FILENAME_UNIQUE, public_url=self._PUBLIC_URL) images.append(file) return Publication( publication_id=url, title=title_rich, description=description, url=url, files=files, timestamp=datetime.utcnow(), images=images, )
def __init__(self, receiver_full_config: ReceiverFullConfig): title = self._TITLE.format(receiver_full_config.receiver_config.language.value) title = self._add_html_tag(string=title, tag=self._TITLE_HTML_TAG) self._title = RichText(data=title, format_data=FormatData.HTML) if receiver_full_config.receiver_config.language == Language.ENGLISH: self._url = self._EN_URL elif receiver_full_config.receiver_config.language == Language.JAPANESE: self._url = self._JP_URL else: raise NotImplementedError super().__init__(receiver_full_config=receiver_full_config)
def __init__(self, *, files_directory: str, instance_name: str, queue_manager: QueueManager, download_files: bool, wait_time: int, logging_level: str, state_change_queue: Queue, colour: int): self._instance_name = instance_name logger = logging.getLogger(self._instance_name) logger.setLevel(logging_level) self._title = RichText(data=self._add_html_tag( self._TITLE, tag=self._TITLE_HTML_TAG), format_data=FormatData.HTML) super().__init__(download_files=download_files, files_directory=files_directory, colour=colour, author=self._AUTHOR, logger=logger, wait_time=wait_time, state_change_queue=state_change_queue, queue_manager=queue_manager)
async def _load_publications(self): html = await self._get_site_content(url=self._EN_URL) beautiful_soap = BeautifulSoup(html, 'html5lib') months = beautiful_soap.findAll('div', class_='monthWrap') for month in months: cards = month.findAll('img') title_str = self._add_html_tag(month.find('h4').text.strip(), tag=self._TITLE_HTML_TAG) title = RichText(data=title_str, format_data=FormatData.HTML) for card in cards: publication = await self._create_publication_from_img( img=card, rich_title=title) if publication: transaction_data = TransactionData( transaction_id=publication.publication_id, publications=[publication]) await self._put_in_queue(transaction_data=transaction_data)
def __init__(self, receiver_full_config: ReceiverFullConfig): self._title = RichText(data=self._add_html_tag(self._TITLE, tag=self._TITLE_HTML_TAG), format_data=FormatData.HTML) super().__init__(receiver_full_config=receiver_full_config)
async def _get_description(self, data: element) -> RichText: data = RichText(data=str(await self._remove_non_text_tags(data=data)), format_data=FormatData.HTML) return data
def _kv_to_print(key: str, value, format_data: FormatData): return RichText( f'***{key}:*** {value}', format_data=FormatData.MARKDOWN).to_format(format_data=format_data)
def _field_to_print(c_field, format_data: FormatData): return RichText( f'***{c_field.name}:*** {c_field.value}', format_data=FormatData.MARKDOWN).to_format(format_data=format_data)