Пример #1
0
    def to_format(self, *, format_data: FormatData):
        """Output all publication to specific format."""
        rich_items = [
            self.title,
            self.description,
        ]

        non_rich_item = {
            'URL': self.url,
        }

        to_print = [
            rich_text.to_format(format_data=format_data)
            for rich_text in rich_items if rich_text
        ]

        to_print.extend([
            self._kv_to_print(key, value, format_data)
            for key, value in non_rich_item.items() if value
        ])

        if self.custom_fields:
            to_print.extend([
                self._field_to_print(c_field, format_data)
                for c_field in self.custom_fields if c_field
            ])

        rich_text = RichText('<p>error<br/>error</p>',
                             format_data=FormatData.HTML)
        line_feed = rich_text.to_format(format_data=format_data).replace(
            'error', '')

        return line_feed.join(to_print)
Пример #2
0
 async def _get_product(self, product_id: int) -> BlackfirePublication:
     product_url = self._PRODUCT_URL.format(product_id)
     html = await self._get_site_content(url=product_url)
     beautiful_soup = BeautifulSoup(html, 'html.parser')
     product_name = beautiful_soup.find('h1')
     product_name_text = product_name.text
     product_name_rich = str(product_name)
     product_description_rich = str(beautiful_soup.find(id='tab-description'))
     product_image_url = self._BLACKFIRE_BASE_URL.format(beautiful_soup.find(id='image').attrs['src'])
     file = await self._get_file_value_object(url=product_image_url,
                                              public_url=self._PUBLIC_URL,
                                              pretty_name=product_name_text)
     beautiful_soup_description = beautiful_soup.find(class_="description").text.split('\n')
     product_custom_fields_value_object = CustomFields(
         release_date=self._get_release_date(beautiful_soup_description=beautiful_soup_description),
         dead_line=self._get_dead_line(beautiful_soup_description=beautiful_soup_description),
     )
     product_value_object = BlackfirePublication(publication_id=product_id,
                                                 title=RichText(data=product_name_rich, format_data=self._FORMAT_DATA),
                                                 description=RichText(data=product_description_rich, format_data=self._FORMAT_DATA),
                                                 url=product_url,
                                                 timestamp=datetime.utcnow(),
                                                 color=self._colour,
                                                 images=[file],
                                                 author=self._AUTHOR,
                                                 custom_fields=product_custom_fields_value_object)
     return product_value_object
Пример #3
0
    async def _get_new_cards(self, card: Tag) -> Optional[Publication]:
        file_name = os.path.basename(card.attrs['src'])
        file_name: str = file_name.split('?')[0]
        file = None

        if 'ws_today_' in file_name:
            file = await self._get_file_value_object(url=card.attrs['src'],
                                                     pretty_name=self._title,
                                                     filename_unique=False,
                                                     public_url=False)

            file_name = file.filename

        if file_name in self._cache:
            return None

        if file is None:
            file = await self._get_file_value_object(url=urllib.parse.urljoin(
                self._domain, card.attrs['src']),
                                                     pretty_name=self._title,
                                                     public_url=True)
        rich_title = RichText(data=self._add_html_tag(self._title,
                                                      self._TITLE_HTML_TAG),
                              format_data=FormatData.HTML)
        return Publication(
            publication_id=file_name,
            title=rich_title,
            url=self._url,
            timestamp=datetime.utcnow(),
            images=[file],
        )
Пример #4
0
    async def _get_new_new(self, new: element.Tag) -> Optional[Publication]:
        url: str = new.find('a').attrs['href']
        parsed_url = urllib.parse.urlparse(url)
        images = []
        files = []
        if not parsed_url.netloc:
            url = urllib.parse.urljoin(self._DOMAIN, url)
            parsed_url = urllib.parse.urlparse(url)

        if url in self._cache:
            return

        title_str = new.find(class_='title').text.strip()
        title_rich = RichText(data=self._add_html_tag(
            string=str(title_str), tag=self._TITLE_HTML_TAG),
                              format_data=FormatData.HTML)
        description = None
        if self._NETLOC == parsed_url.netloc:
            headers = await self._get_site_head(url=url)
            if headers.content_type == 'text/html':
                beautiful_soap = BeautifulSoup(
                    await self._get_site_content(url=url), 'html5lib')
                data = beautiful_soap.find(class_='entry-content')
                description = await self._get_description(data=data)
                images = await self._get_images(data=data,
                                                title=title_str,
                                                max_images=5)

            else:
                file = await self._get_file_value_object(
                    url=url,
                    pretty_name=title_str,
                    filename_unique=self._FILENAME_UNIQUE,
                    public_url=self._PUBLIC_URL)
                files.append(file)

        else:
            file = await self._get_file_value_object(
                url=new.find('img').attrs['src'].split('?')[0],
                pretty_name=title_str,
                filename_unique=self._FILENAME_UNIQUE,
                public_url=self._PUBLIC_URL)
            images.append(file)

        return Publication(
            publication_id=url,
            title=title_rich,
            description=description,
            url=url,
            files=files,
            timestamp=datetime.utcnow(),
            images=images,
        )
Пример #5
0
    def __init__(self, receiver_full_config: ReceiverFullConfig):
        title = self._TITLE.format(receiver_full_config.receiver_config.language.value)
        title = self._add_html_tag(string=title, tag=self._TITLE_HTML_TAG)
        self._title = RichText(data=title, format_data=FormatData.HTML)
        if receiver_full_config.receiver_config.language == Language.ENGLISH:
            self._url = self._EN_URL
        elif receiver_full_config.receiver_config.language == Language.JAPANESE:
            self._url = self._JP_URL
        else:
            raise NotImplementedError

        super().__init__(receiver_full_config=receiver_full_config)
Пример #6
0
 def __init__(self, *, files_directory: str, instance_name: str,
              queue_manager: QueueManager, download_files: bool,
              wait_time: int, logging_level: str, state_change_queue: Queue,
              colour: int):
     self._instance_name = instance_name
     logger = logging.getLogger(self._instance_name)
     logger.setLevel(logging_level)
     self._title = RichText(data=self._add_html_tag(
         self._TITLE, tag=self._TITLE_HTML_TAG),
                            format_data=FormatData.HTML)
     super().__init__(download_files=download_files,
                      files_directory=files_directory,
                      colour=colour,
                      author=self._AUTHOR,
                      logger=logger,
                      wait_time=wait_time,
                      state_change_queue=state_change_queue,
                      queue_manager=queue_manager)
Пример #7
0
    async def _load_publications(self):
        html = await self._get_site_content(url=self._EN_URL)
        beautiful_soap = BeautifulSoup(html, 'html5lib')
        months = beautiful_soap.findAll('div', class_='monthWrap')

        for month in months:
            cards = month.findAll('img')
            title_str = self._add_html_tag(month.find('h4').text.strip(),
                                           tag=self._TITLE_HTML_TAG)
            title = RichText(data=title_str, format_data=FormatData.HTML)

            for card in cards:
                publication = await self._create_publication_from_img(
                    img=card, rich_title=title)
                if publication:
                    transaction_data = TransactionData(
                        transaction_id=publication.publication_id,
                        publications=[publication])
                    await self._put_in_queue(transaction_data=transaction_data)
Пример #8
0
    def __init__(self, receiver_full_config: ReceiverFullConfig):

        self._title = RichText(data=self._add_html_tag(self._TITLE, tag=self._TITLE_HTML_TAG),
                               format_data=FormatData.HTML)
        super().__init__(receiver_full_config=receiver_full_config)
Пример #9
0
 async def _get_description(self, data: element) -> RichText:
     data = RichText(data=str(await self._remove_non_text_tags(data=data)),
                     format_data=FormatData.HTML)
     return data
Пример #10
0
 def _kv_to_print(key: str, value, format_data: FormatData):
     return RichText(
         f'***{key}:*** {value}',
         format_data=FormatData.MARKDOWN).to_format(format_data=format_data)
Пример #11
0
 def _field_to_print(c_field, format_data: FormatData):
     return RichText(
         f'***{c_field.name}:*** {c_field.value}',
         format_data=FormatData.MARKDOWN).to_format(format_data=format_data)