Exemplo n.º 1
0
    def format(self, article, subscriber):
        try:
            docs = []
            for category in article.get('anpa_category'):
                pub_seq_num = superdesk.get_resource_service(
                    'subscribers').generate_sequence_number(subscriber)
                anpa = []

                # selector codes are only injected for those subscribers that are defined
                # in the mapper
                selectors = dict()
                SelectorcodeMapper().map(article,
                                         category.get('qcode').upper(),
                                         subscriber=subscriber,
                                         formatted_item=selectors)
                if 'selector_codes' in selectors and selectors[
                        'selector_codes']:
                    anpa.append(b'\x05')
                    anpa.append(selectors['selector_codes'].encode('ascii'))
                    anpa.append(b'\x0D\x0A')

                # start of message header (syn syn soh)
                anpa.append(b'\x16\x16\x01')
                anpa.append(
                    article.get('service_level', 'a').lower().encode('ascii'))

                # story number
                anpa.append(str(pub_seq_num).zfill(4).encode('ascii'))

                # field seperator
                anpa.append(b'\x0A')  # -LF
                anpa.append(
                    map_priority(article.get('priority')).encode('ascii'))
                anpa.append(b'\x20')

                anpa.append(category['qcode'].encode('ascii'))

                anpa.append(b'\x13')
                # format identifier
                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                    anpa.append(b'\x12')
                else:
                    anpa.append(b'\x11')
                anpa.append(b'\x20')

                # keyword
                keyword = 'bc-{}'.format(
                    self.append_legal(article=article,
                                      truncate=True)).replace(' ', '-')
                keyword = keyword[:24] if len(keyword) > 24 else keyword
                anpa.append(keyword.encode('ascii'))
                anpa.append(b'\x20')

                # version field
                anpa.append(b'\x20')

                # reference field
                anpa.append(b'\x20')

                # filing date
                anpa.append('{}-{}'.format(
                    article['_updated'].strftime('%m'),
                    article['_updated'].strftime('%d')).encode('ascii'))
                anpa.append(b'\x20')

                # add the word count
                anpa.append(
                    str(article.get('word_count',
                                    '0000')).zfill(4).encode('ascii'))
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x02')  # STX

                self._process_headline(anpa, article,
                                       category['qcode'].encode('ascii'))

                keyword = self.append_legal(article=article,
                                            truncate=True).encode(
                                                'ascii', 'ignore')
                anpa.append(keyword)
                take_key = article.get('anpa_take_key',
                                       '').encode('ascii', 'ignore')
                anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                if BYLINE in article:
                    anpa.append(article.get(BYLINE).encode('ascii', 'ignore'))
                    anpa.append(b'\x0D\x0A')

                if article.get('dateline', {}).get('text'):
                    anpa.append(
                        article.get('dateline').get('text').encode(
                            'ascii', 'ignore'))

                body = self.append_body_footer(article)
                if article.get(EMBARGO):
                    embargo = '{}{}'.format(
                        'Embargo Content. Timestamp: ',
                        get_utc_schedule(article, EMBARGO).isoformat())
                    body = embargo + body

                if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED:
                    anpa.append(body.encode('ascii', 'replace'))
                else:
                    anpa.append(
                        BeautifulSoup(body, "html.parser").text.encode(
                            'ascii', 'replace'))

                anpa.append(b'\x0D\x0A')
                if article.get('more_coming', False):
                    anpa.append('MORE'.encode('ascii'))
                else:
                    anpa.append(article.get('source', '').encode('ascii'))
                sign_off = article.get('sign_off', '').encode('ascii')
                anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x03')  # ETX

                # time and date
                anpa.append(datetime.datetime.now().strftime(
                    '%d-%m-%y %H-%M-%S').encode('ascii'))

                anpa.append(b'\x04')  # EOT
                anpa.append(
                    b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A'
                )

                docs.append((pub_seq_num, b''.join(anpa)))

            return docs
        except Exception as ex:
            raise FormatterError.AnpaFormatterError(ex, subscriber)
Exemplo n.º 2
0
    def format(self, article, subscriber, codes=None):
        try:
            docs = []
            formatted_article = deepcopy(article)
            for category in self._get_category_list(
                    formatted_article.get('anpa_category')):
                mapped_source = self._get_mapped_source(formatted_article)
                formatted_article[config.ID_FIELD] = formatted_article.get(
                    'item_id', formatted_article.get(config.ID_FIELD))
                pub_seq_num = superdesk.get_resource_service(
                    'subscribers').generate_sequence_number(subscriber)
                anpa = []

                if codes:
                    anpa.append(b'\x05')
                    anpa.append(' '.join(codes).encode('ascii'))
                    anpa.append(b'\x0D\x0A')

                # start of message header (syn syn soh)
                anpa.append(b'\x16\x16\x01')
                anpa.append(
                    get_service_level(category,
                                      formatted_article).encode('ascii'))

                # story number
                anpa.append(str(pub_seq_num).zfill(4).encode('ascii'))

                # field seperator
                anpa.append(b'\x0A')  # -LF
                anpa.append(
                    map_priority(
                        formatted_article.get('priority')).encode('ascii'))
                anpa.append(b'\x20')

                anpa.append(category['qcode'].lower().encode('ascii'))

                anpa.append(b'\x13')
                # format identifier
                if formatted_article.get(FORMAT,
                                         FORMATS.HTML) == FORMATS.PRESERVED:
                    anpa.append(b'\x12')
                else:
                    anpa.append(b'\x11')
                anpa.append(b'\x20')

                # keyword
                keyword = 'bc-{}'.format(
                    self.append_legal(article=formatted_article,
                                      truncate=True)).replace(' ', '-')
                keyword = keyword[:24] if len(keyword) > 24 else keyword
                anpa.append(keyword.encode('ascii'))
                anpa.append(b'\x20')

                # version field
                anpa.append(b'\x20')

                # reference field
                anpa.append(b'\x20')

                # filing date
                anpa.append('{}-{}'.format(
                    formatted_article['_updated'].strftime('%m'),
                    formatted_article['_updated'].strftime('%d')).encode(
                        'ascii'))
                anpa.append(b'\x20')

                # add the word count
                anpa.append(
                    str(formatted_article.get(
                        'word_count', '0000')).zfill(4).encode('ascii'))
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x02')  # STX

                self._process_headline(anpa, formatted_article,
                                       category['qcode'].encode('ascii'))

                keyword = SluglineMapper().map(
                    article=formatted_article,
                    category=category['qcode'].upper(),
                    truncate=True).encode('ascii', 'ignore')
                anpa.append(keyword)
                take_key = (formatted_article.get('anpa_take_key', '')
                            or '').encode('ascii', 'ignore')
                anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                if formatted_article.get('ednote', '') != '':
                    ednote = '{}\r\n'.format(
                        to_ascii(formatted_article.get('ednote')))
                    anpa.append(ednote.encode('ascii', 'replace'))

                if formatted_article.get(BYLINE):
                    anpa.append(
                        get_text(formatted_article.get(BYLINE)).encode(
                            'ascii', 'replace'))
                    anpa.append(b'\x0D\x0A')

                if formatted_article.get(FORMAT) == FORMATS.PRESERVED:
                    anpa.append(
                        get_text(self.append_body_footer(formatted_article),
                                 content='html').encode('ascii', 'replace'))
                else:
                    body = to_ascii(formatted_article.get('body_html', ''))
                    # we need to inject the dateline
                    if formatted_article.get(
                            'dateline', {}).get('text') and not article.get(
                                'auto_publish', False):
                        body_html_elem = parse_html(
                            formatted_article.get('body_html'))
                        ptag = body_html_elem.find('.//p')
                        if ptag is not None:
                            ptag.text = formatted_article['dateline'][
                                'text'] + ' ' + (ptag.text or '')
                            body = to_string(body_html_elem)
                    anpa.append(self.get_text_content(body))
                    if formatted_article.get('body_footer'):
                        anpa.append(
                            self.get_text_content(
                                to_ascii(
                                    formatted_article.get('body_footer', ''))))

                anpa.append(b'\x0D\x0A')
                anpa.append(mapped_source.encode('ascii'))
                sign_off = (formatted_article.get('sign_off', '')
                            or '').encode('ascii')
                anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'')
                anpa.append(b'\x0D\x0A')

                anpa.append(b'\x03')  # ETX

                # time and date
                anpa.append(datetime.datetime.now().strftime(
                    '%d-%m-%y %H-%M-%S').encode('ascii'))

                anpa.append(b'\x04')  # EOT
                anpa.append(
                    b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A'
                )

                docs.append({
                    'published_seq_num': pub_seq_num,
                    'encoded_item': b''.join(anpa),
                    'formatted_item': b''.join(anpa).decode('ascii')
                })

            return docs
        except Exception as ex:
            raise FormatterError.AnpaFormatterError(ex, subscriber)