def format(self, article, subscriber): try: docs = [] for category in article.get('anpa_category'): pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) anpa = [] # selector codes are only injected for those subscribers that are defined # in the mapper selectors = dict() SelectorcodeMapper().map(article, category.get('qcode').upper(), subscriber=subscriber, formatted_item=selectors) if 'selector_codes' in selectors and selectors[ 'selector_codes']: anpa.append(b'\x05') anpa.append(selectors['selector_codes'].encode('ascii')) anpa.append(b'\x0D\x0A') # start of message header (syn syn soh) anpa.append(b'\x16\x16\x01') anpa.append( article.get('service_level', 'a').lower().encode('ascii')) # story number anpa.append(str(pub_seq_num).zfill(4).encode('ascii')) # field seperator anpa.append(b'\x0A') # -LF anpa.append( map_priority(article.get('priority')).encode('ascii')) anpa.append(b'\x20') anpa.append(category['qcode'].encode('ascii')) anpa.append(b'\x13') # format identifier if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED: anpa.append(b'\x12') else: anpa.append(b'\x11') anpa.append(b'\x20') # keyword keyword = 'bc-{}'.format( self.append_legal(article=article, truncate=True)).replace(' ', '-') keyword = keyword[:24] if len(keyword) > 24 else keyword anpa.append(keyword.encode('ascii')) anpa.append(b'\x20') # version field anpa.append(b'\x20') # reference field anpa.append(b'\x20') # filing date anpa.append('{}-{}'.format( article['_updated'].strftime('%m'), article['_updated'].strftime('%d')).encode('ascii')) anpa.append(b'\x20') # add the word count anpa.append( str(article.get('word_count', '0000')).zfill(4).encode('ascii')) anpa.append(b'\x0D\x0A') anpa.append(b'\x02') # STX self._process_headline(anpa, article, category['qcode'].encode('ascii')) keyword = self.append_legal(article=article, truncate=True).encode( 'ascii', 'ignore') anpa.append(keyword) take_key = article.get('anpa_take_key', '').encode('ascii', 'ignore') anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'') anpa.append(b'\x0D\x0A') if BYLINE in article: anpa.append(article.get(BYLINE).encode('ascii', 'ignore')) anpa.append(b'\x0D\x0A') if article.get('dateline', {}).get('text'): anpa.append( article.get('dateline').get('text').encode( 'ascii', 'ignore')) body = self.append_body_footer(article) if article.get(EMBARGO): embargo = '{}{}'.format( 'Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat()) body = embargo + body if article[ITEM_TYPE] == CONTENT_TYPE.PREFORMATTED: anpa.append(body.encode('ascii', 'replace')) else: anpa.append( BeautifulSoup(body, "html.parser").text.encode( 'ascii', 'replace')) anpa.append(b'\x0D\x0A') if article.get('more_coming', False): anpa.append('MORE'.encode('ascii')) else: anpa.append(article.get('source', '').encode('ascii')) sign_off = article.get('sign_off', '').encode('ascii') anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'') anpa.append(b'\x0D\x0A') anpa.append(b'\x03') # ETX # time and date anpa.append(datetime.datetime.now().strftime( '%d-%m-%y %H-%M-%S').encode('ascii')) anpa.append(b'\x04') # EOT anpa.append( b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A' ) docs.append((pub_seq_num, b''.join(anpa))) return docs except Exception as ex: raise FormatterError.AnpaFormatterError(ex, subscriber)
def format(self, article, subscriber, codes=None): try: docs = [] formatted_article = deepcopy(article) for category in self._get_category_list( formatted_article.get('anpa_category')): mapped_source = self._get_mapped_source(formatted_article) formatted_article[config.ID_FIELD] = formatted_article.get( 'item_id', formatted_article.get(config.ID_FIELD)) pub_seq_num = superdesk.get_resource_service( 'subscribers').generate_sequence_number(subscriber) anpa = [] if codes: anpa.append(b'\x05') anpa.append(' '.join(codes).encode('ascii')) anpa.append(b'\x0D\x0A') # start of message header (syn syn soh) anpa.append(b'\x16\x16\x01') anpa.append( get_service_level(category, formatted_article).encode('ascii')) # story number anpa.append(str(pub_seq_num).zfill(4).encode('ascii')) # field seperator anpa.append(b'\x0A') # -LF anpa.append( map_priority( formatted_article.get('priority')).encode('ascii')) anpa.append(b'\x20') anpa.append(category['qcode'].lower().encode('ascii')) anpa.append(b'\x13') # format identifier if formatted_article.get(FORMAT, FORMATS.HTML) == FORMATS.PRESERVED: anpa.append(b'\x12') else: anpa.append(b'\x11') anpa.append(b'\x20') # keyword keyword = 'bc-{}'.format( self.append_legal(article=formatted_article, truncate=True)).replace(' ', '-') keyword = keyword[:24] if len(keyword) > 24 else keyword anpa.append(keyword.encode('ascii')) anpa.append(b'\x20') # version field anpa.append(b'\x20') # reference field anpa.append(b'\x20') # filing date anpa.append('{}-{}'.format( formatted_article['_updated'].strftime('%m'), formatted_article['_updated'].strftime('%d')).encode( 'ascii')) anpa.append(b'\x20') # add the word count anpa.append( str(formatted_article.get( 'word_count', '0000')).zfill(4).encode('ascii')) anpa.append(b'\x0D\x0A') anpa.append(b'\x02') # STX self._process_headline(anpa, formatted_article, category['qcode'].encode('ascii')) keyword = SluglineMapper().map( article=formatted_article, category=category['qcode'].upper(), truncate=True).encode('ascii', 'ignore') anpa.append(keyword) take_key = (formatted_article.get('anpa_take_key', '') or '').encode('ascii', 'ignore') anpa.append((b'\x20' + take_key) if len(take_key) > 0 else b'') anpa.append(b'\x0D\x0A') if formatted_article.get('ednote', '') != '': ednote = '{}\r\n'.format( to_ascii(formatted_article.get('ednote'))) anpa.append(ednote.encode('ascii', 'replace')) if formatted_article.get(BYLINE): anpa.append( get_text(formatted_article.get(BYLINE)).encode( 'ascii', 'replace')) anpa.append(b'\x0D\x0A') if formatted_article.get(FORMAT) == FORMATS.PRESERVED: anpa.append( get_text(self.append_body_footer(formatted_article), content='html').encode('ascii', 'replace')) else: body = to_ascii(formatted_article.get('body_html', '')) # we need to inject the dateline if formatted_article.get( 'dateline', {}).get('text') and not article.get( 'auto_publish', False): body_html_elem = parse_html( formatted_article.get('body_html')) ptag = body_html_elem.find('.//p') if ptag is not None: ptag.text = formatted_article['dateline'][ 'text'] + ' ' + (ptag.text or '') body = to_string(body_html_elem) anpa.append(self.get_text_content(body)) if formatted_article.get('body_footer'): anpa.append( self.get_text_content( to_ascii( formatted_article.get('body_footer', '')))) anpa.append(b'\x0D\x0A') anpa.append(mapped_source.encode('ascii')) sign_off = (formatted_article.get('sign_off', '') or '').encode('ascii') anpa.append((b'\x20' + sign_off) if len(sign_off) > 0 else b'') anpa.append(b'\x0D\x0A') anpa.append(b'\x03') # ETX # time and date anpa.append(datetime.datetime.now().strftime( '%d-%m-%y %H-%M-%S').encode('ascii')) anpa.append(b'\x04') # EOT anpa.append( b'\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A\x0D\x0A' ) docs.append({ 'published_seq_num': pub_seq_num, 'encoded_item': b''.join(anpa), 'formatted_item': b''.join(anpa).decode('ascii') }) return docs except Exception as ex: raise FormatterError.AnpaFormatterError(ex, subscriber)