def test_truncate_length_and_char(): """Ensure combined length+char truncation works as expected.""" original = '12345-67890-12345' truncated = truncate_string(original, 8, truncate_at_chars='-', overflow_str=None) assert truncated == '12345'
def _generate_text_metadata(topic: Topic) -> Dict[str, Any]: """Generate metadata for a text topic (word count and excerpt).""" extracted_text = extract_text_from_html(topic.rendered_html) # create a short excerpt by truncating the extracted string excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ") return {"word_count": word_count(extracted_text), "excerpt": excerpt}
def test_truncate_length_and_nonexistent_char(): """Ensure length+char truncation works if the char isn't present.""" original = '1234567890-12345' truncated = truncate_string(original, 8, truncate_at_chars='-', overflow_str=None) assert truncated == '12345678'
def upgrade(): op.add_column( "comments", sa.Column("excerpt", sa.Text(), server_default="", nullable=False)) # generate excerpts for all existing (non-deleted) comments session = sa.orm.Session(bind=op.get_bind()) comments = session.query(Comment).filter(Comment.is_deleted == False).all() for comment in comments: extracted_text = extract_text_from_html(comment.rendered_html) comment.excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ") session.commit()
def markdown(self, new_markdown: str) -> None: """Set the comment's markdown and render its HTML.""" if new_markdown == self.markdown: return self._markdown = new_markdown self.rendered_html = convert_markdown_to_safe_html(new_markdown) extracted_text = extract_text_from_html( self.rendered_html, skip_tags=["blockquote", "del"]) self.excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ") if self.age > EDIT_GRACE_PERIOD: self.last_edited_time = utc_now()
def _generate_text_metadata(topic: Topic) -> None: """Generate metadata for a text topic (word count and excerpt).""" html_tree = HTMLParser().parseFragment(topic.rendered_html) # extract the text from all of the HTML elements extracted_text = ''.join( [element_text for element_text in html_tree.itertext()]) # sanitize unicode, remove leading/trailing whitespace, etc. extracted_text = simplify_string(extracted_text) # create a short excerpt by truncating the simplified string excerpt = truncate_string( extracted_text, length=200, truncate_at_chars=' ', ) topic.content_metadata = { 'word_count': word_count(extracted_text), 'excerpt': excerpt, }
def test_simple_truncate(): """Ensure a simple truncation by length works correctly.""" truncated = truncate_string('123456789', 5, overflow_str=None) assert truncated == '12345'
def test_truncate_same_length(): """Ensure truncation doesn't happen if the string is the desired length.""" original = '123456789' assert truncate_string(original, len(original)) == original
def test_simple_truncate_with_overflow(): """Ensure a simple truncation by length with an overflow string works.""" truncated = truncate_string('123456789', 5) assert truncated == '12...'