def test_doc_from_text(): doc = Document.from_text('It is going to be a great day') assert doc.get_root().content == 'It is going to be a great day' assert len(doc.get_root().get_children()) == 0 doc = Document.from_text('It is going to be a great day', separator=' ') assert doc.get_root().content is None assert len(doc.get_root().get_children()) == 8 assert doc.get_root().get_children()[4].content == 'be'
def test_kbbd(): doc = Document.from_text('It is going to be a great day') doc.content_node.tag('cheese', fixed_position=[1, 2]) doc.content_node.tag('foo', fixed_position=[3, 4]) doc2 = doc.from_kddb(doc.to_kddb()) assert doc2.content_node.get_all_content( ) == 'It is going to be a great day' assert len(doc2.content_node.get_features()) == 2
def from_text(text: str, *args, **kwargs) -> Pipeline: """Build a new pipeline and provide text as the basic to create a document Args: text: Text to use to create document text: str: *args: **kwargs: Returns: Pipeline: A new pipeline """ return Pipeline(Document.from_text(text), *args, **kwargs)
def test_persistance_cache(): document = Document.from_text('The sun is very bright today.') document.to_kddb() document.get_root().tag('cheese') assert document.from_kddb(document.to_kddb()).get_root().has_tags() is True
def test_basic_tagging2(): doc = Document.from_text('It is going to be a great day') doc.content_node.tag('cheese', fixed_position=[1, 2]) assert isinstance(doc.content_node.get_tag('cheese'), list)
def test_document_uuid(): doc_1 = Document.from_text('The sun is very bright today.') doc_2 = Document.from_text('Fluffy clouds float through the sky.') assert doc_1.uuid != doc_2.uuid