def test_search_ids(): kiri = Kiri(local=True) docs = get_docs() docs[0].id = "123" kiri.upload(docs) results = kiri.search("another", ids=["123"]) assert len(results.results) == 1, "Invalid number of search results"
def test_doc_summarise(): kiri = Kiri(local=True) c = "I am a document." doc = Document(c) kiri.upload([doc]) out = doc.emotion() assert type(out) == str
def test_doc_summarise(): kiri = Kiri(local=True) c = "I am a document." doc = Document(c) kiri.upload([doc]) out = doc.classify(["random"]) assert isinstance(out, dict)
def test_doc_qa(): kiri = Kiri(local=True) c = "I am a document." doc = Document(c) kiri.upload([doc]) out = doc.qa("What are you?") assert type(out) == str
def test_upload_dup_id(): kiri = Kiri(local=True) docs = get_docs() for doc in docs: doc.id = "123" with pytest.raises(ValueError): kiri.upload(docs)
def test_upload(): kiri = Kiri(local=True) docs = get_docs() kiri.upload(docs) assert docs[0].vector is not None, "Document not vectorised" assert docs[1].vector is not None, "Document not vectorised" assert len( kiri._store.documents) == 2, "Incorrect number of documents in mem"
def test_upload_chunked(): kiri = Kiri(local=True) docs = get_chunked_docs(chunking_level=1) kiri.upload(docs) assert len( kiri._store.documents) == 2, "Incorrect number of documents in mem" for doc in docs: assert doc.vector is not None, "Document not vectorised" assert len(doc.chunk_vectors) == 2, "Invalid number of chunk vectors"
def test_qa_chunk(): kiri = Kiri(local=True) docs = get_chunked_docs() kiri.upload(docs) results = kiri.qa("another?") assert isinstance(results, list) for result in results: assert type(result[0]) == str assert isinstance(result[1], SearchResult)
def init_elastic_kiri(doc_class=ElasticDocument): index_name = "temp_test" elastic_url = "http://localhost:9200" requests.delete(f"{elastic_url}/{index_name}") store = ElasticDocStore(elastic_url, index=index_name, doc_class=doc_class) kiri = Kiri(store=store, local=True) return kiri
def test_search(): kiri = Kiri(local=True) docs = get_docs() kiri.upload(docs) results = kiri.search("another") assert len(results.results) == 2, "Invalid number of search results"
def test_upload_mixed_type(): kiri = Kiri(local=True) docs = [Document("a"), ChunkedDocument("b")] with pytest.raises(ValueError): kiri.upload(docs)
def test_summarise(): kiri = Kiri(local=True) out = kiri.summarise(text) assert type(out) == str
Here's example functionality for a customer-service email system. This shows two capabilities of Kiri: zero-shot classification, and sentiment detection. Zero-shot classification is categorizing into a group of labels that were never seen during training. Sentiment detection... detects text sentiment. A full list of sentiments is availble in the README -- only a few are noted in this example. """ # Common labels for e.g. an e-commerce store's emails labels = ["Returns", "Promotional", "Technical Issues", "Product Inquiries", "Shipping Questions", "Other"] # Negative sentiment, give special attention to these. negative_sentiments = ["annoyance", "disapproval", "disappointment", "anger", "disgust"] kiri = Kiri() kiri.classify("This is just to get rid of the example message before printing", ["test"]) # Print example, just to display local results print("Inbox") print("==================") for email in emails: classification_results = kiri.classify(email, labels) label = max(classification_results, key=classification_results.get) emote = kiri.emotion(email) high_priority = any([e in emote for e in negative_sentiments]) print(f"Category: {label}") if high_priority: print("\033[91mPRIORITY\033[0m")
def test_classify(): kiri = Kiri(local=True) out = kiri.classify(text, ["interests", "alcoholism"]) assert isinstance(out, dict)
def test_emotion(): kiri = Kiri(local=True) out = kiri.emotion(text) assert type(out) == str
def test_search_max_results_chunk(): kiri = Kiri(local=True) docs = get_chunked_docs() kiri.upload(docs) results = kiri.search("another", max_results=1) assert len(results.results) == 1, "Invalid number of search results"
For this example, just the answer in the most relevant document is returned to the user, and used for additional context. """ elastic = False if elastic: doc_store = ElasticDocStore("http://localhost:9000", doc_class=ElasticChunkedDocument, index="kiri_default") documents = big_n_docs["elastic"] else: doc_store = InMemoryDocStore(doc_class=ChunkedDocument) documents = big_n_docs["memory"] kiri = Kiri(doc_store, local=True) kiri.upload(documents) print("Hello! I'm a Kiri chatbot.") # Hold previous Q/A pairs for additional context session_qa = [] while True: try: question = input() answers = kiri.qa(question, prev_qa=session_qa) # Only showing the top-rated answer print(answers[0][0]) prev_qa = (question, answers[0][0]) session_qa += prev_qa print()
def test_init(): kiri = Kiri(local=True)
- Deployment strategy - Node dependencies """ elastic = False if elastic: doc_store = ElasticDocStore("http://localhost:9000", doc_class=ElasticChunkedDocument, index="kiri_default") docs = [d["elastic"] for d in tech_docs] else: doc_store = InMemoryDocStore(doc_class=ChunkedDocument) docs = [d["memory"] for d in tech_docs] kiri = Kiri(doc_store, local=True) kiri.upload(docs) query = "" if len(argv) == 1: print("Supply a query when running this script") print("Usage: python doc_search.py \"<your query here>\"") exit(0) elif len(argv) == 2: query = argv[1] else: query = " ".join(argv[1:]) print(f"Query: {query}") results = kiri.search(query, max_results=3, min_score=0.01)
def test_qa(): kiri = Kiri(local=True) out = kiri.qa("What does Jon like?", text) assert type(out) == str