示例#1
0
def test_search_ids():
    kiri = Kiri(local=True)
    docs = get_docs()
    docs[0].id = "123"
    kiri.upload(docs)
    results = kiri.search("another", ids=["123"])
    assert len(results.results) == 1, "Invalid number of search results"
示例#2
0
def test_doc_summarise():
    kiri = Kiri(local=True)
    c = "I am a document."
    doc = Document(c)
    kiri.upload([doc])
    out = doc.emotion()
    assert type(out) == str
示例#3
0
def test_doc_summarise():
    kiri = Kiri(local=True)
    c = "I am a document."
    doc = Document(c)
    kiri.upload([doc])
    out = doc.classify(["random"])
    assert isinstance(out, dict)
示例#4
0
def test_doc_qa():
    kiri = Kiri(local=True)
    c = "I am a document."
    doc = Document(c)
    kiri.upload([doc])
    out = doc.qa("What are you?")
    assert type(out) == str
示例#5
0
def test_upload_dup_id():
    kiri = Kiri(local=True)
    docs = get_docs()
    for doc in docs:
        doc.id = "123"

    with pytest.raises(ValueError):
        kiri.upload(docs)
示例#6
0
def test_upload():
    kiri = Kiri(local=True)
    docs = get_docs()
    kiri.upload(docs)
    assert docs[0].vector is not None, "Document not vectorised"
    assert docs[1].vector is not None, "Document not vectorised"
    assert len(
        kiri._store.documents) == 2, "Incorrect number of documents in mem"
示例#7
0
def test_upload_chunked():
    kiri = Kiri(local=True)
    docs = get_chunked_docs(chunking_level=1)
    kiri.upload(docs)
    assert len(
        kiri._store.documents) == 2, "Incorrect number of documents in mem"
    for doc in docs:
        assert doc.vector is not None, "Document not vectorised"
        assert len(doc.chunk_vectors) == 2, "Invalid number of chunk vectors"
示例#8
0
def test_qa_chunk():
    kiri = Kiri(local=True)
    docs = get_chunked_docs()
    kiri.upload(docs)
    results = kiri.qa("another?")
    assert isinstance(results, list)
    for result in results:
        assert type(result[0]) == str
        assert isinstance(result[1], SearchResult)
示例#9
0
def init_elastic_kiri(doc_class=ElasticDocument):
    index_name = "temp_test"
    elastic_url = "http://localhost:9200"
    requests.delete(f"{elastic_url}/{index_name}")
    store = ElasticDocStore(elastic_url, index=index_name, doc_class=doc_class)
    kiri = Kiri(store=store, local=True)
    return kiri
示例#10
0
def test_search():
    kiri = Kiri(local=True)
    docs = get_docs()
    kiri.upload(docs)
    results = kiri.search("another")
    assert len(results.results) == 2, "Invalid number of search results"
示例#11
0
def test_upload_mixed_type():
    kiri = Kiri(local=True)
    docs = [Document("a"), ChunkedDocument("b")]

    with pytest.raises(ValueError):
        kiri.upload(docs)
示例#12
0
def test_summarise():
    kiri = Kiri(local=True)
    out = kiri.summarise(text)
    assert type(out) == str
示例#13
0
Here's example functionality for a customer-service email system. 
This shows two capabilities of Kiri: zero-shot classification, and sentiment detection.

Zero-shot classification is categorizing into a group of labels that were never seen during training.

Sentiment detection... detects text sentiment.
A full list of sentiments is availble in the README -- only a few are noted in this example.
"""

# Common labels for e.g. an e-commerce store's emails
labels = ["Returns", "Promotional", "Technical Issues", "Product Inquiries", "Shipping Questions", "Other"]

# Negative sentiment, give special attention to these.
negative_sentiments = ["annoyance", "disapproval", "disappointment", "anger", "disgust"]

kiri = Kiri()
kiri.classify("This is just to get rid of the example message before printing", ["test"])

# Print example, just to display local results
print("Inbox")
print("==================")
for email in emails:
    classification_results = kiri.classify(email, labels)
    label = max(classification_results, key=classification_results.get)
    
    emote = kiri.emotion(email)
    high_priority = any([e in emote for e in negative_sentiments])
    
    print(f"Category: {label}")
    if high_priority:
        print("\033[91mPRIORITY\033[0m")
示例#14
0
def test_classify():
    kiri = Kiri(local=True)
    out = kiri.classify(text, ["interests", "alcoholism"])
    assert isinstance(out, dict)
示例#15
0
def test_emotion():
    kiri = Kiri(local=True)
    out = kiri.emotion(text)
    assert type(out) == str
示例#16
0
def test_search_max_results_chunk():
    kiri = Kiri(local=True)
    docs = get_chunked_docs()
    kiri.upload(docs)
    results = kiri.search("another", max_results=1)
    assert len(results.results) == 1, "Invalid number of search results"
示例#17
0
For this example, just the answer in the most relevant document is returned to the user,
and used for additional context.
"""

elastic = False

if elastic:
    doc_store = ElasticDocStore("http://localhost:9000",
                                doc_class=ElasticChunkedDocument,
                                index="kiri_default")
    documents = big_n_docs["elastic"]
else:
    doc_store = InMemoryDocStore(doc_class=ChunkedDocument)
    documents = big_n_docs["memory"]

kiri = Kiri(doc_store, local=True)

kiri.upload(documents)

print("Hello! I'm a Kiri chatbot.")
# Hold previous Q/A pairs for additional context
session_qa = []
while True:
    try:
        question = input()
        answers = kiri.qa(question, prev_qa=session_qa)
        # Only showing the top-rated answer
        print(answers[0][0])
        prev_qa = (question, answers[0][0])
        session_qa += prev_qa
        print()
示例#18
0
def test_init():
    kiri = Kiri(local=True)
示例#19
0
- Deployment strategy
- Node dependencies
"""

elastic = False

if elastic:
    doc_store = ElasticDocStore("http://localhost:9000",
                                doc_class=ElasticChunkedDocument,
                                index="kiri_default")
    docs = [d["elastic"] for d in tech_docs]
else:
    doc_store = InMemoryDocStore(doc_class=ChunkedDocument)
    docs = [d["memory"] for d in tech_docs]

kiri = Kiri(doc_store, local=True)
kiri.upload(docs)

query = ""
if len(argv) == 1:
    print("Supply a query when running this script")
    print("Usage: python doc_search.py \"<your query here>\"")
    exit(0)
elif len(argv) == 2:
    query = argv[1]
else:
    query = " ".join(argv[1:])

print(f"Query: {query}")

results = kiri.search(query, max_results=3, min_score=0.01)
示例#20
0
def test_qa():
    kiri = Kiri(local=True)
    out = kiri.qa("What does Jon like?", text)
    assert type(out) == str