Пример #1
0
def test_add_and_delete_documents():
    client = common.login_with_test_user(common.client())

    collection_id = common.get_collection_id(client, "Trial Collection")
    assert collection_id != None

    added_doc_id = client.add_document(collection_id=collection_id,
                                       text="This is a test document to be deleted.")
    assert added_doc_id != None
    
    added_doc_id_2 = client.add_document(collection_id=collection_id,
                                       text="This is a test document to be deleted.")
    assert added_doc_id_2 != None
    
    delete_resp = client.delete_documents([added_doc_id, added_doc_id_2])
    assert delete_resp != None
    assert isinstance(delete_resp, dict)
    
    assert delete_resp["success"]
    
    changed_objs = delete_resp["changed_objs"]
    assert len(changed_objs["annotations"]["deleted"]) == 0
    assert added_doc_id in changed_objs["documents"]["deleted"]
    assert added_doc_id_2 in changed_objs["documents"]["deleted"]
    
Пример #2
0
def test_get_classifier_status():
    client = common.login_with_test_user(common.client())

    for collection in client.list_collections():
        classifier = client.get_collection_classifier(collection["_id"])
        status = client.get_classifier_status(classifier["_id"])
        _check_pipeline_status(status, classifier["_id"])
Пример #3
0
def test_list_collections():
    client = common.login_with_test_user(common.client())
    unarchived_collections = client.list_collections(False)
    assert type(unarchived_collections) is list
    assert len(unarchived_collections) > 0
    all_collections = client.list_collections(True)
    assert type(all_collections) is list
    assert len(all_collections) >= len(unarchived_collections)

    # make sure every collection has some expected properties
    for col in (all_collections + unarchived_collections):
        assert "_id" in col
        assert "annotators" in col
        assert "viewers" in col
        assert "configuration" in col
        assert "metadata" in col

    # make sure all_collections is superset of unarchived_collections
    for col in unarchived_collections:
        found = False
        for col2 in all_collections:
            if col["_id"] == col2["_id"]:
                found = True
                break
        assert found == True

    # make sure anything in all that's not in unarchived is archived
    for col in all_collections:
        found = False
        for col2 in unarchived_collections:
            if col["_id"] == col2["_id"]:
                found = True
                break
        assert col["archived"] == (not found)
Пример #4
0
def test_login_and_logout():
    client = common.client()
    assert client.is_logged_in() == False

    # invalid login
    with pytest.raises(pine.client.exceptions.PineClientAuthException):
        client.login_eve("asdf", "asdf")
    assert client.is_logged_in() == False

    # valid login with ID
    user = common.test_user_data()[0]
    try:
        client.login_eve(user["_id"], user["password"])
        assert client.is_logged_in() == True
        assert client.get_my_user_id() == user["_id"]
        client_user = client.get_logged_in_user()
        assert client_user != None
        assert "display_name" in client_user
        assert "is_admin" in client_user
        assert "username" in client_user
        assert "id" in client_user
    finally:
        client.logout()
        assert client.is_logged_in() == False

    # valid login with email
    try:
        client.login_eve(user["email"], user["password"])
        assert client.is_logged_in() == True
        assert client.get_my_user_id() == user["_id"]
    finally:
        client.logout()
        assert client.is_logged_in() == False
Пример #5
0
def test_collection_user_permissions():
    client = common.login_with_test_user(common.client())

    collection_id = _get_collection_id("NER Test Collection", client)
    assert collection_id != None
    permissions = client.get_collection_permissions(collection_id)
    assert permissions.to_dict() == {
        "view": True,
        "annotate": True,
        "add_documents": True,
        "add_images": True,
        "modify_users": False,
        "modify_labels": False,
        "modify_document_metadata": True,
        "download_data": False,
        "archive": False
    }

    collection_id = _get_collection_id("Trial Collection", client)
    assert collection_id != None
    permissions = client.get_collection_permissions(collection_id)
    assert permissions.to_dict() == {
        "view": True,
        "annotate": True,
        "add_documents": True,
        "add_images": True,
        "modify_users": True,
        "modify_labels": True,
        "modify_document_metadata": True,
        "download_data": True,
        "archive": True
    }
Пример #6
0
def test_delete_document_not_allowed():
    client = common.login_with_test_user(common.client())

    collection_id = common.get_collection_id(client, "Trial Collection")
    assert collection_id != None
    
    with pytest.raises(pine.client.exceptions.PineClientHttpException):
        client.delete_document("nonexistent")
Пример #7
0
def test_get_collection_classifier():
    client = common.login_with_test_user(common.client())

    # make sure that a correct classifier is returned for each collection
    for collection in client.list_collections():
        classifier = client.get_collection_classifier(collection["_id"])
        assert type(classifier) is dict
        assert classifier["collection_id"] == collection["_id"]
        assert classifier["pipeline_id"] is not None
Пример #8
0
def test_download_collection_data_errors():
    # find a collection that has annotations that the test user does NOT have access to
    user = "******"
    collection_title = "NER Test Collection"
    client = common.login_with_user(user, common.client())

    with pytest.raises(pine.client.exceptions.PineClientValueException):
        client.download_collection_data(None)

    collection_id = _get_collection_id(collection_title, client)
    assert collection_id != None
    with pytest.raises(pine.client.exceptions.PineClientHttpException) as excinfo:
        client.download_collection_data(collection_id)
    assert excinfo.value.status_code == 401
Пример #9
0
def test_get_pipelines():
    client = common.login_with_test_user(common.client())

    imported_pipelines = common.test_pipeline_data()
    expected_pipeline_ids = [
        pipeline["_id"] for pipeline in imported_pipelines
    ]

    pipelines = client.get_pipelines()
    actual_pipeline_ids = [pipeline["_id"] for pipeline in pipelines]
    assert set(expected_pipeline_ids) == set(actual_pipeline_ids)

    for pipeline in pipelines:
        expected = [
            p for p in imported_pipelines if p["_id"] == pipeline["_id"]
        ][0]
        for key in expected:
            assert expected[key] == pipeline[key]
Пример #10
0
def test_collection_creation_and_get_and_archive():
    client = common.login_with_test_user(common.client())
    pipeline_id = [p for p in client.get_pipelines() if p["name"].lower() == "spacy"][0]["_id"]
    assert pipeline_id is not None
    my_id = client.get_my_user_id()
    assert my_id is not None
    
    collection_builder = client.collection_builder() \
        .viewer(my_id) \
        .annotator(my_id) \
        .label("label") \
        .title("Collection to Test Creation") \
        .description("This is a collection for pytest to test creation.") \
        .classifier(pipeline_id, train_every=100)
    collection_id = client.create_collection(collection_builder)
    assert type(collection_id) is str
    
    try:
        collection = client.get_collection(collection_id)
        assert type(collection) is dict
        
        assert collection["_id"] == collection_id
        assert collection["creator_id"] == my_id
        assert collection["annotators"] == [my_id]
        assert collection["viewers"] == [my_id]
        assert collection["archived"] == False
        assert collection["labels"] == ["label"]
        assert collection["metadata"] == {
            "title": "Collection to Test Creation",
            "description": "This is a collection for pytest to test creation."
        }
        
        updated_collection = client.archive_collection(collection_id, True)
        assert type(updated_collection) is dict
        assert updated_collection["_id"] == collection_id
        assert updated_collection["archived"] == True
        
        updated_collection = client.archive_collection(collection_id, False)
        assert type(updated_collection) is dict
        assert updated_collection["_id"] == collection_id
        assert updated_collection["archived"] == False
    finally:
        client.archive_collection(collection_id, True)
Пример #11
0
def test_sync_train():
    client = common.login_with_test_user(common.client())

    collection = common.get_collection(client, "Small Collection OpenNLP")
    assert collection is not None
    collection_id = collection["_id"]
    assert collection_id is not None
    classifier_id = client.get_collection_classifier(collection_id)["_id"]
    assert classifier_id is not None

    train_job_data = client.classifier_train(classifier_id, do_async=False)
    _assert_job_response(train_job_data, True)
    results = train_job_data["job_response"]
    assert results is not None and isinstance(results, dict)
    assert "average_metrics" in results and isinstance(
        results["average_metrics"], dict)
    assert "updated_objects" in results and isinstance(
        results["updated_objects"], dict)
    assert "fit" in results and isinstance(results["fit"], dict)
    assert "model_filename" in results and isinstance(
        results["model_filename"], str)
Пример #12
0
def test_download_collection_data():
    # find a collection that has annotations that the test user has access to
    user = "******"
    collection_title = "NER Test Collection"
    client = common.login_with_user(user, common.client())

    col_data = common.test_collection_data(collection_title)
    assert col_data != None
    collection_id = _get_collection_id(collection_title, client)
    assert collection_id != None

    # start with nothing but IDs
    kwargs = {
        "collection_id": collection_id,
        "include_collection_metadata": False,
        "include_document_metadata": False,
        "include_document_text": False,
        "include_annotations": False,
        "include_annotation_latest_version_only": True
    }
    data = client.download_collection_data(**kwargs)
    assert set(data.keys()) == {"_id", "documents"}
    assert len(data["documents"]) == col_data["documents"]["num_docs"]
    for doc in data["documents"]:
        assert set(doc.keys()) == {"_id"}
    doc_ids = [doc["_id"] for doc in data["documents"]]
    
    # turn on collection metadata
    kwargs["include_collection_metadata"] = True
    data = client.download_collection_data(**kwargs)
    assert set(data.keys()) == {"_id", "documents", "annotators", "viewers", "metadata",
                                "configuration", "labels", "archived", "creator_id"}
    assert len(data["documents"]) == len(doc_ids)
    for doc in data["documents"]:
        assert set(doc.keys()) == {"_id"}

    # turn on document metadata
    kwargs["include_document_metadata"] = True
    data = client.download_collection_data(**kwargs)
    assert len(data["documents"]) == len(doc_ids)
    for doc in data["documents"]:
        assert set(doc.keys()) == {"_id", "metadata", "has_annotated", "creator_id", "overlap"}

    # turn on document text
    kwargs["include_document_text"] = True
    data = client.download_collection_data(**kwargs)
    assert len(data["documents"]) == len(doc_ids)
    for doc in data["documents"]:
        assert set(doc.keys()) == {"_id", "metadata", "has_annotated", "creator_id", "overlap", "text"}

    # turn on annotations
    kwargs["include_annotations"] = True
    data = client.download_collection_data(**kwargs)
    assert len(data["documents"]) == len(doc_ids)
    for doc in data["documents"]:
        assert set(doc.keys()) == {"_id", "metadata", "has_annotated", "creator_id", "overlap", "text", "annotations"}
        annotations = doc["annotations"]
        assert type(annotations) is list and len(annotations) > 0
        for annotation in annotations:
            assert set(annotation.keys()) == {"_id", "creator_id", "annotation"}

    # turn on all annotation versions
    kwargs["include_annotation_latest_version_only"] = False
    data = client.download_collection_data(**kwargs)
    assert len(data["documents"]) == len(doc_ids)
    for doc in data["documents"]:
        annotations = doc["annotations"]
        assert type(annotations) is list and len(annotations) > 0
        for annotation in annotations:
            assert set(annotation.keys()) == {"_id", "creator_id", "annotation", "_version", "_latest_version"}
Пример #13
0
# -*- coding: utf-8 -*-

#
#   Hello World client in Python
#   Connects REQ socket to tcp://localhost:5555
#   Sends "Hello" to server, expects "World" back
#

ADDRESS_SPEC = "tcp://localhost:6666"
#ADDRESS_SPEC = "tcp://192.168.86.181:5555"
#ADDRESS_SPEC = "tcp://127.0.0.1:5678"


from common import client
from zeromq_compat import ZeroMQ

socket = ZeroMQ(ADDRESS_SPEC, ZeroMQ.REQ)
client(socket)
Пример #14
0
def _test_train_and_predict(collection_title):
    client = common.login_with_test_user(common.client())

    collection = common.get_collection(client, collection_title)
    assert collection is not None
    collection_id = collection["_id"]
    assert collection_id is not None
    labels = collection["labels"]
    assert labels is not None and len(labels) > 0
    classifier_id = client.get_collection_classifier(collection_id)["_id"]
    assert classifier_id is not None
    first_document = client.get_collection_documents(collection_id,
                                                     truncate=False)[0]
    document_id = first_document["_id"]
    document_text = first_document["text"]
    assert document_text.startswith("Thousands of demonstrators have ")

    # train async
    train_job_data = client.classifier_train(classifier_id, do_async=True)
    train_job_id = _assert_job_response(train_job_data, False)
    common.wait_for_job_to_finish(client,
                                  classifier_id,
                                  train_job_id,
                                  max_wait_seconds=120)
    status = client.get_classifier_status(classifier_id)
    _check_pipeline_status(status, classifier_id)
    assert status["job_response"]["has_trained"]
    train_job_results = client.get_classifier_job_results(
        classifier_id, train_job_id)
    assert train_job_results != None and isinstance(train_job_results, dict)

    # predict from ID sync
    prediction_job_data = client.classifier_predict(classifier_id,
                                                    [document_id], [],
                                                    do_async=False)
    prediction_job_id = _assert_job_response(prediction_job_data, True)
    docs_by_id = prediction_job_data["job_response"]["documents_by_id"]
    texts = prediction_job_data["job_response"]["texts"]
    assert docs_by_id.keys() == {document_id}
    prediction_from_id = docs_by_id[document_id]
    assert len(texts) == 0

    # predict from text async
    prediction_job_data = client.classifier_predict(classifier_id, [],
                                                    [document_text],
                                                    do_async=True)
    prediction_job_id = _assert_job_response(prediction_job_data, False)
    common.wait_for_job_to_finish(client,
                                  classifier_id,
                                  prediction_job_id,
                                  max_wait_seconds=120)
    prediction_job_data = client.get_classifier_job_results(
        classifier_id, prediction_job_id)
    assert prediction_job_data != None and isinstance(prediction_job_data,
                                                      dict)
    docs_by_id = prediction_job_data["documents_by_id"]
    texts = prediction_job_data["texts"]
    assert len(docs_by_id) == 0
    assert len(texts) == 1
    prediction_from_text = texts[0]

    # should be the same
    assert prediction_from_id == prediction_from_text

    # make sure they're in the right format
    assert isinstance(prediction_from_id, dict)
    assert "doc" in prediction_from_id and "ner" in prediction_from_id
    assert isinstance(prediction_from_id["doc"], list)
    for pred in prediction_from_id["doc"]:
        assert isinstance(pred, str)
        assert pred in labels
    assert isinstance(prediction_from_id["ner"], list)
    for pred in prediction_from_id["ner"]:
        assert isinstance(pred, list) and isinstance(
            pred[0], int) and isinstance(pred[1], int) and isinstance(
                pred[2], str)
        assert pred[0] >= 0 and pred[1] > pred[0]
        assert pred[2] in labels

    return prediction_from_id
Пример #15
0
def test_is_valid():
    client = common.client()
    assert client.is_valid() == True
Пример #16
0
def test_get_pipeline_status():
    client = common.login_with_test_user(common.client())

    for pipeline in common.test_pipeline_data():
        status = client.get_pipeline_status(pipeline["_id"])
        _check_pipeline_status(status, None)
Пример #17
0
def test_get_and_advance_next_documents(tmp_path):
    client = common.login_with_test_user(common.client())
    pipeline_id = [
        p for p in client.get_pipelines() if p["name"].lower() == "spacy"
    ][0]["_id"]
    assert pipeline_id is not None
    my_id = client.get_my_user_id()
    assert my_id is not None

    # write documents CSV
    documents_file = tmp_path / "documents.csv"
    with open(documents_file, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        for i in range(5):
            writer.writerow(["This is document number {}.".format(i)])

    collection = client.collection_builder() \
        .viewer(my_id) \
        .annotator(my_id) \
        .label("label") \
        .title("Collection to Test Next Document") \
        .description("This is a collection for pytest to test the next/advance documents feature.") \
        .classifier(pipeline_id, train_every=100, overlap=1) \
        .document_csv_file(documents_file, has_header=False, text_column=0)
    collection_id = client.create_collection(collection)
    assert collection_id is not None

    document_ids = [
        d["_id"]
        for d in client.get_collection_documents(collection_id, True, 1)
    ]
    assert len(document_ids) == 5

    try:
        classifier = client.get_collection_classifier(collection_id)
        assert classifier is not None
        classifier_id = classifier["_id"]
        assert classifier_id is not None

        # add more documents
        document_ids += [
            client.add_document(collection_id=collection_id,
                                overlap=1,
                                text="This is document number {}.".format(i))
            for i in range(5, 10)
        ]
        assert len(document_ids) == 10
        for document_id in document_ids:
            assert type(document_id) is str

        next_ids = []
        next_id = client.get_next_document(classifier_id)
        while next_id is not None:
            assert type(next_id) is str
            assert next_id not in next_ids  # no duplicates
            next_ids.append(next_id)
            assert len(next_ids) <= len(
                document_ids)  # sanity check to prevent an infinite loop
            updated_document = client.advance_next_document(
                classifier_id, next_id)
            assert type(updated_document) is dict
            next_id = client.get_next_document(classifier_id)
        assert set(document_ids) == set(next_ids)
    finally:
        client.archive_collection(collection_id)