示例#1
0
def test_file_embed_lookup():
    steamship = get_steamship_client()

    content_a = "Ted likes to run."
    content_b = "Grace likes to bike."

    file = steamship.upload(content=content_a, mime_type=MimeTypes.MKD).data

    blockify_res = file.blockify(
        plugin_instance="markdown-blockifier-default-1.0")
    assert blockify_res.error is None
    blockify_res.wait()

    parser = PluginInstance.create(steamship, plugin_handle="test-tagger").data
    parse_res = file.tag(plugin_instance=parser.handle)
    assert parse_res.error is None
    parse_res.wait()

    b = steamship.upload(content=content_b, mime_type=MimeTypes.MKD).data
    blockify_res = b.blockify(
        plugin_instance="markdown-blockifier-default-1.0")
    assert blockify_res.error is None
    blockify_res.wait()

    parser = PluginInstance.create(steamship, plugin_handle="test-tagger").data
    parse_res = b.tag(plugin_instance=parser.handle)
    assert parse_res.error is None
    parse_res.wait()

    embedder = PluginInstance.create(steamship,
                                     plugin_handle="test-embedder").data
    # Now we add the file to the index
    with random_index(steamship, embedder.handle) as index:
        index.insert_file(file.id, block_type="sentence", reindex=True)
        index.insert_file(b.id, block_type="sentence", reindex=True)

        res = index.search("What does Ted like to do?").data
        assert len(res.items) == 1
        assert res.items[0].value.value == content_a

        res = index.search("What does Grace like to do?").data
        assert len(res.items) == 1
        assert res.items[0].value.value == content_b

        # Now we list the items
        itemsa = index.list_items(file_id=file.id).data
        assert len(itemsa.items) == 1
        assert len(itemsa.items[0].embedding) > 0
        assert itemsa.items[0].value == content_a

        itemsb = index.list_items(file_id=b.id).data
        assert len(itemsb.items) == 1
        assert len(itemsb.items[0].embedding) > 0
        assert len(itemsb.items[0].embedding) == len(itemsa.items[0].embedding)
        assert itemsb.items[0].value == content_b
示例#2
0
def test_file_index():
    steamship = get_steamship_client()
    t = "A nice poem"
    p1_1 = "Roses are red."
    p1_2 = "Violets are blue."
    p2_1 = "Sugar is sweet."
    p2_2 = "I love you."
    t2 = "A flavorful story"
    p3_1 = "Cake is made of flour."
    p3_2 = "Cake tastes good with milk."
    p4_1 = "Cake comes in chocolate and vanilla flavors."
    p4_2 = "Cake can be cut into mAny pieces and shared."

    content1 = f"# {t}\n\n{p1_1} {p1_2}\n\n{p2_1} {p2_2}"
    content2 = f"# {t2}\n\n{p3_1} {p3_2}\n\n{p4_1} {p4_2}"
    content = f"{content1}\n\n{content2}"

    file = steamship.upload(content=content, mime_type=MimeTypes.MKD).data
    assert file.id is not None
    assert file.mime_type == MimeTypes.MKD

    blockify_resp = file.blockify(
        plugin_instance="markdown-blockifier-default-1.0")
    assert blockify_resp.error is None
    blockify_resp.wait()

    # Now we parse
    parser = PluginInstance.create(steamship, plugin_handle="test-tagger").data
    parse_resp = file.tag(plugin_instance=parser.handle)
    assert parse_resp.error is None
    parse_resp.wait()

    # Now the sentences should be parsed!
    q2 = file.refresh().data
    assert len(q2.blocks) == 6

    # Now we add the file to the index via the shortcut.
    embedder = PluginInstance.create(steamship,
                                     plugin_handle="test-embedder").data
    # noinspection PyUnresolvedReferences
    index = file.index(plugin_instance=embedder.handle)

    res = index.search("What color are roses?").data
    assert len(res.items) == 1
    # Because the simdex now indexes entire blocks and not sentences, the result of this is the whole block text
    assert res.items[0].value.value == " ".join([p1_1, p1_2])

    res = index.search("What flavors does cake come in?").data
    assert len(res.items) == 1
    # Because the simdex now indexes entire blocks and not sentences, the result of this is the whole block text
    assert res.items[0].value.value == " ".join([p4_1, p4_2])

    index.delete()
    file.delete()
示例#3
0
def test_plugin_instance_get():
    steamship = get_steamship_client()
    handle = f"test_tagger_test_handle{uuid.uuid4()}"
    instance = PluginInstance.create(steamship,
                                     plugin_handle="test-tagger",
                                     handle=handle).data
    assert instance.id is not None

    other_instance = PluginInstance.get(steamship, handle=handle).data

    assert instance.id == other_instance.id
示例#4
0
def test_deploy_in_space():
    client = get_steamship_client()
    space = Space.create(client, handle="test-non-default-space").data
    instance = PluginInstance.create(client,
                                     plugin_handle="test-tagger",
                                     space_id=space.id).data
    assert instance.space_id == space.id
示例#5
0
def basic_embeddings(plugin_instance: PluginInstance):
    e1 = plugin_instance.tag("This is a test")
    e1b = plugin_instance.tag("Banana")
    e1.wait()
    e1b.wait()
    assert count_embeddings(e1.data.file) == 1
    assert count_embeddings(e1b.data.file) == 1
    assert len(e1.data.file.blocks[0].tags[0].value["embedding"]) > 1

    e2 = plugin_instance.tag("This is a test")
    e2.wait()
    assert count_embeddings(e2.data.file) == 1
    assert len(e2.data.file.blocks[0].tags[0].value["embedding"]) == len(
        e1.data.file.blocks[0].tags[0].value["embedding"])

    e4 = plugin_instance.tag("This is a test")
    e4.wait()
    assert count_embeddings(e4.data.file) == 1
示例#6
0
def test_parsing():
    steamship = get_steamship_client()
    parser = PluginInstance.create(steamship, plugin_handle="test-tagger").data
    resp = parser.tag("This is a test")
    resp.wait()
    resp = resp.data
    assert len(resp.file.blocks) == 1
    d = resp.file.blocks[0]

    assert d.text == "This is a test"
    assert len(d.tags) == 5
示例#7
0
def test_file_parse():
    steamship = get_steamship_client()
    content1 = "# {}\n\n{} {}\n\n{} {}".format(T, P1_1, P1_2, P2_1, P2_2)
    content2 = "# {}\n\n{} {}\n\n{} {}".format(T2, P3_1, P3_2, P4_1, P4_2)
    content = "{}\n\n{}".format(content1, content2)

    file = steamship.upload(content=content, mime_type=MimeTypes.MKD).data
    assert file.id is not None
    assert file.mime_type == MimeTypes.MKD

    blockify_resp = file.blockify(
        plugin_instance="markdown-blockifier-default-1.0")
    assert blockify_resp.error is None
    blockify_resp.wait()

    # Now we parse
    parser = PluginInstance.create(steamship, plugin_handle="test-tagger").data
    parse_resp = file.tag(plugin_instance=parser.handle)
    assert parse_resp.error is None
    parse_resp.wait()

    # Now the sentences should be parsed!
    q2 = file.refresh().data
    assert len(q2.blocks) == 6

    # Now we add the file to the index
    plugin_instance = PluginInstance.create(steamship,
                                            plugin_handle=_TEST_EMBEDDER).data
    with random_index(steamship,
                      plugin_instance=plugin_instance.handle) as index:
        index.insert_file(file.id, reindex=False)
        embed_resp = index.embed()
        assert embed_resp.error is None
        embed_resp.wait()

        res = index.search("What color are roses?").data
        assert len(res.items) == 1
        # Because the simdex now indexes entire blocks and not sentences, the result of this is the whole block text
        assert res.items[0].value.value == " ".join([P1_1, P1_2])

    file.delete()
示例#8
0
def test_task_comment_feedback_reporting():
    """
    We want to be able to generate reports like this:

    Select Across Gorup    -- externalGroup
    Inputs Seen: XXX       -- Distinct externalId
    Inputs Suggested: YYY  -- Add to metadata
    Inputs Liked / Disliked / Used -- Add to metadata

    So really we just need to test the group aggregation
    """
    client = get_steamship_client()
    embedder = PluginInstance.create(client, plugin_handle="test-embedder").data
    with random_index(client, plugin_instance=embedder.handle) as index:
        item1 = EmbeddedItem(
            value="Pizza", external_id="pizza", external_type="food", metadata=[1, 2, 3]
        )

        group_name_1 = random_name()
        group_name_2 = random_name()

        index.insert(
            item1.value,
            external_id=item1.external_id,
            external_type=item1.external_type,
            metadata=item1.metadata,
        )
        task = index.embed()
        task.wait()

        res = index.search(item1.value, include_metadata=True, k=1)
        res.task.add_comment(
            external_id="Foo1",
            external_type="Bar1",
            external_group=group_name_1,
            metadata=[1, 2, 3],
        )
        res.task.add_comment(
            external_id="Foo2",
            external_type="Bar1",
            external_group=group_name_1,
            metadata=[1, 2, 3],
        )
        res.task.add_comment(
            external_id="Foo2",
            external_type="Bar1",
            external_group=group_name_2,
            metadata=[1, 2, 3],
        )

        comments = res.task.list_comments()
        assert len(comments.data.comments) == 3

        g1 = client.list_comments(external_group=group_name_1)
        assert len(g1.data.comments) == 2

        g2 = client.list_comments(external_group=group_name_2)
        assert len(g2.data.comments) == 1

        g1 = client.list_comments(task_id=res.task.task_id, external_group=group_name_1)
        assert len(g1.data.comments) == 2

        g2 = client.list_comments(task_id=res.task.task_id, external_group=group_name_2)
        assert len(g2.data.comments) == 1

        g1 = client.list_comments(
            task_id=res.task.task_id, external_id="Foo1", external_group=group_name_1
        )
        assert len(g1.data.comments) == 1

        g2 = client.list_comments(
            task_id=res.task.task_id, external_id="Foo1", external_group=group_name_2
        )
        assert len(g2.data.comments) == 0

        comments.data.comments[0].delete()
        comments.data.comments[1].delete()
        comments.data.comments[2].delete()

        g1 = client.list_comments(external_group=group_name_1)
        assert len(g1.data.comments) == 0

        g2 = client.list_comments(external_group=group_name_2)
        assert len(g2.data.comments) == 0
示例#9
0
def test_basic_task_comment():
    steamship = get_steamship_client()
    embedder = PluginInstance.create(steamship, plugin_handle="test-embedder").data
    with random_index(steamship, embedder.handle) as index:
        item1 = EmbeddedItem(
            value="Pizza", external_id="pizza", external_type="food", metadata=[1, 2, 3]
        )

        index.insert(
            item1.value,
            external_id=item1.external_id,
            external_type=item1.external_type,
            metadata=item1.metadata,
        )
        task = index.embed()
        task.wait()

        res2 = index.search(item1.value, include_metadata=True, k=1)
        res2.task.add_comment(external_id="Foo", external_type="Bar", metadata=[1, 2])
        # We don't return to Res2 until the end to make sure we aren't co-mingling comments!

        res = index.search(item1.value, include_metadata=True, k=1)

        assert res.data.items is not None
        assert len(res.data.items) == 1
        assert res.data.items[0].value.value == item1.value
        assert res.data.items[0].value.external_id == item1.external_id
        assert res.data.items[0].value.external_type == item1.external_type
        _list_equal(res.data.items[0].value.metadata, item1.metadata)

        res.task.add_comment(external_id="Foo", external_type="Bar", metadata=[1, 2])

        comments = res.task.list_comments()
        assert len(comments.data.comments) == 1

        comment = comments.data.comments[0]
        assert comment.external_id == "Foo"
        assert comment.external_type == "Bar"
        _list_equal(comment.metadata, [1, 2])

        comment.delete()

        comments = res.task.list_comments()
        assert len(comments.data.comments) == 0

        # Now let's add one
        res.task.add_comment(external_id="Foo1", external_type="Bar1", metadata=[1, 2, 3])
        res.task.add_comment(external_id="Foo2", external_type="Bar2", metadata=[1, 2, 3, 4])

        comments = res.task.list_comments()
        assert len(comments.data.comments) == 2

        comment = comments.data.comments[0]
        assert comment.external_id == "Foo1"
        assert comment.external_type == "Bar1"
        _list_equal(comment.metadata, [1, 2, 3])

        comment = comments.data.comments[1]
        assert comment.external_id == "Foo2"
        assert comment.external_type == "Bar2"
        _list_equal(comment.metadata, [1, 2, 3, 4])

        comments.data.comments[0].delete()
        comments.data.comments[1].delete()

        comments = res.task.list_comments()
        assert len(comments.data.comments) == 0

        # Now we handle res2
        comments = res2.task.list_comments()
        assert len(comments.data.comments) == 1
        comment = comments.data.comments[0]
        assert comment.external_id == "Foo"
        assert comment.external_type == "Bar"
        _list_equal(comment.metadata, [1, 2])
        comments.data.comments[0].delete()
        comments = res.task.list_comments()
        assert len(comments.data.comments) == 0
示例#10
0
def test_snapshot_create():
    steamship = get_steamship_client()

    plugin_instance = PluginInstance.create(steamship,
                                            plugin_handle=_TEST_EMBEDDER).data
    index = steamship.create_index(plugin_instance=plugin_instance.handle).data

    _insert(index, ["Oranges are orange."])
    search_results = index.search("What color are oranges?",
                                  include_metadata=True)
    assert len(search_results.data.items) == 1
    assert search_results.data.items[0].value.index_source == "index"
    assert search_results.data.items[0].value.value == "Oranges are orange."
    assert search_results.data.items[0].value.external_id == "TestId"
    assert search_results.data.items[0].value.external_type == "TestType"
    assert len(search_results.data.items[0].value.metadata) == 3

    _snapshot(index)
    search_results = index.search("What color are oranges?",
                                  include_metadata=True)
    assert len(search_results.data.items) == 1
    assert search_results.data.items[0].value.index_source == "snapshot"
    assert search_results.data.items[0].value.value == "Oranges are orange."
    assert search_results.data.items[0].value.external_id == "TestId"
    assert search_results.data.items[0].value.external_type == "TestType"
    assert len(search_results.data.items[0].value.metadata) == 3

    _insert(index, ["Apples are red."])
    search_results = index.search("What color are apples?",
                                  include_metadata=True)
    assert len(search_results.data.items) == 1
    assert search_results.data.items[0].value.index_source == "index"
    assert search_results.data.items[0].value.value == "Apples are red."
    assert search_results.data.items[0].value.external_id == "TestId"
    assert search_results.data.items[0].value.external_type == "TestType"
    assert len(search_results.data.items[0].value.metadata) == 3

    _snapshot(index)
    search_results = index.search("What color are apples?",
                                  include_metadata=True)
    assert len(search_results.data.items) == 1
    assert search_results.data.items[0].value.index_source == "snapshot"
    assert search_results.data.items[0].value.value == "Apples are red."
    assert search_results.data.items[0].value.external_id == "TestId"
    assert search_results.data.items[0].value.external_type == "TestType"
    assert len(search_results.data.items[0].value.metadata) == 3

    index.delete()
    steamship = get_steamship_client()

    index = steamship.create_index(plugin_instance=plugin_instance.handle).data

    sentences = []
    for i in range(15):
        sentences.append("Orange number {} is as good as the last".format(i))

    sent = "Is orange number 13 Any good?"
    _insert(index, sentences)

    search_results = index.search(sent, include_metadata=True)
    assert len(search_results.data.items) == 1
    assert search_results.data.items[0].value.index_source == "index"
    assert search_results.data.items[
        0].value.value == "Orange number 13 is as good as the last"
    assert search_results.data.items[0].value.external_id == "TestId"
    assert search_results.data.items[0].value.external_type == "TestType"
    assert len(search_results.data.items[0].value.metadata) == 3

    _snapshot(index, window_size=2)
    search_results = index.search(sent, include_metadata=True)
    assert len(search_results.data.items) == 1
    assert search_results.data.items[0].value.index_source == "snapshot"
    assert search_results.data.items[
        0].value.value == "Orange number 13 is as good as the last"
    assert search_results.data.items[0].value.external_id == "TestId"
    assert search_results.data.items[0].value.external_type == "TestType"
    assert len(search_results.data.items[0].value.metadata) == 3

    index.delete()
示例#11
0
def test_basic_embedding_search():
    client = get_steamship_client()
    plugin_instance = PluginInstance.create(client,
                                            plugin_handle=_TEST_EMBEDDER).data
    basic_embedding_search(client, plugin_instance.handle)
示例#12
0
def test_parse_file():
    steamship = get_steamship_client()
    parser = PluginInstance.create(steamship, plugin_handle="test-tagger").data
    tag_file(steamship, parser.handle)
示例#13
0
def test_e2e_parser():
    client = get_steamship_client()
    tagger_plugin_path = PLUGINS_PATH / "taggers" / "plugin_configurable_tagger.py"
    config_template = {
        "tagKind": {
            "type": "string"
        },
        "tagName": {
            "type": "string"
        },
        "numberValue": {
            "type": "number"
        },
        "booleanValue": {
            "type": "boolean"
        },
    }
    instance_config1 = {
        "tagKind": "testTagKind",
        "tagName": "testTagName",
        "numberValue": 3,
        "booleanValue": True,
    }

    with deploy_plugin(
            client,
            tagger_plugin_path,
            "tagger",
            version_config_template=config_template,
            instance_config=instance_config1,
    ) as (plugin, version, instance):
        test_doc = "Hi there"
        res = instance.tag(doc=test_doc)
        res.wait()
        assert res.error is None
        assert res.data is not None
        assert len(res.data.file.blocks) == 1
        assert res.data.file.blocks[0].text == test_doc

        # Validate configured content
        assert len(res.data.file.tags) == 1
        tag = res.data.file.tags[0]
        assert tag.name == instance_config1["tagName"]
        assert tag.kind == instance_config1["tagKind"]
        tag_value = tag.value
        assert tag_value["numberValue"] == instance_config1["numberValue"]
        assert tag_value["booleanValue"] == instance_config1["booleanValue"]

        instance_config2 = {
            "tagKind": "testTagKind2",
            "tagName": "testTagName2",
            "numberValue": 4,
            "booleanValue": False,
        }

        instance2 = PluginInstance.create(
            client,
            plugin_id=plugin.id,
            plugin_version_id=version.id,
            config=instance_config2,
        )
        instance2.wait()
        assert instance2.error is None
        assert instance2.data is not None
        instance2 = instance2.data

        res = instance2.tag(doc=test_doc)
        res.wait()
        assert res.error is None
        assert res.data is not None
        assert len(res.data.file.blocks) == 1
        assert res.data.file.blocks[0].text == test_doc

        # Validate configured content
        assert len(res.data.file.tags) == 1
        tag = res.data.file.tags[0]
        assert tag.name == instance_config2["tagName"]
        assert tag.kind == instance_config2["tagKind"]
        tag_value = tag.value
        assert tag_value["numberValue"] == instance_config2["numberValue"]
        assert tag_value["booleanValue"] == instance_config2["booleanValue"]