Exemplo n.º 1
0
def test_dataset_del() -> None:
    id = UID()
    data = UID()
    description = "This is a dummy id"
    tags = ["dummy", "test"]
    obj1 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([1, 2, 3, 4])
    description = "This is a dummy tensor n1"
    tags = ["dummy", "test"]
    obj2 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([10, 20, 30, 40])
    description = "This is a dummy tensor n2"
    tags = ["dummy", "test"]
    obj3 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = [obj1, obj2, obj3]
    description = "This is a dataset"
    tags = ["dummy", "dataset"]
    dataset_obj = Dataset(id=id, data=data, description=description, tags=tags)

    dataset_obj.__delitem__(obj2.id)

    assert dataset_obj.data == [obj1, obj3]
Exemplo n.º 2
0
def test_dataset_get_element() -> None:
    id = UID()
    data = UID()
    description = "This is a dummy id"
    tags = ["dummy", "test"]
    obj1 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([1, 2, 3, 4])
    description = "This is a dummy tensor n1"
    tags = ["dummy", "test"]
    obj2 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([10, 20, 30, 40])
    description = "This is a dummy tensor n2"
    tags = ["dummy", "test"]
    obj3 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = [obj1, obj2, obj3]
    description = "This is a dataset"
    tags = ["dummy", "dataset"]
    dataset_obj = Dataset(id=id, data=data, description=description, tags=tags)

    result = dataset_obj.__getitem__(_id=obj1.id)

    assert len(result) == 1
    assert result[0] == obj1
Exemplo n.º 3
0
def test_dataset_search_id_fail() -> None:
    id = UID()
    data = UID()
    description = "This is a dummy id"
    tags = ["dummy", "test"]
    obj1 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([1, 2, 3, 4])
    description = "This is a dummy tensor n1"
    tags = ["dummy", "test"]
    obj2 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([10, 20, 30, 40])
    description = "This is a dummy tensor n2"
    tags = ["dummy", "test"]
    obj3 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = [obj1, obj2, obj3]
    description = "This is a dataset"
    tags = ["dummy", "dataset"]
    dataset_obj = Dataset(id=id, data=data, description=description, tags=tags)

    assert not dataset_obj.__contains__(_id=UID())
Exemplo n.º 4
0
def test_serde_storable_obj() -> None:
    id = UID()
    data = th.Tensor([-1, -2, -3, -4])
    description = "This is a dummy id"
    tags = ["dummy", "test"]
    obj1 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([1, 2, 3, 4])
    description = "This is a dummy tensor n1"
    tags = ["dummy", "test"]
    obj2 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([10, 20, 30, 40])
    description = "This is a dummy tensor n2"
    tags = ["dummy", "test"]
    obj3 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = [obj1, obj2, obj3]
    description = "This is a dataset"
    tags = ["dummy", "dataset"]
    dataset_obj = Dataset(id=id, data=data, description=description, tags=tags)

    blob = sy.serialize(obj=dataset_obj)
    sy.deserialize(blob=blob)
Exemplo n.º 5
0
def test_keys(client, database, cleanup):
    disk_store = DiskObjectStore(database)
    a_id = UID()

    id1 = UID()
    id2 = UID()
    storable1 = StorableObject(id=id1, data=tensor1)
    disk_store.__setitem__(id1, storable1)
    storable2 = StorableObject(id=id2, data=tensor2)
    disk_store.__setitem__(id2, storable2)

    keys = disk_store.keys()
    assert any(id1 == k for k in keys)
    assert any(id2 == k for k in keys)
    assert len(keys) == 2
Exemplo n.º 6
0
def test_delete(client, database, cleanup):
    disk_store = DiskObjectStore(database)
    _id = UID()
    storable = StorableObject(id=_id, data=tensor1)
    disk_store.__setitem__(_id, storable)

    bin_obj = database.session.query(BinObject).get(str(_id.value))
    metadata = (database.session.query(ObjectMetadata).filter_by(
        obj=str(_id.value)).one())

    assert bin_obj is not None
    assert th.all(th.eq(bin_obj.object, tensor1))

    assert metadata is not None
    assert metadata.tags == []
    assert metadata.description == ""
    assert metadata.read_permissions == {}
    assert metadata.search_permissions == {}

    disk_store.delete(_id)

    assert database.session.query(BinObject).get(str(_id.value)) is None

    with pytest.raises(NoResultFound) as e_info:
        database.session.query(ObjectMetadata).filter_by(
            obj=str(_id.value)).one()
Exemplo n.º 7
0
def test_clear(client, database, cleanup):
    disk_store = DiskObjectStore(database)
    _id = UID()
    storable = StorableObject(id=_id, data=tensor1)
    disk_store.__setitem__(_id, storable)

    bin_obj = database.session.query(BinObject).get(str(_id.value))
    metadata = (database.session.query(ObjectMetadata).filter_by(
        obj=str(_id.value)).one())

    assert bin_obj is not None
    assert th.all(th.eq(bin_obj.object, tensor1))

    assert metadata is not None
    assert metadata.tags == []
    assert metadata.description == ""
    assert metadata.read_permissions == {}
    assert metadata.search_permissions == {}

    retrieved = disk_store.get_object(_id)
    assert th.all(th.eq(retrieved.data, tensor1))
    assert retrieved.id == _id

    disk_store.clear()

    assert database.session.query(BinObject).count() == 0
    assert database.session.query(ObjectMetadata).count() == 0
Exemplo n.º 8
0
def create_dataset(df_json: dict) -> dict:
    _json = deepcopy(df_json)
    storage = DiskObjectStore(db)
    mapping = []

    # Separate CSV from metadata
    for el in _json["tensors"].copy():
        _id = UID()
        _json["tensors"][el]["id"] = str(_id.value)
        mapping.append((el, _id, _json["tensors"][el].pop("content", None)))

    # Ensure we have same ID in metadata and dataset
    df_id = UID()
    _json["id"] = str(df_id.value)

    # Create storables from UID/CSV. Update metadata
    storables = []
    for idx, (name, _id, raw_file) in enumerate(mapping):
        _tensor = pd.read_csv(StringIO(raw_file))
        _tensor = th.tensor(_tensor.values.astype(np.float32))

        _json["tensors"][name]["shape"] = [int(x) for x in _tensor.size()]
        _json["tensors"][name]["dtype"] = "{}".format(_tensor.dtype)
        storage.__setitem__(_id, StorableObject(id=_id, data=_tensor))
        # Ensure we have same ID in metadata and dataset
        db.session.add(
            DatasetGroup(bin_object=str(_id.value), dataset=str(df_id.value)))

    json_obj = JsonObject(id=_json["id"], binary=_json)
    metadata = get_metadata(db)
    metadata.length += 1

    db.session.add(json_obj)
    db.session.commit()
    return _json
Exemplo n.º 9
0
def generate_id_obj(
    data: th.Tensor, description: str, tags: List[str]
) -> Tuple[UID, StorableObject]:
    id = UID()
    obj = StorableObject(id=id, data=data, description=description, tags=tags)

    return id, obj
Exemplo n.º 10
0
def test_get_objects_of_type(client, database, cleanup):
    disk_store = DiskObjectStore(database)
    a_id = UID()

    id1 = UID()
    id2 = UID()
    storable1 = StorableObject(id=id1, data=tensor1)
    disk_store.__setitem__(id1, storable1)
    storable2 = StorableObject(id=id2, data=tensor2)
    disk_store.__setitem__(id2, storable2)

    selected = disk_store.get_objects_of_type(th.Tensor)
    selected_data = [x.data for x in selected]

    assert any(th.all(th.eq(tensor1, d_)) for d_ in selected_data)
    assert any(th.all(th.eq(tensor2, d_)) for d_ in selected_data)
    assert len(selected_data) == 2
Exemplo n.º 11
0
def test_values(client, database, cleanup):
    disk_store = DiskObjectStore(database)
    a_id = UID()

    id1 = UID()
    id2 = UID()
    storable1 = StorableObject(id=id1, data=tensor1)
    disk_store.__setitem__(id1, storable1)
    storable2 = StorableObject(id=id2, data=tensor2)
    disk_store.__setitem__(id2, storable2)

    values = disk_store.values()
    values_data = [v.data for v in values]

    assert any(th.all(th.eq(tensor1, v)) for v in values_data)
    assert any(th.all(th.eq(tensor2, v)) for v in values_data)
    assert len(values_data) == 2
Exemplo n.º 12
0
def test_create_dataset_with_store_obj() -> None:
    id = UID()
    data = UID()
    description = "This is a dummy id"
    tags = ["dummy", "test"]
    obj1 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([1, 2, 3, 4])
    description = "This is a dummy tensor"
    tags = ["dummy", "test"]
    obj2 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = [obj1, obj2]
    description = "This is a dummy tensor"
    tags = ["dummy", "dataset"]
    Dataset(id=id, data=data, description=description, tags=tags)
Exemplo n.º 13
0
def test_serde_storable_obj() -> None:
    id = UID()
    data = th.Tensor([1, 2, 3, 4])
    description = "This is a dummy test"
    tags = ["dummy", "test"]
    obj = StorableObject(id=id, data=data, description=description, tags=tags)

    blob = sy.serialize(obj=obj)

    sy.deserialize(blob=blob)
Exemplo n.º 14
0
def test_serde_storable_obj_2() -> None:
    id = UID()
    data = th.Tensor([1, 2, 3, 4])
    description = "This is a dummy test"
    tags = ["dummy", "test"]
    obj = StorableObject(id=id, data=data, description=description, tags=tags)
    blob = serialize(obj)
    ds_obj = sy.deserialize(blob=blob)
    assert obj.id == ds_obj.id
    assert (obj.data == ds_obj.data).all()
    assert obj.description == ds_obj.description
    assert obj.tags == ds_obj.tags
Exemplo n.º 15
0
def test_serde_storable_obj_2() -> None:
    id = UID()
    data = th.Tensor([-1, -2, -3, -4])
    description = "This is a dummy id"
    tags = ["dummy", "test"]
    obj1 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([1, 2, 3, 4])
    description = "This is a dummy tensor n1"
    tags = ["dummy", "test"]
    obj2 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = th.Tensor([10, 20, 30, 40])
    description = "This is a dummy tensor n2"
    tags = ["dummy", "test"]
    obj3 = StorableObject(id=id, data=data, description=description, tags=tags)

    id = UID()
    data = [obj1, obj2, obj3]
    description = "This is a dataset"
    tags = ["dummy", "dataset"]
    obj = Dataset(id=id, data=data, description=description, tags=tags)

    blob = serialize(obj)
    ds_obj = sy.deserialize(blob=blob)

    assert obj.id == ds_obj.id
    assert obj.description == ds_obj.description
    assert obj.tags == ds_obj.tags
    assert len(obj.data) == len(ds_obj.data)

    for i in range(len(obj.data)):
        assert obj.data[i].id == ds_obj.data[i].id
        assert th.all(th.eq(obj.data[i].data, ds_obj.data[i].data))
        assert obj.data[i].description == ds_obj.data[i].description
        assert obj.data[i].tags == ds_obj.data[i].tags
Exemplo n.º 16
0
def create_tensor_msg(
    msg: CreateTensorMessage,
    node: AbstractNode,
) -> CreateTensorResponse:
    try:
        payload = msg.content

        new_tensor = th.tensor(payload["tensor"])
        new_tensor.tag(*payload.get("tags", []))
        new_tensor.describe(payload.get("description", ""))

        id_at_location = UID()

        # Step 2: create message which contains object to send
        storable = StorableObject(
            id=id_at_location,
            data=new_tensor,
            tags=new_tensor.tags,
            description=new_tensor.description,
            search_permissions={VerifyAll(): None}
            if payload.get("searchable", False)
            else {},
        )

        obj_msg = SaveObjectAction(obj=storable, address=node.address)

        signed_message = obj_msg.sign(
            signing_key=SigningKey(
                payload["internal_key"].encode("utf-8"), encoder=HexEncoder
            )
        )

        node.recv_immediate_msg_without_reply(msg=signed_message)

        return CreateTensorResponse(
            address=msg.reply_to,
            status_code=200,
            content={
                "msg": "Tensor created succesfully!",
                "tensor_id": str(id_at_location.value),
            },
        )
    except Exception as e:
        return CreateTensorResponse(
            address=msg.reply_to,
            status_code=200,
            content={"error": str(e)},
        )
Exemplo n.º 17
0
def test_save_object_action_serde() -> None:
    obj = th.tensor([1, 2, 3])
    addr = Address(network=SpecificLocation(), device=SpecificLocation())

    storable = StorableObject(id=UID(), data=obj)
    msg = SaveObjectAction(obj=storable, address=addr)

    blob = serialize(msg)

    msg2 = sy.deserialize(blob=blob)

    assert (msg2.obj.data == msg.obj.data).all()

    # Tensors do not automatically get IDs anymore
    # assert msg2.obj.id == msg.obj.id
    assert msg2.address == msg.address
Exemplo n.º 18
0
def update_dataset(key: str, df_json: dict) -> dict:
    _json = deepcopy(df_json)
    storage = DiskObjectStore(db)

    json_obj = db.session.query(JsonObject).get(key)
    past_json = json_obj.binary
    past_ids = [x["id"] for x in past_json["tensors"].values()]

    mapping = []
    # Separate CSV from metadata
    for el in _json["tensors"].copy():
        if (
            _json["tensors"][el].get("id", None) is not None
            and _json["tensors"][el].get("id", None) in past_ids
        ):
            _json["tensors"][el]["id"] = past_json["tensors"][el]["id"]
        else:
            _id = UID()
            _json["tensors"][el]["id"] = str(_id.value)
        mapping.append((el, _id, _json["tensors"][el].pop("content", None)))

    # Ensure we have same ID in metadata and dataset
    df_id = past_json["id"]
    _json["id"] = df_id

    # Clean existing storables in storage
    db.session.query(DatasetGroup).filter_by(dataset=df_id).delete(
        synchronize_session=False
    )
    for key in past_ids:
        storage.delete(key)

    # Create storables from UID/CSV. Update metadata
    storables = []
    for idx, (name, _id, raw_file) in enumerate(mapping):
        _tensor = pd.read_csv(StringIO(raw_file))
        _tensor = th.tensor(_tensor.values.astype(np.float32))

        _json["tensors"][name]["shape"] = [int(x) for x in _tensor.size()]
        _json["tensors"][name]["dtype"] = "{}".format(_tensor.dtype)
        storage.__setitem__(_id, StorableObject(id=_id, data=_tensor))
        # Ensure we have same ID in metadata and dataset
        db.session.add(DatasetGroup(bin_object=str(_id.value), dataset=df_id))

    setattr(json_obj, "binary", _json)
    db.session.commit()
    return _json
Exemplo n.º 19
0
    def __getitem__(self, key: UID) -> StorableObject:
        bin_obj = self.db.session.query(BinObject).filter_by(
            id=str(key.value)).first()
        obj_metadata = (self.db.session.query(ObjectMetadata).filter_by(
            obj=str(key.value)).first())

        if not bin_obj or not obj_metadata:
            raise Exception("Object not found!")

        read_permissions = {
            VerifyKey(key.encode("utf-8"), encoder=HexEncoder): value
            for key, value in obj_metadata.read_permissions.items()
        }

        obj = StorableObject(
            id=UID.from_string(bin_obj.id),
            data=bin_obj.object,
            description=obj_metadata.description,
            tags=obj_metadata.tags,
            read_permissions=read_permissions,
            search_permissions=syft.lib.python.Dict({VERIFYALL: None}),
        )
        return obj
Exemplo n.º 20
0
    def update_dataset(self, key: str, df_json: dict) -> dict:
        _json = deepcopy(df_json)
        json_obj = self.db.session.query(JsonObject).get(key)
        bin_obj = self.db.session.query(BinaryObject).get(key)

        mapping = []
        # Separate CSV from metadata
        for el in _json["tensors"].copy():
            _id = UID()
            _json["tensors"][el]["id"] = _id.value.hex
            mapping.append((el, _id, _json["tensors"][el].pop("content",
                                                              None)))

        # Create storables from UID/CSV
        # Update metadata
        storables = []
        for idx, (name, _id, raw_file) in enumerate(mapping):
            _tensor = pd.read_csv(StringIO(raw_file))
            _tensor = th.tensor(_tensor.values.astype(np.float32))

            _json["tensors"][name]["shape"] = [int(x) for x in _tensor.size()]
            _json["tensors"][name]["dtype"] = "{}".format(_tensor.dtype)
            storables.append(StorableObject(id=_id, data=_tensor))

        # Ensure we have same ID in metadata and dataset
        _id = json_obj.id
        _id = UID.from_string(_id)
        df = Dataset(id=_id, data=storables)
        _json["id"] = _id.value.hex

        metadata = get_metadata(self.db)
        metadata.length += 1

        setattr(bin_obj, "binary", serialize(df, to_bytes=True))
        setattr(json_obj, "binary", _json)
        self.db.session.commit()
        return _json
Exemplo n.º 21
0
def process_items(node, tar_obj, user_key):
    # Optional fields
    tags, manifest, description, skip_files = extract_metadata_info(tar_obj)

    dataset_db = Dataset(id=str(UID().value),
                         manifest=manifest,
                         description=description,
                         tags=tags)
    db.session.add(dataset_db)
    data = list()
    for item in tar_obj.members:
        if not item.isdir() and (not item.name in skip_files):
            reader = csv.reader(
                tar_obj.extractfile(item.name).read().decode().split("\n"),
                delimiter=",",
            )

            dataset = []

            for row in reader:
                if len(row) != 0:
                    dataset.append(row)
            dataset = np.array(dataset, dtype=np.float)
            df = th.tensor(dataset, dtype=th.float32)
            id_at_location = UID()

            # Step 2: create message which contains object to send
            storable = StorableObject(
                id=id_at_location,
                data=df,
                tags=tags + ["#" + item.name.split("/")[-1]],
                search_permissions={VERIFYALL: None},
            )

            obj_msg = SaveObjectAction(obj=storable, address=node.address)

            signed_message = obj_msg.sign(signing_key=SigningKey(
                user_key.encode("utf-8"), encoder=HexEncoder))

            node.recv_immediate_msg_without_reply(msg=signed_message)

            obj_dataset_relation = BinObjDataset(
                name=item.name,
                dataset=dataset_db.id,
                obj=str(id_at_location.value),
                dtype=df.__class__.__name__,
                shape=str(tuple(df.shape)),
            )
            db.session.add(obj_dataset_relation)
            data.append({
                "name": obj_dataset_relation.name,
                "id": str(id_at_location.value),
                "tags": tags + ["#" + item.name.split("/")[-1]],
                "dtype": obj_dataset_relation.dtype,
                "shape": obj_dataset_relation.shape,
            })

    db.session.commit()
    ds = model_to_json(dataset_db)
    ds["data"] = data
    return ds
Exemplo n.º 22
0
def create_storable(_id: UID, data: Tensor, description: str,
                    tags: Iterable[str]) -> StorableObject:
    obj = StorableObject(id=_id, data=data, description=description, tags=tags)

    return obj
Exemplo n.º 23
0
def test_plan_serialization(client: sy.VirtualMachineClient) -> None:

    # cumbersome way to get a pointer as input for our actions,
    # there is probably a better/shorter way
    t = th.tensor([1, 2, 3])
    tensor_pointer = t.send(client)

    # define actions
    a1 = GetObjectAction(id_at_location=UID(),
                         address=Address(),
                         reply_to=Address(),
                         msg_id=UID())
    a2 = RunFunctionOrConstructorAction(
        path="torch.Tensor.add",
        args=tuple(),
        kwargs={},
        id_at_location=UID(),
        address=Address(),
        msg_id=UID(),
    )

    a3 = RunClassMethodAction(
        path="torch.Tensor.add",
        _self=tensor_pointer,
        args=[],
        kwargs={},
        id_at_location=UID(),
        address=Address(),
        msg_id=UID(),
    )

    a4 = GarbageCollectObjectAction(id_at_location=UID(), address=Address())
    a5 = EnumAttributeAction(path="", id_at_location=UID(), address=Address())

    a6 = GetOrSetPropertyAction(
        path="",
        _self=tensor_pointer,
        id_at_location=UID(),
        address=Address(),
        args=[],
        kwargs={},
        action=PropertyActions.GET,
    )
    a7 = GetSetStaticAttributeAction(
        path="",
        id_at_location=UID(),
        address=Address(),
        action=StaticAttributeAction.GET,
    )
    a8 = SaveObjectAction(obj=StorableObject(id=UID(), data=t),
                          address=Address())

    # define plan
    plan = Plan([a1, a2, a3, a4, a5, a6, a7, a8])

    # serialize / deserialize
    blob = serialize(plan)
    plan_reconstructed = sy.deserialize(blob=blob)

    # test
    assert isinstance(plan_reconstructed, Plan)
    assert all(isinstance(a, Action) for a in plan_reconstructed.actions)
Exemplo n.º 24
0
def test_create_storable_obj() -> None:
    id = UID()
    data = UID()
    description = "This is a dummy test"
    tags = ["dummy", "test"]
    StorableObject(id=id, data=data, description=description, tags=tags)