Пример #1
0
    def testDiskLoad(self, *_):
        test_addr = f'127.0.0.1:{get_next_port()}'
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK))

            # load from bytes io
            shared_handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))
            with shared_handler.create_bytes_writer(
                    session_id, data_key1, ser_data1.total_bytes) as writer:
                ser_data1.write_to(writer)

            handler.load_from_bytes_io(session_id, [data_key1], shared_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)])

            shared_handler.delete(session_id, [data_key1])
            handler.delete(session_id, [data_key1])

            # load from object io
            ref_data2 = weakref.ref(data2)
            proc_handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY))
            proc_handler.put_objects(session_id, [data_key2], [data2])
            del data2

            handler.load_from_object_io(session_id, [data_key2], proc_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key2])[0]),
                             [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.DISK)])

            proc_handler.delete(session_id, [data_key2])
            self.assertIsNone(ref_data2())
            handler.delete(session_id, [data_key2])
Пример #2
0
    def testSharedPutAndGet(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)
            bytes_data2 = ser_data2.to_buffer()

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))

            handler.put_objects(session_id, [data_key1], [data1])
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]),
                [(0, DataStorageDevice.SHARED_MEMORY)])
            assert_allclose(data1,
                            handler.get_objects(session_id, [data_key1])[0])

            handler.delete(session_id, [data_key1])
            self.assertEqual(
                list(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]), [])
            with self.assertRaises(KeyError):
                handler.get_objects(session_id, [data_key1])

            handler.put_objects(session_id, [data_key2], [ser_data2],
                                serialize=True)
            assert_allclose(data2,
                            handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])

            handler.put_objects(session_id, [data_key2], [bytes_data2],
                                serialize=True)
            assert_allclose(data2,
                            handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])
Пример #3
0
    def testLoadStoreInOtherProcess(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=3,
                              address=test_addr,
                              distributor=MarsDistributor(3)) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor1')
            pool.create_actor(InProcHolderActor, uid='w:2:InProcHolderActor2')
            pool.create_actor(IORunnerActor,
                              lock_free=True,
                              dispatched=False,
                              uid=IORunnerActor.gen_uid(1))

            test_ref = pool.create_actor(OtherProcessTestActor,
                                         uid='w:0:OtherProcTest')

            def _get_result():
                start_time = time.time()
                while test_ref.get_result() is None:
                    pool.sleep(0.5)
                    if time.time() - start_time > 10:
                        raise TimeoutError

            test_ref.run_copy_test((0, DataStorageDevice.SHARED_MEMORY),
                                   (1, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (0, DataStorageDevice.SHARED_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (2, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()
Пример #4
0
    def testSharedLoadFromObjects(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.SHARED_MEMORY))

            # load from object io
            ref_data1 = weakref.ref(data1)

            proc_handler = storage_client.get_storage_handler(
                (0, DataStorageDevice.PROC_MEMORY))
            proc_handler.put_objects(session_id, [data_key1], [data1])
            del data1

            handler.load_from_object_io(session_id, [data_key1], proc_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(
                sorted(
                    storage_manager_ref.get_data_locations(
                        session_id, [data_key1])[0]),
                [(0, DataStorageDevice.PROC_MEMORY),
                 (0, DataStorageDevice.SHARED_MEMORY)])

            proc_handler.delete(session_id, [data_key1])
            self.assertIsNone(ref_data1())
            handler.delete(session_id, [data_key1])
Пример #5
0
    def _start_shared_holder_pool(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid())

            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            yield pool, test_actor
Пример #6
0
    def create_standard_actors(cls,
                               pool,
                               address,
                               quota_size=None,
                               with_daemon=True,
                               with_status=True,
                               with_resource=False):
        quota_size = quota_size or (1024 * 1024)

        pool.create_actor(SchedulerClusterInfoActor, [address],
                          uid=SchedulerClusterInfoActor.default_uid())
        pool.create_actor(WorkerClusterInfoActor, [address],
                          uid=WorkerClusterInfoActor.default_uid())

        pool.create_actor(PlasmaKeyMapActor,
                          uid=PlasmaKeyMapActor.default_uid())
        pool.create_actor(StorageManagerActor,
                          uid=StorageManagerActor.default_uid())
        if with_resource:
            pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
        if with_daemon:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
        if with_status:
            pool.create_actor(StatusActor,
                              address,
                              uid=StatusActor.default_uid())

        pool.create_actor(SharedHolderActor,
                          cls.plasma_storage_size,
                          uid=SharedHolderActor.default_uid())
        pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
        pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
        pool.create_actor(QuotaActor,
                          quota_size,
                          uid=MemQuotaActor.default_uid())
        pool.create_actor(ExecutionActor, uid=ExecutionActor.default_uid())
Пример #7
0
    def testClientReadAndWrite(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            options.worker.lock_free_fileio = True
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                file_names = []

                def _write_data(ser, writer):
                    file_names.append(writer.filename)
                    self.assertEqual(writer.nbytes, ser_data1.total_bytes)
                    with writer:
                        ser.write_to(writer)

                # test creating non-promised writer and write
                with storage_client.create_writer(session_id,
                                                  data_key1,
                                                  ser_data1.total_bytes,
                                                  (DataStorageDevice.DISK, ),
                                                  _promise=False) as writer:
                    _write_data(ser_data1, writer)
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key1])[0]),
                    [(0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key1])

                # test creating promised writer and write
                file_names[:] = []
                self.waitp(
                    storage_client.create_writer(
                        session_id, data_key2, ser_data1.total_bytes,
                        (DataStorageDevice.DISK, )).then(
                            functools.partial(_write_data, ser_data1)))
                self.assertTrue(os.path.exists(file_names[0]))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.DISK)])

                def _read_data(reader):
                    with reader:
                        return dataserializer.deserialize(reader.read())

                # test creating reader when data exist in location
                result = self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.DISK, )).then(_read_data))[0]
                assert_allclose(result, data1)

                # test creating reader when no data in location (should raise)
                with self.assertRaises(IOError):
                    storage_client.create_reader(
                        session_id,
                        data_key2, (DataStorageDevice.SHARED_MEMORY, ),
                        _promise=False)

                # test creating reader when copy needed
                self.waitp(
                    storage_client.create_reader(
                        session_id, data_key2,
                        (DataStorageDevice.SHARED_MEMORY, )).then(_read_data))
                self.assertEqual(
                    sorted(
                        storage_client.get_data_locations(
                            session_id, [data_key2])[0]),
                    [(0, DataStorageDevice.SHARED_MEMORY),
                     (0, DataStorageDevice.DISK)])

                storage_client.delete(session_id, [data_key2])
                while os.path.exists(file_names[0]):
                    test_actor.ctx.sleep(0.05)
                self.assertFalse(os.path.exists(file_names[0]))
Пример #8
0
    def testClientSpill(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client
                idx = 0

                shared_handler = storage_client.get_storage_handler(
                    (0, DataStorageDevice.SHARED_MEMORY))
                proc_handler = storage_client.get_storage_handler(
                    (0, DataStorageDevice.PROC_MEMORY))

                def _fill_data():
                    i = 0
                    for i, (key,
                            data) in enumerate(zip(data_keys[idx:],
                                                   data_list)):
                        try:
                            shared_handler.put_objects(session_id, [key],
                                                       [data])
                        except StorageFull:
                            break
                    return i + idx

                idx = _fill_data()

                # test copying non-existing keys
                storage_client.copy_to(session_id, ['non-exist-key'], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                with self.assertRaises(KeyError):
                    self.get_result(5)

                # test copying into containing locations
                storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_keys[0]])[0]),
                    [(0, DataStorageDevice.SHARED_MEMORY)])

                # test unsuccessful copy when no data at target
                def _mock_load_from(*_, **__):
                    return promise.finished(*build_exc_info(SystemError),
                                            _accept=False)

                with patch_method(StorageHandler.load_from, _mock_load_from), \
                        self.assertRaises(SystemError):
                    storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.DISK]) \
                        .then(lambda *_: test_actor.set_result(None),
                              lambda *exc: test_actor.set_result(exc, accept=False))
                    self.get_result(5)

                # test successful copy for multiple objects
                storage_client.delete(session_id, [data_keys[idx - 1]])
                ref_data = weakref.ref(data_list[idx])
                ref_data2 = weakref.ref(data_list[idx + 1])
                proc_handler.put_objects(session_id, data_keys[idx:idx + 2],
                                         data_list[idx:idx + 2])
                data_list[idx:idx + 2] = [None, None]

                storage_client.copy_to(session_id, data_keys[idx:idx + 2],
                                       [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.DISK]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                proc_handler.delete(session_id, data_keys[idx:idx + 2])

                self.assertEqual(
                    storage_manager_ref.get_data_locations(
                        session_id, data_keys[idx:idx + 2]),
                    [{(0, DataStorageDevice.SHARED_MEMORY)},
                     {(0, DataStorageDevice.DISK)}])
                self.assertIsNone(ref_data())
                self.assertIsNone(ref_data2())

                # test copy with spill
                idx += 2
                proc_handler.put_objects(session_id, [data_keys[idx]],
                                         [data_list[idx]])

                storage_client.copy_to(session_id, [data_keys[idx]], [DataStorageDevice.SHARED_MEMORY]) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

                self.assertEqual(
                    sorted(
                        storage_manager_ref.get_data_locations(
                            session_id, [data_keys[idx]])[0]),
                    [(0, DataStorageDevice.PROC_MEMORY),
                     (0, DataStorageDevice.SHARED_MEMORY)])
Пример #9
0
    def testClientPutAndGet(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            pool.create_actor(IORunnerActor)

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())
            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor')

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]
            data_dict = dict(zip(data_keys, data_list))

            with self.run_actor_test(pool) as test_actor:
                storage_client = test_actor.storage_client

                # check batch object put with size exceeds
                storage_client.put_objects(session_id, data_keys, data_list,
                                           [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.PROC_MEMORY]) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)
                locations = storage_client.get_data_locations(
                    session_id, data_keys)
                loc_to_keys = defaultdict(list)
                for key, location in zip(data_keys, locations):
                    self.assertEqual(len(location), 1)
                    loc_to_keys[list(location)[0][-1]].append(key)
                self.assertGreater(
                    len(loc_to_keys[DataStorageDevice.PROC_MEMORY]), 1)
                self.assertGreater(
                    len(loc_to_keys[DataStorageDevice.SHARED_MEMORY]), 1)

                # check get object with all cases
                with self.assertRaises(IOError):
                    first_shared_key = loc_to_keys[
                        DataStorageDevice.SHARED_MEMORY][0]
                    storage_client.get_object(session_id,
                                              first_shared_key,
                                              [DataStorageDevice.PROC_MEMORY],
                                              _promise=False)

                shared_objs = storage_client.get_objects(
                    session_id, [first_shared_key],
                    [DataStorageDevice.SHARED_MEMORY],
                    _promise=False)
                self.assertEqual(len(shared_objs), 1)
                assert_allclose(shared_objs[0], data_dict[first_shared_key])

                storage_client.get_object(session_id, first_shared_key,
                                          [DataStorageDevice.PROC_MEMORY], _promise=True) \
                    .then(functools.partial(test_actor.set_result),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                assert_allclose(self.get_result(5),
                                data_dict[first_shared_key])

                storage_client.delete(session_id, data_keys)
                time.sleep(0.5)
                ref = weakref.ref(data_dict[data_keys[0]])
                storage_client.put_objects(session_id, data_keys[:1], [ref()],
                                           [DataStorageDevice.SHARED_MEMORY])
                data_list[:] = []
                data_dict.clear()
                self.assertIsNone(ref())
Пример #10
0
    def testPlasmaSharedStore(self):
        import pyarrow
        from pyarrow import plasma

        store_size = 10 * 1024**2
        test_addr = f'127.0.0.1:{get_next_port()}'
        with plasma.start_plasma_store(store_size) as (sckt, _), \
                create_actor_pool(n_process=1, address=test_addr) as pool:
            km_ref = pool.create_actor(PlasmaKeyMapActor,
                                       uid=PlasmaKeyMapActor.default_uid())
            try:
                plasma_client = plasma.connect(sckt)
            except TypeError:
                plasma_client = plasma.connect(sckt, '', 0)
            store = PlasmaSharedStore(plasma_client, km_ref)

            self.assertGreater(store.get_actual_capacity(store_size),
                               store_size / 2)

            session_id = str(uuid.uuid4())
            data_list = [
                np.random.randint(0, 32767, (655360, ), np.int16)
                for _ in range(20)
            ]
            key_list = [str(uuid.uuid4()) for _ in range(20)]

            self.assertFalse(store.contains(session_id, str(uuid.uuid4())))
            with self.assertRaises(KeyError):
                store.get(session_id, str(uuid.uuid4()))
            with self.assertRaises(KeyError):
                store.get_actual_size(session_id, str(uuid.uuid4()))
            with self.assertRaises(KeyError):
                store.seal(session_id, str(uuid.uuid4()))

            fake_data_key = str(uuid.uuid4())
            km_ref.put(session_id, fake_data_key,
                       plasma.ObjectID.from_random())
            self.assertFalse(store.contains(session_id, fake_data_key))
            self.assertIsNone(km_ref.get(session_id, fake_data_key))
            with self.assertRaises(KeyError):
                km_ref.put(session_id, fake_data_key,
                           plasma.ObjectID.from_random())
                store.get(session_id, fake_data_key)
            self.assertIsNone(km_ref.get(session_id, fake_data_key))
            with self.assertRaises(KeyError):
                km_ref.put(session_id, fake_data_key,
                           plasma.ObjectID.from_random())
                store.seal(session_id, fake_data_key)
            self.assertIsNone(km_ref.get(session_id, fake_data_key))
            with self.assertRaises(KeyError):
                km_ref.put(session_id, fake_data_key,
                           plasma.ObjectID.from_random())
                store.get_actual_size(session_id, fake_data_key)
            self.assertIsNone(km_ref.get(session_id, fake_data_key))
            with self.assertRaises(KeyError):
                km_ref.put(session_id, fake_data_key,
                           plasma.ObjectID.from_random())
                store.get_buffer(session_id, fake_data_key)
            self.assertIsNone(km_ref.get(session_id, fake_data_key))
            store.delete(session_id, fake_data_key)

            with self.assertRaises(SerializationFailed):
                non_serial = type('non_serial', (object, ), dict(nbytes=10))
                store.put(session_id, fake_data_key, non_serial())
            self.assertIsNone(km_ref.get(session_id, fake_data_key))
            with self.assertRaises(Exception):
                store.create(session_id, fake_data_key, 'abcd')
            self.assertIsNone(km_ref.get(session_id, fake_data_key))
            with self.assertRaises(StorageFull):
                store.create(session_id, fake_data_key, store_size * 2)
            self.assertIsNone(km_ref.get(session_id, fake_data_key))

            arrow_ser = pyarrow.serialize(data_list[0])
            buf = store.create(session_id, key_list[0], arrow_ser.total_bytes)
            writer = pyarrow.FixedSizeBufferWriter(buf)
            arrow_ser.write_to(writer)
            writer.close()
            store.seal(session_id, key_list[0])

            self.assertTrue(store.contains(session_id, key_list[0]))
            self.assertEqual(store.get_actual_size(session_id, key_list[0]),
                             arrow_ser.total_bytes)
            assert_allclose(store.get(session_id, key_list[0]), data_list[0])
            assert_allclose(
                pyarrow.deserialize(store.get_buffer(session_id, key_list[0])),
                data_list[0])

            with self.assertRaises(StorageDataExists):
                store.create(session_id, key_list[0], arrow_ser.total_bytes)
            self.assertIsNotNone(km_ref.get(session_id, key_list[0]))
            store.delete(session_id, key_list[0])
            del buf

            bufs = []
            for key, data in zip(key_list, data_list):
                try:
                    bufs.append(store.put(session_id, key, data))
                except StorageFull:
                    break
            del bufs
Пример #11
0
    def testSharedReadAndWrite(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        io_size = dataserializer.HEADER_LENGTH * 2
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((100, 100))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            def _write_data(ser, writer):
                self.assertEqual(writer.nbytes, ser_data1.total_bytes)
                with writer:
                    ser.write_to(writer)

            handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                .then(functools.partial(_write_data, ser_data1)) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            handler.delete(session_id, [data_key1])

            def _write_data(ser, writer):
                with writer:
                    for start in range(0, len(ser), io_size):
                        writer.write(ser[start:start + io_size])

            handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \
                .then(functools.partial(_write_data, ser_data1.to_buffer())) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])

            def _read_data_all(reader):
                with reader:
                    return dataserializer.deserialize(reader.read())

            handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                .then(_read_data_all) \
                .then(functools.partial(test_actor.set_result),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            assert_allclose(self.get_result(5), data1)

            def _read_data_batch(reader):
                bio = BytesIO()
                with reader:
                    while True:
                        buf = reader.read(io_size)
                        if buf:
                            bio.write(buf)
                        else:
                            break
                return dataserializer.deserialize(bio.getvalue())

            handler.create_bytes_reader(session_id, data_key1, _promise=True) \
                .then(_read_data_batch) \
                .then(functools.partial(test_actor.set_result),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            assert_allclose(self.get_result(5), data1)
            handler.delete(session_id, [data_key1])
Пример #12
0
    def testSharedSpill(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            holder_ref = pool.create_actor(
                SharedHolderActor, self.plasma_storage_size,
                uid=SharedHolderActor.default_uid())

            session_id = str(uuid.uuid4())
            data_list = [np.random.randint(0, 32767, (655360,), np.int16)
                         for _ in range(20)]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))
            idx = 0

            def _fill_data():
                i = 0
                for i, (key, data) in enumerate(zip(data_keys[idx:], data_list)):
                    try:
                        handler.put_objects(session_id, [key], [data])
                    except StorageFull:
                        break
                return i + idx

            def _do_spill():
                data_size = storage_manager_ref.get_data_sizes(session_id, [data_keys[0]])[0]
                handler.spill_size(2 * data_size) \
                    .then(lambda *_: test_actor.set_result(None),
                          lambda *exc: test_actor.set_result(exc, accept=False))
                self.get_result(5)

            # test lift data key
            idx = _fill_data()
            handler.lift_data_keys(session_id, [data_keys[0]])
            _do_spill()

            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]),
                             [(0, DataStorageDevice.DISK)])

            handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            idx += 1

            # test pin data key
            idx = _fill_data()
            holder_ref.lift_data_keys(session_id, [data_keys[0]], last=False)
            pin_token = str(uuid.uuid4())
            pinned_keys = handler.pin_data_keys(session_id, (data_keys[0],), pin_token)
            self.assertIn(data_keys[0], pinned_keys)
            _do_spill()

            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]),
                             [(0, DataStorageDevice.DISK)])

            handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            idx += 1

            # test unpin data key
            idx = _fill_data()
            handler.unpin_data_keys(session_id, (data_keys[0],), pin_token)
            _do_spill()

            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]),
                             [(0, DataStorageDevice.DISK)])
Пример #13
0
    def testSharedLoadFromBytes(self, *_):
        import logging
        logging.basicConfig(level=logging.DEBUG)
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            ser_data1 = dataserializer.serialize(data1)

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            # load from bytes io
            disk_handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK))
            with disk_handler.create_bytes_writer(
                    session_id, data_key1, ser_data1.total_bytes) as writer:
                ser_data1.write_to(writer)

            handler.load_from_bytes_io(session_id, [data_key1], disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)])

            disk_handler.delete(session_id, [data_key1])
            handler.delete(session_id, [data_key1])

            # load from bytes io till no capacity
            data_list = [np.random.randint(0, 32767, (655360,), np.int16)
                         for _ in range(20)]
            data_keys = [str(uuid.uuid4()) for _ in range(20)]
            for key, data in zip(data_keys, data_list):
                ser_data = dataserializer.serialize(data)
                with disk_handler.create_bytes_writer(
                        session_id, key, ser_data.total_bytes) as writer:
                    ser_data.write_to(writer)

            handler.load_from_bytes_io(session_id, data_keys, disk_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))

            affected_keys = set()
            try:
                self.get_result(5)
            except StorageFull as ex:
                affected_keys.update(ex.affected_keys)

            storage_client.delete(session_id, data_keys, [DataStorageDevice.DISK])

            self.assertLess(len(affected_keys), len(data_keys))
            self.assertGreater(len(affected_keys), 1)
            for k, size in zip(data_keys, storage_client.get_data_sizes(session_id, data_keys)):
                if k in affected_keys:
                    self.assertIsNone(size)
                else:
                    self.assertIsNotNone(size)