Пример #1
0
    def test_with_iterable_pipelin(self):
        import boto3
        client = boto3.client('s3')
        client.create_bucket(Bucket='fake-open-skills',
                             ACL='public-read-write')
        s3 = S3Store('fake-open-skills/models')
        model_storage = ModelStorage(storage=s3)
        fake = FakeModel('fake')

        model_storage.save_model(fake, fake.model_name)
        vectorize_for_pipeline = partial(nlp.vectorize,
                                         embedding_model=SerializedByStorage(
                                             storage=s3,
                                             model_name=fake.model_name,
                                             model=fake))
        pipe = IterablePipeline(vectorize_for_pipeline)

        pipe_unpickled = pickle.loads(pickle.dumps(pipe))
        # make sure the fake model wasn't pickled but the reference
        assert pipe_unpickled.functions[-1].keywords[
            'embedding_model']._model == None
        assert pipe_unpickled.functions[-1].keywords[
            'embedding_model'].storage.path == s3.path
        # The model will be loaded when it's needed
        assert list(pipe_unpickled([1])) == [[1, 2, 3, 4]]
Пример #2
0
    def test_pickle_s3(self):
        import boto3
        client = boto3.client('s3')
        client.create_bucket(Bucket='fake-open-skills', ACL='public-read-write')
        s3 = S3Store('fake-open-skills/models')
        model_storage = ModelStorage(storage=s3)
        fake = FakeModel('fake')
        model_storage.save_model(fake, fake.model_name)

        s_fake = SerializedByStorage(fake, s3, fake.model_name)
        s3.write(pickle.dumps(s_fake), 'fake.pickle')
        fake_unpickled = pickle.loads(s3.load('fake.pickle'))
        # make sure the fake model wasn't pickled but the reference
        assert fake_unpickled._model == None
        assert fake_unpickled.storage.path == s3.path
        assert fake_unpickled.val == fake.val

        # if the object to be pickled doesn't have storage attribute and didn't provide the storage
        # to SerializedByStorage, it will be serialized normally
        s_fake = SerializedByStorage(model=fake, model_name=fake.model_name)
        s3.write(pickle.dumps(s_fake), 'fake.pickle')
        fake_unpickled = pickle.loads(s3.load('fake.pickle'))
        assert fake_unpickled._model != None
Пример #3
0
    def test_with_iterable_pipeline(self):
        import boto3
        client=boto3.client('s3')
        client.create_bucket(Bucket='fake-open-skills', ACL='public-read-write')
        s3 = S3Store('fake-open-skills')
        model_storage = ModelStorage(s3)

        proxy_fake = ProxyObjectWithStorage(model_obj=FakeModel('fake'), storage=s3, model_name='fake')
        model_storage.save_model(proxy_fake, proxy_fake.model_name)

        vectorize_for_pipeline = partial(nlp.vectorize, embedding_model=SerializedByStorage(model=proxy_fake, model_name=proxy_fake.model_name))
        pipe = IterablePipeline(vectorize_for_pipeline)

        s3.write(pickle.dumps(pipe), 'fake.pipe')
        pipe_unpickled = pickle.loads(s3.load('fake.pipe'))

        assert list(pipe_unpickled([1])) == [[1, 2, 3, 4]]
Пример #4
0
 def test_delegation(self):
     fake = FakeModel('fake')
     s_fake = SerializedByStorage(model=fake, model_name=fake.model_name)
     assert fake.val == s_fake.val
Пример #5
0
 def __init__(self, embedding, classifier, **kwargs):
     self.embedding = SerializedByStorage(embedding)
     self.classifier = SerializedByStorage(classifier)
     self.target_variable = self.classifier.target_variable