def test_with_iterable_pipelin(self): import boto3 client = boto3.client('s3') client.create_bucket(Bucket='fake-open-skills', ACL='public-read-write') s3 = S3Store('fake-open-skills/models') model_storage = ModelStorage(storage=s3) fake = FakeModel('fake') model_storage.save_model(fake, fake.model_name) vectorize_for_pipeline = partial(nlp.vectorize, embedding_model=SerializedByStorage( storage=s3, model_name=fake.model_name, model=fake)) pipe = IterablePipeline(vectorize_for_pipeline) pipe_unpickled = pickle.loads(pickle.dumps(pipe)) # make sure the fake model wasn't pickled but the reference assert pipe_unpickled.functions[-1].keywords[ 'embedding_model']._model == None assert pipe_unpickled.functions[-1].keywords[ 'embedding_model'].storage.path == s3.path # The model will be loaded when it's needed assert list(pipe_unpickled([1])) == [[1, 2, 3, 4]]
def test_pickle_s3(self): import boto3 client = boto3.client('s3') client.create_bucket(Bucket='fake-open-skills', ACL='public-read-write') s3 = S3Store('fake-open-skills/models') model_storage = ModelStorage(storage=s3) fake = FakeModel('fake') model_storage.save_model(fake, fake.model_name) s_fake = SerializedByStorage(fake, s3, fake.model_name) s3.write(pickle.dumps(s_fake), 'fake.pickle') fake_unpickled = pickle.loads(s3.load('fake.pickle')) # make sure the fake model wasn't pickled but the reference assert fake_unpickled._model == None assert fake_unpickled.storage.path == s3.path assert fake_unpickled.val == fake.val # if the object to be pickled doesn't have storage attribute and didn't provide the storage # to SerializedByStorage, it will be serialized normally s_fake = SerializedByStorage(model=fake, model_name=fake.model_name) s3.write(pickle.dumps(s_fake), 'fake.pickle') fake_unpickled = pickle.loads(s3.load('fake.pickle')) assert fake_unpickled._model != None
def test_with_iterable_pipeline(self): import boto3 client=boto3.client('s3') client.create_bucket(Bucket='fake-open-skills', ACL='public-read-write') s3 = S3Store('fake-open-skills') model_storage = ModelStorage(s3) proxy_fake = ProxyObjectWithStorage(model_obj=FakeModel('fake'), storage=s3, model_name='fake') model_storage.save_model(proxy_fake, proxy_fake.model_name) vectorize_for_pipeline = partial(nlp.vectorize, embedding_model=SerializedByStorage(model=proxy_fake, model_name=proxy_fake.model_name)) pipe = IterablePipeline(vectorize_for_pipeline) s3.write(pickle.dumps(pipe), 'fake.pipe') pipe_unpickled = pickle.loads(s3.load('fake.pipe')) assert list(pipe_unpickled([1])) == [[1, 2, 3, 4]]
def test_delegation(self): fake = FakeModel('fake') s_fake = SerializedByStorage(model=fake, model_name=fake.model_name) assert fake.val == s_fake.val
def __init__(self, embedding, classifier, **kwargs): self.embedding = SerializedByStorage(embedding) self.classifier = SerializedByStorage(classifier) self.target_variable = self.classifier.target_variable