def get_file_from_s3(self) -> bool: """Get hdf5 file from S3 storage Return: bool If success to download file, returns True """ logger.debug({ 'action': 'get_file_from_s3', 'status': 'start', 'file_path': self.file_path, 'message': 'start to downlaod hdf5 file from S3' }) s3_key = os.path.join('hdf5', self.hdf5_file_name) s3_client = S3Object( s3_key, aws_access_key_id=settings.aws_access_key_id, aws_secret_access_key=settings.aws_secret_access_key) download_dir = os.path.join(settings.base_dir, 'tmp', 'hdf5') try: s3_client.download_file(download_dir) except botocore.exceptions.ClientError as e: logger.error({ 'action': 'get_file_from_s3', 'status': 'fail', 'message': e }) raise logger.debug({ 'action': 'get_file_from_s3', 'status': 'end', 'file_path': self.file_path, 'message': 'finish to downlaod hdf5 file from S3' }) return True
def get_knn_file_from_s3(self) -> bool: """Get knn file from S3 resource If not get s3 object, create new pickle file to read Returns: bool If success to download csv file or create new file, returns True """ logger.info({ 'action': 'get_knn_file_from_s3', 'status': 'start', 'message': 'start to downlaod knn file from S3' }) s3_key = os.path.join('knn', self.file_name) s3_client = S3Object( s3_key, aws_access_key_id=settings.aws_access_key_id, aws_secret_access_key=settings.aws_secret_access_key) print('test') try: s3_client.download_file(self.knn_dir) except botocore.exceptions.ClientError as e: logger.error({ 'action': 'get_knn_file_path', 'status': 'fail', 'message': e, 'knn_file_path': self.file_path }) pathlib.Path(self.file_path).touch() logger.info({ 'action': 'get_knn_file_from_s3', 'status': 'end', 'message': 'finish to downlaod knn file from S3' }) return True
def s3_object(): s3_object = S3Object('uploads/picture/1/2/sample.jpg', aws_access_key_id=settings.aws_access_key_id, aws_secret_access_key=settings.aws_secret_access_key) s3_object.client.create_bucket(Bucket='startlens-media-storage') s3_object.client.put_object(Bucket='startlens-media-storage', Key='uploads/picture/1/2/sample.jpg') return s3_object
def add_new_data(self, classes: list, images: list) -> None: """Save class and converted vector data to csv file with each exhibit update Parameters ---------- classes: list the list of classification class images: list the list of 50 dimentional vector data of image """ logger.info({ 'action': 'save_new_data', 'status': 'start', 'spot_id': self.spot_id, 'message': 'start to write csv file' }) with open(self.file_path, 'a') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=self.columns) # writer.writeheader() for class_, image in zip(classes, images): writer.writerow({ self.columns_class: class_, self.columns_vector: image }) logger.info({ 'action': 'save_new_data', 'status': 'writing', 'class': class_, 'message': 'writing class, image vector as csv row' }) logger.info({ 'action': 'save_new_data', 'status': 'end', 'spot_id': self.spot_id, 'message': 'finish to write csv file' }) # Save csv to S3 as backup logger.info({ 'action': 'add_new_data', 'status': 'start', 'spot_id': self.spot_id, 'message': 'start to upload csv to S3' }) s3_key = os.path.join('csv', self.file_name) s3_client = S3Object( s3_key, aws_access_key_id=settings.aws_access_key_id, aws_secret_access_key=settings.aws_secret_access_key) s3_client.upload_file(self.file_path) logger.info({ 'action': 'add_new_data', 'status': 'end', 'spot_id': self.spot_id, 'message': 'finish to upload csv to S3' })
def read_and_resize(self, file_path: str): """Read the file from file path and resize to squire(224x224) Parameter --------- file_path: str the file path of image file Returns: ndarray image data converted to RGB array(shape: 224x224x3) """ s3_object = S3Object( file_path, aws_access_key_id=settings.aws_access_key_id, aws_secret_access_key=settings.aws_secret_access_key) io_image = s3_object.get_bytes_image_on_memory() pil_image = Image.open(io_image).convert('RGB') pil_image = pil_image.resize((IMAGE_SIZE, IMAGE_SIZE)) return np.array(pil_image, dtype="float32")
def save_trained_model(self, model_obj) -> None: """Save model to pickle Parameters ---------- model_obj: trained model to save such as knn """ logger.debug({ 'action': 'save_train_data', 'status': 'start', 'knn_file_path': self.file_path }) with open(self.file_path, 'wb') as pkl_file: pickle.dump(model_obj, pkl_file) logger.debug({ 'action': 'save_train_data', 'status': 'end', 'knn_file_path': self.file_path }) # Save pkl to S3 as backup logger.info({ 'action': 'save_trained_model', 'status': 'start', 'spot_id': self.spot_id, 'message': 'start to upload pkl to S3' }) s3_key = os.path.join('knn', self.file_name) s3_client = S3Object( s3_key, aws_access_key_id=settings.aws_access_key_id, aws_secret_access_key=settings.aws_secret_access_key) s3_client.upload_file(self.file_path) logger.info({ 'action': 'save_trained_model', 'status': 'end', 'spot_id': self.spot_id, 'message': 'finished to upload pkl to S3' })
def train(self, file_paths: list) -> None: """Train model Train triplet loss model and save in hdf5, tflite as a result Parameters ---------- file_paths: list file path of S3 resource ex. ["uplaod/picture/1/3/xxx.jpg", ...] """ train_paths, test_paths = train_test_split(file_paths, train_size=0.7, random_state=1337) file_class_mapping_train = { train_path: get_class_label_from_path(train_path) for train_path in train_paths } file_class_mapping_test = { test_path: get_class_label_from_path(test_path) for test_path in test_paths } train_samples = GenerateSample(file_class_mapping_train) test_samples = GenerateSample(file_class_mapping_test) checkpoint = ModelCheckpoint(PATH_MODEL_CHECKPINT, monitor='loss', verbose=1, save_best_only=True, mode='min') early = EarlyStopping(monitor="val_loss", mode="min", patience=2) callbacks_list = [checkpoint, early] embedding_model, triplet_model = self.get_model() # show layers for i, layer in enumerate(embedding_model.layers): print(i, layer.name, layer.trainable) # TODO: adjust parameters to be flozen gradually for layer in embedding_model.layers[72:]: layer.trainable = True for layer in embedding_model.layers[:72]: layer.trainable = False if "bn" in layer.name: layer.trainable = True logger.info({ 'action': 'train', 'network summary': embedding_model.summary() }) triplet_model.compile(loss=None, optimizer=Adam(lr=0.0001)) logger.info({'action': 'train', 'status': 'start training'}) history = triplet_model.fit_generator( train_samples.generate(), validation_data=test_samples.generate(), epochs=EPOCH, verbose=1, workers=1, steps_per_epoch=STEPS_PER_EPOCH, validation_steps=VALIDATION_STEPS, use_multiprocessing=False, callbacks=callbacks_list) logger.info({'action': 'train', 'train_loss': history.history['loss']}) logger.info({ 'action': 'train', 'val_loss': history.history['val_loss'] }) logger.info({ 'action': 'train', 'epoch': EPOCH, 'steps_per_epoch': STEPS_PER_EPOCH, 'validation_steps': VALIDATION_STEPS }) logger.info({'action': 'train', 'history': history.history}) embedding_model.save(PATH_MODEL_TMP) s3_object = S3Object( PATH_MODEL_DIST, aws_access_key_id=settings.aws_access_key_id, aws_secret_access_key=settings.aws_secret_access_key) is_saved_model = s3_object.upload_file(PATH_MODEL_TMP) if is_saved_model: self.convert_to_tflite(embedding_model) s3_object.file_path = PATH_TFMODEL_DIST is_saved_tflite = s3_object.upload_file(PATH_TFMODEL_TMP) if is_saved_model and is_saved_tflite: logger.info({ 'action': 'train', 'status': 'success to train model and save it' }) else: logger.info({'action': 'train', 'status': 'failed to save model'}) logger.info({'action': 'train', 'status': 'end training'})
def test_initialize_class(self): with pytest.raises(S3AccessDeniedError): S3Object('uploads/picture/1/2/sample.jpg')