Пример #1
0
    def get_file_from_s3(self) -> bool:
        """Get hdf5 file from S3 storage

        Return: bool
            If success to download file, returns True
        """
        logger.debug({
            'action': 'get_file_from_s3',
            'status': 'start',
            'file_path': self.file_path,
            'message': 'start to downlaod hdf5 file from S3'
        })
        s3_key = os.path.join('hdf5', self.hdf5_file_name)
        s3_client = S3Object(
            s3_key,
            aws_access_key_id=settings.aws_access_key_id,
            aws_secret_access_key=settings.aws_secret_access_key)
        download_dir = os.path.join(settings.base_dir, 'tmp', 'hdf5')
        try:
            s3_client.download_file(download_dir)
        except botocore.exceptions.ClientError as e:
            logger.error({
                'action': 'get_file_from_s3',
                'status': 'fail',
                'message': e
            })
            raise
        logger.debug({
            'action': 'get_file_from_s3',
            'status': 'end',
            'file_path': self.file_path,
            'message': 'finish to downlaod hdf5 file from S3'
        })
        return True
Пример #2
0
    def get_knn_file_from_s3(self) -> bool:
        """Get knn file from S3 resource
        If not get s3 object, create new pickle file to read

        Returns: bool
            If success to download csv file or create new file, returns True
        """
        logger.info({
            'action': 'get_knn_file_from_s3',
            'status': 'start',
            'message': 'start to downlaod knn file from S3'
        })
        s3_key = os.path.join('knn', self.file_name)
        s3_client = S3Object(
            s3_key,
            aws_access_key_id=settings.aws_access_key_id,
            aws_secret_access_key=settings.aws_secret_access_key)
        print('test')
        try:
            s3_client.download_file(self.knn_dir)
        except botocore.exceptions.ClientError as e:
            logger.error({
                'action': 'get_knn_file_path',
                'status': 'fail',
                'message': e,
                'knn_file_path': self.file_path
            })
            pathlib.Path(self.file_path).touch()
        logger.info({
            'action': 'get_knn_file_from_s3',
            'status': 'end',
            'message': 'finish to downlaod knn file from S3'
        })
        return True
Пример #3
0
def s3_object():
    s3_object = S3Object('uploads/picture/1/2/sample.jpg',
                         aws_access_key_id=settings.aws_access_key_id,
                         aws_secret_access_key=settings.aws_secret_access_key)
    s3_object.client.create_bucket(Bucket='startlens-media-storage')
    s3_object.client.put_object(Bucket='startlens-media-storage',
                                Key='uploads/picture/1/2/sample.jpg')
    return s3_object
Пример #4
0
    def add_new_data(self, classes: list, images: list) -> None:
        """Save class and converted vector data to csv file with each exhibit update

        Parameters
        ----------
        classes: list
            the list of classification class
        images: list
            the list of 50 dimentional vector data of image
        """
        logger.info({
            'action': 'save_new_data',
            'status': 'start',
            'spot_id': self.spot_id,
            'message': 'start to write csv file'
        })
        with open(self.file_path, 'a') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=self.columns)
            # writer.writeheader()

            for class_, image in zip(classes, images):
                writer.writerow({
                    self.columns_class: class_,
                    self.columns_vector: image
                })
                logger.info({
                    'action': 'save_new_data',
                    'status': 'writing',
                    'class': class_,
                    'message': 'writing class, image vector as csv row'
                })
        logger.info({
            'action': 'save_new_data',
            'status': 'end',
            'spot_id': self.spot_id,
            'message': 'finish to write csv file'
        })

        # Save csv to S3 as backup
        logger.info({
            'action': 'add_new_data',
            'status': 'start',
            'spot_id': self.spot_id,
            'message': 'start to upload csv to S3'
        })
        s3_key = os.path.join('csv', self.file_name)
        s3_client = S3Object(
            s3_key,
            aws_access_key_id=settings.aws_access_key_id,
            aws_secret_access_key=settings.aws_secret_access_key)
        s3_client.upload_file(self.file_path)
        logger.info({
            'action': 'add_new_data',
            'status': 'end',
            'spot_id': self.spot_id,
            'message': 'finish to upload csv to S3'
        })
Пример #5
0
    def read_and_resize(self, file_path: str):
        """Read the file from file path and resize to squire(224x224)

        Parameter
        ---------
        file_path: str
            the file path of image file
        
        Returns: ndarray
            image data converted to RGB array(shape: 224x224x3)
        """
        s3_object = S3Object(
            file_path,
            aws_access_key_id=settings.aws_access_key_id,
            aws_secret_access_key=settings.aws_secret_access_key)
        io_image = s3_object.get_bytes_image_on_memory()
        pil_image = Image.open(io_image).convert('RGB')
        pil_image = pil_image.resize((IMAGE_SIZE, IMAGE_SIZE))
        return np.array(pil_image, dtype="float32")
Пример #6
0
    def save_trained_model(self, model_obj) -> None:
        """Save model to pickle
        
        Parameters
        ----------
        model_obj:
            trained model to save such as knn
        """
        logger.debug({
            'action': 'save_train_data',
            'status': 'start',
            'knn_file_path': self.file_path
        })
        with open(self.file_path, 'wb') as pkl_file:
            pickle.dump(model_obj, pkl_file)
        logger.debug({
            'action': 'save_train_data',
            'status': 'end',
            'knn_file_path': self.file_path
        })

        # Save pkl to S3 as backup
        logger.info({
            'action': 'save_trained_model',
            'status': 'start',
            'spot_id': self.spot_id,
            'message': 'start to upload pkl to S3'
        })
        s3_key = os.path.join('knn', self.file_name)
        s3_client = S3Object(
            s3_key,
            aws_access_key_id=settings.aws_access_key_id,
            aws_secret_access_key=settings.aws_secret_access_key)
        s3_client.upload_file(self.file_path)
        logger.info({
            'action': 'save_trained_model',
            'status': 'end',
            'spot_id': self.spot_id,
            'message': 'finished to upload pkl to S3'
        })
Пример #7
0
    def train(self, file_paths: list) -> None:
        """Train model
        Train triplet loss model and save in hdf5, tflite as a result

        Parameters
        ----------
        file_paths: list
            file path of S3 resource
            ex. ["uplaod/picture/1/3/xxx.jpg", ...]
        """
        train_paths, test_paths = train_test_split(file_paths,
                                                   train_size=0.7,
                                                   random_state=1337)
        file_class_mapping_train = {
            train_path: get_class_label_from_path(train_path)
            for train_path in train_paths
        }
        file_class_mapping_test = {
            test_path: get_class_label_from_path(test_path)
            for test_path in test_paths
        }
        train_samples = GenerateSample(file_class_mapping_train)
        test_samples = GenerateSample(file_class_mapping_test)

        checkpoint = ModelCheckpoint(PATH_MODEL_CHECKPINT,
                                     monitor='loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='min')
        early = EarlyStopping(monitor="val_loss", mode="min", patience=2)
        callbacks_list = [checkpoint, early]

        embedding_model, triplet_model = self.get_model()
        # show layers
        for i, layer in enumerate(embedding_model.layers):
            print(i, layer.name, layer.trainable)

        # TODO: adjust parameters to be flozen gradually
        for layer in embedding_model.layers[72:]:
            layer.trainable = True
        for layer in embedding_model.layers[:72]:
            layer.trainable = False
            if "bn" in layer.name:
                layer.trainable = True
        logger.info({
            'action': 'train',
            'network summary': embedding_model.summary()
        })

        triplet_model.compile(loss=None, optimizer=Adam(lr=0.0001))
        logger.info({'action': 'train', 'status': 'start training'})
        history = triplet_model.fit_generator(
            train_samples.generate(),
            validation_data=test_samples.generate(),
            epochs=EPOCH,
            verbose=1,
            workers=1,
            steps_per_epoch=STEPS_PER_EPOCH,
            validation_steps=VALIDATION_STEPS,
            use_multiprocessing=False,
            callbacks=callbacks_list)
        logger.info({'action': 'train', 'train_loss': history.history['loss']})
        logger.info({
            'action': 'train',
            'val_loss': history.history['val_loss']
        })
        logger.info({
            'action': 'train',
            'epoch': EPOCH,
            'steps_per_epoch': STEPS_PER_EPOCH,
            'validation_steps': VALIDATION_STEPS
        })
        logger.info({'action': 'train', 'history': history.history})

        embedding_model.save(PATH_MODEL_TMP)
        s3_object = S3Object(
            PATH_MODEL_DIST,
            aws_access_key_id=settings.aws_access_key_id,
            aws_secret_access_key=settings.aws_secret_access_key)
        is_saved_model = s3_object.upload_file(PATH_MODEL_TMP)

        if is_saved_model:
            self.convert_to_tflite(embedding_model)
            s3_object.file_path = PATH_TFMODEL_DIST
            is_saved_tflite = s3_object.upload_file(PATH_TFMODEL_TMP)

        if is_saved_model and is_saved_tflite:
            logger.info({
                'action': 'train',
                'status': 'success to train model and save it'
            })
        else:
            logger.info({'action': 'train', 'status': 'failed to save model'})
        logger.info({'action': 'train', 'status': 'end training'})
Пример #8
0
 def test_initialize_class(self):
     with pytest.raises(S3AccessDeniedError):
         S3Object('uploads/picture/1/2/sample.jpg')