def test_get(self): spec = ms.get('mobilenet_v2') self.assertIsInstance(spec, image_spec.ImageModelSpec) spec = ms.get('average_word_vec') self.assertIsInstance(spec, text_spec.AverageWordVecModelSpec) spec = ms.get(image_spec.mobilenet_v2_spec) self.assertIsInstance(spec, image_spec.ImageModelSpec)
def test_get(self): spec = ms.get('mobilenet_v2') self.assertIsInstance(spec, ms.ImageModelSpec) spec = ms.get('average_word_vec') self.assertIsInstance(spec, ms.AverageWordVecModelSpec) spec = ms.get(ms.mobilenet_v2_spec) self.assertIsInstance(spec, ms.ImageModelSpec) with self.assertRaises(KeyError): ms.get('not_exist_model_spec')
def run(train_data_path, validation_data_path, export_dir, spec='bert_qa', **kwargs): """Runs demo.""" # Chooses model specification that represents model. spec = model_spec.get(spec) # Gets training data and validation data. train_data = QuestionAnswerDataLoader.from_squad(train_data_path, spec, is_training=True) validation_data = QuestionAnswerDataLoader.from_squad(validation_data_path, spec, is_training=False) # Fine-tunes the model. model = question_answer.create(train_data, model_spec=spec, **kwargs) # Gets evaluation results. metric = model.evaluate(validation_data) tf.compat.v1.logging.info('Eval F1 score:%f' % metric['final_f1']) # Exports to TFLite format. model.export(export_dir)
def testEfficientDetLite0(self): # Gets model specification. spec = model_spec.get('efficientdet_lite0') # Prepare data. images_dir, annotations_dir, label_map = test_util.create_pascal_voc( self.get_temp_dir()) data = object_detector_dataloader.DataLoader.from_pascal_voc( images_dir, annotations_dir, label_map) # Train the model. task = object_detector.create(data, spec, batch_size=1, epochs=1) self.assertEqual(spec.config.num_classes, 2) # Evaluate trained model metrics = task.evaluate(data) self.assertIsInstance(metrics, dict) self.assertGreaterEqual(metrics['AP'], 0) # Export the model to saved model. output_path = os.path.join(self.get_temp_dir(), 'saved_model') task.export(self.get_temp_dir(), export_format=ExportFormat.SAVED_MODEL) self.assertTrue(os.path.isdir(output_path)) self.assertNotEqual(len(os.listdir(output_path)), 0) # Export the model to the float TFLite model. output_path = os.path.join(self.get_temp_dir(), 'float.tflite') task.export(self.get_temp_dir(), tflite_filename='float.tflite', quantization_config=None, export_format=ExportFormat.TFLITE, with_metadata=True, export_metadata_json_file=True) # Checks the sizes of the float32 TFLite model files in bytes. model_size = 13476379 self.assertNear(os.path.getsize(output_path), model_size, 50000) json_output_file = os.path.join(self.get_temp_dir(), 'float.json') self.assertTrue(os.path.isfile(json_output_file)) self.assertGreater(os.path.getsize(json_output_file), 0) expected_json_file = test_util.get_test_data_path( 'efficientdet_lite0_metadata.json') self.assertTrue(filecmp.cmp(json_output_file, expected_json_file)) # Evaluate the TFLite model. task.evaluate_tflite(output_path, data) self.assertIsInstance(metrics, dict) self.assertGreaterEqual(metrics['AP'], 0) # Tests the default quantized model. filename = 'model_quant.tflite' output_path = os.path.join(self.get_temp_dir(), filename) task.export(self.get_temp_dir(), tflite_filename=filename, export_format=ExportFormat.TFLITE) model_size = 4312187 err = model_size * 0.05 self.assertTrue(os.path.isfile(output_path)) self.assertNear(os.path.getsize(output_path), model_size, err)
def create(cls, train_data, model_spec, batch_size=None, epochs=2, shuffle=False, do_train=True): """Loads data and train the model for question answer. Args: train_data: Training data. model_spec: Specification for the model. batch_size: Batch size for training. epochs: Number of epochs for training. shuffle: Whether the data should be shuffled. do_train: Whether to run training. Returns: An instance based on QuestionAnswer. """ model_spec = ms.get(model_spec) if compat.get_tf_behavior() not in model_spec.compat_tf_versions: raise ValueError( 'Incompatible versions. Expect {}, but got {}.'.format( model_spec.compat_tf_versions, compat.get_tf_behavior())) model = cls(model_spec, shuffle=shuffle) if do_train: tf.compat.v1.logging.info('Retraining the models...') model.train(train_data, epochs, batch_size) else: model.create_model() return model
def run(spec, data_dir, dataset_type, export_dir, **kwargs): """Runs demo.""" spec = model_spec.get(spec) if dataset_type == 'esc50': # Limit to 2 categories to speed up the demo categories = ['dog', 'cat'] train_data = audio_dataloader.DataLoader.from_esc50( spec, data_dir, folds=[0, 1, 2, 3], categories=categories) validation_data = audio_dataloader.DataLoader.from_esc50( spec, data_dir, folds=[ 4, ], categories=categories) test_data = audio_dataloader.DataLoader.from_esc50( spec, data_dir, folds=[ 5, ], categories=categories) else: data = audio_dataloader.DataLoader.from_folder(spec, data_dir) train_data, rest_data = data.split(0.8) validation_data, test_data = rest_data.split(0.5) print('Training the model') model = audio_classifier.create(train_data, spec, validation_data, **kwargs) print('Evaluating the model') _, acc = model.evaluate(test_data) print('Test accuracy: %f' % acc) model.export(export_dir)
def test_export_and_evaluation(self): model_dir = os.path.join(self.test_tempdir, 'recommendation_export') model_spec = ms.get( 'recommendation', input_spec=self.input_spec, model_hparams=self.model_hparams) model = recommendation.create( self.train_loader, model_spec=model_spec, model_dir=model_dir, steps_per_epoch=1) export_format = [ ExportFormat.TFLITE, ExportFormat.SAVED_MODEL, ] model.export(model_dir, export_format=export_format) # Expect tflite file. expected_tflite = os.path.join(model_dir, 'model.tflite') self.assertTrue(os.path.exists(expected_tflite)) self.assertGreater(os.path.getsize(expected_tflite), 0) # Expect saved model. expected_saved_model = os.path.join(model_dir, 'saved_model', 'saved_model.pb') self.assertTrue(os.path.exists(expected_saved_model)) self.assertGreater(os.path.getsize(expected_saved_model), 0) # Evaluate tflite model. self._test_evaluate_tflite(model, expected_tflite)
def create(train_data, model_spec, validation_data=None, epochs=None, batch_size=None, do_train=True): """Loads data and train the model for object detection. Args: train_data: Training data. model_spec: Specification for the model. validation_data: Validation data. If None, skips validation process. epochs: Number of epochs for training. batch_size: Batch size for training. do_train: Whether to run training. Returns: ObjectDetector """ model_spec = ms.get(model_spec) if compat.get_tf_behavior() not in model_spec.compat_tf_versions: raise ValueError( 'Incompatible versions. Expect {}, but got {}.'.format( model_spec.compat_tf_versions, compat.get_tf_behavior())) object_detector = ObjectDetector(model_spec, train_data.label_map) if do_train: tf.compat.v1.logging.info('Retraining the models...') object_detector.train(train_data, validation_data, epochs, batch_size) else: object_detector.create_model() return object_detector
def run(data_dir, export_dir, spec='bert_classifier', **kwargs): """Runs demo.""" # Chooses model specification that represents model. spec = model_spec.get(spec) # Gets training data and validation data. train_data = TextClassifierDataLoader.from_csv(filename=os.path.join( os.path.join(data_dir, 'train.tsv')), text_column='sentence', label_column='label', model_spec=spec, delimiter='\t', is_training=True) validation_data = TextClassifierDataLoader.from_csv(filename=os.path.join( os.path.join(data_dir, 'dev.tsv')), text_column='sentence', label_column='label', model_spec=spec, delimiter='\t', is_training=False) # Fine-tunes the model. model = text_classifier.create(train_data, model_spec=spec, validation_data=validation_data, **kwargs) # Gets evaluation results. _, acc = model.evaluate(validation_data) print('Eval accuracy: %f' % acc) # Exports to TFLite format. model.export(export_dir)
def create(train_data, model_spec, model_spec_options=None, model_dir=None, validation_data=None, batch_size=16, steps_per_epoch=10000, epochs=1, learning_rate=0.1, gradient_clip_norm=1.0, shuffle=True, do_train=True, max_history_length=10): """Loads data and train the model for recommendation. Args: train_data: Training data. model_spec: Specification for the model. model_spec_options: dict, additional options to creat a model. model_dir: str, path to export model checkpoints and summaries. validation_data: Validation data. batch_size: Batch size for training. steps_per_epoch: int, Number of step per epoch. epochs: int, Number of epochs for training. learning_rate: float, learning rate. gradient_clip_norm: float, clip threshold (<= 0 meaning no clip). shuffle: boolean, whether the training data should be shuffled. do_train: boolean, whether to run training. max_history_length: int, max history length as model input (for inference). Returns: object of QuestionAnswer class. """ # Create model spec. if model_spec_options is None: model_spec_options = {} model_spec = ms.get(model_spec)(**model_spec_options) # Use model_dir or a temp folder to store intermediate checkpoints, etc. if model_dir is None: model_dir = tempfile.mkdtemp() recommendation = Recommendation(model_spec, model_dir=model_dir, shuffle=shuffle, max_history_length=max_history_length, learning_rate=learning_rate, gradient_clip_norm=gradient_clip_norm) if do_train: tf.compat.v1.logging.info('Training recommendation model...') recommendation.train(train_data, validation_data, batch_size=batch_size, steps_per_epoch=steps_per_epoch, epochs=epochs) else: recommendation.create_model(do_train=False) return recommendation
def testEfficientDetLite0(self): # Gets model specification. spec = model_spec.get('efficientdet_lite0') # Prepare data. images_dir, annotations_dir, label_map = test_util.create_pascal_voc( self.get_temp_dir()) data = object_detector_dataloader.DataLoader.from_pascal_voc( images_dir, annotations_dir, label_map) # Train the model. task = object_detector.create(data, spec, batch_size=1, epochs=1) self.assertEqual(spec.config.num_classes, 2) # Evaluate trained model metrics = task.evaluate(data, batch_size=1) self.assertIsInstance(metrics, dict) self.assertGreaterEqual(metrics['AP'], 0) # Export the model to saved model. output_path = os.path.join(self.get_temp_dir(), 'saved_model') task.export(self.get_temp_dir(), export_format=ExportFormat.SAVED_MODEL) self.assertTrue(os.path.isdir(output_path)) self.assertNotEqual(len(os.listdir(output_path)), 0) # Export the model to TFLite model. output_path = os.path.join(self.get_temp_dir(), 'float.tflite') task.export(self.get_temp_dir(), tflite_filename='float.tflite', export_format=ExportFormat.TFLITE, with_metadata=True, export_metadata_json_file=True) self.assertTrue(tf.io.gfile.exists(output_path)) self.assertGreater(os.path.getsize(output_path), 0) json_output_file = os.path.join(self.get_temp_dir(), 'float.json') self.assertTrue(os.path.isfile(json_output_file)) self.assertGreater(os.path.getsize(json_output_file), 0) expected_json_file = test_util.get_test_data_path( 'efficientdet_lite0_metadata.json') self.assertTrue(filecmp.cmp(json_output_file, expected_json_file)) # Export the model to quantized TFLite model. # TODO(b/175173304): Skips the test for stable tensorflow 2.4 for now since # it fails. Will revert this change after TF upgrade. if tf.__version__.startswith('2.4'): return output_path = os.path.join(self.get_temp_dir(), 'model_quantized.tflite') config = configs.QuantizationConfig.create_full_integer_quantization( data, is_integer_only=True) task.export(self.get_temp_dir(), tflite_filename='model_quantized.tflite', quantization_config=config, export_format=ExportFormat.TFLITE) self.assertTrue(os.path.isfile(output_path)) self.assertGreater(os.path.getsize(output_path), 0)
def test_mobilebert_model(self, spec, trainable): # Only test squad1.1 since it takes too long time for this. version = '1.1' model_spec = ms.get(spec) model_spec.trainable = trainable model_spec.predict_batch_size = 1 train_data, validation_data = _get_data(model_spec, version) model = question_answer.create( train_data, model_spec=model_spec, epochs=1, batch_size=1) self._test_f1_score(model, validation_data, 0.0) self._test_export_to_tflite(model, validation_data, atol=1e-02)
def test_evaluate(self): model_dir = os.path.join(self.test_tempdir, 'recommendation_evaluate') model_spec = ms.get( 'recommendation', input_spec=self.input_spec, model_hparams=self.model_hparams) model = recommendation.create( self.train_loader, model_spec=model_spec, model_dir=model_dir, steps_per_epoch=1) history = model.evaluate(self.test_loader) self.assertIsInstance(history, list) self.assertTrue(history) # Non-empty list.
def test_mobilebert_model(self, spec): self.skipTest('TODO(b/164095081): Fix breakage and re-enable') # Only test squad1.1 since it takes too long time for this. version = '1.1' model_spec = ms.get(spec) model_spec.trainable = False model_spec.predict_batch_size = 1 train_data, validation_data = _get_data(model_spec, version) model = question_answer.create(train_data, model_spec=model_spec, epochs=1, batch_size=1) self._test_f1_score(model, validation_data, 0.0) self._test_export_to_tflite(model, validation_data, atol=1e-02)
def test_create(self, encoder_type): model_dir = os.path.join(self.test_tempdir, 'recommendation_create') input_spec = _testutil.get_input_spec(encoder_type) model_spec = ms.get( 'recommendation', input_spec=input_spec, model_hparams=self.model_hparams) model = recommendation.create( self.train_loader, model_spec=model_spec, model_dir=model_dir, steps_per_epoch=1) self.assertIsNotNone(model.model)
def run(data_dir, export_dir, spec='efficientnet_lite0', **kwargs): """Runs demo.""" spec = model_spec.get(spec) data = ImageClassifierDataLoader.from_folder(data_dir) train_data, rest_data = data.split(0.8) validation_data, test_data = rest_data.split(0.5) model = image_classifier.create(train_data, model_spec=spec, validation_data=validation_data, **kwargs) _, acc = model.evaluate(test_data) print('Test accuracy: %f' % acc) model.export(export_dir)
def run(data_dir, export_dir, spec='audio_browser_fft', **kwargs): """Runs demo.""" spec = model_spec.get(spec) data = audio_dataloader.DataLoader.from_folder(spec, data_dir) train_data, rest_data = data.split(0.8) validation_data, test_data = rest_data.split(0.5) print('Training the model') model = audio_classifier.create(train_data, spec, validation_data, **kwargs) print('Evaluating the model') _, acc = model.evaluate(test_data) print('Test accuracy: %f' % acc) model.export(export_dir)
def create(cls, train_data: object_detector_dataloader.DataLoader, model_spec: object_detector_spec.EfficientDetModelSpec, validation_data: Optional[ object_detector_dataloader.DataLoader] = None, epochs: Optional[object_detector_dataloader.DataLoader] = None, batch_size: Optional[int] = None, train_whole_model: bool = False, do_train: bool = True) -> T: """Loads data and train the model for object detection. Args: train_data: Training data. model_spec: Specification for the model. validation_data: Validation data. If None, skips validation process. epochs: Number of epochs for training. batch_size: Batch size for training. train_whole_model: Boolean, False by default. If true, train the whole model. Otherwise, only train the layers that are not match `model_spec.config.var_freeze_expr`. do_train: Whether to run training. Returns: An instance based on ObjectDetector. """ model_spec = ms.get(model_spec) if epochs is not None: model_spec.config.num_epochs = epochs if batch_size is not None: model_spec.config.batch_size = batch_size if train_whole_model: model_spec.config.var_freeze_expr = None if compat.get_tf_behavior() not in model_spec.compat_tf_versions: raise ValueError( 'Incompatible versions. Expect {}, but got {}.'.format( model_spec.compat_tf_versions, compat.get_tf_behavior())) object_detector = cls(model_spec, train_data.label_map, train_data) if do_train: tf.compat.v1.logging.info('Retraining the models...') object_detector.train(train_data, validation_data, epochs, batch_size) else: object_detector.create_model() return object_detector
def from_squad(cls, filename, model_spec, is_training=True, version_2_with_negative=False, cache_dir=None): """Loads data in SQuAD format and preproecess text according to `model_spec`. Args: filename: Name of the file. model_spec: Specification for the model. is_training: Whether the loaded data is for training or not. version_2_with_negative: Whether it's SQuAD 2.0 format. cache_dir: The cache directory to save preprocessed data. If None, generates a temporary directory to cache preprocessed data. Returns: QuestionAnswerDataLoader object. """ model_spec = ms.get(model_spec) file_base_name = os.path.basename(filename) is_cached, tfrecord_file, meta_data_file, _ = _get_cache_info( cache_dir, file_base_name, model_spec, is_training) # If cached, directly loads data from cache directory. if is_cached and is_training: dataset, meta_data = _load(tfrecord_file, meta_data_file, model_spec, is_training) return QuestionAnswerDataLoader( dataset=dataset, size=meta_data['size'], version_2_with_negative=meta_data['version_2_with_negative'], examples=[], features=[], squad_file=filename) meta_data, examples, features = cls._generate_tf_record_from_squad_file( filename, model_spec, tfrecord_file, is_training, version_2_with_negative) file_util.write_json_file(meta_data_file, meta_data) dataset, meta_data = _load(tfrecord_file, meta_data_file, model_spec, is_training) return QuestionAnswerDataLoader(dataset, meta_data['size'], meta_data['version_2_with_negative'], examples, features, filename)
def create(cls, train_data, model_spec='average_word_vec', validation_data=None, batch_size=None, epochs=3, steps_per_epoch=None, shuffle=False, do_train=True): """Loads data and train the model for test classification. Args: train_data: Training data. model_spec: Specification for the model. validation_data: Validation data. If None, skips validation process. batch_size: Batch size for training. epochs: Number of epochs for training. steps_per_epoch: Integer or None. Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. If `steps_per_epoch` is None, the epoch will run until the input dataset is exhausted. shuffle: Whether the data should be shuffled. do_train: Whether to run training. Returns: An instance based on TextClassifier. """ model_spec = ms.get(model_spec) if compat.get_tf_behavior() not in model_spec.compat_tf_versions: raise ValueError( 'Incompatible versions. Expect {}, but got {}.'.format( model_spec.compat_tf_versions, compat.get_tf_behavior())) text_classifier = cls(model_spec, train_data.index_to_label, shuffle=shuffle) if do_train: tf.compat.v1.logging.info('Retraining the models...') text_classifier.train(train_data, validation_data, epochs, batch_size, steps_per_epoch) else: text_classifier.create_model() return text_classifier
def run(data_dir, export_dir, spec='efficientnet_lite0', **kwargs): """Runs demo.""" spec = model_spec.get(spec) data = ImageClassifierDataLoader.from_folder(data_dir) train_data, rest_data = data.split(0.8) validation_data, test_data = rest_data.split(0.5) model = image_classifier.create(train_data, model_spec=spec, validation_data=validation_data, **kwargs) _, acc = model.evaluate(test_data) print('Test accuracy: %f' % acc) # Exports to TFLite and SavedModel, with label file. export_format = [ ExportFormat.TFLITE, ExportFormat.SAVED_MODEL, ] model.export(export_dir, export_format=export_format)
def run(data_dir, tflite_filename, label_filename, spec='efficientnet_b0', **kwargs): """Runs demo.""" spec = model_spec.get(spec) data = ImageClassifierDataLoader.from_folder(data_dir) train_data, rest_data = data.split(0.8) validation_data, test_data = rest_data.split(0.5) model = image_classifier.create( train_data, model_export_format=ModelExportFormat.TFLITE, model_spec=spec, validation_data=validation_data, **kwargs) _, acc = model.evaluate(test_data) print('Test accuracy: %f' % acc) model.export(tflite_filename, label_filename)
def create(cls, train_data, model_spec='average_word_vec', validation_data=None, batch_size=None, epochs=3, shuffle=False, do_train=True): """Loads data and train the model for test classification. Args: train_data: Training data. model_spec: Specification for the model. validation_data: Validation data. If None, skips validation process. batch_size: Batch size for training. epochs: Number of epochs for training. shuffle: Whether the data should be shuffled. do_train: Whether to run training. Returns: An instance based on TextClassifier. """ model_spec = ms.get(model_spec) if compat.get_tf_behavior() not in model_spec.compat_tf_versions: raise ValueError( 'Incompatible versions. Expect {}, but got {}.'.format( model_spec.compat_tf_versions, compat.get_tf_behavior())) text_classifier = cls(model_spec, train_data.index_to_label, shuffle=shuffle) if do_train: tf.compat.v1.logging.info('Retraining the models...') text_classifier.train(train_data, validation_data, epochs, batch_size) else: text_classifier.create_model() return text_classifier
def from_csv(cls, filename, text_column, label_column, fieldnames=None, model_spec='average_word_vec', is_training=True, delimiter=',', quotechar='"', shuffle=False, cache_dir=None): """Loads text with labels from the csv file and preproecess text according to `model_spec`. Args: filename: Name of the file. text_column: String, Column name for input text. label_column: String, Column name for labels. fieldnames: A sequence, used in csv.DictReader. If fieldnames is omitted, the values in the first row of file f will be used as the fieldnames. model_spec: Specification for the model. is_training: Whether the loaded data is for training or not. delimiter: Character used to separate fields. quotechar: Character used to quote fields containing special characters. shuffle: boolean, if shuffle, random shuffle data. cache_dir: The cache directory to save preprocessed data. If None, generates a temporary directory to cache preprocessed data. Returns: TextDataset containing text, labels and other related info. """ model_spec = ms.get(model_spec) csv_name = os.path.basename(filename) is_cached, tfrecord_file, meta_data_file, vocab_file = cls._get_cache_info( cache_dir, csv_name, model_spec, is_training) # If cached, directly loads data from cache directory. if is_cached: return cls._load_data(tfrecord_file, meta_data_file, model_spec) lines = cls._read_csv(filename, fieldnames, delimiter, quotechar) if shuffle: random.shuffle(lines) # Gets labels. label_set = set() for line in lines: label_set.add(line[label_column]) label_names = sorted(label_set) # Generates text examples from csv file. examples = [] for i, line in enumerate(lines): text, label = line[text_column], line[label_column] guid = '%s-%d' % (csv_name, i) examples.append( classifier_data_lib.InputExample(guid, text, None, label)) # Saves preprocessed data and other assets into files. cls._save_data(examples, model_spec, label_names, tfrecord_file, meta_data_file, vocab_file, is_training) # Loads data from cache directory. return cls._load_data(tfrecord_file, meta_data_file, model_spec)
def test_get_raises(self): with self.assertRaises(KeyError): ms.get('not_exist_model_spec')
def test_get_not_none(self, model): spec = ms.get(model) self.assertIsNotNone(spec)
def test_get_not_none_recommendation_models(self, model): spec = ms.get( model, input_spec=recommendation_testutil.get_input_spec(), model_hparams=recommendation_testutil.get_model_hparams()) self.assertIsNotNone(spec)
def from_folder(cls, filename, model_spec='average_word_vec', is_training=True, class_labels=None, shuffle=True, cache_dir=None): """Loads text with labels and preproecess text according to `model_spec`. Assume the text data of the same label are in the same subdirectory. each file is one text. Args: filename: Name of the file. model_spec: Specification for the model. is_training: Whether the loaded data is for training or not. class_labels: Class labels that should be considered. Name of the subdirectory not in `class_labels` will be ignored. If None, all the subdirectories will be considered. shuffle: boolean, if shuffle, random shuffle data. cache_dir: The cache directory to save preprocessed data. If None, generates a temporary directory to cache preprocessed data. Returns: TextDataset containing text, labels and other related info. """ model_spec = ms.get(model_spec) data_root = os.path.abspath(filename) folder_name = os.path.basename(data_root) is_cached, tfrecord_file, meta_data_file, vocab_file = cls._get_cache_info( cache_dir, folder_name, model_spec, is_training) # If cached, directly loads data from cache directory. if is_cached: return cls._load_data(tfrecord_file, meta_data_file, model_spec) # Gets paths of all text. if class_labels: all_text_paths = [] for class_label in class_labels: all_text_paths.extend( list( tf.io.gfile.glob( os.path.join(data_root, class_label) + r'/*'))) else: all_text_paths = list(tf.io.gfile.glob(data_root + r'/*/*')) all_text_size = len(all_text_paths) if all_text_size == 0: raise ValueError('Text size is zero') if shuffle: random.shuffle(all_text_paths) # Gets label and its index. if class_labels: label_names = sorted(class_labels) else: label_names = sorted( name for name in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, name))) # Generates text examples from folder. examples = [] for i, path in enumerate(all_text_paths): with tf.io.gfile.GFile(path, 'r') as f: text = f.read() guid = '%s-%d' % (folder_name, i) label = os.path.basename(os.path.dirname(path)) examples.append( classifier_data_lib.InputExample(guid, text, None, label)) # Saves preprocessed data and other assets into files. cls._save_data(examples, model_spec, label_names, tfrecord_file, meta_data_file, vocab_file, is_training) # Loads data from cache directory. return cls._load_data(tfrecord_file, meta_data_file, model_spec)
def create(train_data, model_spec='efficientnet_lite0', validation_data=None, batch_size=None, epochs=None, train_whole_model=None, dropout_rate=None, learning_rate=None, momentum=None, shuffle=False, use_augmentation=False, use_hub_library=True, warmup_steps=None, model_dir=None, do_train=True): """Loads data and retrains the model based on data for image classification. Args: train_data: Training data. model_spec: Specification for the model. validation_data: Validation data. If None, skips validation process. batch_size: Number of samples per training step. If `use_hub_library` is False, it represents the base learning rate when train batch size is 256 and it's linear to the batch size. epochs: Number of epochs for training. train_whole_model: If true, the Hub module is trained together with the classification layer on top. Otherwise, only train the top classification layer. dropout_rate: The rate for dropout. learning_rate: Base learning rate when train batch size is 256. Linear to the batch size. momentum: a Python float forwarded to the optimizer. Only used when `use_hub_library` is True. shuffle: Whether the data should be shuffled. use_augmentation: Use data augmentation for preprocessing. use_hub_library: Use `make_image_classifier_lib` from tensorflow hub to retrain the model. warmup_steps: Number of warmup steps for warmup schedule on learning rate. If None, the default warmup_steps is used which is the total training steps in two epochs. Only used when `use_hub_library` is False. model_dir: The location of the model checkpoint files. Only used when `use_hub_library` is False. do_train: Whether to run training. Returns: An instance of ImageClassifier class. """ model_spec = ms.get(model_spec) if compat.get_tf_behavior() not in model_spec.compat_tf_versions: raise ValueError( 'Incompatible versions. Expect {}, but got {}.'.format( model_spec.compat_tf_versions, compat.get_tf_behavior())) if use_hub_library: hparams = get_hub_lib_hparams(batch_size=batch_size, train_epochs=epochs, do_fine_tuning=train_whole_model, dropout_rate=dropout_rate, learning_rate=learning_rate, momentum=momentum) else: hparams = train_image_classifier_lib.HParams.get_hparams( batch_size=batch_size, train_epochs=epochs, do_fine_tuning=train_whole_model, dropout_rate=dropout_rate, learning_rate=learning_rate, warmup_steps=warmup_steps, model_dir=model_dir) image_classifier = ImageClassifier(model_spec, train_data.index_to_label, shuffle=shuffle, hparams=hparams, use_augmentation=use_augmentation, representative_data=train_data) if do_train: tf.compat.v1.logging.info('Retraining the models...') image_classifier.train(train_data, validation_data) else: # Used in evaluation. image_classifier.create_model(with_loss_and_metrics=True) return image_classifier
def testEfficientDetLite0(self): # Gets model specification. spec = model_spec.get('efficientdet_lite0') # Prepare data. images_dir, annotations_dir, label_map = test_util.create_pascal_voc( self.get_temp_dir()) data = object_detector_dataloader.DataLoader.from_pascal_voc( images_dir, annotations_dir, label_map) # Train the model. task = object_detector.create(data, spec, batch_size=1, epochs=1) self.assertEqual(spec.config.num_classes, 2) # Evaluate trained model metrics = task.evaluate(data) self.assertIsInstance(metrics, dict) self.assertGreaterEqual(metrics['AP'], 0) # Export the model to saved model. output_path = os.path.join(self.get_temp_dir(), 'saved_model') task.export(self.get_temp_dir(), export_format=ExportFormat.SAVED_MODEL) self.assertTrue(os.path.isdir(output_path)) self.assertNotEqual(len(os.listdir(output_path)), 0) # Export the model to TFLite model. output_path = os.path.join(self.get_temp_dir(), 'float.tflite') task.export(self.get_temp_dir(), tflite_filename='float.tflite', quantization_type=QuantizationType.FP32, export_format=ExportFormat.TFLITE, with_metadata=True, export_metadata_json_file=True) # Checks the sizes of the float32 TFLite model files in bytes. model_size = 13476379 self.assertNear(os.path.getsize(output_path), model_size, 50000) json_output_file = os.path.join(self.get_temp_dir(), 'float.json') self.assertTrue(os.path.isfile(json_output_file)) self.assertGreater(os.path.getsize(json_output_file), 0) expected_json_file = test_util.get_test_data_path( 'efficientdet_lite0_metadata.json') self.assertTrue(filecmp.cmp(json_output_file, expected_json_file)) # Evaluate the TFLite model. task.evaluate_tflite(output_path, data) self.assertIsInstance(metrics, dict) self.assertGreaterEqual(metrics['AP'], 0) # Export the model to quantized TFLite model. # TODO(b/175173304): Skips the test for stable tensorflow 2.4 for now since # it fails. Will revert this change after TF upgrade. if tf.__version__.startswith('2.4'): return # Not include QuantizationType.FP32 here since we have already tested it # above together with metadata file test. types = (QuantizationType.INT8, QuantizationType.FP16, QuantizationType.DYNAMIC) # The sizes of the TFLite model files in bytes. model_sizes = (4439987, 6840331, 4289875) for quantization_type, model_size in zip(types, model_sizes): filename = quantization_type.name.lower() + '.tflite' output_path = os.path.join(self.get_temp_dir(), filename) task.export(self.get_temp_dir(), quantization_type=quantization_type, tflite_filename=filename, export_format=ExportFormat.TFLITE) self.assertNear(os.path.getsize(output_path), model_size, 50000)