def train(): # 定义优化器 opt = optimizers.Adam(lr) # 定义回调函数,包含防止过拟合、日志统计、自动保存模型等 callback = [ callbacks.TensorBoard(log_dir=log_dir, update_freq='batch'), callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=3), callbacks.EarlyStopping(monitor='loss', patience=4), callbacks.ModelCheckpoint(filepath=checkpoint_filepath, verbose=1), callbacks.ModelCheckpoint(filepath=save_path, monitor='val_categorical_accuracy', save_best_only=True, mode='max', verbose=1) ] # 分布式训练 with strategy.scope(): model = make_model() if os.path.exists(save_path): model.load_weights(save_path) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy']) model.fit(train_db, epochs=epochs, validation_data=test_db, callbacks=callback) # 开始训练 model.evaluate(test_db) # 在测试集中评估模型 model.save(save_path) # 保存最终模型
def get_default_callbacks( model_path: Path, monitor: str = 'val_acc', base_patience: int = 3, lr_reduce_factor: float = 0.5, min_lr: float = 1e-7, verbose: int = 1, log_dir: Path = None, gradients: bool = True, #change to false confusion_matrix: bool = True, #change to false loss: Callable = None, data: Tuple[np.ndarray, np.ndarray] = None, classes: list = None, heatmap_options: dict = None, csv_logdir: Path = None, csv_append: bool = False, save_latest: bool = False): callbacks = [ clb.ReduceLROnPlateau(monitor=monitor, factor=lr_reduce_factor, min_lr=min_lr, patience=base_patience, verbose=verbose), clb.EarlyStopping(monitor=monitor, patience=(2 * base_patience + 1), verbose=verbose), clb.ModelCheckpoint(monitor=monitor, filepath=model_path, save_best_only=True, verbose=verbose) ] if log_dir: callbacks.append( ExtendedTensorBoard(log_dir, gradients, confusion_matrix, loss, data, classes, heatmap_options)) if csv_logdir: if csv_append: callbacks.append(clb.CSVLogger(csv_logdir, append=True)) else: callbacks.append(clb.CSVLogger(csv_logdir)) if save_latest: latest_path = model_path.parent / f'{model_path.stem}_latest{model_path.suffix}' callbacks.append( clb.ModelCheckpoint(monitor=monitor, filepath=latest_path)) return callbacks
def model_train(model, x_train, x_val, epochs, train_step, val_step, weights_path): """ 模型训练 :param model: 定义好的模型 :param x_train: 训练集数据 :param x_val: 验证集数据 :param epochs: 迭代次数 :param train_step: 一个epoch的训练次数 :param val_step: 一个epoch的验证次数 :param weights_path: 权值保存路径 :return: None """ # 如果选成h5格式,则不会保存成ckpt的tensorflow常用格式 cbk = [callbacks.ModelCheckpoint(filepath=weights_path, save_best_only=True, save_weights_only=True, monitor='val_loss'), callbacks.EarlyStopping(patience=10, min_delta=1e-3)] # 重点:fit 和 fit_generator的区别 # 之前fit方法是使用整个训练集可以放入内存当中 # fit_generator的就是用在应用于数据集非常大的时候,但2.1已经整合在fit里面了现在已经改了。 history = model.fit(x_train, steps_per_epoch=train_step, epochs=epochs, validation_data=x_val, validation_steps=val_step, callbacks=cbk, verbose=1)
def main(): feature_desc = { 'text': tf.io.FixedLenFeature([], tf.string, default_value=''), 'stars': tf.io.FixedLenFeature([], tf.float32, default_value=0.0) } def _parse_data(proto): parsed = tf.io.parse_single_example(proto, feature_desc) return (parsed['text'], parsed['stars']) raw_data = tf.data.TFRecordDataset([review_file]) dataset = raw_data.map(_parse_data) dataset = dataset.shuffle(3000).batch(128) model = get_review_sent_model() utils.plot_model(model, 'review_sent_model.png', show_shapes=True) # using cpu only because of tensorflow hub bug with tf.device('/CPU:0'): model.compile(optimizers.Adam(), 'mae') model_callbacks = [ callbacks.ModelCheckpoint( # cannot use whole-model saving because of hub layer # filepath='train/review_model_{epoch}.h5', filepath='train/review_model_{epoch}.ckpt', save_weights_only=True, verbose=1) ] model.fit(dataset, epochs=6, callbacks=model_callbacks) model.save('train/review_model_final.h5')
def train_by_fit(optimizer, loss, train_data, train_steps, validation_data, validation_steps): """ 使用fit方式训练,可以知道训练完的时间,以及更规范的添加callbacks参数 :param optimizer: 优化器 :param loss: 自定义的loss function :param train_data: 以tf.data封装好的训练集数据 :param validation_data: 验证集数据 :param train_steps: 迭代一个epoch的轮次 :param validation_steps: 同上 :return: None """ cbk = [ callbacks.ReduceLROnPlateau(verbose=1), callbacks.EarlyStopping(patience=10, verbose=1), callbacks.ModelCheckpoint('./model/yolov3_{val_loss:.04f}.h5', save_best_only=True, save_weights_only=True) ] model = yolo_body() model.compile(optimizer=optimizer, loss=loss) # initial_epoch用于恢复之前的训练 model.fit(train_data, steps_per_epoch=max(1, train_steps), validation_data=validation_data, validation_steps=max(1, validation_steps), epochs=cfg.epochs, callbacks=cbk)
def fit_generator(self, train_generator, val_generator=None, epochs=10, monitor='val_loss', patience_count=10, metrics=['accuracy'], outdir=""): self.model.compile(optimizer=optimizers.Adam(lr=0.00006), loss='binary_crossentropy', metrics=metrics) training_callbacks = [ callbacks.EarlyStopping(monitor=monitor, patience=patience_count), callbacks.ModelCheckpoint(filepath=outdir + 'model.{epoch}.h5', save_best_only=True, monitor=monitor, mode='auto') ] if val_generator is None: self.training_history = self.model.fit(train_generator, epochs=epochs, verbose=1) else: self.training_history = self.model.fit( train_generator, validation_data=val_generator, callbacks=training_callbacks, epochs=epochs, verbose=1)
def fit_model(model, x_train, y_train, x_valid, y_valid, ckpt_path): monitor = "val_loss" K.clear_session() history = model.fit(x=x_train, y=y_train, batch_size=16, epochs=50, verbose=1, callbacks=[ callbacks.ModelCheckpoint(filepath=ckpt_path, monitor=monitor, verbose=2, save_best_only=True, save_weights_only=True), callbacks.EarlyStopping( monitor=monitor, min_delta=1e-4, patience=25, verbose=2, ), callbacks.ReduceLROnPlateau(monitor=monitor, factor=0.8, patience=3, verbose=2, min_lr=1e-4) ], validation_data=(x_valid, y_valid)) return history
def train(model, data, args): """ Training a CapsuleNet :param model: the CapsuleNet model :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))` :param args: arguments :return: The trained model """ # unpacking the data (x_train, y_train), (x_test, y_test) = data # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=int(args.debug)) checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-{epoch:02d}.h5', monitor='val_capsnet_acc', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) # compile the model model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss, 'mse'], loss_weights=[1., args.lam_recon], metrics={'capsnet': 'accuracy'}) """ # Training without data augmentation: model.fit([x_train, y_train], [y_train, x_train], batch_size=args.batch_size, epochs=args.epochs, validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay]) """ # Begin: Training with data augmentation ---------------------------------------------------------------------# def train_generator(x, y, batch_size, shift_fraction=0.): train_datagen = ImageDataGenerator( width_shift_range=shift_fraction, height_shift_range=shift_fraction) # shift up to 2 pixel for MNIST generator = train_datagen.flow(x, y, batch_size=batch_size) while 1: x_batch, y_batch = generator.next() yield ([x_batch, y_batch], [y_batch, x_batch]) # Training with data augmentation. If shift_fraction=0., also no augmentation. model.fit_generator( generator=train_generator(x_train, y_train, args.batch_size, args.shift_fraction), steps_per_epoch=int(y_train.shape[0] / args.batch_size), epochs=args.epochs, validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay]) # End: Training with data augmentation -----------------------------------------------------------------------# model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) return model
def compile_model(): model = create_model() model.compile(optimizer=optimizers.Adam(), loss=deepball_loss_function, metrics=([deepball_precision])) model_checkpoint = callbacks.ModelCheckpoint(filepath='footballcnn.h5', verbose=1) train_datagen = DataGenerator(file_path=IMAGE_PATH, config_path=CONFIG_PATH) model.fit(x=train_datagen, epochs=6, callbacks=[model_checkpoint]) model.save_weights('footballcnn.h5') """"img = preprocessing.image.load_img('1frame1199.jpg', target_size=(360, 640, 3)) input1 = preprocessing.image.img_to_array(img) input1 = input1.reshape([1, 360, 640, 3]) input1 = input1 / 255. b = model.predict(input1) print(b.shape) b = b[0, :, :, 0] b = np.expand_dims(b, axis=2) preprocessing.image.save_img('pred.jpg', b) c = np.unravel_index(b.argmax(), b.shape) print(c)""" return
def train(h5_dataset_file, output_folder): X_t = HDF5Matrix(h5_dataset_file, 'X_train') y_t = HDF5Matrix(h5_dataset_file, 'y_train') X_v = HDF5Matrix(h5_dataset_file, 'X_val') y_v = HDF5Matrix(h5_dataset_file, 'y_val') path = output_folder + '/equalization_model-{epoch:03d}-{mean_squared_error:03f}-{val_mean_squared_error:03f}.h5' model_checkpoint = callbacks.ModelCheckpoint(path, verbose=1, monitor='val_loss', save_best_only=False, mode='auto') earlyStopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min') network = COVID19EqualizationNetwork(target_image_size=(1024, 1024)) network.build_model(True, optimizer=tf.keras.optimizers.Adam(lr=1e-5), loss_function='mae', additional_metrics=['mse'], pretrained_weights_file_path=None) model = network.model history = model.fit(X_t, y_t, shuffle=False, batch_size=3, epochs=100, validation_data=(X_v, y_v), callbacks=[model_checkpoint, earlyStopping], verbose=1, max_queue_size=100) final_path = output_folder + 'equalization-last_model.h5' model.save(final_path, overwrite=True) acc = history.history['mean_squared_error'] val_acc = history.history['val_mean_squared_error'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(acc)) fig, (ax1, ax2) = plt.subplots(1, 2) ax1.plot(epochs, acc, 'bo', label='Training acc') ax1.plot(epochs, val_acc, 'b', label='Validation acc') # ax1.title('Training and validation accuracy') ax1.legend() accuracy_filepath = output_folder + 'accuracy_history.jpg' ax1.savefig(accuracy_filepath) ax2.figure() ax2.plot(epochs, loss, 'bo', label='Training loss') ax2.plot(epochs, val_loss, 'b', label='Validation loss') # ax2.title('Training and validation loss') ax2.legend() loss_filepath = output_folder + 'loss_history.jpg' ax2.savefig(loss_filepath)
def get_callbacks(model_file, logging_file): callbacks = list() # save the model callbacks.append(cb.ModelCheckpoint(model_file, monitor='val_loss', save_best_only=True, mode='auto')) # save log file callbacks.append(CSVLogger(logging_file, append=True)) return callbacks
def f_train_model(model, inpx, inpy, model_weights, num_epochs=5, batch_size=64): ''' Train model. Returns just history.history ''' cv_fraction = 0.33 # Fraction of data for cross validation history = model.fit(x=inpx, y=inpy, batch_size=batch_size, epochs=num_epochs, verbose=1, callbacks=[ callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=1), callbacks.ModelCheckpoint(model_weights, save_best_only=True, monitor='val_loss', mode='min') ], validation_split=cv_fraction, shuffle=True) print("Number of parameters", model.count_params()) return history.history
def train_model(model: keras.Sequential, data, epochs): logdir = f'logs/fit/{epochs}/' + \ datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = callbacks.TensorBoard(log_dir=logdir, histogram_freq=1) early_stop_callback = callbacks.EarlyStopping(monitor='val_accuracy', patience=5, min_delta=0.0001, restore_best_weights=True, verbose=0) learning_rate_callback = callbacks.LearningRateScheduler( lambda epoch, lr: lr if epoch < 2 else lr * 0.9, verbose=0) save_callback = callbacks.ModelCheckpoint( filepath='logs/model' + datetime.now().strftime("%Y%m%d-%H%M%S"), save_best_only=True) x_train = to_dict(data['training_data']) y_train = tf.convert_to_tensor(data['training_labels']) x_val = to_dict(data['test_data']) y_val = tf.convert_to_tensor(data['test_labels']) training_history = model.fit(x_train, y_train, epochs=epochs, validation_data=(x_val, y_val), callbacks=[ tensorboard_callback, early_stop_callback, learning_rate_callback, save_callback ]) return training_history
def fit_model(self, X_train, y_train, X_val, y_val): y_train = to_categorical(y_train) y_val = to_categorical(y_val) early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5) model_checkpoint = callbacks.ModelCheckpoint('multi-digit_cnn_new.h5', save_best_only=True) optimizer = Adam(lr=1e-3, amsgrad=True) tb = callbacks.TensorBoard(log_dir="ccnlogs/{}".format(time())) self.model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) self.history = self.model.fit( X_train, [ y_train[:, 0], y_train[:, 1], y_train[:, 2], y_train[:, 3], y_train[:, 4] ], batch_size=512, epochs=12, shuffle=True, validation_data=(X_val, [ y_val[:, 0], y_val[:, 1], y_val[:, 2], y_val[:, 3], y_val[:, 4] ]), callbacks=[early_stopping, model_checkpoint])
def callbacks(self): """ set callbacks when model fit """ checkpoint = KC.ModelCheckpoint(filepath=self.config.checkpoint, monitor='val_loss', verbose=1, save_weights_only=False, period=self.config.ckpt_interval) def lr_schedule(epoch): lr = self.config.init_lr total_epochs = self.config.epochs check_1 = int(total_epochs * 0.3) check_2 = int(total_epochs * 0.6) check_3 = int(total_epochs * 0.8) if epoch > check_1: lr *= 3e-1 if epoch > check_2: lr *= 3e-1 if epoch > check_3: lr *= 3e-1 return lr lr_scheduler = KC.LearningRateScheduler(lr_schedule) tensorboard = KC.TensorBoard( log_dir=self.config.tb_logs, batch_size=self.config.batch_size, histogram_freq=0, # validation data can not be generator write_graph=False, write_grads=False) callbacks = [ checkpoint, lr_scheduler, tensorboard ] # TODO bug:Failed to create a directory: out_model/mobilenet_t1/logs\train; return callbacks
def model_train(model, x_train, x_val, epochs, train_step, val_step, weights_path): """ 模型训练 :param model: 定义好的模型 :param x_train: 训练集数据 :param x_val: 验证集数据 :param epochs: 迭代次数 :param train_step: 一个epoch的训练次数 :param val_step: 一个epoch的验证次数 :param weights_path: 权值保存路径 :return: None """ # 如果选成h5格式,则不会保存成ckpt的tensorflow常用格式 # monitor是指验证参数,如何评估模型好坏的标准 cbk = [ callbacks.ModelCheckpoint(filepath=weights_path, save_best_only=True, save_weights_only=True, monitor='val_loss'), callbacks.EarlyStopping(patience=10, min_delta=1e-3) ] history = model.fit(x_train, steps_per_epoch=train_step, epochs=epochs, validation_data=x_val, validation_steps=val_step, callbacks=cbk, verbose=1)
def fit_model(self, data, early_stop=False): self.model = self.model_builder.build_model() X_train = data.data["X_train"] X_val = data.data["X_val"] y_train = data.data["y_train"] y_val = data.data["y_val"] file_name = self.FILE_NAME_FORMAT.format(data.identifier) # usually doesn't help early_stop = callbacks.EarlyStopping( monitor='val_loss', patience=self.EARLY_STOP_PATIENCE, verbose=0, mode='min') mcp_save = callbacks.ModelCheckpoint( file_name, save_best_only=True, monitor='val_loss', mode='min') cbs = [mcp_save] if early_stop == True: cbs.append(early_stop) return self.model.fit( X_train, y_train, epochs=self.EPOCHS, batch_size=self.BATCH_SIZE, validation_data=(X_val, y_val), callbacks=cbs)
def fit_generator(self, data): self.model = self.model_builder.build_model() train_generator = data.data['train_generator'] validation_generator = data.data['validation_generator'] early_stop = callbacks.EarlyStopping( monitor='val_loss', patience=self.EARLY_STOP_PATIENCE, verbose=0, mode='min') file_name = self.FILE_NAME_FORMAT.format(data.identifier) mcp_save = callbacks.ModelCheckpoint( file_name, save_best_only=True, monitor='val_loss', mode='min') return self.model.fit_generator( train_generator, steps_per_epoch=300, epochs=self.EPOCHS, validation_data=validation_generator, validation_steps=50, callbacks=[early_stop, mcp_save])
def training(data: Tuple[preprocessing.Data, preprocessing.Data], options: Options, model: keras.Model, logdir: PathLike, extra_callbacks: Optional[List[kc.Callback]] = None) -> None: """Runs training. Args: data (Tuple[preprocessing.Data, preprocessing.Data]): Training and the validation data. options (deepgrp.model.Options): Hyperparameter. model (keras.Model): Model to train. logdir (os.PathLike): Log / Checkpoint directory. extra_callbacks (Optional[List[kc.Callback]]): Additional callbacks. """ n_repeats = len(options.repeats_to_search) + 1 shapes = (tf.TensorShape([None, options.vecsize, data[0].fwd.shape[0]]), tf.TensorShape([None, options.vecsize, n_repeats])) dataset = tf.data.Dataset.from_generator(fetch_batch(options, data[0]), (tf.float32, tf.float32), shapes) dataset_val = tf.data.Dataset.from_generator(fetch_batch(options, data[1]), (tf.float32, tf.float32), shapes) callbacks = [ kc.TensorBoard(log_dir=logdir, histogram_freq=3, write_graph=True, write_images=True, profile_batch=1, update_freq='batch'), kc.EarlyStopping(monitor='val_loss', min_delta=0, patience=options.early_stopping_th, verbose=0, mode='auto', baseline=None, restore_best_weights=True), kc.ModelCheckpoint(path.join(logdir, '{epoch:002d}'), monitor='val_loss', verbose=0, mode='min', save_freq='epoch', save_best_only=True, save_weights_only=True) ] if extra_callbacks: callbacks += extra_callbacks model.fit(dataset, verbose=0, epochs=options.n_epochs, steps_per_epoch=options.n_batches, validation_freq=1, shuffle=False, validation_data=dataset_val, validation_steps=1, callbacks=callbacks)
def _build_callback_hooks( self, models_dir: str, logs_dir: Optional[str] = None, is_training=True, logging_frequency=25, monitor_metric: str = "", monitor_mode: str = "", patience=2, ): """ Build callback hooks for the training loop Returns: callbacks_list: list of callbacks """ callbacks_list: list = list() if is_training: # Model checkpoint if models_dir and monitor_metric: checkpoints_path = os.path.join( models_dir, RelevanceModelConstants.CHECKPOINT_FNAME) cp_callback = callbacks.ModelCheckpoint( filepath=checkpoints_path, save_weights_only=False, verbose=1, save_best_only=True, mode=monitor_mode, monitor=monitor_metric, ) callbacks_list.append(cp_callback) # Early Stopping if monitor_metric: early_stopping_callback = callbacks.EarlyStopping( monitor=monitor_metric, mode=monitor_mode, patience=patience, verbose=1, restore_best_weights=True, ) callbacks_list.append(early_stopping_callback) # TensorBoard if logs_dir: tensorboard_callback = callbacks.TensorBoard(log_dir=logs_dir, histogram_freq=1, update_freq=5) callbacks_list.append(tensorboard_callback) # Debugging/Logging callbacks_list.append(DebuggingCallback(self.logger, logging_frequency)) # Add more here return callbacks_list