def __init__( self, log_path: str, git_dirs: List[str] = [], git_modules: List[str] = [], git_version_filename: str = "git_versions.yml", project_info_filename: str = "project_info.yml", enable_train_logging: bool = True, train_log_file: str = "train_logs.csv", exp_prefix: str = None, ): super().__init__() self.git_dirs: List[str] = [git_dirs ] if type(git_dirs) is str else git_dirs self.git_modules: List[str] = [ git_modules ] if type(git_modules) is str else git_modules self.log_path = log_path self.git_version_filename: str = git_version_filename self.project_info_filename: str = project_info_filename self.log_callback: Optional[Callback] = None self.exp_prefix: str = exp_prefix if enable_train_logging: self.log_callback = CSVLogger( filename=join(log_path, train_log_file))
def train_top_model(): # Load the bottleneck features and labels train_features = np.load( open(output_dir + 'bottleneck_features_train.npy', 'rb')) train_labels = np.load( open(output_dir + 'bottleneck_labels_train.npy', 'rb')) validation_features = np.load( open(output_dir + 'bottleneck_features_validation.npy', 'rb')) validation_labels = np.load( open(output_dir + 'bottleneck_labels_validation.npy', 'rb')) # Create the top model for the inception V3 network, a single Dense layer # with softmax activation. top_input = Input(shape=train_features.shape[1:]) top_output = Dense(5, activation='softmax')(top_input) model = Model(top_input, top_output) # Train the model using the bottleneck features and save the weights. model.compile(optimizer=SGD(lr=1e-4, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) csv_logger = CSVLogger(output_dir + 'top_model_training.csv') model.fit(train_features, train_labels, epochs=top_epochs, batch_size=batch_size, validation_data=(validation_features, validation_labels), callbacks=[csv_logger]) model.save_weights(top_model_weights_path)
def train(model, train_ds, test_ds, epochs, STEPS_PER_EPOCH, VALIDATION_STEPS): """ 模型训练 :param model: 模型 :param train_ds: 训练数据集 :param test_ds: 验证数据集 :param epochs: :param STEPS_PER_EPOCH: :param VALIDATION_STEPS: :return: 训练结果 """ # 模型配置编译 model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # 保存训练结果 loggger = CSVLogger('FCN_training.log', append=False) # 模型训练 history = model.fit(train_ds, epochs=epochs, steps_per_epoch=STEPS_PER_EPOCH, validation_data=test_ds, validation_steps=VALIDATION_STEPS) # 保存模型 model.save('FCN_model.h5') return history
def _set_checkpoint(self): if not os.path.exists(self._weights_dir): os.makedirs(self._weights_dir) self._filepath = os.path.join(self._weights_dir, "weights-{epoch:03d}.h5") self._checkpoint = ModelCheckpoint(self._filepath, monitor=self._monitor, mode=self._monitor_mode, verbose=1, period=5) self._csv_logger = CSVLogger(os.path.join(self._weights_dir, 'log.csv'), append=True, separator=';') self._callbacks_list = [self._checkpoint, self._csv_logger] if self._lr_reducer: self._callbacks_list = [self._lr_reducer] + self._callbacks_list timestamps = datetime.now() timestamps = str(timestamps) timestamps = timestamps[:timestamps.find('.')] timestamps = timestamps.replace(' ', '_') tensorboard_logdir = 'logs/{}'.format(timestamps) tensorboard = TensorBoard(log_dir=tensorboard_logdir) self._callbacks_list.append(tensorboard)
def pretrain(self, x, batch_size=256, epochs=200, optimizer='adam'): logger.info('Pretraining...') self._autoencoder.compile(optimizer=optimizer, loss='mse') STRFTIME = "%Y-%m-%d_%H:%M" csv_logger = CSVLogger(os.path.join(self._save_dir, f'pretrain_log_{datetime.now().strftime(STRFTIME)}.csv')) callback_tensorboard = TensorBoard(log_dir=os.path.join(self._log_dir, str(datetime.now())), histogram_freq=2, batch_size=32, write_graph=True, write_grads=True, write_images=False) # begin training t0 = time() try: self._autoencoder.fit(x, x, batch_size=batch_size, epochs=epochs, callbacks=[csv_logger, callback_tensorboard], verbose=False, validation_split=0.1) except ValueError: self._autoencoder.fit(x, x[-1], batch_size=batch_size, epochs=epochs, callbacks=[csv_logger, callback_tensorboard], verbose=False, validation_split=0.1) logger.info('Pretraining time: {}'.format(str(time() - t0))) self._autoencoder.save(os.path.join(self._save_dir, 'pretrain_cae_model.h5')) logger.info('Pretrained weights are saved to {}'.format(os.path.join(self._save_dir, 'pretrain_cae_model.h5'))) self._pretrained = True
def defineCallBacks( self, earlyStopping=True, ): # Define csv logger, for logging loss and acc for every epoch csv_logger1 = CSVLogger("./logs/training_autoencoder_A dim-" + str(self.DIM_ENCODER) + " lr-" + str(self.lr) + ".log", separator=',', append=True) csv_logger2 = CSVLogger("./logs/training_autoencoder_B dim-" + str(self.DIM_ENCODER) + " lr-" + str(self.lr) + ".log", separator=',', append=True) # define the checkpoint, for saving models filepath1 = "./model/model_1_-{epoch:02d}-{val_loss:.2f}-{loss:.2f}.hdf5" filepath2 = "./model/model_2_-{epoch:02d}-{val_loss:.2f}-{loss:.2f}.hdf5" #filepath2 = "model_2_-{epoch:02d}-{val_loss:.2f}-{val_acc:.2f}-{acc:.2f}-{loss:.2f}.hdf5" checkpoint1 = ModelCheckpoint(filepath1, monitor='loss', verbose=1, save_best_only=True, mode='min', period=2) checkpoint2 = ModelCheckpoint(filepath2, monitor='loss', verbose=1, save_best_only=True, mode='min', period=2) # Early stopping earlystop = EarlyStopping(monitor='val_loss', min_delta=0.00001, patience=5, verbose=0, mode='auto') callbacks_list1 = [earlystop, checkpoint1, csv_logger1] callbacks_list2 = [earlystop, checkpoint2, csv_logger2] callbacks_list1 = [earlystop, csv_logger1] callbacks_list2 = [earlystop, csv_logger2] return callbacks_list1, callbacks_list2
def get_callbacks(weight_path: str, history_path: str) -> List[Callback]: """ Retorna a lista callbacks do modelo Args: ----- weight_path: Caminho para salvar os checkpoints Returns: -------- (list of keras.callbacks): lista dos callbacks """ # Salva os pesos dos modelo para serem carregados # caso o monitor não diminua check_params = { "monitor": "val_loss", "verbose": 1, "mode": "min", "save_best_only": True, "save_weights_only": True, } checkpoint = ModelCheckpoint(weight_path, **check_params) # Reduz o valor de LR caso o monitor nao diminuia reduce_params = { "factor": 0.5, "patience": 3, "verbose": 1, "mode": "max", "min_delta": 1e-3, "cooldown": 2, "min_lr": 1e-8, } reduce_lr = ReduceLROnPlateau(monitor="val_f1", **reduce_params) # Parada do treino caso o monitor nao diminua stop_params = {"mode": "max", "restore_best_weights": True, "patience": 40} early_stop = EarlyStopping(monitor="val_f1", **stop_params) # Termina se um peso for NaN (not a number) terminate = TerminateOnNaN() # Habilita a visualizacao no TersorBoard # tensorboard = TensorBoard(log_dir="./logs") # Armazena os dados gerados no treinamento em um CSV if history_path is not None: csv_logger = CSVLogger(history_path, append=True) # Vetor a ser passado na função fit callbacks = [checkpoint, early_stop, reduce_lr, terminate, csv_logger] else: # Vetor a ser passado na função fit # callbacks = [ # checkpoint, # early_stop, # reduce_lr, # terminate # ] callbacks = [checkpoint, reduce_lr, terminate] # callbacks = [checkpoint, early_stop, reduce_lr, terminate] return callbacks
def train(self): if self.trained == True: self.model.load_weights(self.weight_save_path) self.trained = False # parma validation assert params['mode'] in ['fe', 'ft'], "mode must be either 'fe' or 'ft'" # 레이어 trainable 지정 # feature extraction if params['mode'] == 'fe': self.model.layers[0].trainable = False # finetuning elif params['mode'] == 'ft': self.model.layers[0].trainable = True # compile the model with designated parameters self.model.compile(optimizer=Adam(lr=params['lr']), loss='categorical_crossentropy', metrics=['categorical_accuracy', top_3_accuracy]) if not os.path.exists(params['log_path']): os.mkdir(params['log_path']) if not os.path.exists(params['cp_path']): os.mkdir(params['cp_path']) # csv logger callback log_path = os.path.join(params['log_path'], self.name + '_' + params['mode'] + '.log') csvlog_callback = CSVLogger(log_path) # checkpoint callback cp_path = os.path.join(params['cp_path'], self.name + '_' + params['mode'] + '-{epoch:04d}-{val_loss:.2f}.h5') cp_callback = ModelCheckpoint(cp_path, mode="max", save_best_only=True) print('{:=^75}'.format('training {} with {}'.format(self.name, params['mode']))) # actual data fitting self.model.fit_generator(generator=generator_train, epochs=params['epoch'], class_weight=class_weight, validation_data=generator_validate, validation_steps=steps_validate, callbacks=[cp_callback, csvlog_callback]) # save model once done training if not os.path.exists(params['model_path']): os.mkdir(params['model_path']) model_save_path = os.path.join(params['model_path'], self.model.name + '_' + params['mode'] + '.h5') self.model.save(model_save_path) self.trained = True
def get_callbacks(): earlystop = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10) lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.5, min_lr=1e-5) csv_log = CSVLogger(base_path + 'log.csv') # tensorboard = TensorBoard(log_dir='log(logs)') callbacks = [earlystop, lr_reduction, csv_log] return callbacks
class CCMLProjectCallback(Callback): def __init__( self, log_path: str, git_dirs: List[str] = [], git_modules: List[str] = [], git_version_filename: str = "git_versions.yml", project_info_filename: str = "project_info.yml", enable_train_logging: bool = True, train_log_file: str = "train_logs.csv", exp_prefix: str = None, ): super().__init__() self.git_dirs: List[str] = [git_dirs ] if type(git_dirs) is str else git_dirs self.git_modules: List[str] = [ git_modules ] if type(git_modules) is str else git_modules self.log_path = log_path self.git_version_filename: str = git_version_filename self.project_info_filename: str = project_info_filename self.log_callback: Optional[Callback] = None self.exp_prefix: str = exp_prefix if enable_train_logging: self.log_callback = CSVLogger( filename=join(log_path, train_log_file)) def set_model(self, model): super().set_model(model) if self.log_callback is not None: self.log_callback.set_model(model) def on_train_begin(self, logs=None): produce_git_version_yaml( join(self.log_path, self.git_version_filename), self.git_dirs, self.git_modules, ) produce_project_info(join(self.log_path, self.project_info_filename), exp_prefix=self.exp_prefix) if self.log_callback is not None: self.log_callback.on_train_begin(logs=logs) def on_epoch_end(self, epoch, logs=None): if self.log_callback is not None: self.log_callback.on_epoch_end(epoch, logs=logs) def on_train_end(self, logs=None): if self.log_callback is not None: self.log_callback.on_train_end(logs=logs)
def train(tub_names, model_path, batch_size, epochs): model_path = os.path.expanduser(model_path) m = MyModel() model = m.model model.summary() X, y = tubs_to_arrays(tub_names, seed=10) total_records = len(X) total_train = int(total_records * .8) total_val = total_records - total_train steps_per_epoch = ((total_train // batch_size) + 1) * 2 validation_steps = (total_val // batch_size) + 1 print('Train images: %d, Validation images: %d' % (total_train, total_val)) print('Batch size:', batch_size) print('Epochs:', epochs) print('Training steps:', steps_per_epoch) print('Validation steps:', validation_steps) input("Press Enter to continue...") train_gen = generator(X[:total_train], y[:total_train], batch_size, train=True, categorical_angle=m.categorical_angle, categorical_throttle=m.categorical_throttle) val_gen = generator(X[total_train:], y[total_train:], batch_size, train=False, categorical_angle=m.categorical_angle, categorical_throttle=m.categorical_throttle) save_best = ModelCheckpoint(model_path, monitor='val_loss', verbose=0, save_best_only=True, mode='min') callbacks = [save_best, CSVLogger("logs/train.log"), OutputCallback()] hist = model.fit_generator(train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=0, validation_data=val_gen, callbacks=callbacks, validation_steps=validation_steps, workers=4, use_multiprocessing=True) return hist
def train_new_model(data_path, log_path=None, max_games=None): K.clear_session() input_shape = State.empty().to_numpy().shape[-1] model = create_model(input_shape, filters=12) print(model.summary()) if data_path is not None: x_state, y_policy, y_reward = read_data(data_path, max_games) callbacks = [EarlyStopping(patience=5)] if log_path is not None: callbacks.append(CSVLogger(log_path)) model.fit(x_state, [y_policy, y_reward], epochs=100, validation_split=0.3, callbacks=callbacks) return model
def get_callbacks(WEIGHTS_FPATH, LOG_FPATH, monitor): callbacks = [ ModelCheckpoint(WEIGHTS_FPATH, monitor=monitor, save_best_only=True, save_weights_only=True, mode='auto'), EarlyStopping(monitor=monitor, patience=3), #LearningRateScheduler(anneal_lr), #LearningRateTracker(), ReduceLROnPlateau(monitor=monitor, factor=0.2, patience=2, min_lr=1e-7, mode='auto'), CSVLogger(LOG_FPATH, separator=' ', append=True), ] return callbacks
def train_model(model, xtrain, ytrain, xtest, ytest, lr=0.001, batch_size=32, epochs=10, result_folder=""): """ Trains a CNN for a given dataset :param model: initialized model :param xtrain: training images :param ytrain: labels for training images numbered from 0 to n :param xtest: test images :param ytest: labels for test images numbered from 0 to n :param lr: initial learning rate for SGD optimizer :param batch_size: batch size :param epochs: number of epochs to train :param result_folder: Save trained model to this directory :return: None """ from tensorflow.python.keras.optimizers import SGD from tensorflow.python.keras.callbacks import LearningRateScheduler, ModelCheckpoint sgd = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) checkpoint = ModelCheckpoint(os.path.join(result_folder, "model.h5"), save_best_only=True) csv_logger = CSVLogger(os.path.join(result_folder, "training.log"), separator=",", append=True) history = model.fit( xtrain, ytrain, batch_size=batch_size, validation_data=(xtest, ytest), epochs=epochs, callbacks=[LearningRateScheduler(lr_schedule), checkpoint, csv_logger]) plot(history, result_folder)
def resume_train(self, epochs, checkpoint_epoch): self.model = load_model(os.path.join(self.log_dir, 'model_architecture.h5')) self.model.load_weights(os.path.join(self.log_dir, 'model_weights.h5')) self.model = layers.compile_model(model=self.model) train_dataset = MPII_dataset( images_dir=self.images_dir, annots_json_filename=self.annotations_json_file, input_shape=self.input_shape, output_shape=self.output_shape, type='train' ) train_generator = train_dataset.generate_batches( batch_size=self.batch_size, stacks_num=self.stacks_num, ) checkpoint = EvalCallback( images_dir=self.images_dir, annotations_json_file=self.annotations_json_file, log_dir=self.log_dir, batch_size=self.batch_size, stacks_num=self.stacks_num, input_shape=train_dataset.get_input_shape(), output_shape=train_dataset.get_output_shape() ) logger = CSVLogger( os.path.join(self.log_dir, "csv_train.csv"), append=True ) self.model.fit_generator( generator=train_generator, steps_per_epoch=int(math.ceil(train_dataset.get_dataset_size() // self.batch_size)), epochs=epochs, callbacks=[checkpoint, logger], initial_epoch=checkpoint_epoch )
def __setup_callbacks(self) -> List: """ Sets up the callbacks for training :return: the early stopping schedule, tensorboard data and the checkpointer """ # Create a folder for the model log of the current experiment weights_log_path = os.path.join(self.__current_experiment_path, 'weights') # Set up the callback to save the best weights after each epoch checkpointer = ModelCheckpoint(filepath=os.path.join( weights_log_path, 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'), verbose=0, save_best_only=True, save_weights_only=True, monitor='val_loss', mode='min') # Set up Tensorboard tensorboard = TensorBoard(log_dir=os.path.join( self.__current_experiment_path, 'tensorboard'), write_graph=True, histogram_freq=0, write_grads=True, write_images=False, batch_size=self._params.batch_size, update_freq=self._params.batch_size) # Set up early stopping to interrupt the training if val_loss is not increasing after n epochs early_stopping = EarlyStopping(monitor='val_loss', patience=25, mode='min') csv_logger = CSVLogger(os.path.join(self.__current_experiment_path, "training.csv"), append=True) return [early_stopping, tensorboard, checkpointer, csv_logger]
def train(self, epochs): train_dataset = MPII_dataset( images_dir=self.images_dir, annots_json_filename=self.annotations_json_file, input_shape=self.input_shape, output_shape=self.output_shape, type='train' ) train_generator = train_dataset.generate_batches( batch_size=self.batch_size, stacks_num=self.stacks_num, ) checkpoint = EvalCallback( images_dir=self.images_dir, annotations_json_file=self.annotations_json_file, log_dir=self.log_dir, batch_size=self.batch_size, stacks_num=self.stacks_num, input_shape=train_dataset.get_input_shape(), output_shape=train_dataset.get_output_shape() ) # logger_filepath = os.path.join(self.log_dir, "csv_train.csv") # # if not os.path.exists(logger_filepath): # open(logger_filepath, "w").close() logger = CSVLogger( os.path.join(self.log_dir, "csv_train.csv") ) self.model.fit_generator( generator=train_generator, steps_per_epoch=int(math.ceil(train_dataset.get_dataset_size() // self.batch_size)), epochs=epochs, callbacks=[checkpoint, logger] )
def setup_callables(self): monitor = "val_dice_coef" # Setup callback to save best weights after each epoch checkpointer = ModelCheckpoint(filepath=os.path.join( self.model_dir, 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'), verbose=0, save_best_only=True, save_weights_only=True, monitor=monitor, mode='max') # setup callback to register training history csv_logger = CSVLogger(os.path.join(self.log_dir, 'log.csv'), append=True, separator=';') # setup logger to catch warnings and info messages set_logger(os.path.join(self.log_dir, 'train_val.log')) # setup callback to retrieve tensorboard info tensorboard = TensorBoard(log_dir=self.log_dir, write_graph=True, histogram_freq=0) # setup early stopping to stop training if val_loss is not increasing after 3 epochs early_stopping = EarlyStopping(monitor=monitor, patience=5, mode='max', verbose=0) lr_reducer = ReduceLROnPlateau(monitor=monitor, factor=0.05, cooldown=0, patience=5, verbose=0, mode='max') return [ checkpointer, csv_logger, tensorboard, early_stopping, lr_reducer ]
def train_model(data, args): train_config = TrainConfig(args) mode_module = importlib.import_module("modes." + args.mode) train_generator = mode_module.DataGenerator(data.train_data) val_generator = mode_module.DataGenerator(data.validation_data) model = mode_module.build_model() model.compile(optimizer=train_config.optimizer, loss=train_config.loss, metrics=train_config.metrics) results_csv_file = os.path.join(args.results_dir, "results.csv") ckpt_filename = "Epoch-{epoch:02d}-Val-Acc-{val_accuracy:.4f}.hdf5" weight_file = os.path.join(args.checkpoints_dir, ckpt_filename) results_callback = CSVLogger(results_csv_file, append=True, separator=',') checkpoints_callback = ModelCheckpoint(weight_file, save_best_only=True, save_weights_only=True) tensorboard_callback = TensorBoard(log_dir=args.results_dir, histogram_freq=0, write_graph=True, write_images=True) model.fit_generator(generator=train_generator, validation_data=val_generator, verbose=2, epochs=train_config.epochs, shuffle=True, callbacks=[ results_callback, tensorboard_callback, checkpoints_callback ]) return model
def init_callbacks(self): self.callbacks.append( ModelCheckpoint( filepath=os.path.join(self.config.callbacks.checkpoint_dir, '%s-{epoch:02d}-{val_loss:.2f}.hdf5' % self.config.exp.name), monitor=self.config.callbacks.checkpoint_monitor, mode=self.config.callbacks.checkpoint_mode, save_best_only=self.config.callbacks.checkpoint_save_best_only, save_weights_only=self.config.callbacks.checkpoint_save_weights_only, verbose=self.config.callbacks.checkpoint_verbose, ) ) self.callbacks.append( TensorBoard( log_dir=self.config.callbacks.tensorboard_log_dir, write_graph=self.config.callbacks.tensorboard_write_graph, ) ) self.callbacks.append( CSVLogger(os.path.join(self.config.callbacks.history_dir, "parameters.csv"), separator=',', append=False) )
def __init__(self, model, dump_path=None, csv_log_path=None): version = utils.now() if dump_path is None: Path('model/{}'.format(model.name)).mkdir(exist_ok=True) self.dump_path = 'model/{}/{}.hdf5'.format(model.name, version) else: self.dump_path = dump_path if csv_log_path is None: Path('logs/{}'.format(model.name)).mkdir(exist_ok=True) self.csv_log_path = 'logs/{}/{}_log.csv'.format( model.name, version) else: self.csv_log_path = csv_log_path self.model = model.model self.callbacks = [ EarlyStopping(monitor='val_loss', patience=5, verbose=1, min_delta=0.01, mode='min'), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=0.01, mode='min'), ModelCheckpoint(monitor='val_loss', filepath=self.dump_path, save_best_only=True, save_weights_only=True, mode='min'), CSVLogger(self.csv_log_path) ]
model.fit_generator( train_generator, validation_data=test_generator, epochs=100, use_multiprocessing=False, workers=10, callbacks=[ ModelCheckpoint( '%s/model_{epoch:d}_loss{loss:.4f}_acc_{acc:.4f}.h5' % folder, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=True, mode='auto', period=1), CSVLogger(logger, append=log_append), lr_scheduler(initial_lr=3e-4, decay_factor=0.75, step_size=5, min_lr=1e-8) ], max_queue_size=30, ) elif mode == 'testing': adam = tf.train.AdamOptimizer(learning_rate=1e-3) model.compile(loss="categorical_crossentropy", optimizer=adam, metrics=['accuracy']) res = model.evaluate_generator( test_sequence, verbose=1 ) print(model.metrics_names) print(res) elif mode == 'predict':
def train(self): tb = TensorBoard(log_dir=self.log_dir, write_graph=True, update_freq='epoch', histogram_freq=20) def scheduler(epoch): if epoch < 10: return self.lr elif 10 <= epoch < 15: return self.lr * np.exp(-0.3) elif 15 <= epoch < 20: return self.lr * np.exp(-0.4) elif 20 <= epoch < 35: return self.lr * np.exp(-0.45) else: return self.lr * np.exp(-0.4) # return self.lr * np.exp(0.4 * (10 - epoch)) lr_schedule = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1) ckpt = tf.keras.callbacks.ModelCheckpoint( filepath=os.path.join( self.ckpt_dir, 'rcnet-%s-%d*%d-l{loss:.1f}-vl{val_loss:.1f}_{epoch:03d}.h5' % (self.backbone_name, self.input_shape[0], self.input_shape[1])), monitor='val_loss', verbose=1, load_weights_on_restart=False, save_best_only=False, save_weights_only=True) csv_logger = CSVLogger('training.log') # get tf.data.Dataset train_ds, train_example_nums = get_ds( self.train_file_dir, batch_size=self.batch_size, epochs=self.epochs, parse_fn=_parse_record, parse_record_args=self.parse_args, trainval_split=False) val_ds, val_example_nums = get_ds(self.val_file_dir, batch_size=self.batch_size, trainval_split=False, parse_fn=_parse_record, epochs=self.epochs, parse_record_args=dict( is_training=False, **self.parse_args)) logging.info( "start from epoch %d, using gpu nums %d, batch size %d, initial lr %f, input_shape %s" % (self.start_epoch + 1, self.gpu_nums, self.batch_size, self.lr, self.input_shape)) self._model.fit(train_ds, validation_data=val_ds, initial_epoch=self.start_epoch, callbacks=[tb, lr_schedule, ckpt, csv_logger], epochs=self.epochs, steps_per_epoch=train_example_nums // self.batch_size, verbose=1, validation_steps=val_example_nums // self.batch_size) self._model.save_weights( os.path.join( self.ckpt_dir, 'rcnet-%s-%d*%d_final.h5' % (self.backbone_name, self.input_shape[0], self.input_shape[1])))
def main(cvset=0, n_features=5000, batch_size=1000, p_drop=0.5, latent_dim=2, n_epoch=5000, run_iter=0, exp_name='nagent', model_id='nagent_model'): train_dict, val_dict, full_dict, dir_pth = dataIO(cvset=0, n_features=n_features, exp_name=exp_name, train_size=25000) #Architecture parameters ------------------------------ input_dim = train_dict['X'].shape[1] print(input_dim) fc_dim = 50 fileid = model_id + \ '_cv_' + str(cvset) + \ '_ng_' + str(n_features) + \ '_pd_' + str(p_drop) + \ '_bs_' + str(batch_size) + \ '_ld_' + str(latent_dim) + \ '_ne_' + str(n_epoch) + \ '_ri_' + str(run_iter) fileid = fileid.replace('.', '-') print(fileid) n_agents = 1 #Model definition ----------------------------------------------- M = {} M['in_ae'] = Input(shape=(input_dim, ), name='in_ae') M['mask_ae'] = Input(shape=(input_dim, ), name='mask_ae') for i in range(n_agents): M['dr_ae_' + str(i)] = Dropout(p_drop, name='dr_ae_' + str(i))(M['in_ae']) M['fc01_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc01_ae_' + str(i))(M['dr_ae_' + str(i)]) M['fc02_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc02_ae_' + str(i))(M['fc01_ae_' + str(i)]) M['fc03_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc03_ae_' + str(i))(M['fc02_ae_' + str(i)]) M['fc04_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc04_ae_' + str(i))(M['fc03_ae_' + str(i)]) M['fc05_ae_' + str(i)] = Dense(latent_dim, activation='linear', name='fc05_ae_' + str(i))(M['fc04_ae_' + str(i)]) M['ld_ae_' + str(i)] = BatchNormalization(scale=False, center=False, epsilon=1e-10, momentum=0., name='ld_ae_' + str(i))( M['fc05_ae_' + str(i)]) M['fc06_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc06_ae_' + str(i))(M['ld_ae_' + str(i)]) M['fc07_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc07_ae_' + str(i))(M['fc06_ae_' + str(i)]) M['fc08_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc08_ae_c' + str(i))( M['fc07_ae_' + str(i)]) M['fc09_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc09_ae_' + str(i))(M['fc08_ae_' + str(i)]) M['ou_ae_' + str(i)] = Dense(input_dim, activation='linear', name='ou_ae_' + str(i))(M['fc09_ae_' + str(i)]) AE = Model(inputs=[M['in_ae'], M['mask_ae']], outputs=[M['ou_ae_' + str(i)] for i in range(n_agents)]) def masked_mse(X, Y, mask): loss_val = tf.reduce_mean( tf.multiply(tf.math.squared_difference(X, Y), mask)) def masked_loss(y_true, y_pred): return loss_val return masked_loss #Create loss dictionary loss_dict = { 'ou_ae_' + str(i): masked_mse(M['in_ae'], M['ou_ae_0'], M['mask_ae']) for i in range(n_agents) } #Loss weights dictionary loss_wt_dict = {'ou_ae_' + str(i): 1.0 for i in range(n_agents)} #Add loss definitions to the model AE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict) #Custom logging cb_obj = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv') train_input_dict = { 'in_ae': train_dict['X'], 'mask_ae': train_dict['mask'] } train_output_dict = { 'ou_ae_' + str(i): train_dict['X'] for i in range(n_agents) } val_input_dict = {'in_ae': val_dict['X'], 'mask_ae': val_dict['mask']} val_output_dict = { 'ou_ae_' + str(i): val_dict['X'] for i in range(n_agents) } #Model training start_time = timeit.default_timer() AE.fit(train_input_dict, train_output_dict, batch_size=batch_size, initial_epoch=0, epochs=n_epoch, validation_data=(val_input_dict, val_output_dict), verbose=2, callbacks=[cb_obj]) elapsed = timeit.default_timer() - start_time print('-------------------------------') print('Training time:', elapsed) print('-------------------------------') #Save weights AE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5') #Generate summaries summary = {} for i in range(n_agents): encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)]) summary['z'] = encoder.predict(full_dict['X']) sio.savemat(dir_pth['result'] + fileid + '-summary.mat', summary) return
write_images=True) if args.save_weights_only: checkpoint = ModelCheckpoint(os.path.join( path_model_save, start_datetime + '-WEIGHTS-E-{epoch:02d}-VA-{val_acc:.2f}.hdf5'), save_weights_only=True, monitor='val_loss', verbose=0) else: checkpoint = ModelCheckpoint(os.path.join( path_model_save, start_datetime + '-MODEL-E-{epoch:02d}-VA-{val_acc:.2f}.hdf5'), monitor='val_loss', verbose=0) logger = CSVLogger(os.path.join(path_csvlogger, '{}.csv'.format(start_datetime)), separator=',', append=False) callbacks = [tensorboard, checkpoint, logger] if args.lr_discovery: callbacks.append(lr_discovery) # def vggface_preprocessing(img): # return preprocess_input(img, data_format='channels_last', version=2) # When using VGGFace for transfer learning, make sure to pass vggface_preprocessing as preprocess_func argument # and set range255=True. Moreover, the image size needs to be at least 197x197, you might want to use pad_to_size=197. # Data generator configuration train_data_generator = TrainDataGenerator(batch_size=args.batch_size,
def iterative_prune_model(): # build the inception v3 network base_model = inception_v3.InceptionV3(include_top=False, weights='imagenet', pooling='avg', input_shape=(299, 299, 3)) print('Model loaded.') top_output = Dense(5, activation='softmax')(base_model.output) # add the model on top of the convolutional base model = Model(base_model.inputs, top_output) del base_model model.load_weights(tuned_weights_path) # compile the model with a SGD/momentum optimizer # and a very slow learning rate. model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy']) # Set up data generators train_datagen = ImageDataGenerator( preprocessing_function=inception_v3.preprocess_input, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical') train_steps = train_generator.n // train_generator.batch_size test_datagen = ImageDataGenerator( preprocessing_function=inception_v3.preprocess_input) validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_height, img_width), batch_size=val_batch_size, class_mode='categorical') val_steps = validation_generator.n // validation_generator.batch_size # Evaluate the model performance before pruning loss = model.evaluate_generator(validation_generator, validation_generator.n // validation_generator.batch_size) print('original model validation loss: ', loss[0], ', acc: ', loss[1]) total_channels = get_total_channels(model) n_channels_delete = int(math.floor(percent_pruning / 100 * total_channels)) # Incrementally prune the network, retraining it each time percent_pruned = 0 # If percent_pruned > 0, continue pruning from previous checkpoint if percent_pruned > 0: checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned) + 'percent') model = load_model(output_dir + checkpoint_name + '.h5') while percent_pruned <= total_percent_pruning: # Prune the model apoz_df = get_model_apoz(model, validation_generator) percent_pruned += percent_pruning print('pruning up to ', str(percent_pruned), '% of the original model weights') model = prune_model(model, apoz_df, n_channels_delete) # Clean up tensorflow session after pruning and re-load model checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned) + 'percent') model.save(output_dir + checkpoint_name + '.h5') del model tensorflow.python.keras.backend.clear_session() tf.reset_default_graph() model = load_model(output_dir + checkpoint_name + '.h5') # Re-train the model model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy']) checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned) + 'percent') csv_logger = CSVLogger(output_dir + checkpoint_name + '.csv') model.fit_generator(train_generator, steps_per_epoch=train_steps, epochs=epochs, validation_data=validation_generator, validation_steps=val_steps, workers=4, callbacks=[csv_logger]) # Evaluate the final model performance loss = model.evaluate_generator(validation_generator, validation_generator.n // validation_generator.batch_size) print('pruned model loss: ', loss[0], ', acc: ', loss[1])
def explicitTrain(model, preprocess_input, humanDataset: SegmentationDataset, nonHumanDataset: SegmentationDataset, valHumanDataset: SegmentationDataset, valNonHumanDataset: SegmentationDataset, trainingDir: str, modelEncoder: str, imageSize=224, batchSize: int = 1, epochs: int = 1, startEpoch: int = 0): validationPacketSize = 16 * 16 x_val_h, y_val_h = valHumanDataset.readBatch(validationPacketSize) augmentations.applyTransforms(x_val_h, y_val_h, augmentations.valid_transforms(imageSize)) x_val_nh, y_val_nh = valNonHumanDataset.readBatch(validationPacketSize) augmentations.applyTransforms(x_val_nh, y_val_nh, augmentations.valid_transforms(imageSize)) x_val = np.stack(x_val_h + x_val_nh) y_val = np.stack(y_val_h + y_val_nh) x_val = preprocess_input(x_val) # checkPointPath = os.path.join(trainingDir, 'u-net-{}.chpt'.format(modelEncoder)) # checkPointCallback = tf.keras.callbacks.ModelCheckpoint(filepath=checkPointPath, # save_weights_only=True, # verbose=1) SAVE_AFTER_NUMBER = 50000 packetSize = 16 * 16 nonHumanPacketSize = max( (packetSize * len(nonHumanDataset)) // len(humanDataset), 1) csv_logger = CSVLogger('training.log', append=True) for epoch in range(startEpoch, epochs): logger.info('epoch %d', epoch) humanDataset.reset() nonHumanDataset.reset() try: packets = len(humanDataset) // packetSize for packetIndex in range(packets - 1): logger.debug('reading batch, memory used %f', usedMemory()) x_train_h, y_train_h = humanDataset.readBatch(packetSize) x_train_h, y_train_h = augmentations.appendTransforms( x_train_h, y_train_h, augmentations.train_transforms_after_resize, augmentations.resize_transforms(imageSize)) logger.debug('reading human batch, memory used %f', usedMemory()) x_train_nh, y_train_nh = nonHumanDataset.readBatch( nonHumanPacketSize) x_train_nh, y_train_nh = augmentations.appendTransforms( x_train_nh, y_train_nh, augmentations.train_transforms_after_resize, augmentations.resize_transforms(imageSize)) logger.debug('reading nonHuman batch, memory used %f', usedMemory()) x_train, y_train = HumanDatasetSequence.shuffleHumanNonHuman( x_train_h, x_train_nh, y_train_h, y_train_nh) x_train = np.concatenate((x_train, )) y_train = np.concatenate((y_train, )) del x_train_h del x_train_nh del y_train_h del y_train_nh logger.debug('concatenate batches, memory used %f', usedMemory()) x_train = preprocess_input(x_train) # x_train = x_train / 255 logger.debug('preprocess x_train, memory used %f', usedMemory()) logger.debug('start train on %d samples, memory used %f', len(x_train), usedMemory()) model.fit(x=x_train, y=y_train, batch_size=batchSize, epochs=epoch + 1, initial_epoch=epoch, validation_data=(x_val, y_val), callbacks=[csv_logger]) saveModel = ( (humanDataset.index + nonHumanDataset.index) % SAVE_AFTER_NUMBER) < (packetSize + nonHumanPacketSize) if saveModel: save_model(model, trainingDir, modelEncoder, packetIndex) del x_train del y_train logger.debug('trained on %d samples, memory used %f', humanDataset.index + nonHumanDataset.index, usedMemory()) # gc.collect() # objgraph.show_most_common_types(limit=50) # obj = objgraph.by_type('list')[1000] # objgraph.show_backrefs(obj, max_depth=10) x_train_h, y_train_h = humanDataset.readBatch(packetSize) x_train_h, y_train_h = augmentations.appendTransforms( x_train_h, y_train_h, augmentations.train_transforms_after_resize, augmentations.resize_transforms(imageSize)) x_train_nh, y_train_nh = nonHumanDataset.readBatch( nonHumanPacketSize) x_train_nh, y_train_nh = augmentations.appendTransforms( x_train_nh, y_train_nh, augmentations.train_transforms_after_resize, augmentations.resize_transforms(imageSize)) x_train, y_train = HumanDatasetSequence.shuffleHumanNonHuman( x_train_h, x_train_nh, y_train_h, y_train_nh) x_train = np.concatenate((x_train, )) y_train = np.concatenate((y_train, )) del x_train_h del x_train_nh del y_train_h del y_train_nh x_train = preprocess_input(x_train) # x_train = x_train / 255 model.fit(x=x_train, y=y_train, batch_size=batchSize, epochs=1, validation_data=(x_val, y_val), callbacks=[csv_logger]) save_model(model, trainingDir, modelEncoder, packets - 1) del x_train del y_train gc.collect() logger.info('epoch %d is trained', epoch) except Exception as e: logger.error('Exception %s', str(e)) traceback.print_exc() return now = datetime.now() dt_string = now.strftime("%Y/%m/%d %H:%M:%S") modelPath = os.path.join( trainingDir, 'u-net-{}_epoch{}_{}.tfmodel'.format(modelEncoder, epoch, dt_string)) model.save(modelPath) logger.info('model saved')
def main(batch_size=150, p_drop=0.4, latent_dim=2, cpl_fn='minvar', cpl_str=1e-3, n_epoch=500, run_iter=0, model_id='cnn', exp_name='MNIST'): fileid = model_id + \ '_cf_' + cpl_fn + \ '_cs_' + str(cpl_str) + \ '_pd_' + str(p_drop) + \ '_bs_' + str(batch_size) + \ '_ld_' + str(latent_dim) + \ '_ne_' + str(n_epoch) + \ '_ri_' + str(run_iter) fileid = fileid.replace('.', '-') train_dat, train_lbl, val_dat, val_lbl, dir_pth = dataIO(exp_name=exp_name) #Architecture parameters ------------------------------ input_dim = train_dat.shape[1] n_arms = 2 fc_dim = 49 #Model definition ------------------------------------- M = {} M['in_ae'] = Input(shape=(28, 28, 1), name='in_ae') for i in range(n_arms): M['co1_ae_' + str(i)] = Conv2D(10, (3, 3), activation='relu', padding='same', name='co1_ae_' + str(i))(M['in_ae']) M['mp1_ae_' + str(i)] = MaxPooling2D( (2, 2), padding='same', name='mp1_ae_' + str(i))(M['co1_ae_' + str(i)]) M['dr1_ae_' + str(i)] = Dropout(rate=p_drop, name='dr1_ae_' + str(i))( M['mp1_ae_' + str(i)]) M['fl1_ae_' + str(i)] = Flatten(name='fl1_ae_' + str(i))(M['dr1_ae_' + str(i)]) M['fc01_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc01_ae_' + str(i))(M['fl1_ae_' + str(i)]) M['fc02_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc02_ae_' + str(i))(M['fc01_ae_' + str(i)]) M['fc03_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc03_ae_' + str(i))(M['fc02_ae_' + str(i)]) if cpl_fn in ['mse']: M['ld_ae_' + str(i)] = Dense(latent_dim, activation='linear', name='ld_ae_' + str(i))(M['fc03_ae_' + str(i)]) elif cpl_fn in ['mseBN', 'fullcov', 'minvar']: M['fc04_ae_' + str(i)] = Dense(latent_dim, activation='linear', name='fc04_ae_' + str(i))( M['fc03_ae_' + str(i)]) M['ld_ae_' + str(i)] = BatchNormalization( scale=False, center=False, epsilon=1e-10, momentum=0.99, name='ld_ae_' + str(i))(M['fc04_ae_' + str(i)]) M['fc05_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc05_ae_' + str(i))(M['ld_ae_' + str(i)]) M['fc06_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc06_ae_' + str(i))(M['fc05_ae_' + str(i)]) M['fc07_ae_' + str(i)] = Dense(fc_dim * 4, activation='relu', name='fc07_ae_' + str(i))(M['fc06_ae_' + str(i)]) M['re1_ae_' + str(i)] = Reshape( (14, 14, 1), name='re1_ae_' + str(i))(M['fc07_ae_' + str(i)]) M['us1_ae_' + str(i)] = UpSampling2D( (2, 2), name='us1_ae_' + str(i))(M['re1_ae_' + str(i)]) M['co2_ae_' + str(i)] = Conv2D(10, (3, 3), activation='relu', padding='same', name='co2_ae_' + str(i))(M['us1_ae_' + str(i)]) M['ou_ae_' + str(i)] = Conv2D(1, (3, 3), activation='sigmoid', padding='same', name='ou_ae_' + str(i))(M['co2_ae_' + str(i)]) cplAE = Model(inputs=M['in_ae'], outputs=[M['ou_ae_' + str(i)] for i in range(n_arms)] + [M['ld_ae_' + str(i)] for i in range(n_arms)]) if cpl_fn in ['mse', 'mseBN']: cpl_fn_loss = mse elif cpl_fn == 'fullcov': cpl_fn_loss = fullcov elif cpl_fn == 'minvar': cpl_fn_loss = minvar assert type(cpl_fn) #Create loss dictionary loss_dict = { 'ou_ae_0': mse(M['in_ae'], M['ou_ae_0']), 'ou_ae_1': mse(M['in_ae'], M['ou_ae_1']), 'ld_ae_0': cpl_fn_loss(M['ld_ae_0'], M['ld_ae_1']), 'ld_ae_1': cpl_fn_loss(M['ld_ae_1'], M['ld_ae_0']) } #Loss weights dictionary loss_wt_dict = { 'ou_ae_0': 1.0, 'ou_ae_1': 1.0, 'ld_ae_0': cpl_str, 'ld_ae_1': cpl_str } #Add loss definitions to the model cplAE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict) #Data feed train_input_dict = {'in_ae': train_dat} val_input_dict = {'in_ae': val_dat} train_output_dict = { 'ou_ae_0': train_dat, 'ou_ae_1': train_dat, 'ld_ae_0': np.empty((train_dat.shape[0], latent_dim)), 'ld_ae_1': np.empty((train_dat.shape[0], latent_dim)) } val_output_dict = { 'ou_ae_0': val_dat, 'ou_ae_1': val_dat, 'ld_ae_0': np.empty((val_dat.shape[0], latent_dim)), 'ld_ae_1': np.empty((val_dat.shape[0], latent_dim)) } log_cb = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv') #Train model cplAE.fit(train_input_dict, train_output_dict, validation_data=(val_input_dict, val_output_dict), batch_size=batch_size, initial_epoch=0, epochs=n_epoch, verbose=2, shuffle=True, callbacks=[log_cb]) #Saving weights cplAE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5') matsummary = {} #Trained model prediction for i in range(n_arms): encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)]) matsummary['z_val_' + str(i)] = encoder.predict({'in_ae': val_dat}) matsummary['z_train_' + str(i)] = encoder.predict({'in_ae': train_dat}) matsummary['train_lbl'] = train_lbl matsummary['val_lbl'] = val_lbl sio.savemat(dir_pth['result'] + fileid + '-summary.mat', matsummary) return
model = Model(inputs= temporal_input_layer, outputs=predictions) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) #print(model.summary()) ## NN ends here # callbacks # tensorboard = TensorBoard(log_dir="logs/{}".format(time())) model_name = str(config[parser_args.task]['MODEL_FOLDER']) + '_temporal' + curr_time + '.h5' model_checkpoint = ModelCheckpoint(model_name, verbose=1, monitor='val_loss',save_best_only=True, mode='auto') early_stopping = EarlyStopping(monitor="val_loss", verbose=1, patience=8) log_file_name = os.path.join(config['plotting']['MEASURE_FOLDER'], 'evals-{}.json'.format(curr_time)) csv_logger = CSVLogger(filename=log_file_name, append = True) # generate a model by training history = model.fit_generator(train_generator, epochs=num_epochs, steps_per_epoch=36690//batch_size, validation_data= val_generator, validation_steps=12227//batch_size, verbose=1, callbacks=[model_checkpoint, early_stopping, csv_logger]) with open('{}-config_{}.ini'.format(config[parser_args.task]['MODEL_FOLDER'], curr_time), 'w') as cfgfile: newConfig = configparser.ConfigParser() newConfig[parser_args.task] = config[parser_args.task] #print(config[parser_args.task]) newConfig.write(cfgfile)
class_weight = {i: class_weight[i] for i in range(2)} NAME = "{database}-{gender}-{modeltype}-{spec}-{time}".format( database=confv.database_cremad, gender=confv.gender_female, modeltype=confv.ml_mode_convolutional, spec="1st", time=int(time.time())) mdl_logs_pth = os.path.join(confv.base_store, confv.log_dir) tensorboard = TensorBoard(log_dir=mdl_logs_pth + '\\{}'.format(NAME)) dfc.check_dir_inside_saved_features_and_modelconfigs_and_models( parent=confv.saved_training_metrics_logs, database=confv.database_cremad, gender=confv.gender_female) csv_logger = CSVLogger(mconf_cremad_f.training_log_path) model.fit(X, y, epochs=40, batch_size=128, shuffle=True, class_weight=class_weight, validation_split=0.2, callbacks=[tensorboard, csv_logger]) print( "--------------------Finished model training for adjusted and {gender} isolated dataset: {name}--------------------" .format(gender=confv.gender_female, name=confv.dataset_cremad_female)) # MODEL SAVING print( "\n\n--------------------Started model saving for adjusted and {gender} isolated dataset: {name}--------------------"