def fit(self, main_net_loss, net_lobe, idx_, monitor_period, main_model): self.reset_lr_if_need(idx_, main_net_loss, main_model) x, y = next(self.train_data_gen) # tr_data is a generator or enquerer # callbacks train_csvlogger = callbacks.CSVLogger(self.mypath.log_fpath('train'), separator=',', append=True) valid_csvlogger = callbacks.CSVLogger(self.mypath.log_fpath('valid'), separator=',', append=True) class ModelCheckpointWrapper(callbacks.ModelCheckpoint): def __init__(self, best_init=None, *arg, **kwagrs): super().__init__(*arg, **kwagrs) if best_init is not None: self.best = best_init if "2_out" in self.io: monitor_tr = self.task + "_out_segmentation2_loss" monitor_vd = "val_" + monitor_tr else: monitor_tr, monitor_vd = "loss", "val_loss" saver_train = ModelCheckpointWrapper(best_init=self.best_tr_loss, filepath=self.mypath.model_fpath_best_patch('train'), verbose=1, save_best_only=True, monitor=monitor_tr, # do not add valid_data here, save time! save_weights_only=True) saver_valid = ModelCheckpointWrapper(best_init=self.best_vd_loss, filepath=self.mypath.model_fpath_best_patch('valid'), verbose=1, save_best_only=True, monitor=monitor_vd, # do not add valid_data here, save time! save_weights_only=True) if args.attention and self.task != 'lobe': lobe_pred = net_lobe.predict(x) if type(lobe_pred) is list: # multi outputs lobe_pred = lobe_pred[0] y = get_attentioned_y(y, lobe_pred) if idx_ % monitor_period == 0: # every 100 steps, valid once, save time, keep best valid model # print(x.shape, y.shape) history = self.net.fit(x, y, batch_size=args.batch_size, validation_data=tuple(self.valid_array), callbacks=[saver_train, saver_valid, train_csvlogger, valid_csvlogger]) current_vd_loss = history.history['val_loss'][0] old_vd_loss = np.float(self.best_vd_loss) if current_vd_loss < old_vd_loss: self.best_vd_loss = current_vd_loss else: history = self.net.fit(x, y, batch_size=args.batch_size, callbacks=[saver_train, train_csvlogger]) for key, result in history.history.items(): print(key, result) current_tr_loss = history.history['loss'][0] old_tr_loss = np.float(self.best_tr_loss) if current_tr_loss < old_tr_loss: self.best_tr_loss = current_tr_loss self.current_tr_loss = current_tr_loss
def get_default_callbacks( model_path: Path, monitor: str = 'val_acc', base_patience: int = 3, lr_reduce_factor: float = 0.5, min_lr: float = 1e-7, verbose: int = 1, log_dir: Path = None, gradients: bool = True, #change to false confusion_matrix: bool = True, #change to false loss: Callable = None, data: Tuple[np.ndarray, np.ndarray] = None, classes: list = None, heatmap_options: dict = None, csv_logdir: Path = None, csv_append: bool = False, save_latest: bool = False): callbacks = [ clb.ReduceLROnPlateau(monitor=monitor, factor=lr_reduce_factor, min_lr=min_lr, patience=base_patience, verbose=verbose), clb.EarlyStopping(monitor=monitor, patience=(2 * base_patience + 1), verbose=verbose), clb.ModelCheckpoint(monitor=monitor, filepath=model_path, save_best_only=True, verbose=verbose) ] if log_dir: callbacks.append( ExtendedTensorBoard(log_dir, gradients, confusion_matrix, loss, data, classes, heatmap_options)) if csv_logdir: if csv_append: callbacks.append(clb.CSVLogger(csv_logdir, append=True)) else: callbacks.append(clb.CSVLogger(csv_logdir)) if save_latest: latest_path = model_path.parent / f'{model_path.stem}_latest{model_path.suffix}' callbacks.append( clb.ModelCheckpoint(monitor=monitor, filepath=latest_path)) return callbacks
def get_callbacks(args, partition_idx): import tensorflow.keras.callbacks as bk # from CustomEarlyStopping import CustomEarlyStopping model_type = args.model_type timestamp = args.timestamp early_stop = args.early_stop t_name = args.weights_dir + '/tensorboard_logs/{}_{}_{}'.format( model_type, timestamp, partition_idx) t_name = t_name.replace('/', '\\') # Correction for Windows paths callbacks = list() callbacks.append(None) # Position for Checkpoint # CustomEarlyStopping(patience_loss=args.patience, patience_acc=10, threshold=.95) callbacks.append(bk.CSVLogger(args.weights_dir + '/log.csv')) # CustomEarlyStopping(patience_loss=10, threshold=0.95) callbacks.append(bk.TensorBoard(log_dir=t_name, histogram_freq=args.debug)) if early_stop > 0: # TODO - Test multiple EarlyStopping callbacks.append( bk.EarlyStopping(monitor='val_loss', patience=early_stop, verbose=0)) # callbacks.append(bk.EarlyStopping(monitor='val_accuracy', patience=early_stop, verbose=0)) callbacks.append( bk.ReduceLROnPlateau(monitor='val_loss', factor=.9, patience=10, min_lr=0.00001, cooldown=0, verbose=0)) # calls.append(C.LearningRateScheduler(schedule=lambda epoch: args.lr * (args.lr_decay ** epoch))) # calls.append( C.LearningRateScheduler(schedule=lambda epoch: args.lr * math.cos(1+( (epoch-1 % (args.epochs/cycles)))/(args.epochs/cycles) ) )) # calls.append( C.LearningRateScheduler(schedule=lambda epoch: 0.001 * np.exp(-epoch / 10.)) ) return callbacks
def train(model, data, args): """ Training a CapsuleNet :param model: the CapsuleNet model :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))` :param args: arguments :return: The trained model """ # unpacking the data (x_train, y_train), (x_test, y_test) = data # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=int(args.debug)) checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-{epoch:02d}.h5', monitor='val_capsnet_acc', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) # compile the model model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss, 'mse'], loss_weights=[1., args.lam_recon], metrics={'capsnet': 'accuracy'}) """ # Training without data augmentation: model.fit([x_train, y_train], [y_train, x_train], batch_size=args.batch_size, epochs=args.epochs, validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay]) """ # Begin: Training with data augmentation ---------------------------------------------------------------------# def train_generator(x, y, batch_size, shift_fraction=0.): train_datagen = ImageDataGenerator( width_shift_range=shift_fraction, height_shift_range=shift_fraction) # shift up to 2 pixel for MNIST generator = train_datagen.flow(x, y, batch_size=batch_size) while 1: x_batch, y_batch = generator.next() yield ([x_batch, y_batch], [y_batch, x_batch]) # Training with data augmentation. If shift_fraction=0., also no augmentation. model.fit_generator( generator=train_generator(x_train, y_train, args.batch_size, args.shift_fraction), steps_per_epoch=int(y_train.shape[0] / args.batch_size), epochs=args.epochs, validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay]) # End: Training with data augmentation -----------------------------------------------------------------------# model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) return model
def get_callbacks(model_name): filepath = '' with open('Pathfile.txt', 'r') as myfile: filepath = myfile.read() filepath = filepath.split("\n")[0] tb_log_dir = os.path.join(filepath, 'Logs', model_name) lg_log_dir = os.path.join(filepath, 'History', model_name + '.csv') lg = callbacks.CSVLogger(lg_log_dir, separator=',', append=False) es = callbacks.EarlyStopping(monitor='loss', min_delta=0.0001, patience=40, verbose=1, mode='auto', restore_best_weights=True) # lr = callbacks.LearningRateScheduler(scheduler, verbose=1) #callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', save_freq='epoch') rop = callbacks.ReduceLROnPlateau(monitor='loss', factor=0.3, patience=5, verbose=1, mode='auto', min_delta=0.001, cooldown=0, min_lr=0.00000001) tb = callbacks.TensorBoard( log_dir=tb_log_dir, histogram_freq=0, write_graph=False, write_images=False, update_freq='epoch', profile_batch=0) # embeddings_freq=0,embeddings_metadata=None) return [es, rop, tb, lg]
def train_model( self, train_X, train_Y, test_X, test_Y, model_name, file_save_path, epochs=100, EarlyStop_patience=30, ): my_callbacks = [ callbacks.EarlyStopping(monitor='val_loss', patience=EarlyStop_patience, restore_best_weights=True), callbacks.CSVLogger(filename=file_save_path + f'/Log_{model_name}.csv'), ] self.model.fit(x=train_X, y=train_Y, epochs=epochs, validation_data=(test_X, test_Y), steps_per_epoch=1, validation_steps=1, shuffle=True, callbacks=my_callbacks) self.model.save(filepath=file_save_path + f'/model_{model_name}.h5')
def train(model, data, args): # unpacking the data (x_train, y_train), (x_test, y_test) = data # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') checkpoint = callbacks.ModelCheckpoint(args.save_dir + 'weights-{epoch:02d}.h5', monitor='val_capsnet_acc', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss, 'mse'], loss_weights=[1., args.lam_recon], metrics={'capsnet': 'accuracy'}) # Training without data augmentation: model.fit((x_train, y_train), (y_train, x_train), batch_size=args.batch_size, epochs=args.epochs, validation_data=((x_test, y_test), (y_test, x_test)), callbacks=[log, checkpoint, lr_decay]) model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) return model
def trainVAE(x_train, epochs, save_path, z_dim, batch_size=64): if not os.path.exists(save_path): os.makedirs(save_path) print(x_train.shape) # Set model encoder, decoder, vae = build_vae(x_train, z_dim) vae.summary() # Custom vae_loss def vae_loss(x, rec_x): z_mean, z_log_var, z = encoder(x) # 1.reconstruct loss rec_x = decoder(z) rec_loss = tf.keras.losses.binary_crossentropy(x, rec_x) rec_loss = tf.reduce_mean(rec_loss) rec_loss *= (128 * 64) # 2. KL Divergence loss kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var) kl_loss = -0.5 * tf.reduce_mean(kl_loss) total_loss = rec_loss + kl_loss return total_loss # Compile with custom loss vae.compile(optimizer='adam', loss=vae_loss) # Set callbacks ckp = callbacks.ModelCheckpoint(filepath=save_path + '/model.h5', monitor='loss', verbose=1, save_best_only=True) csv_logger = callbacks.CSVLogger(save_path + '/logger.csv') reduce_lr = callbacks.ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=1e-5) # Train history = vae.fit(x_train, x_train, epochs=epochs, batch_size=batch_size, callbacks=[ckp, reduce_lr, csv_logger]) # Plotining plot_loss(history, save_path) plot_model(encoder, to_file=save_path + '/vae_encoder.png', show_shapes=True) plot_model(decoder, to_file=save_path + '/vae_decoder.png', show_shapes=True)
def build_callbacks(conf): ''' The purpose of the method is to set up logging and history. It is based on Keras Callbacks https://github.com/fchollet/keras/blob/fbc9a18f0abc5784607cd4a2a3886558efa3f794/keras/callbacks.py Currently used callbacks include: BaseLogger, CSVLogger, EarlyStopping. Other possible callbacks to add in future: RemoteMonitor, LearningRateScheduler Argument list: - conf: There is a "callbacks" section in conf.yaml file. Relevant parameters are: list: Parameter specifying additional callbacks, read in the driver script and passed as an argument of type list (see next arg) metrics: List of quantities monitored during training and validation mode: one of {auto, min, max}. The decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For val_acc, this should be max, for val_loss this should be min, etc. In auto mode, the direction is automatically inferred from the name of the monitored quantity. monitor: Quantity used for early stopping, has to be from the list of metrics patience: Number of epochs used to decide on whether to apply early stopping or continue training - callbacks_list: uses callbacks.list configuration parameter, specifies the list of additional callbacks Returns: modified list of callbacks ''' # mode = conf['callbacks']['mode'] # monitor = conf['callbacks']['monitor'] # patience = conf['callbacks']['patience'] csvlog_save_path = conf['paths']['csvlog_save_path'] # CSV callback is on by default if not os.path.exists(csvlog_save_path): os.makedirs(csvlog_save_path) # callbacks_list = conf['callbacks']['list'] callbacks = [cbks.BaseLogger()] callbacks += [ cbks.CSVLogger("{}callbacks-{}.log".format( csvlog_save_path, datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))) ] return cbks.CallbackList(callbacks)
def make_callbacks(args, experiment=None, run=None): if run is None: run = args["run_dir"] writer = summary_ops_v2.create_file_writer_v2(run) def schedule(epoch): if epoch < args["warmup_length"]: return args["warmup_coeff"] * args["learning_rate"] else: return args["learning_rate"] cb = [ tf_callbacks.ModelCheckpoint(str(Path(run, "model.hdf5")), verbose=0, save_freq='epoch'), tf_callbacks.ModelCheckpoint( str(Path(run, "best-model.hdf5")), verbose=0, save_freq='epoch', save_best_only=True, mode="max", monitor="val_balanced_accuracy", ), tf_callbacks.EarlyStopping(monitor="val_balanced_accuracy", patience=args["es_patience"], mode="max", min_delta=args["es_epsilon"]), my_callbacks.ReduceLROnPlateauWithWarmup( monitor="val_balanced_accuracy", min_delta=args["lrplat_epsilon"], factor=args["lrplat_factor"], patience=int(args["lrplat_patience"]), base_learning_rate=args["learning_rate"], warmup_length=args["warmup_length"], warmup_coeff=args["warmup_coeff"]), # tf_callbacks.LearningRateScheduler(schedule), tf_callbacks.CSVLogger(str(Path(run, 'scores.log'))), my_callbacks.ImageLogger(writer), tf_callbacks.TensorBoard(log_dir=run, write_graph=True, profile_batch=0, histogram_freq=1), my_callbacks.AdamLRLogger(writer) ] if experiment: cb.append(my_callbacks.CometLogger(experiment)) return cb
def fit_model(self, batch_size, epochs): csv_logger = callbacks.CSVLogger("lanczos/models/N" + str(self.N) + "n" + str(self.n) + "_model_loss.csv", separator=",", append=False) history = self.model.fit( self.X_train, self.y_train, batch_size=batch_size, epochs=epochs, verbose=0, #2 validation_data=(self.X_test, self.y_test), callbacks=[csv_logger]) return history
def get_callbacks(path_train_log, path_checkpoint): reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', min_lr=1e-6, factor=0.5, patience=3, verbose=1, mode='auto') early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=30, verbose=1, min_delta=0.001) csv_logger = callbacks.CSVLogger(path_train_log) checkpointer = callbacks.ModelCheckpoint(filepath=path_checkpoint, save_best_only=True, save_weights_only=True) return [reduce_lr, early_stopping, csv_logger, checkpointer]
def callbacks(): calls = [] reduceLROnPlat = cb.ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, verbose=1, mode='auto', epsilon=0.0001, cooldown=1, min_lr=0.000001) log = cb.CSVLogger('log.csv') RocAuc = RocAucEvaluation(validation_data=(x_valid, y_valid), interval=1) calls.append(reduceLROnPlat) calls.append(log) calls.append(RocAuc) return calls
def main(input_dir, output_dir): class_names = [path.basename(s) for s in glob(input_dir + "/*/")] n_classes = len(class_names) # image_gen = preprocessing.image.ImageDataGenerator( # rescale=1.0 / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True # ) image_gen = preprocessing.image.ImageDataGenerator(rescale=1.0 / 255) train_gen = image_gen.flow_from_directory( input_dir, target_size=(224, 224), batch_size=16, class_mode="categorical", # "binary" color_mode="rgb", ) base_model = applications.mobilenet_v2.MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights="imagenet", pooling="max") for layer in base_model.layers: layer.trainable = False y = base_model.output y = layers.Dense(n_classes, activation="softmax")(y) model = models.Model(inputs=base_model.inputs, outputs=y) lr = 0.05 model.compile( optimizer=optimizers.Adam(lr), loss="categorical_crossentropy", metrics=["accuracy", metrics.AUC()], ) # print(model.summary()) model.fit( train_gen, validation_data=train_gen, epochs=20, callbacks=[ callbacks.ModelCheckpoint(output_dir, save_best_only=True), callbacks.EarlyStopping(patience=2), callbacks.LearningRateScheduler( lambda epoch: lr * np.exp(-0.1 * (epoch - 1))), callbacks.CSVLogger(path.join(output_dir, "metrics.csv")), ], )
def update_fit_params(fit_params, model_name, tensorboard_params=None): markered_path = "about_models/{}/{}".format( model_name, datetime.now().strftime("%Y%m%d-%H%M%S")) os.makedirs(markered_path) os.makedirs(os.path.join(markered_path, "saves")) fit_params["callbacks"] = [ callbacks.EarlyStopping(patience=5), callbacks.CSVLogger( os.path.join(markered_path, "{}_history.csv".format(model_name))), callbacks.ModelCheckpoint(filepath=os.path.join( markered_path, "saves/weights.h5"), save_best_only=True, save_weights_only=True) ] if tensorboard_params: tensorboard_params["log_dir"] = os.path.join(markered_path, "logs") fit_params["callbacks"] += [ get_proper_callback(callbacks.TensorBoard, tensorboard_params) ] return fit_params, markered_path
def set_callback(self): ''' set_callback ''' ckpt_format_str = "weights_epoch-{epoch:02d}_loss-{val_loss:.4f}_acc-{val_acc:.4f}_auc-{val_roc_auc:.4f}.hdf5" ckpt_path = self.log_dir.checkpoint.concat(ckpt_format_str) csv_log_path = self.log_dir.concat("log_file.csv") learning_curve = LearningCurve(directory=self.log_dir.learning_curve.path) learning_curve.book(x="step", y="roc_auc", best="max") learning_curve.book(x="step", y="acc", best="max") learning_curve.book(x="step", y="loss", best="min") callback_list = [ callbacks.ModelCheckpoint(filepath=ckpt_path), callbacks.ReduceLROnPlateau(verbose=1), callbacks.CSVLogger(csv_log_path), learning_curve, ] self.callback_list = callback_list
def build_callbacks(exp_path: os.PathLike, patience: int = 10): """Build an array of callbacks for model training.""" os.makedirs(exp_path, exist_ok=True) cbacks = [ callbacks.ModelCheckpoint( exp_path / "checkpoints", save_weights_only=True, monitor="val_loss", save_best_only=True, ), callbacks.EarlyStopping( monitor="val_loss", patience=patience, verbose=1, restore_best_weights=True, ), callbacks.CSVLogger(exp_path / "history.csv", separator=",", append=True), callbacks.TensorBoard(log_dir=exp_path / "tensorboard", histogram_freq=1), ] return cbacks
def main(batch_size, training_dir, checkpoint_dir, epochs, n_fixed_layers, logger_filename, weight_file, class_weight): config = tf.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU sess = tf.Session(config=config) tf.keras.backend.set_session(sess) df_train = pd.read_csv("dataset/training.csv") df_train = df_train.sample(frac=1,random_state=42) df_val = pd.read_csv("dataset/validation.csv") #3 using subset of training data df_train['Filename'] = training_dir+"/"+df_train['Filename'].astype(str) df_val['Filename'] = training_dir+"/"+df_val['Filename'].astype(str) #df_train = df_train[:100] #df_val = df_val[:100] generator = preprocess.tfdata_generator(df_train['Filename'].values, df_train['Drscore'].values, is_training=True, buffer_size=50, batch_size=batch_size) validation_generator = preprocess.tfdata_generator(df_val['Filename'].values, df_val['Drscore'].values, is_training=False, buffer_size=50, batch_size=batch_size) ## various callbacks tensorboard_cbk = callbacks.TensorBoard(log_dir=checkpoint_dir, update_freq='epoch', write_grads=False, histogram_freq=0) checkpoint_cbk = callbacks.ModelCheckpoint( filepath=os.path.join(checkpoint_dir,'weights-{epoch:03d}.hdf5'), save_best_only=True, monitor='val_loss', verbose=1, save_weights_only=False) earlystop_ckb = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=False) csv_callback = callbacks.CSVLogger(os.path.join(checkpoint_dir,logger_filename),append=True) reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6) lr_scheduler = callbacks.LearningRateScheduler(step_decay) model,base_model = create_model() ## freeze upper layers files = sorted(glob(os.path.join(checkpoint_dir, 'weights-*.hdf5'))) if weight_file: model_file = weight_file initial_epoch = int(model_file[-8:-5]) print('Resuming using saved model %s.' % model_file) model = tf.keras.models.load_model(model_file) elif files: model_file = files[-1] initial_epoch = int(model_file[-8:-5]) print('Resuming using saved model %s.' % model_file) model = tf.keras.models.load_model(model_file) else: #model,base_model = create_model() initial_epoch = 0 if n_fixed_layers: for layer in base_model.layers[:n_fixed_layers]: layer.trainable = False for layer in base_model.layers[n_fixed_layers:]: layer.trainable = True print("training layer {}".format(layer.name)) if class_weight: class_weights=compute_class_weight('balanced', np.unique(df_train["Drscore"].values), df_train["Drscore"].values) weight_dict = dict([(i,class_weights[i]) for i in range(len(class_weights))]) else: weight_dict=None model.fit( generator, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=df_train.shape[0]//batch_size, verbose=1, validation_data=validation_generator, validation_steps=df_val.shape[0]//batch_size, class_weight=np.array(class_weights), callbacks=[tensorboard_cbk, checkpoint_cbk, csv_callback, reduce_lr])
def train(config_dict): data_fp = config_dict.get("data_fp") out_fp = config_dict.get("out_fp") train_pkl = config_dict.get("train_pkl") valid_pkl = config_dict.get("valid_pkl") # 参数 batch_size = config_dict.get("batch_size") end_epoch = config_dict.get("end_epoch") x_vars = config_dict.get("x_vars") y_vars = config_dict.get("y_vars") h5_lst = glob.glob(os.path.join(data_fp, "*.HDF")) train_files, valid_files, test_files = split_files_by_mon(h5_lst) if not os.path.exists(train_pkl): train_dp = DataPrepare(train_files, batch_size, "train_flag", x_vars, y_vars) train_dp.copy_file(data_fp, os.path.join(out_fp, "hdf", "train")) train_dp.write_idx_to_csv( os.path.join(os.path.dirname(train_pkl), "train_idx.csv")) train_dp.write_data_to_pkl(data_fp, train_pkl) if not os.path.exists(valid_pkl): valid_dp = DataPrepare(valid_files, batch_size, "train_flag", x_vars, y_vars) valid_dp.copy_file(data_fp, os.path.join(out_fp, "hdf", "valid")) valid_dp.write_idx_to_csv( os.path.join(os.path.dirname(valid_pkl), "valid_idx.csv")) valid_dp.write_data_to_pkl(data_fp, valid_pkl) fd = os.path.join(out_fp, "hdf", "test") os.makedirs(fd, exist_ok=True) if not os.listdir(fd): for f in test_files: shutil.copyfile(f, os.path.join(fd, os.path.basename(f))) np.random.seed(42) # 数据批 train_batches = DataGenerator(train_pkl, batch_size, shuffle=True) valid_batches = DataGenerator(valid_pkl, batch_size, shuffle=False) # 模型准备 model = ResNet50(input_shape=(len(x_vars), 1)) # model = load_model(r"/path/to/pretrained_weights") model.compile( optimizer=optimizers.Adam(lr=0.001, decay=1e-4), # optimizer=optimizers.SGD(learning_rate=0.001), loss=losses.mean_squared_error, metrics=[metrics.mean_absolute_error]) # 训练模型 log_dir = config_dict.get("log_dir") os.makedirs(log_dir, exist_ok=True) best_ckpt = callbacks.ModelCheckpoint(os.path.join(log_dir, 'weights_best.h5'), save_best_only=True, monitor='val_mean_absolute_error', mode='auto') ckpt = callbacks.ModelCheckpoint(os.path.join( log_dir, 'weights_{epoch:02d}_{val_mean_absolute_error:.2f}.h5'), save_best_only=True, monitor='val_mean_absolute_error', mode='auto', period=10) train_logger = callbacks.CSVLogger(os.path.join(log_dir, "train_log.csv")) # callback = callbacks.LearningRateScheduler(scheduler) # early_stopping = callbacks.EarlyStopping( # monitor='val_mean_absolute_error', # patience=10, # ) lr_decay = callbacks.ReduceLROnPlateau(monitor="val_mean_absolute_error", mode="min") # warm_up_lr = WarmUpCosineDecayScheduler( # learning_rate_base=0.001, # total_steps=int(end_epoch * len(train_batches) / batch_size), # warmup_learning_rate=1e-5, # warmup_steps=int(5 * len(train_batches) / batch_size), # hold_base_rate_steps=10, # min_learn_rate=1e-6 # ) events_dir = os.path.join(log_dir, "events") if not os.path.exists(log_dir): os.makedirs(log_dir) tensorboard = callbacks.TensorBoard(log_dir=events_dir, histogram_freq=0, write_graph=True, write_images=True) model.fit(x=train_batches, validation_data=valid_batches, epochs=end_epoch, callbacks=[best_ckpt, ckpt, train_logger, tensorboard])
def main(): parser = argparse.ArgumentParser() parser.add_argument("--name", default="rnn{}".format(datetime.now().strftime("%y%m%d%H%M%S"))) parser.add_argument("--directory", default="./logs") # GPU parser.add_argument("--num_gpus", default=len(get_available_gpus()), type=int) parser.add_argument("--multi-gpu", default=False, action='store_true', dest='multi_gpu') # Hyperparameters parser.add_argument("--epoch", dest="epochs", default=200, type=int) parser.add_argument("--batch_size", default=128, type=int) parser.add_argument("--valid_batch_size", default=1024, type=int) # Optimizer parser.add_argument("--optimizer", default="Adam", type=str) parser.add_argument("--lr", default=0.001, type=float) parser.add_argument("--clipnorm", default=-1, type=float, help="if it is greater than 0, then graidient clipping is activated") parser.add_argument("--clipvalue", default=-1, type=float) parser.add_argument("--use-class-weight", dest="use_class_weight", default=False, action="store_true") # Frequencies parser.add_argument("--valid_freq", type=int, default=32) parser.add_argument("--save_freq", type=int, default=32) parser.add_argument("-v", "--verbose", action="store_true") # Project parameters parser.add_argument("--min-pt", dest="min_pt", default=100, type=int) # Model Archtecture parser.add_argument("--act", dest="activation", default="elu", type=str) parser.add_argument("--rnn", default="gru", type=str) args = parser.parse_args() ################### # ################### log_dir = Directory(path=os.path.join(args.directory, args.name)) log_dir.mkdir("script") log_dir.mkdir("checkpoint") log_dir.mkdir("learning_curve") log_dir.mkdir("roc_curve") log_dir.mkdir("model_response") config = Config(log_dir.path, "w") config.append(args) config["hostname"] = os.environ["HOSTNAME"] config["log_dir"] = log_dir.path config.save() scripts = [ "./dataset.py", "./model.py", "./train.py", ] for each in scripts: shutil.copy2(each, log_dir.script.path) open(log_dir.script.concat("__init__.py"), 'w').close() ######################################## # Load training and validation datasets ######################################## dset = get_dataset_paths(config.min_pt) config.append(dset) config["seq_maxlen"] = { "x_kin": 30, "x_pid": 30 } train_iter = get_data_iter( path=dset["training"], batch_size=config.batch_size, seq_maxlen=config.seq_maxlen, fit_generator_mode=True) valid_iter = get_data_iter( path=dset["validation"], batch_size=config.valid_batch_size, seq_maxlen=config.seq_maxlen, fit_generator_mode=True) test_iter = get_data_iter( path=dset["test"], batch_size=config.valid_batch_size, seq_maxlen=config.seq_maxlen, fit_generator_mode=False) if config.use_class_weight: class_weight = get_class_weight(train_iter) config["class_weight"] = list(class_weight) else: class_weight = None ################################# # Build & Compile a model. ################################# x_kin_shape = train_iter.get_shape("x_kin", batch_shape=False) x_pid_shape = train_iter.get_shape("x_pid", batch_shape=False) model = build_model( x_kin_shape, x_pid_shape, rnn=config.rnn, activation=config.activation, name=config.name) config["model"] = model.get_config() if config.multi_gpu: model = multi_gpu_model(model, gpus=config.num_gpus) if config.hostname == "cms05.sscc.uos.ac.kr": model_plot_path = log_dir.concat("model.png") plot_model(model, to_file=model_plot_path, show_shapes=True) else: model.summary() loss = 'categorical_crossentropy' # TODO capsulisation optimizer_kwargs = {} if config.clipnorm > 0: optimizer_kwargs["clipnorm"] = config.clipnorm if config.clipvalue > 0: optimizer_kwargs["clipvalue"] = config.clipvalue optimizer = getattr(optimizers, config.optimizer)(lr=config.lr, **optimizer_kwargs) metric_list = ["accuracy" , roc_auc] model.compile( loss=loss, optimizer=optimizer, metrics=metric_list) config["loss"] = loss config["optimizer_config"] = optimizer.get_config() ########################################################################### # Callbacks ########################################################################### ckpt_format_str = "weights_epoch-{epoch:02d}_loss-{val_loss:.4f}_acc-{val_acc:.4f}_auc-{val_roc_auc:.4f}.hdf5" ckpt_path = log_dir.checkpoint.concat(ckpt_format_str) csv_log_path = log_dir.concat("log_file.csv") learning_curve = LearningCurve(directory=log_dir.learning_curve.path) learning_curve.book(x="step", y="roc_auc", best="max") learning_curve.book(x="step", y="acc", best="max") learning_curve.book(x="step", y="loss", best="min") callback_list = [ callbacks.ModelCheckpoint(filepath=ckpt_path), callbacks.ReduceLROnPlateau(verbose=1), callbacks.CSVLogger(csv_log_path), learning_curve, ] ############################################################################ # Training ############################################################################ model.fit_generator( train_iter, steps_per_epoch=len(train_iter), epochs=config.epochs, validation_data=valid_iter, validation_steps=len(valid_iter), callbacks=callback_list, shuffle=True, class_weight=class_weight) del model print("Training is over! :D") ########################################### # Evaluation ############################################ train_iter.fit_generator_mode = False train_iter.cycle = False good_ckpt = find_good_checkpoint( log_dir.checkpoint.path, which={"max": ["auc", "acc"], "min": ["loss"]}) all_ckpt = set(log_dir.checkpoint.get_entries()) # no local optima useless_ckpt = all_ckpt.difference(good_ckpt) for each in useless_ckpt: os.remove(each) for idx, each in enumerate(good_ckpt, 1): print("[{}/{}] {}".format(idx, len(good_ckpt), each)) K.clear_session() evaluate(checkpoint_path=each, train_iter=train_iter, test_iter=test_iter, log_dir=log_dir) config.save()
metrics=['accuracy']) return model model = model_final(preproc_source_sentences.shape, preproc_target_sentences.shape[1], len(source_tokenizer.word_index) + 1, len(target_tokenizer.word_index) + 1) model.summary() #CallBacks mfile = 'models/Glove_training_bach32.model.h5' model_checkpoint = callbacks.ModelCheckpoint(mfile, monitor='accuracy', save_best_only=True, save_weights_only=True) logger = callbacks.CSVLogger('results/training_bach_32.log') tensorboard = callbacks.TensorBoard(log_dir='results/training_bach_32') callbacks = [logger, tensorboard] #Training model and save callbacks: #model.fit(X_train, Y_train, batch_size=1024, epochs=25, validation_split=0.1, callbacks=callbacks) #Training model and save callbacks: model.fit(X_train, Y_train, batch_size=32, epochs=10, validation_split=0.01) Predicted_by_Glove = model.predict(X_test, len(X_test))
def main(args): if FLAGS.model_name == "VGG16": model = VGG16ForDogBreed(input_shape=(224, 224, 3)) if FLAGS.model_name == "ResNet50": model = ResNet50ForDogBreed(input_shape=(224, 224, 3)) optimizer = _make_optimizer() model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=['accuracy']) DATA_DIR = "data" TRAIN = "train" VAL = "val" train_path = os.path.join(DATA_DIR, TRAIN) val_path = os.path.join(DATA_DIR, VAL) number_of_train_files = _number_of_files(train_path) number_of_val_files = _number_of_files(val_path) batch_size = 512 epochs = 500 train_generator = _make_generator(train=True, dataset_path=train_path, target_size=(224, 224), batch_size=batch_size) val_generator = _make_generator(train=False, dataset_path=val_path, batch_size=batch_size) STEP_SIZE_TRAIN = number_of_train_files // batch_size STEP_SIZE_VALID = number_of_val_files // batch_size CHECKPOINT_DIR = "checkpoints" if os.path.exists(CHECKPOINT_DIR) is not True: os.mkdir(CHECKPOINT_DIR) import json with open('download_conf.json') as json_file: download_conf = json.load(json_file) model_info = "{0}/model-{1}_optimizer-{2}_dataset-{3}".format( CHECKPOINT_DIR, FLAGS.model_name, FLAGS.optimizer, download_conf["dataset"]) csv_logger = callbacks.CSVLogger('{0}_{1}_training.log'.format( FLAGS.model_name, download_conf["dataset"])) checkpointer = callbacks.ModelCheckpoint( filepath=model_info + '_{epoch:03d}_{val_accuracy:.2f}_{val_loss:.2f}.hdf5', monitor='val_loss', verbose=1, save_best_only=True) callbacks_list = [csv_logger, checkpointer] MODEL_REPOSITORY_DIR = "models" if os.path.exists(MODEL_REPOSITORY_DIR) is not True: os.mkdir(MODEL_REPOSITORY_DIR) json_config = model.to_json() model_path = "{0}/model-{1}.json".format(MODEL_REPOSITORY_DIR, FLAGS.model_name) with open(model_path, 'w') as json_file: json_file.write(json_config) model.fit_generator(train_generator, steps_per_epoch=STEP_SIZE_TRAIN, epochs=epochs, validation_data=val_generator, validation_steps=STEP_SIZE_VALID, callbacks=callbacks_list)
'mean_squared_error', metrics=['mean_absolute_error']) history = model.fit( train_loader, validation_data=val_loader, epochs=args.num_epochs, verbose=True, shuffle=False, callbacks=[ LRLogger(), EpochTimeLogger(), cb.LearningRateScheduler(lr_schedule), cb.ModelCheckpoint(os.path.join(test_dir, 'best_model.h5'), save_best_only=True), cb.EarlyStopping(patience=128, restore_best_weights=True), cb.CSVLogger(os.path.join(test_dir, 'train_log.csv')), cb.TerminateOnNaN() ]) # Run on the validation set and assess statistics y_true = np.hstack([x[1].numpy()[:, 0] for x in iter(test_loader)]) y_pred = np.squeeze(model.predict(test_loader)) pd.DataFrame({ 'true': y_true, 'pred': y_pred }).to_csv(os.path.join(test_dir, 'test_results.csv'), index=False) with open(os.path.join(test_dir, 'test_summary.json'), 'w') as fp: json.dump( {
def train( train_data, val_data, test_data, model: keras.Model, save_dir: pathlib.Path, config: Config, category_taxonomy: Taxonomy, category_names: List[str], ): print("Starting training...") temporary_log_dir = pathlib.Path(tempfile.mkdtemp()) print("Temporary log directory: {}".format(temporary_log_dir)) X_train, y_train = train_data X_val, y_val = val_data X_test, y_test = test_data model.fit( X_train, y_train, batch_size=config.train_config.batch_size, epochs=config.train_config.epochs, validation_data=(X_val, y_val), callbacks=[ callbacks.TerminateOnNaN(), callbacks.ModelCheckpoint( filepath=str(save_dir / "weights.{epoch:02d}-{val_loss:.4f}.hdf5"), monitor="val_loss", save_best_only=True, ), callbacks.TensorBoard(log_dir=str(temporary_log_dir), histogram_freq=2), callbacks.EarlyStopping(monitor="val_loss", patience=4), callbacks.CSVLogger(str(save_dir / "training.csv")), ], ) print("Training ended") log_dir = save_dir / "logs" print("Moving log directory from {} to {}".format(temporary_log_dir, log_dir)) shutil.move(str(temporary_log_dir), str(log_dir)) model.save(str(save_dir / "last_checkpoint.hdf5")) last_checkpoint_path = sorted(save_dir.glob("weights.*.hdf5"))[-1] print("Restoring last checkpoint {}".format(last_checkpoint_path)) model = keras.models.load_model(str(last_checkpoint_path)) print("Evaluating on validation dataset") y_pred_val = model.predict(X_val) report, clf_report = evaluation_report(y_val, y_pred_val, taxonomy=category_taxonomy, category_names=category_names) save_json(report, save_dir / "metrics_val.json") save_json(clf_report, save_dir / "classification_report_val.json") y_pred_test = model.predict(X_test) report, clf_report = evaluation_report(y_test, y_pred_test, taxonomy=category_taxonomy, category_names=category_names) save_json(report, save_dir / "metrics_test.json") save_json(clf_report, save_dir / "classification_report_test.json")
def pretrain(self, x, y=None, optimizer='adam', epochs=200, batch_size=256, save_dir='results/temp', da_s1=False, verbose=1, use_multiprocessing=True): print('Pretraining......') self.autoencoder.compile(optimizer=optimizer, loss='mse') csv_logger = callbacks.CSVLogger(save_dir + '/pretrain_log.csv') cb = [csv_logger] if y is not None and verbose > 0: class PrintACC(callbacks.Callback): def __init__(self, x, y): self.x = x self.y = y super(PrintACC, self).__init__() def on_epoch_end(self, epoch, logs=None): if epochs < 10 or epoch % int(epochs / 10) != 0: return feature_model = Model( self.model.input, self.model.get_layer( index=int(len(self.model.layers) / 2)).output) features = feature_model.predict(self.x) km = KMeans(n_clusters=len(np.unique(self.y)), n_init=20, n_jobs=4) y_pred = km.fit_predict(features) print(' ' * 8 + '|==> acc: %.4f, nmi: %.4f <==|' % (metrics.acc(self.y, y_pred), metrics.nmi(self.y, y_pred))) cb.append(PrintACC(x, y)) # begin pretraining t0 = time() if not da_s1: self.autoencoder.fit(x, x, batch_size=batch_size, epochs=epochs, callbacks=cb, verbose=verbose) else: print('-=*' * 20) print('Using augmentation for pretraining') print('-=*' * 20) self.autoencoder.fit_generator( generator(self.datagen, x, batch_size=batch_size), steps_per_epoch=math.ceil(x.shape[0] / batch_size), epochs=epochs, callbacks=cb, verbose=verbose, use_multiprocessing=use_multiprocessing, workers=4) print('Pretraining time: ', time() - t0) self.autoencoder.save_weights(save_dir + '/ae_weights.h5') print('Pretrained weights are saved to %s/ae_weights.h5' % save_dir) self.pretrained = True
def main(): parser = argparse.ArgumentParser() parser.add_argument("--logdir", dest="log_dir", type=str, default="./logs/untitled-{}".format( datetime.now().strftime("%y%m%d-%H%M%S"))) parser.add_argument("--num_gpus", default=len(get_available_gpus()), type=int) parser.add_argument("--multi-gpu", default=False, action='store_true', dest='multi_gpu') # Hyperparameters parser.add_argument("--epoch", dest="num_epochs", default=100, type=int) parser.add_argument("--batch_size", default=128, type=int) parser.add_argument("--valid_batch_size", default=1024, type=int) parser.add_argument("--lr", default=0.001, type=float) parser.add_argument( "--clipnorm", default=-1, type=float, help="if it is greater than 0, then graidient clipping is activated") parser.add_argument("--clipvalue", default=-1, type=float) parser.add_argument("--use-class-weight", dest="use_class_weight", default=False, action="store_true") # Frequencies parser.add_argument("--valid_freq", type=int, default=32) parser.add_argument("--save_freq", type=int, default=32) parser.add_argument("-v", "--verbose", action="store_true") # Project parameters parser.add_argument("--min-pt", dest="min_pt", default=100, type=int) args = parser.parse_args() ################### # ################### log_dir = Directory(path=args.log_dir) log_dir.mkdir("script") log_dir.mkdir("model_checkpoint") log_dir.mkdir("learning_curve") log_dir.mkdir("roc_curve") log_dir.mkdir("model_response") backup_scripts(log_dir.script.path) config = Config(log_dir.path, "w") config.append(args) config["hostname"] = os.environ["HOSTNAME"] ######################################## # Load training and validation datasets ######################################## dset = get_dataset_paths(args.min_pt) config.append(dset) config["seq_maxlen"] = {"x": 30} train_iter = get_data_iter(path=dset["training"], batch_size=args.batch_size, seq_maxlen=config.seq_maxlen, fit_generator_mode=True) valid_iter = get_data_iter(path=dset["validation"], batch_size=args.valid_batch_size, seq_maxlen=config.seq_maxlen, fit_generator_mode=True) test_iter = get_data_iter(path=dset["test"], batch_size=args.valid_batch_size, seq_maxlen=config.seq_maxlen, fit_generator_mode=False) if args.use_class_weight: class_weight = get_class_weight(train_iter) config["class_weight"] = list(class_weight) else: class_weight = None ################################# # Build & Compile a model. ################################# x_shape = train_iter.get_shape("x", batch_shape=False) model = build_a_model(x_shape=x_shape) config["model"] = model.get_config() if args.multi_gpu: model = multi_gpu_model(_model, gpus=args.num_gpus) model_plot_path = log_dir.concat("model.png") plot_model(model, to_file=model_plot_path, show_shapes=True) loss = 'categorical_crossentropy' # TODO capsulisation optimizer_kwargs = {} if args.clipnorm > 0: optimzer_kwargs["clipnorm"] = args.clipnorm if args.clipvalue > 0: optimzer_kwargs["clipvalue"] = args.clipvalue optimizer = optimizers.Adam(lr=args.lr, **optimizer_kwargs) metric_list = ["accuracy", roc_auc] model.compile(loss=loss, optimizer=optimizer, metrics=metric_list) config["loss"] = loss config["optimizer"] = "Adam" config["optimizer_config"] = optimizer.get_config() ########################################################################### # Callbacks ########################################################################### ckpt_format_str = "weights_epoch-{epoch:02d}_loss-{val_loss:.4f}_acc-{val_acc:.4f}_auc-{val_roc_auc:.4f}.hdf5" ckpt_path = log_dir.model_checkpoint.concat(ckpt_format_str) csv_log_path = log_dir.concat("log_file.csv") learning_curve = LearningCurve(directory=log_dir.learning_curve.path) learning_curve.book(x="step", y="roc_auc", best="max") learning_curve.book(x="step", y="acc", best="max") learning_curve.book(x="step", y="loss", best="min") callback_list = [ callbacks.ModelCheckpoint(filepath=ckpt_path), callbacks.EarlyStopping(monitor="val_loss", patience=5), callbacks.ReduceLROnPlateau(), callbacks.CSVLogger(csv_log_path), learning_curve, ] ############################################################################ # Training ############################################################################ model.fit_generator(train_iter, steps_per_epoch=len(train_iter), epochs=50, validation_data=valid_iter, validation_steps=len(valid_iter), callbacks=callback_list, shuffle=True, class_weight=class_weight) print("Training is over! :D") del model ########################################### # Evaluation ############################################ train_iter.fit_generator_mode = False train_iter.cycle = False good_ckpt = find_good_checkpoint(log_dir.model_checkpoint.path, which={ "max": ["auc", "acc"], "min": ["loss"] }) for idx, each in enumerate(good_ckpt, 1): print("[{}/{}] {}".format(idx, len(good_ckpt), each)) K.clear_session() evaluate(custom_objects={"roc_auc": roc_auc}, checkpoint_path=each, train_iter=train_iter, test_iter=test_iter, log_dir=log_dir) config.save()
lon1 = y_true[1] * pi_on_180 lon2 = y_pred[1] * pi_on_180 x = (lon2 - lon1) * tf.math.cos(0.5 * (lat2 + lat1)) y = lat2 - lat1 d = 6371 * tf.math.sqrt(x * x + y * y) return d model.compile(optimizer=optimizers.Adam(), loss="mse", metrics=[equirectangular_distance, haversine, "mse", "mae"]) batch_size = 64 epochs = 200 csv_logger = callbacks.CSVLogger('training.csv') model.fit(x=x_train, y=y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=epochs, callbacks=[csv_logger]) y_pred = model.predict(x_test) wgs84_geod = Geod(ellps='WGS84') def delta_distance_azimuth(lat1, lon1, lat2, lon2): az12, az21, dist = wgs84_geod.inv(lon1, lat1, lon2, lat2) dist = [x / 1000.0 for x in dist]
def train(model, eval_model, data, args): """ Training a CapsuleNet :param model: the CapsuleNet model :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))` :param args: arguments :return: The trained model """ # unpacking the data (x_train, y_train), (x_test, y_test), classes = data print("x_train {}, y_train {}, x_test {}, y_test {}".format( x_train.shape, y_train.shape, x_test.shape, y_test.shape)) # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=int(args.debug)) checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-{epoch:02d}.h5', monitor='val_rec_macro', mode='max', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) if os.path.isfile(args.save_dir + '/trained_model.h5'): model.load_weights(args.save_dir + '/trained_model.h5') # compile the model model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss, 'mse'], loss_weights=[1., args.lam_recon], metrics={'capsnet': 'accuracy'}) mc = MetricCallback(validation_data=((x_test, y_test), (y_test, x_test)), labels=classes, batch_size=args.batch_size) model.fit([x_train, y_train], [y_train, x_train], batch_size=args.batch_size, epochs=args.epochs, validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[mc, log, tb, checkpoint, lr_decay], shuffle=True) model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) plot_log(args.save_dir + '/log.csv', show=True) y_pred = eval_model.predict( x_test, batch_size=args.batch_size)[0].astype("float32") acc = accuracy_score(y_test, y_pred) cm = confusion_matrix(y_test, y_pred) recall = recall_score(y_test, y_pred, average="macro") print("Accuracy: {:.2f}%".format(acc * 100)) print("Recall score: {:.2f}%".format(recall * 100)) print("Confusion matrix:\n{}".format(cm)) return model
def pretrain(self, x, y=None, optimizer='adam', epochs=200, batch_size=256, save_dir='results/temp', verbose=1, aug_pretrain=False): print('Begin pretraining: ', '-' * 60) self.autoencoder.compile(optimizer=optimizer, loss='mse') csv_logger = callbacks.CSVLogger(save_dir + '/pretrain_log.csv') cb = [csv_logger] if y is not None and verbose > 0: class PrintACC(callbacks.Callback): def __init__(self, x, y): self.x = x self.y = y super(PrintACC, self).__init__() def on_epoch_end(self, epoch, logs=None): if int(epochs / 10) != 0 and epoch % int(epochs / 10) != 0: return feature_model = Model( self.model.input, self.model.get_layer( index=int(len(self.model.layers) / 2)).output) features = feature_model.predict(self.x) km = KMeans(n_clusters=len(np.unique(self.y)), n_init=20, n_jobs=4) y_pred = km.fit_predict(features) print(' ' * 8 + '|==> acc: %.4f, nmi: %.4f <==|' % (metrics.acc(self.y, y_pred), metrics.nmi(self.y, y_pred))) cb.append(PrintACC(x, y)) # begin pretraining t0 = time() if not aug_pretrain: self.autoencoder.fit(x, x, batch_size=batch_size, epochs=epochs, callbacks=cb, verbose=verbose) else: print( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) print('-=*' * 20) print('Using augmentation for ae') print('-=*' * 20) def gen(x, batch_size): if len(x.shape) > 2: # image gen0 = self.datagen.flow(x, shuffle=True, batch_size=batch_size) while True: batch_x = gen0.next() yield (batch_x, batch_x) else: width = int(np.sqrt(x.shape[-1])) if width * width == x.shape[-1]: # gray im_shape = [-1, width, width, 1] else: # RGB width = int(np.sqrt(x.shape[-1] / 3.0)) im_shape = [-1, width, width, 3] gen0 = self.datagen.flow(np.reshape(x, im_shape), shuffle=True, batch_size=batch_size) while True: batch_x = gen0.next() batch_x = np.reshape(batch_x, [batch_x.shape[0], x.shape[-1]]) yield (batch_x, batch_x) print("###############################################") print(batch_size) self.autoencoder.fit_generator(gen(x, 1), steps_per_epoch=int(x.shape[0] / 2), epochs=epochs, callbacks=cb, verbose=verbose, workers=1, use_multiprocessing=False) """ self.autoencoder.fit_generator(gen(x, batch_size), steps_per_epoch=int(x.shape[0]/batch_size), epochs=epochs, callbacks=cb, verbose=verbose, workers=8, use_multiprocessing=True if platform.system() != "Windows" else False) """ print('Pretraining time: ', time() - t0) self.autoencoder.save_weights(save_dir + '/ae_weights.h5') print('Pretrained weights are saved to %s/ae_weights.h5' % save_dir) self.pretrained = True print('End pretraining: ', '-' * 60)
def main(): ########################## # Argument Parsing ########################## parser = argparse.ArgumentParser() parser.add_argument("--logdir", dest="log_dir", type=str, default="./logs/untitled-{}".format( datetime.now().strftime("%y%m%d-%H%M%S"))) parser.add_argument("--num_gpus", default=len(get_available_gpus()), type=int) parser.add_argument("--multi-gpu", default=False, action='store_true', dest='multi_gpu') # Hyperparameters parser.add_argument("--epoch", dest="epochs", default=100, type=int) parser.add_argument("--batch_size", default=128, type=int) parser.add_argument("--valid_batch_size", default=1024, type=int) parser.add_argument("--optimizer", default="Adam", type=str) parser.add_argument("--lr", default=0.003, type=float) parser.add_argument( "--clipnorm", default=-1, type=float, help="if it is greater than 0, then graidient clipping is activated") parser.add_argument("--clipvalue", default=-1, type=float) parser.add_argument("--use-class-weight", dest="use_class_weight", default=False, action="store_true") # Frequencies parser.add_argument("--valid_freq", type=int, default=32) parser.add_argument("--save_freq", type=int, default=32) parser.add_argument("-v", "--verbose", action="store_true") # Project parameters parser.add_argument("--min-pt", dest="min_pt", default=100, type=int) # Model Archtecture parser.add_argument("--act", dest="activation", default="elu", type=str) args = parser.parse_args() ################### # ################### log_dir = Directory(path=args.log_dir) log_dir.mkdir("script") log_dir.mkdir("checkpoint") log_dir.mkdir("learning_curve") log_dir.mkdir("roc_curve") log_dir.mkdir("model_response") backup_scripts(log_dir.script.path) config = Config(log_dir.path, "w") config.append(args) config["hostname"] = os.environ["HOSTNAME"] ############################### # Load ################################# if os.environ["HOSTNAME"] == "cms05.sscc.uos.ac.kr": ckpt_dir = "/store/slowmoyang/QGJets/SJ-keras4hep/Dev-Composite" elif os.environ["HOSTNAME"] == "gate2": ckpt_dir = "/scratch/slowmoyang/QGJets/SJ-keras4hep/Dev-Composite" else: raise NotImplementedError cnn_path = os.path.join( ckpt_dir, "VanillaConvNet_epoch-67_loss-0.4987_acc-0.7659_auc-0.8422.hdf5") rnn_path = os.path.join( ckpt_dir, "RNNGatherEmbedding_weights_epoch-121_loss-0.4963_acc-0.7658_auc-0.8431.hdf5" ) cnn_custom_objects = get_cnn_custom_objects() rnn_custom_objects = get_rnn_custom_objects() custom_objects = {} custom_objects.update(cnn_custom_objects) custom_objects.update(rnn_custom_objects) cnn = load_model(cnn_path, custom_objects=cnn_custom_objects) cnn.summary() print("\n" * 5) rnn = load_model(rnn_path, custom_objects=rnn_custom_objects) rnn.summary() print("\n" * 5) ###################################### # Build ###################################### inputs = cnn.inputs + rnn.inputs # cnn_last_hidden = cnn.get_layer("cnn_conv2d_3").output # rnn_last_hidden = rnn.get_layer("rnn_dense_5").output # cnn_flatten = Flatten()(cnn_last_hidden) # joint = Concatenate(axis=-1)([cnn_flatten, rnn_last_hidden]) cnn_last_hidden = cnn.get_layer("cnn_batch_norm_2").output rnn_last_hidden = rnn.get_layer("rnn_dense_5").output cnn_gap = GlobalAveragePooling2D()(cnn_last_hidden) cnn_flatten = Flatten()(cnn_gap) joint = Concatenate(axis=-1)([cnn_flatten, rnn_last_hidden]) joint = BatchNormalization(axis=-1, name="joint_batch_norm")(joint) joint = Dense(128)(joint) joint = Activation("relu")(joint) logits = Dense(2)(joint) y_pred = Softmax()(logits) model = Model(inputs=inputs, outputs=y_pred) model.summary() ################################################ # Freeze ################################################## for each in model.layers: if each.name.startswith("cnn") or each.name.startswith("rnn"): each.trainable = False ################################################### # #################################################### dset = get_dataset_paths(config.min_pt) config["fit_generator_input"] = { "x": ["x_img", "x_kin", "x_pid", "x_len"], "y": ["y"] } train_iter = get_data_iter(path=dset["training"], batch_size=config.batch_size, fit_generator_input=config.fit_generator_input, fit_generator_mode=True) valid_iter = get_data_iter(path=dset["validation"], batch_size=config.valid_batch_size, fit_generator_input=config.fit_generator_input, fit_generator_mode=True) test_iter = get_data_iter(path=dset["test"], batch_size=config.valid_batch_size, fit_generator_input=config.fit_generator_input, fit_generator_mode=False) if config.use_class_weight: class_weight = get_class_weight(train_iter) config["class_weight"] = list(class_weight) else: class_weight = None ###################################### # ####################################### loss = 'categorical_crossentropy' # TODO capsulisation optimizer_kwargs = {} if config.clipnorm > 0: optimizer_kwargs["clipnorm"] = config.clipnorm if config.clipvalue > 0: optimizer_kwargs["clipvalue"] = config.clipvalue optimizer = getattr(optimizers, config.optimizer)(lr=config.lr, **optimizer_kwargs) metric_list = ["accuracy", roc_auc] model.compile(loss=loss, optimizer=optimizer, metrics=metric_list) ########################################################################### # Callbacks ########################################################################### ckpt_format_str = "weights_epoch-{epoch:02d}_loss-{val_loss:.4f}_acc-{val_acc:.4f}_auc-{val_roc_auc:.4f}.hdf5" ckpt_path = log_dir.checkpoint.concat(ckpt_format_str) csv_log_path = log_dir.concat("log_file.csv") learning_curve = LearningCurve(directory=log_dir.learning_curve.path) learning_curve.book(x="step", y="roc_auc", best="max") learning_curve.book(x="step", y="acc", best="max") learning_curve.book(x="step", y="loss", best="min") callback_list = [ callbacks.ModelCheckpoint(filepath=ckpt_path), # callbacks.EarlyStopping(monitor="val_loss" , patience=5), callbacks.ReduceLROnPlateau(verbose=1), callbacks.CSVLogger(csv_log_path), learning_curve, ] ############################################################################ # Training ############################################################################ model.fit_generator(train_iter, steps_per_epoch=len(train_iter), epochs=config.epochs, validation_data=valid_iter, validation_steps=len(valid_iter), callbacks=callback_list, shuffle=True, class_weight=class_weight) print("Training is over! :D") del model ########################################### # Evaluation ############################################ train_iter.fit_generator_mode = False train_iter.cycle = False good_ckpt = find_good_checkpoint(log_dir.checkpoint.path, which={ "max": ["auc", "acc"], "min": ["loss"] }) for idx, each in enumerate(good_ckpt, 1): print("[{}/{}] {}".format(idx, len(good_ckpt), each)) K.clear_session() evaluate(custom_objects=custom_objects, checkpoint_path=each, train_iter=train_iter, test_iter=test_iter, log_dir=log_dir) config.save()