def train_wrapper(args: Namespace) -> None: """ Function for training a network. """ model_name = args.model if args.cont: model = load_model(model_name) history = model.__asf_model_history weights = model.get_weights() lr_schedule = ExponentialDecay(9.2e-4, decay_steps=2000, decay_rate=0.96, staircase=True) # optimizer = model.optimizer model.compile(loss=jaccard_distance_loss, optimizer=Adam(learning_rate=lr_schedule), metrics=['accuracy', MeanIoU(num_classes=2)]) model.set_weights(weights) # model.compile( # loss='binary_crossentropy', optimizer='adam', metrics=["accuracy"] # ) else: model_path = path_from_model_name(model_name) if not args.overwrite and os.path.isfile(model_path): print(f"File {model_name} already exists!") return # model = create_model_masked(model_name) model = create_cdl_model_masked(model_name) history = {'loss': [], 'accuracy': [], "mean_io_u": []} train_model(model, history, args.dataset, args.epochs)
def TCN_model(X_Train, Y_train, batch_size, activation, dilations, nbfilters, kernelsize, nbstacks, batch_norm, layer_norm, weight_norm, dropout_rate, lookback_window): from tcn import TCN, tcn_full_summary from tensorflow.keras.layers import Dense from tensorflow.keras.models import Sequential from tensorflow.keras.callbacks import EarlyStopping from keras.optimizers.schedules import ExponentialDecay from keras.optimizers import Adam import tensorflow as tf from keras.regularizers import l2 batch_size, time_steps, input_dim = batch_size, 10, X_Train.shape[2] lr_schedule = ExponentialDecay(initial_learning_rate=1e-2, decay_steps=1000, decay_rate=0.9) tcn_layer = TCN(input_shape=(lookback_window, X_Train.shape[2]), activation=activation, padding='causal', dilations=dilations, nb_filters=nbfilters, kernel_size=kernelsize, nb_stacks=nbstacks, use_batch_norm=batch_norm, use_layer_norm=layer_norm, use_weight_norm=weight_norm, dropout_rate=dropout_rate) model_all_data = Sequential([ TCN(input_shape=(lookback_window, X_Train.shape[2]), activation=activation, padding='causal', dilations=dilations, nb_filters=nbfilters, kernel_size=kernelsize, nb_stacks=nbstacks, use_batch_norm=batch_norm, use_layer_norm=layer_norm, use_weight_norm=weight_norm, dropout_rate=dropout_rate), Dense( Y_train.shape[1], activation=activation, kernel_regularizer=l2(0.01), ) ]) # The receptive field tells you how far the model can see in terms of timesteps. print('Receptive field size =', tcn_layer.receptive_field) model_all_data.summary() adam = Adam(learning_rate=lr_schedule) model_all_data.compile(optimizer=adam, loss=root_mean_squared_error, metrics=['mse', 'mae', 'mape']) # tcn_full_summary(model_all_data, expand_residual_blocks=False) return model_all_data
def build_model(hp): from tcn import TCN, tcn_full_summary from tensorflow.keras.layers import Dense from tensorflow.keras.models import Sequential from tensorflow.keras.callbacks import EarlyStopping from keras.optimizers.schedules import ExponentialDecay from keras.optimizers import Adam import tensorflow as tf from keras.regularizers import l2 lr_schedule = ExponentialDecay( initial_learning_rate=1e-2, decay_steps=1000, decay_rate=0.9) lookback_window=73 # hp_dilation=hp.Choice('dilations',values=[[1,2,4,8,16],[1,2,4,8,16,32]]) hp_nbfilters=hp.Choice('nb_filters',values=[4,8,16,32]) hp_kernelsize=hp.Choice('kernel_size',values=[2,3,4]) hp_nbstacks=hp.Choice('nb_stacks',values=[1,2,3,4]) hp_dropout_rate=hp.Float('dropout', 0, 0.5, step=0.1, default=0.5) lrelu = lambda x: tf.keras.activations.relu(x, alpha=0.1) tcn_layer= TCN(input_shape=(lookback_window, X_Train.shape[2]), activation=lrelu, padding='causal', dilations=[1,2,4,8,16], nb_filters=hp_nbfilters, kernel_size=hp_kernelsize, nb_stacks=hp_nbstacks, use_batch_norm=True, use_layer_norm=False, use_weight_norm=False, dropout_rate=hp_dropout_rate, ) model=Sequential([tcn_layer, Dense(Y_train.shape[1], activation=lrelu, kernel_regularizer=l2(0.01))]) adam= Adam(learning_rate=lr_schedule) model.compile(optimizer=adam, loss=root_mean_squared_error , metrics=[RMSE_Tranche_1, RMSE_Tranche_2, RMSE_Tranche_3, RMSE_Tranche_4, RMSE_Tranche_5, RMSE_Tranche_6, # RMSE_10_pourcent_faibles, # RMSE_10_pourcent_fortes, 'mse', 'mae', 'mape']) return model
def train( model, train_images, train_annotations, input_height=None, input_width=None, n_classes=None, verify_dataset=True, checkpoints_path=None, epochs=5, batch_size=2, validate=False, val_images=None, val_annotations=None, val_batch_size=2, auto_resume_checkpoint=False, load_weights=None, steps_per_epoch=512, val_steps_per_epoch=512, gen_use_multiprocessing=False, masked=False, dice=False, optimizer_name='adam', lr=0.001, do_augment=False, augmentation_name="aug_all", callbacks=None, focal=False, default=False, custom_augmentation=None, other_inputs_paths=None, preprocessing=None, read_image_type=1, want_tpu=False # cv2.IMREAD_COLOR = 1 (rgb), # cv2.IMREAD_GRAYSCALE = 0, # cv2.IMREAD_UNCHANGED = -1 (4 channels like RGBA) ): from .models.all_models import model_from_name # check if user gives model name instead of the model object if isinstance(model, six.string_types): # create the model from the name assert (n_classes is not None), "Please provide the n_classes" if (input_height is not None) and (input_width is not None): model, tpu_strategy = model_from_name[model]( n_classes, input_height=input_height, input_width=input_width) else: model, tpu_strategy = model_from_name[model](n_classes) n_classes = model.n_classes input_height = model.input_height input_width = model.input_width output_height = model.output_height output_width = model.output_width if validate: assert val_images is not None assert val_annotations is not None if optimizer_name is not None: if focal: loss_k = focal_tversky elif masked: loss_k = masked_categorical_crossentropy elif dice: loss_k = dice_loss else: loss_k = weighted_categorical_crossentropy if optimizer_name == 'adam': opt = Adam(learning_rate=lr) if optimizer_name == 'sgd': lr_schedule = ExponentialDecay(initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9) opt = SGD(learning_rate=lr_schedule) model.compile( loss=loss_k, optimizer=opt, metrics=['accuracy', MeanIoU(num_classes=n_classes, name='mIoU')]) if checkpoints_path is not None: config_file = checkpoints_path + "_config.json" dir_name = os.path.dirname(config_file) if (not os.path.exists(dir_name)) and len(dir_name) > 0: os.makedirs(dir_name) with open(config_file, "w") as f: json.dump( { "model_class": model.model_name, "n_classes": n_classes, "input_height": input_height, "input_width": input_width, "output_height": output_height, "output_width": output_width }, f) if load_weights is not None and len(load_weights) > 0: print("Loading weights from ", load_weights) model.load_weights(load_weights) initial_epoch = 0 if auto_resume_checkpoint and (checkpoints_path is not None): latest_checkpoint = find_latest_checkpoint(checkpoints_path) if latest_checkpoint is not None: print("Loading the weights from latest checkpoint ", latest_checkpoint) model.load_weights(latest_checkpoint) initial_epoch = int(latest_checkpoint.split('.')[-1]) if verify_dataset: print("Verifying training dataset") verified = verify_segmentation_dataset(train_images, train_annotations, n_classes) assert verified if validate: print("Verifying validation dataset") verified = verify_segmentation_dataset(val_images, val_annotations, n_classes) assert verified train_gen = image_segmentation_generator( train_images, train_annotations, batch_size, n_classes, input_height, input_width, output_height, output_width, do_augment=do_augment, augmentation_name=augmentation_name, custom_augmentation=custom_augmentation, other_inputs_paths=other_inputs_paths, preprocessing=preprocessing, read_image_type=read_image_type) if validate: val_gen = image_segmentation_generator( val_images, val_annotations, val_batch_size, n_classes, input_height, input_width, output_height, output_width, other_inputs_paths=other_inputs_paths, preprocessing=preprocessing, read_image_type=read_image_type) if callbacks is None and (not checkpoints_path is None): # default_callback = ModelCheckpoint( # filepath=checkpoints_path + ".{epoch:05d}", # save_weights_only=True, # verbose=True # ) default_callback = CSVLogger('training.log') if sys.version_info[0] < 3: # for pyhton 2 default_callback = CheckpointsCallback(checkpoints_path) callbacks = [default_callback] if callbacks is None: callbacks = [] if not validate: model.fit(train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=callbacks, initial_epoch=initial_epoch) else: model.fit(train_gen, steps_per_epoch=steps_per_epoch, validation_data=val_gen, validation_steps=val_steps_per_epoch, epochs=epochs, callbacks=callbacks, use_multiprocessing=gen_use_multiprocessing, initial_epoch=initial_epoch)
def create_cdl_model_masked(model_name: str, num_filters: int = NUM_FILTERS, time_steps: int = TIME_STEPS, dropout: float = 0.5, batchnorm: bool = True) -> Model: """ Function to define the Time Distributed UNET Model """ """Requires stack of Sequential SAR data (with vh vv channels stacked), where each image is a different timestep""" inputs = Input(shape=(None, None, TIME_STEPS * N_CHANNELS), batch_size=None) c1 = conv2d_block(inputs, num_filters * 1, kernel_size=3, batchnorm=batchnorm) p1 = MaxPooling2D((2, 2))(c1) p1 = Dropout(dropout)(p1) c2 = conv2d_block(p1, num_filters * 2, kernel_size=3, batchnorm=batchnorm) p2 = MaxPooling2D((2, 2))(c2) p2 = Dropout(dropout)(p2) c3 = conv2d_block(p2, num_filters * 4, kernel_size=3, batchnorm=batchnorm) p3 = MaxPooling2D((2, 2))(c3) p3 = Dropout(dropout)(p3) c4 = conv2d_block(p3, num_filters * 8, kernel_size=3, batchnorm=batchnorm) p4 = MaxPooling2D((2, 2))(c4) p4 = Dropout(dropout)(p4) c5 = conv2d_block(p4, num_filters * 16, kernel_size=3, batchnorm=batchnorm) p5 = MaxPooling2D((2, 2))(c5) p5 = Dropout(dropout)(p5) c6 = conv2d_block(p5, num_filters * 32, kernel_size=3, batchnorm=batchnorm) p6 = MaxPooling2D((2, 2))(c6) p6 = Dropout(dropout)(p6) # c7 = conv2d_block(p6, num_filters * 64, kernel_size=3, batchnorm=batchnorm) # p7 = MaxPooling2D((2, 2))(c7) # p7 = Dropout(dropout)(p7) # c8 = conv2d_block(p7, num_filters * 128, kernel_size=3, batchnorm=batchnorm) # p8 = MaxPooling2D((2, 2))(c8) # p8 = Dropout(dropout)(p8) # middle_clstm = ConvLSTM2D(filters=num_filters * 4, kernel_size=3, activation="tanh", padding='same', return_sequences=True) # middle_bidirection = Bidirectional(middle_clstm)(p3) middle = conv2d_block(p6, num_filters * 32, kernel_size=3) # Expanding dims # uv = deconv2d_block_time_dist(middle, num_filters=num_filters*128, dropout=dropout, kernel_size=3, batchnorm=batchnorm, concat_layer=c8, activation=True) # uw = deconv2d_block_time_dist(uv, num_filters=num_filters*64, dropout=dropout, kernel_size=3, batchnorm=batchnorm, concat_layer=c7, activation=True) uy = deconv2d_block_time_dist(middle, num_filters=num_filters * 32, dropout=dropout, kernel_size=3, batchnorm=batchnorm, concat_layer=c6, activation=True) uz = deconv2d_block_time_dist(uy, num_filters=num_filters * 16, dropout=dropout, kernel_size=3, batchnorm=batchnorm, concat_layer=c5, activation=True) u = deconv2d_block_time_dist(uz, num_filters=num_filters * 8, dropout=dropout, kernel_size=3, batchnorm=batchnorm, concat_layer=c4, activation=True) u1 = deconv2d_block_time_dist(u, num_filters=num_filters * 4, dropout=dropout, kernel_size=3, batchnorm=batchnorm, concat_layer=c3, activation=True) u2 = deconv2d_block_time_dist(u1, num_filters=num_filters * 2, dropout=dropout, kernel_size=3, batchnorm=batchnorm, concat_layer=c2, activation=True) u3 = deconv2d_block_time_dist(u2, num_filters=num_filters, dropout=dropout, kernel_size=3, batchnorm=batchnorm, concat_layer=c1, activation=True) # classifier (forward-backwards convlstm) # final_conv_forward = ConvLSTM2D(filters=num_filters, kernel_size=3, activation="tanh", padding='same', return_sequences=False)(u3) # final_conv_backwards = ConvLSTM2D(filters=num_filters, kernel_size=3, activation="tanh", padding='same', return_sequences=False) # final_bidirectional = Bidirectional(final_conv_forward)(u3) final = Conv2D(filters=1, kernel_size=1, activation="sigmoid", padding='same')(u3) # final = ConvLSTM2D(filters=1, kernel_size=1, activation="sigmoid", padding='same', return_sequences=False)(final_bidirecitonal) # final_conv_locality = feature_locality(inputs, final, num_filters, batchnorm, dropout) model = Model(inputs=inputs, outputs=[final]) model.__asf_model_name = model_name lr_schedule = ExponentialDecay(1e-3, decay_steps=2000, decay_rate=0.96, staircase=True) # Adam(lr=1e-3) # dice_coefficient_loss #[BinaryCrossentropy(from_logits=False), cosh_dice_coefficient_loss] model.compile(loss=jaccard_distance_loss, optimizer=Adam(learning_rate=lr_schedule), metrics=['accuracy', MeanIoU(num_classes=2)]) return model