ground_truth_dict = get_ground_truth_dict(ground_truth_path) else: ground_truth_mode = False ground_truth_dict = None # search masks.txt file which contains the coordinates to be excluded masks_path = os.path.join(img_dir, 'masks.txt') if os.path.exists(masks_path): # masks_dict is a dictionary with image IDs as keys and coordiantes as values masks_dict = get_masks_dict(masks_path) else: masks_dict = None # import model and load pre-trained parameters model = model_builder(level=args.level, confidence=args.confidence, input_shape=(*PATCH_SIZE, 3)) network_path = os.path.join(MODEL_DIR, PRETRAINED_MODEL) model.load_weights(network_path) print('=' * 110) print( '{network:s} architecture is selected with batch size {batch_size:02d} (pre-trained on {dataset:s} dataset).' .format(**{ 'network': NETWORK, 'dataset': DATASET, 'batch_size': args.batch })) if ground_truth_dict is not None: print('Ground-truth file found.')
def inference(model_level, model_dir, test_img_IDs): confidence_estimation_mode = False model = model_builder(level=model_level, confidence=False, input_shape=(*PATCH_SIZE, 3)) model.load_weights(model_dir) ground_truth_dict = get_ground_truth_dict( r'train\RECommended\ground-truth.txt') masks_dict = get_masks_dict(r'train\RECommended\masks.txt') angular_errors_statistics = [] for (counter, test_img_ID) in enumerate(test_img_IDs): print('Processing {}/{} images...'.format(counter + 1, len(test_img_IDs)), end='\r') # data generator batch, boxes, remained_boxes_indices, ground_truth = img2batch( test_img_ID, patch_size=PATCH_SIZE, input_bits=INPUT_BITS, valid_bits=VALID_BITS, darkness=DARKNESS, ground_truth_dict=ground_truth_dict, masks_dict=masks_dict, gamma=GAMMA) nb_batch = int(np.ceil(PATCHES / BATCH_SIZE)) batch_size = int(PATCHES / nb_batch) # actual batch size local_estimates, confidences = np.empty(shape=(0, 3)), np.empty( shape=(0, )) # use batch(es) to feed into the network for b in range(nb_batch): batch_start_index, batch_end_index = b * batch_size, ( b + 1) * batch_size batch_tmp = batch[batch_start_index:batch_end_index, ] if confidence_estimation_mode: # the model requires 2 inputs when confidence estimation mode is activated batch_tmp = [batch_tmp, np.zeros((batch_size, 3))] outputs = model.predict(batch_tmp) # model inference if confidence_estimation_mode: # the model produces 6 outputs when confidence estimation mode is on. See model.py for more details # local_estimates is the gain instead of illuminant color! local_estimates = np.vstack((local_estimates, outputs[4])) confidences = np.hstack((confidences, outputs[5].squeeze())) else: # local_estimates is the gain instead of illuminant color! local_estimates = np.vstack((local_estimates, outputs)) confidences = None if confidence_estimation_mode: global_estimate = local_estimates_aggregation( local_estimates, confidences) else: global_estimate = local_estimates_aggregation_naive( local_estimates) global_rgb_estimate = 1. / global_estimate # convert gain into rgb triplet global_angular_error = angular_error(ground_truth, global_rgb_estimate) angular_errors_statistics.append(global_angular_error) return np.array(angular_errors_statistics)
def train_model(data, class_cols, model_path): """Trains a model for galactic image recognition. Parameters ---------- data : :obj:`pandas.core.frame.DataFrame` The data frame containing training data formatted for use with :func:`ImageDataGenerator.flow_from_dataframe`. Data will automatically be broken into training and validaiton sets. class_cols : list of str The dataframe columns containing the classes. For use with multi-output models. model_path : str Path where model should be saved. Returns ------- model : :obj:`keras.models.Model` The trained model. """ # create an ImageDataGenerator, which applies random affine # transformations to the data. such augmentation is standard datagen = ImageDataGenerator(rotation_range=360, zoom_range=[.75, 1.3], width_shift_range=.05, height_shift_range=.05, horizontal_flip=True, vertical_flip=True, validation_split=.25) # create two sets of generators, one for training data and one for # validation data, which can be used to check progress throughout # training. the target_size option automatically scales our data to # the requested size. We also set up for a multi-output model, even # though we are currently only checking one question, which will # allow some flexibility should this goal change traingen = datagen.flow_from_dataframe(data, directory=MODULE_PATH, x_col='imgpath', y_col=class_cols, batchsize=BATCH_SIZE, target_size=INPUT_DIM, class_mode='other', subset='training') valgen = datagen.flow_from_dataframe(data, directory=MODULE_PATH, x_col='imgpath', y_col=class_cols, batchsize=BATCH_SIZE, target_size=INPUT_DIM, class_mode='other', subset='validation') # now we actually build the model, which is defined in model.py model = model_builder(input_dim=traingen.image_shape) # save an image of the model as defined in model.py. can be useful # for quickly checking that you have the architecture you want. # note that this has to happen before we distribute over gpu. plot_model(model, to_file=os.path.join(model_path, 'model.png'), show_shapes=True, show_layer_names=True) # calculate the number of steps per epoch (or validation) such that # all (or nearly all) images are used train_step_size = traingen.n // traingen.batch_size val_step_size = valgen.n // valgen.batch_size # set up callbacks for saving and logging # XXX: will need to append history if we continue training a model monitor = 'val_loss' # should monitor the same quanitity for all base_patience = 10 # ensure we try LR reduction a few times before stop checkpoint = ModelCheckpoint(os.path.join(model_path, 'model.h5'), monitor=monitor, save_best_only=True) csv_logger = CSVLogger(os.path.join(model_path, 'training.log')) lr_plateau = ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=base_patience, min_lr=0.) stop = EarlyStopping(monitor=monitor, patience=5 * base_patience) # set up for a multi-gpu model # HACK: fixes an issue in keras where these don't play nice with # xla_gpus (which cause double counting of available gpus). available_devices = [ multi_gpu_utils._normalize_device_name(name) for name in multi_gpu_utils._get_available_devices() ] # this line is our actual keras fix; it's the '/' that's key n_gpus = len([x for x in available_devices if '/gpu' in x]) if n_gpus > 1: # only use multi_gpu if we have multiple gpus parallel_model = multi_gpu_model(model, gpus=n_gpus) # compile the model. note that the names of outputs in dicts (e.g., # 't01') should match the names of the relevant output layers found # in the model definition parallel_model.compile(optimizer=Nadam(lr=0.0001), loss={'t01': 'categorical_crossentropy'}, loss_weights={'t01': 1.}, metrics=['accuracy']) # train the model history = parallel_model.fit_generator( generator=traingen, steps_per_epoch=train_step_size, validation_data=valgen, validation_steps=val_step_size, epochs=EPOCHS, callbacks=[checkpoint, csv_logger, lr_plateau, stop], verbose=1) else: model.compile(optimizer=Nadam(lr=0.0001), loss={'t01': 'categorical_crossentropy'}, loss_weights={'t01': 1.}, metrics=['accuracy']) # train the model history = model.fit_generator( generator=traingen, steps_per_epoch=train_step_size, validation_data=valgen, validation_steps=val_step_size, epochs=EPOCHS, callbacks=[checkpoint, csv_logger, lr_plateau, stop], verbose=1) # necessary for recoverring the original model later, instead of # the parallelized model. this matters for transfer learning model.save(os.path.join(model_path, 'model.h5')) # XXX: the following graphs are only computed for the current # training session. This is ok until we decide to continue # training on a model, instead of starting fresh. # Plot training & validation accuracy values plt.figure() plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.savefig(os.path.join(model_path, 'acc.png')) plt.close() # Plot training & validation loss values plt.figure() plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.savefig(os.path.join(model_path, 'loss.png')) plt.close() return model
# Decay learning rate MIN_LR = 0.000001 DECAY_FACTOR = 1.00004 #Initiliase Data list_ds = tf.data.Dataset.list_files( DATA_BASE_DIR + '/*') #Returns a tensor Dataset of file directory preprocess_function = partial( data.preprocess_image, target_size=image_size ) #Partially fill in a function data.preprocess_image with the arguement image_size train_data = list_ds.map(preprocess_function).shuffle(100).batch( batch_size) #Apply the function pre_process to list_ds #Initilaise Model generator, discriminator = m.model_builder(image_size) generator.summary() tf.keras.utils.plot_model(generator, show_shapes=True, dpi=64) discriminator.summary() tf.keras.utils.plot_model(discriminator, show_shapes=True, dpi=64) #Define Optimiser D_optimizer = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=BETA_1, beta_2=BETA_2, epsilon=EPSILON) G_optimizer = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=BETA_1, beta_2=BETA_2, epsilon=EPSILON)
# load pre-trained weights in Inception-V3 inception_model = applications.InceptionV3() # a dictionary records the layer name and layer weights in Inception-V3 inception_layers = { layer.name: layer for layer in inception_model.layers } inception_weights = dict() for layer_name in conv_layers_names: inception_weights[layer_name] = inception_layers[ layer_name].get_weights() K.clear_session() # create a model and initialize with inception_weights model = model_builder(level=args.level, input_shape=(*PATCH_SIZE, 3)) model_layers = {layer.name: layer for layer in model.layers} for layer_name in conv_layers_names: idx = list(model_layers.keys()).index(layer_name) model.layers[idx].set_weights(inception_weights[layer_name]) print('Initialize {0} layer with weights in Inception v3.'.format( layer_name)) model.compile(loss='mse', optimizer=Nadam(lr=LR), metrics=[angular_error_metric]) model.summary() # uncomment following lines to plot the model architecture # from keras.utils import plot_model # plot_model(model, to_file=os.path.join(logs_dir, 'architecture.pdf'), show_shapes=True)