def test_multi_gpu_test_invalid_devices(self): if not check_if_compatible_devices(gpus=2): self.skipTest('multi gpu only') with self.cached_session(): input_shape = (1000, 10) model = keras.models.Sequential() model.add( keras.layers.Dense(10, activation='relu', input_shape=input_shape[1:])) model.add(keras.layers.Dense(1, activation='sigmoid')) model.compile(loss='mse', optimizer='rmsprop') x = np.random.random(input_shape) y = np.random.random((input_shape[0], 1)) with self.assertRaises(ValueError): parallel_model = multi_gpu_utils.multi_gpu_model( model, gpus=len(keras.backend._get_available_gpus()) + 1) parallel_model.fit(x, y, epochs=2) with self.assertRaises(ValueError): parallel_model = multi_gpu_utils.multi_gpu_model( model, gpus=[0, 2, 4, 6, 8]) parallel_model.fit(x, y, epochs=2) with self.assertRaises(ValueError): parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=1) parallel_model.fit(x, y, epochs=2) with self.assertRaises(ValueError): parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=[0]) parallel_model.fit(x, y, epochs=2)
def test_multi_gpu_test_simple_model(self): gpus = 2 num_samples = 1000 input_dim = 10 output_dim = 1 hidden_dim = 10 epochs = 2 target_gpu_id = [0, 1] if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(hidden_dim, input_shape=(input_dim, ))) model.add(keras.layers.Dense(output_dim)) x = np.random.random((num_samples, input_dim)) y = np.random.random((num_samples, output_dim)) parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs) parallel_model = multi_gpu_utils.multi_gpu_model( model, gpus=target_gpu_id) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs)
def test_multi_gpu_with_siamese_network(self): gpus = 2 if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with self.cached_session(): input_shape = (3, ) nested_model = keras.models.Sequential([ keras.layers.Dense(32, input_shape=input_shape), keras.layers.Dense(1) ], name='nested') input1 = keras.Input(input_shape) input2 = keras.Input(input_shape) score1 = nested_model(input1) score2 = nested_model(input2) score_sum = keras.layers.Add(name='add')([score1, score2]) siamese = keras.models.Model(inputs=[input1, input2], outputs=[score_sum, score1, score2], name='siamese') parallel_siamese = multi_gpu_utils.multi_gpu_model(siamese, gpus) self.assertEqual(parallel_siamese.output_names, ['add', 'nested', 'nested_1'])
def __init__(self, data_name, model_name='CapsNet', mode='test', config_path='config.json', custom_path=None, verbose=True, n_routing=3, gpu_number=None, **kwargs): Model.__init__(self, data_name, mode, config_path, verbose) self.model_name = model_name self.n_routing = n_routing self.load_config() if custom_path != None: self.model_path = custom_path else: # "original_capsnet_{}.{}.h5".format(self.data_name, "{epoch:03d}") self.model_path = os.path.join( self.config['saved_model_dir'], f"{self.model_name}", f"{self.model_name}_{self.data_name}.h5") os.makedirs(os.path.join(self.config['saved_model_dir'], f"{self.model_name}"), exist_ok=True) self.model_path_new_train = os.path.join( self.config['saved_model_dir'], f"{self.model_name}", f"{self.model_name}_{self.data_name}_{'{epoch:03d}'}.h5") self.tb_path = os.path.join(self.config['tb_log_save_dir'], f"{self.model_name}_{self.data_name}") self.load_graph() if gpu_number: self.model = multi_gpu_model(self.model, gpu_number)
def set_multi_gpu(self, gpus=None): """ Enable multi-GPU processing. Creates a copy of the CPU model and calls `multi_gpu_model`. * gpus: number of GPUs to use, defaults to all. """ self.model_cpu = self.model self.model = multi_gpu_model(self.model, gpus=gpus)
def test_multi_gpu_test_multi_io_model(self): gpus = 2 num_samples = 1000 input_dim_a = 10 input_dim_b = 5 output_dim_a = 1 output_dim_b = 2 hidden_dim = 10 epochs = 2 target_gpu_id = [0, 1] if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with self.cached_session(): input_a = keras.Input((input_dim_a, )) input_b = keras.Input((input_dim_b, )) a = keras.layers.Dense(hidden_dim)(input_a) b = keras.layers.Dense(hidden_dim)(input_b) c = keras.layers.concatenate([a, b]) output_a = keras.layers.Dense(output_dim_a)(c) output_b = keras.layers.Dense(output_dim_b)(c) model = keras.models.Model([input_a, input_b], [output_a, output_b]) a_x = np.random.random((num_samples, input_dim_a)) b_x = np.random.random((num_samples, input_dim_b)) a_y = np.random.random((num_samples, output_dim_a)) b_y = np.random.random((num_samples, output_dim_b)) parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs) parallel_model = multi_gpu_utils.multi_gpu_model( model, gpus=target_gpu_id) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs)
def compute_output(self, image_data, image_shape): # Generate output tensor targets for filtered bounding boxes. # self.input_image_shape = K.placeholder(shape=(2,)) self.input_image_shape = tf.constant(image_shape) if self.gpu_num >= 2: self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) boxes, scores, classes = yolo_eval(self.yolo_model(image_data), self.anchors, len(self.class_names), self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou) return boxes, scores, classes
def test_multi_gpu_with_multi_input_layers(self): gpus = 2 if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with self.cached_session(): inputs = keras.Input((4, 3)) init_state = keras.Input((3, )) outputs = keras.layers.SimpleRNN(3, return_sequences=True)( inputs, initial_state=init_state) x = [np.random.randn(2, 4, 3), np.random.randn(2, 3)] y = np.random.randn(2, 4, 3) model = keras.Model([inputs, init_state], outputs) parallel_model = multi_gpu_utils.multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mean_squared_error', optimizer='adam') parallel_model.train_on_batch(x, y)
def test_nested_model_with_tensor_input(self): gpus = 2 input_dim = 10 shape = (input_dim, ) num_samples = 16 num_classes = 10 if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') with ops.Graph().as_default(), self.cached_session(): input_shape = (num_samples, ) + shape x_train = np.random.randint(0, 255, input_shape) y_train = np.random.randint(0, num_classes, (input_shape[0], )) y_train = np_utils.to_categorical(y_train, num_classes) x_train = x_train.astype('float32') y_train = y_train.astype('float32') dataset = data.Dataset.from_tensor_slices((x_train, y_train)) dataset = dataset.repeat() dataset = dataset.batch(4) iterator = data.make_one_shot_iterator(dataset) inputs, targets = iterator.get_next() input_tensor = keras.layers.Input(tensor=inputs) model = keras.models.Sequential() model.add(keras.layers.Dense(3, input_shape=(input_dim, ))) model.add(keras.layers.Dense(num_classes)) output = model(input_tensor) outer_model = keras.Model(input_tensor, output) parallel_model = multi_gpu_utils.multi_gpu_model(outer_model, gpus=gpus) parallel_model.compile(loss='categorical_crossentropy', optimizer=optimizer_v1.RMSprop(lr=0.0001, decay=1e-6), metrics=['accuracy'], target_tensors=[targets]) parallel_model.fit(epochs=1, steps_per_epoch=3)
def train(self, gpus): # async datasets saver might be running, wait before training self.dataset.saver.wait() strategy = tf.distribute.MirroredStrategy() if gpus > 1: log.info("training with %d GPUs", gpus) physical_devices = tf.config.list_physical_devices('GPU') tf.config.set_visible_devices(physical_devices[0:gpus], 'GPU') strategy = tf.distribute.MirroredStrategy() with strategy.scope(): # train if self.model is None: self.model = self.logic.builder(True) to_train = multi_model(self.model, None) if gpus > 1: with strategy.scope(): to_train = multi_model(self.model, multi_gpu_model(self.model, gpus=gpus)) past = self.history.copy() if self.history is not None else None with strategy.scope(): present = self.logic.trainer(to_train, self.dataset).history if past is None: self.history = present else: self.history = {} for name, past_values in past.items(): self.history[name] = past_values + present[name] self.accu = self.accuracy() print("") self._emit_txt_stats(sys.stdout) # save model structure and weights self._save_model() # save training history self._save_history() # save model accuracy statistics self._save_stats()
def __init__(self, data_name, model_name='DCT_Efficient_CapsNet', mode='test', config_path='config.json', custom_path=None, verbose=True, gpu_number=None, optimizer='Adam', half_filter_in_resnet=True, use_tiny_block=True, heterogeneous=False, **kwargs): Model.__init__(self, data_name, mode, config_path, verbose) self.model_name = model_name if custom_path != None: self.model_path = custom_path else: self.model_path = os.path.join( self.config['saved_model_dir'], f"{self.model_name}", f"{self.model_name}_{self.data_name}.h5") os.makedirs(os.path.join(self.config['saved_model_dir'], f"{self.model_name}"), exist_ok=True) self.model_path_new_train = os.path.join( self.config['saved_model_dir'], f"{self.model_name}", f"{self.model_name}_{self.data_name}_{'{epoch:03d}'}.h5") self.tb_path = os.path.join(self.config['tb_log_save_dir'], f"{self.model_name}_{self.data_name}") self.half = half_filter_in_resnet self.tiny = use_tiny_block self.heterogeneous = heterogeneous self.load_graph() if gpu_number: self.model = multi_gpu_model(self.model, gpu_number) self.optimizer = optimizer
def create_model(model_name, input_shape, target_labels, n_classes_per_label_type, n_gpus, continue_training=False, rebuild_model=False, transfer_learning=False, transfer_learning_type='last_layer', path_of_model_to_load=None, initial_learning_rate=0.01, output_loss_weights=None, optimizer='sgd'): """ Returns specified model architecture """ # Load model from disk if continue_training and not rebuild_model: logging.debug("Preparing continue_training") loaded_model = load_model_from_disk(path_of_model_to_load) return loaded_model model_input = Input(shape=input_shape, name='images') if model_name == 'InceptionResNetV2': keras_model = InceptionResNetV2(include_top=False, weights=None, input_tensor=model_input, input_shape=None, pooling='avg') output_flat = keras_model.output model_input = keras_model.input elif model_name in [ 'ResNet18', 'ResNet34', 'ResNet50', 'ResNet101', 'ResNet152' ]: res_builder = ResnetBuilder() if model_name == 'ResNet18': output_flat = res_builder.build_resnet_18(model_input) elif model_name == 'ResNet34': output_flat = res_builder.build_resnet_34(model_input) elif model_name == 'ResNet50': output_flat = res_builder.build_resnet_50(model_input) elif model_name == 'ResNet101': output_flat = res_builder.build_resnet_101(model_input) elif model_name == 'ResNet152': output_flat = res_builder.build_resnet_152(model_input) elif model_name == 'small_cnn': output_flat = small_cnn(model_input) elif model_name == 'Xception': keras_model = Xception(include_top=False, weights=None, input_tensor=model_input, input_shape=None, pooling='avg') output_flat = keras_model.output model_input = keras_model.input else: raise ValueError("Model: %s not implemented" % model_name) all_target_outputs = list() for n_classes, target_name in zip(n_classes_per_label_type, target_labels): all_target_outputs.append( Dense(units=n_classes, kernel_initializer="he_normal", activation='softmax', name=target_name)(output_flat)) # Define model optimizer if optimizer == 'sgd': opt = SGD(lr=initial_learning_rate, momentum=0.9, decay=1e-4) elif optimizer == 'rmsprop': opt = RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.0) else: raise ValueError("optimizer %s not implemented" % optimizer) if n_gpus > 1: logging.debug("Preparing Multi-GPU Model") with tf.device('/cpu:0'): base_model = Model(inputs=model_input, outputs=all_target_outputs) if continue_training and rebuild_model: logging.debug("Preparing continue_training by \ rebuilding model") loaded_model = load_model_from_disk(path_of_model_to_load) copy_model_weights(loaded_model, base_model, incl_last=True) elif transfer_learning: if transfer_learning_type == 'last_layer': logging.debug("Preparing transfer_learning with freezing \ all but the last layer") loaded_model = load_model_from_disk(path_of_model_to_load) copy_model_weights(loaded_model, base_model, incl_last=False) non_output_layers = get_non_output_layer_ids(base_model) base_model = set_layers_to_non_trainable( base_model, non_output_layers) elif transfer_learning_type == 'all_layers': logging.debug("Preparing transfer_learning with freezing \ no layers") loaded_model = load_model_from_disk(path_of_model_to_load) copy_model_weights(loaded_model, base_model, incl_last=False) else: raise ValueError("transfer_learning_type option %s not \ recognized" % transfer_learning) model = multi_gpu_model(base_model, gpus=n_gpus) else: model = Model(inputs=model_input, outputs=all_target_outputs) if continue_training and rebuild_model: logging.debug("Preparing continue_training by \ rebuilding model") loaded_model = load_model_from_disk(path_of_model_to_load) copy_model_weights(loaded_model, model, incl_last=True) elif transfer_learning: if transfer_learning_type == 'last_layer': logging.debug("Preparing transfer_learning with freezing \ all but the last layer") loaded_model = load_model_from_disk(path_of_model_to_load) copy_model_weights(loaded_model, model, incl_last=False) non_output_layers = get_non_output_layer_ids(model) model = set_layers_to_non_trainable(model, non_output_layers) elif transfer_learning_type == 'all_layers': logging.debug("Preparing transfer_learning with freezing \ no layers") loaded_model = load_model_from_disk(path_of_model_to_load) copy_model_weights(loaded_model, model, incl_last=False) else: raise ValueError("transfer_learning_type option %s not \ recognized" % transfer_learning) model.compile(loss=build_masked_loss(K.sparse_categorical_crossentropy), optimizer=opt, loss_weights=output_loss_weights, metrics=[accuracy, top_k_accuracy]) return model
def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( ".h5"), "Keras model or weights must be a .h5 file." # Load model, or construct model and load weights. start = timer() num_anchors = len(self.anchors) num_classes = len(self.class_names) is_tiny_version = num_anchors == 6 # default setting try: self.yolo_model = tf.keras.models.load_model(model_path, compile=False) except: self.yolo_model = (tiny_yolo_body(Input( shape=(None, None, 3)), num_anchors // 2, num_classes) if is_tiny_version else yolo_body(Input(shape=(None, None, 3)), num_anchors // 3, num_classes)) self.yolo_model.load_weights( self.model_path) # make sure model, anchors and classes match else: assert self.yolo_model.layers[-1].output_shape[ -1] == num_anchors / len(self.yolo_model.output) * ( num_classes + 5 ), "Mismatch between model and given anchor and class sizes" end = timer() print("{} model, anchors, and classes loaded in {:.2f}sec.".format( model_path, end - start)) # Generate colors for drawing bounding boxes. if len(self.class_names) == 1: self.colors = ["GreenYellow"] else: hsv_tuples = [(x / len(self.class_names), 1.0, 1.0) for x in range(len(self.class_names))] self.colors = list( map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map( lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors, )) np.random.seed( 10101) # Fixed seed for consistent colors across runs. np.random.shuffle( self.colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. # Generate output tensor targets for filtered bounding boxes. self.input_image_shape = K.placeholder(shape=(2, )) if self.gpu_num >= 2: self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) boxes, scores, classes = yolo_eval( self.yolo_model.output, self.anchors, len(self.class_names), self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou, ) return boxes, scores, classes
temp_layer = model.get_layer(layer.name) temp_weights = temp_layer.get_weights() temp_weights_list.append(temp_weights) for i in range(len(customed_model.layers) - 1): customed_model.get_layer(customed_model.layers[i].name).set_weights( temp_weights_list[i]) # SoftMax-SoftMax/ArcFace-ArcFace, FC layer not change else: customed_model = model customed_model.summary() # Use multi-gpus to train the model num_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')) if num_gpus > 1: parallel_model = multi_gpu_model(customed_model, gpus=num_gpus) '''Setting configurations for training the Model''' if num_gpus <= 1: customed_model.compile(optimizer=Adam(lr=0.01, epsilon=1e-8), loss='categorical_crossentropy', metrics=['accuracy']) # Temporarily increase the learing rate to 0.01 else: parallel_model.compile(optimizer=Adam(lr=0.01, epsilon=1e-8), loss='categorical_crossentropy', metrics=['accuracy']) # Save the model after every epoch class ParallelModelCheckpoint(ModelCheckpoint): def __init__(self,
num_epochs = args.num_epochs print(">>> num_epochs received by trial") print(num_epochs) num_gpus = args.num_gpus print(">>> num_gpus received by trial:") print(num_gpus) print("\n>>> Constructing Model...") constructor = ModelConstructor(arch, nn_config) test_model = constructor.build_model() print(">>> Model Constructed Successfully\n") if num_gpus > 1: test_model = multi_gpu_model(test_model, gpus=num_gpus) test_model.summary() test_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(learning_rate=1e-3, decay=1e-4), metrics=['accuracy']) (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 y_train = to_categorical(y_train) y_test = to_categorical(y_test)
# with open(__file__, 'r') as training_script: training_script_content = training_script.read() # training_script_content = '#' + str(sys.argv) + '\n' + training_script_content # with open(runPath+'/'+__file__, 'w') as training_script: training_script.write(training_script_content) # Generate model plot # plot_model(model, to_file=runPath+'/model_plot.svg', show_shapes=True, show_layer_names=True) # Save model summary to file from contextlib import redirect_stdout with open(runPath + '/model_summary.txt', 'w') as f: with redirect_stdout(f): model.summary() # Multi-gpu setup: basemodel = model if args.gpus > 1: model = multi_gpu_model(model, gpus=args.gpus) # Optimizer optimizer = Adam(lr=args.lr, amsgrad=True) # Compile the model print( '\n\n\n', 'Compiling model..', runID, '\n\n\tGPU ' + (str(args.gpus) + ' gpus' if args.gpus > 1 else args.gpuids) + '\t\tBatch size [ ' + str(args.bs) + ' ] ' + ' \n\n') model.compile(loss=depth_loss_function, optimizer=optimizer) print('Ready for training!\n') # Callbacks callbacks = []