def get_unet(input_img, n_filters=16, dropout=0.1, batchnorm=True): """Function to define the UNET Model""" # Contracting Path c1 = conv2d_block(input_img, n_filters * 1, kernel_size=3, batchnorm=batchnorm) p1 = MaxPooling2D((2, 2))(c1) p1 = Dropout(dropout)(p1) c2 = conv2d_block(p1, n_filters * 2, kernel_size=3, batchnorm=batchnorm) p2 = MaxPooling2D((2, 2))(c2) p2 = Dropout(dropout)(p2) c3 = conv2d_block(p2, n_filters * 4, kernel_size=3, batchnorm=batchnorm) p3 = MaxPooling2D((2, 2))(c3) p3 = Dropout(dropout)(p3) c4 = conv2d_block(p3, n_filters * 8, kernel_size=3, batchnorm=batchnorm) p4 = MaxPooling2D((2, 2))(c4) p4 = Dropout(dropout)(p4) c5 = conv2d_block(p4, n_filters=n_filters * 16, kernel_size=3, batchnorm=batchnorm) # Expansive Path u6 = Conv2DTranspose(n_filters * 8, (3, 3), strides=(2, 2), padding='same')(c5) u6 = concatenate([u6, c4]) u6 = Dropout(dropout)(u6) c6 = conv2d_block(u6, n_filters * 8, kernel_size=3, batchnorm=batchnorm) u7 = Conv2DTranspose(n_filters * 4, (3, 3), strides=(2, 2), padding='same')(c6) u7 = concatenate([u7, c3]) u7 = Dropout(dropout)(u7) c7 = conv2d_block(u7, n_filters * 4, kernel_size=3, batchnorm=batchnorm) u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides=(2, 2), padding='same')(c7) u8 = concatenate([u8, c2]) u8 = Dropout(dropout)(u8) c8 = conv2d_block(u8, n_filters * 2, kernel_size=3, batchnorm=batchnorm) u9 = Conv2DTranspose(n_filters * 1, (3, 3), strides=(2, 2), padding='same')(c8) u9 = concatenate([u9, c1]) u9 = Dropout(dropout)(u9) c9 = conv2d_block(u9, n_filters * 1, kernel_size=3, batchnorm=batchnorm) outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9) model = Model(inputs=[input_img], outputs=[outputs]) return model
def yolo_body(inputs, num_anchors, num_classes): """ Creates a YOLO v2 body. Parameters ---------- inputs: tf.Tensor Input Tensors num_anchors: int Number of anchors num_classes: int Number of classes Returns ------- darknet_model: tf.keras.Model A Keras Model instance """ darknet = Model(inputs, darknet_body()(inputs)) conv20 = compose(DarknetConv2D_BN_Leaky(1024, (3, 3)), DarknetConv2D_BN_Leaky(1024, (3, 3)))(darknet.output) conv13 = darknet.layers[43].output conv21 = DarknetConv2D_BN_Leaky(64, (1, 1))(conv13) conv21_reshaped = Lambda(space_to_depth_x2, name='space_to_depth')(conv21) x = concatenate([conv21_reshaped, conv20]) x = DarknetConv2D_BN_Leaky(1024, (3, 3))(x) x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x) return Model(inputs, x)
def buildVerificator(): input_shape = (64, 64, 1) left_input = Input(input_shape) right_input = Input(input_shape) #build convnet to use in each siamese 'leg' convnet = Sequential() convnet.add( layers.Conv2D(64, (4, 4), activation='relu', input_shape=input_shape)) convnet.add(MaxPooling2D()) convnet.add(layers.Conv2D(128, (4, 4), activation='relu')) convnet.add(MaxPooling2D()) convnet.add(layers.Conv2D(128, (4, 4), activation='relu')) convnet.add(MaxPooling2D()) convnet.add(layers.Conv2D(256, (4, 4), activation='relu')) convnet.add(Flatten()) convnet.add(Dense(4096, activation="sigmoid")) #encode each of the two inputs into a vector with the convnet encoded_l = convnet(left_input) encoded_r = convnet(right_input) #merge two encoded inputs with the l1 distance between them L1_distance = lambda x: K.abs(x[0] - x[1]) both = concatenate([encoded_l, encoded_r]) prediction = Dense(1, activation='sigmoid')(both) siamese_net = Model([left_input, right_input], prediction) #optimizer = SGD(0.0004,momentum=0.6,nesterov=True,decay=0.0003) optimizer = Adam(0.00006) #//TODO: get layerwise learning rates and momentum annealing scheme described in paperworking siamese_net.compile(loss="binary_crossentropy", optimizer=optimizer) return siamese_net
def join_models(GE_dataset, CNV_dataset, MUT_dataset): shape_GE = GE_dataset.shape[1] shape_CNV = CNV_dataset.shape[1] shape_MUT = MUT_dataset.shape[1] Inputs_1 = Input(shape=[shape_GE], name='Inputs_1') x = Dense(384, activation='sigmoid', name='Dense_1_0')(Inputs_1) x = Dropout(0.2, name='Dropout_1_0')(x) x = Dense(512, activation='relu', name='Dense_1_1')(x) Outputs_1 = Dense(101, activation='linear', name='Outputs_1')(x) Inputs_2 = Input(shape=[shape_CNV], name='Inputs_2') y = Dense(256, activation='sigmoid', name='Dense_2_0')(Inputs_2) y = Dropout(0.5, name='Dropout_2_0')(y) y = Dense(256, activation='relu', name='Dense_2_1')(y) Outputs_2 = Dense(101, activation='sigmoid', name='Outputs_2')(y) Inputs_3 = Input(shape=[shape_MUT], name='Inputs_3') z = Dense(384, activation='relu', name='Dense_3_0')(Inputs_3) z = Dropout(0.4, name='Dropout_3_0')(z) z = Dense(512, activation='relu', name='Dense_3_1')(z) Outputs_3 = Dense(101, activation='linear', name='Outputs_3')(z) Concatenated = concatenate([Outputs_1, Outputs_2, Outputs_3], name='Concatenated') a = Dense(64, activation='relu', name='Dense_4_0')(Concatenated) a = Dense(64, activation='relu', name='Dense_4_1')(a) Main_output = Dense(101, activation='linear', name='Main_output')(a) model = Model(inputs=[Inputs_1, Inputs_2, Inputs_3], outputs=Main_output) model.compile(optimizer='RMSprop', loss='mean_squared_error',metrics=['mean_squared_error']) plot_model(model, show_shapes=True, to_file='join_models.png') return(model)
def make_model(): left_image_in = Input(shape=(240, 320, 3),name='img_left') right_image_in = Input(shape=(240, 320, 3),name='img_right') # concat into a 6 channel input volume x = concatenate([left_image_in, right_image_in], axis=2) # FCN layers x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(64, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x) # Dense layers x = Flatten(name='flattened')(x) x = Dense(100, activation='relu')(x) x = Dropout(.1)(x) x = Dense(50, activation='relu')(x) x = Dropout(.1)(x) # Steering angle angle = Dense(15, activation='softmax', name='angle_cat_out')(x) angle_out = Dense(1, activation='sigmoid', name='angle_out')(angle) # throttle output throttle_out = Dense(1, activation='relu', name='throttle_out')(x) model = Model(inputs=[left_image_in, right_image_in], outputs=[angle_out, throttle_out]) # model.compile(optimizer='adam', # loss={'angle_out': 'mean_squared_error', # 'throttle_out': 'mean_absolute_error'}, # loss_weights={'angle_out': 0.9, 'throttle_out': .01}) return model
def residual_layer(x): conv = Convolution2D(256, (3, 3), padding='SAME')(x) norm = BatchNormalization()(conv) relu = Activation('relu')(norm) conv2 = Convolution2D(256, (3, 3), padding='SAME')(relu) m = merge.concatenate([conv2, x]) return Activation('relu')(m)
def get_model(self, embedding_matrix, vocab_size, question_len=15, img_feat=2048, embed_dim=300): number_of_hidden_units_LSTM = 512 number_of_dense_layers = 3 number_of_hidden_units = 1024 activation_function = 'tanh' dropout_pct = 0.5 # Image model - loading image features and reshaping model_image = Sequential() model_image.add(Reshape((img_feat,), input_shape=(img_feat,))) # Language Model - 3 LSTMs model_language = Sequential() # model_language.add(Embedding(vocab_size, embedding_matrix.shape[1], input_length=question_len, # weights=[embedding_matrix], trainable=False)) model_language.add(LSTM(number_of_hidden_units_LSTM, return_sequences=True, input_shape=(question_len, embed_dim))) model_language.add(LSTM(number_of_hidden_units_LSTM, return_sequences=True)) model_language.add(LSTM(number_of_hidden_units_LSTM, return_sequences=False)) # combined model model = Sequential() model.add(concatenate([model_language, model_image])) for _ in range(number_of_dense_layers): model.add(Dense(number_of_hidden_units, kernel_initializer='uniform')) model.add(Activation(activation_function)) model.add(Dropout(dropout_pct)) model.add(Dense(1000)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') return model
def inception_resnet_v2_C(input, scale_residual=True): channel_axis = -1 # Input is relu activation init = input ir1 = Conv2D(192, (1, 1), activation='relu', padding='same')(input) ir2 = Conv2D(192, (1, 1), activation='relu', padding='same')(input) ir2 = Conv2D(224, (1, 3), activation='relu', padding='same')(ir2) ir2 = Conv2D(256, (3, 1), activation='relu', padding='same')(ir2) ir_merge = merge.concatenate([ir1, ir2], axis=channel_axis) ir_conv = Conv2D(backend.int_shape(input)[channel_axis], (1, 1), activation='relu')(ir_merge) out = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, output_shape=backend.int_shape(input)[1:], arguments={'scale': 0.1})([input, ir_conv]) # ir_conv = Conv2D(2144, (1, 1), activation='linear', padding='same')(ir_merge) # if scale_residual: ir_conv = Lambda(lambda x: x * 0.1)(ir_conv) # out = merge.concatenate([init, ir_conv], axis=channel_axis) out = BatchNormalization(axis=channel_axis)(out) out = Activation("relu")(out) return out
def layer(x): x_mean = K.expand_dims(K.mean(x, axis=1), axis=1) x_max = K.expand_dims(K.max(x, axis=1), axis=1) x = concatenate([x, x_mean, x_max], axis=1) x = building_block(filters)(x) x = Conv3D(classes, 1, data_format=DATA_FORMAT)(x) return x
def layer(x): output = [] output.append(x) for i in range(modules): x = dilated_res_block(filters, dilation[i])(output[i]) x = dilated_res_block(filters, dilation[i])(x) output.append(x) return concatenate(output, axis=1)
def dense_block(x, nb_layers, nb_filter, growth_rate, droput_rate=0.2, weight_decay=1e-4): for i in range(nb_layers): cb = conv_block(x, growth_rate, droput_rate, weight_decay) x = concatenate([x, cb], axis=-1) nb_filter += growth_rate return x, nb_filter
def reduction_A(input, k=192, l=224, m=256, n=384): channel_axis = -1 r1 = MaxPooling2D((3, 3), strides=(2, 2))(input) r2 = Conv2D(n, (3, 3), activation='relu', strides=(2, 2))(input) r3 = Conv2D(k, (1, 1), activation='relu', padding='same')(input) r3 = Conv2D(l, (3, 3), activation='relu', padding='same')(r3) r3 = Conv2D(m, (3, 3), activation='relu', strides=(2, 2))(r3) m = merge.concatenate([r1, r2, r3], axis=channel_axis) m = BatchNormalization(axis=channel_axis)(m) m = Activation('relu')(m) return m
def denseBlock(t, nb_layers): for _ in range(nb_layers): tmp = t t = BatchNormalization(axis=1, gamma_regularizer=l2(0.0001), beta_regularizer=l2(0.0001))(t) t = Activation('relu')(t) t = Conv2D(16, kernel_size=(3, 3), padding='same', kernel_initializer='he_uniform', data_format='channels_last')(t) t = Dropout(0.2)(t) t = concatenate([t, tmp]) return t
def default_bhv(num_outputs, num_bvh_inputs, input_shape): ''' Notes: this model depends on concatenate which failed on keras < 2.0.8 ''' img_in = Input(shape=input_shape, name='img_in') bvh_in = Input(shape=(num_bvh_inputs, ), name="behavior_in") x = img_in #x = Cropping2D(cropping=((60,0), (0,0)))(x) #trim 60 pixels off top x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(64, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x) # x = Flatten(name='flattened')(x) a, b, c, d = x.shape # returns dimension a = b * c * d x = Permute([1, 2, 3])(x) x = Reshape((int(a), ))(x) # convert dim -> int x = Dense(100, activation='relu')(x) x = Dropout(.1)(x) y = bvh_in y = Dense(num_bvh_inputs * 2, activation='relu')(y) y = Dense(num_bvh_inputs * 2, activation='relu')(y) y = Dense(num_bvh_inputs * 2, activation='relu')(y) z = concatenate([x, y]) z = Dense(100, activation='relu')(z) z = Dropout(.1)(z) z = Dense(50, activation='relu')(z) z = Dropout(.1)(z) #categorical output of the angle angle_out = Dense(15, activation='softmax', name='angle_out')( z ) # Connect every input with every output and output 15 hidden units. Use Softmax to give percentage. 15 categories and find best one based off percentage 0.0-1.0 #continous output of throttle throttle_out = Dense(20, activation='softmax', name='throttle_out')( z) # Reduce to 1 number, Positive number only model = Model(inputs=[img_in, bvh_in], outputs=[angle_out, throttle_out]) return model
def SRDenseNetBlock(inputs, i, nlayers): logits = Conv2D(filters=16, kernel_size=3, padding="same", activation="relu", use_bias=True, name="conv2d_%d_%d" % (i + 1, 0 + 1))(inputs) for j in xrange(1, nlayers): middle = Conv2D(filters=16, kernel_size=3, padding="same", activation="relu", use_bias=True, name="conv2d_%d_%d" % (i + 1, j + 1))(logits) logits = concatenate([logits, middle], name="concatenate_%d_%d" % (i + 1, j + 1)) return logits
def reduction_resnet_v2_B(input): channel_axis = -1 r1 = MaxPooling2D((3, 3), strides=(2, 2), padding='valid')(input) r2 = Conv2D(256, (1, 1), activation='relu', padding='same')(input) r2 = Conv2D(384, (3, 3), activation='relu', strides=(2, 2))(r2) r3 = Conv2D(256, (1, 1), activation='relu', padding='same')(input) r3 = Conv2D(288, (3, 3), activation='relu', strides=(2, 2))(r3) r4 = Conv2D(256, (1, 1), activation='relu', padding='same')(input) r4 = Conv2D(288, (3, 3), activation='relu', padding='same')(r4) r4 = Conv2D(320, (3, 3), activation='relu', strides=(2, 2))(r4) m = merge.concatenate([r1, r2, r3, r4], axis=channel_axis) m = BatchNormalization(axis=channel_axis)(m) m = Activation('relu')(m) return m
def SRDenseNetKeras(inputs, nblocks=8, nlayers=8): logits = Conv2D(filters=16, kernel_size=3, strides=1, padding='same', activation="relu", use_bias=True)(inputs) gggggg = logits for i in xrange(nblocks): logits = SRDenseNetBlock(logits, i, nlayers) logits = concatenate([logits, gggggg]) logits = Conv2D(filters=256, kernel_size=1, padding='same', activation="relu", use_bias=True)(logits) logits = Conv2DTranspose(filters=256, kernel_size=3, strides=2, padding='same', activation="relu", use_bias=True)(logits) logits = Conv2DTranspose(filters=256, kernel_size=3, strides=2, padding='same', activation="relu", use_bias=True)(logits) logits = Conv2D(filters=1, kernel_size=3, padding='same', use_bias=True)(logits) mModel = Model(inputs, logits) mModel.compile(optimizer=Adam(lr=0.00001), loss='mean_squared_error', metrics=['mean_squared_error']) return mModel
def __init__(self): self.input_shape = (224,224) self.filter_count = 32 self.kernel_size = (3, 3) self.leakrelu_alpha = 0.2 self.encoder = self.createEncoder() Input1 = Input(shape=(224,224,3)) Input2 = Input(shape=(224,224,3)) # target = Dot(axes=1)([self.encoder(Input1), self.encoder(Input2)]) x = concatenate(inputs = [self.encoder(Input1), self.encoder(Input2)]) target = Dense(1)(x) self.discriminator = self.createDiscriminator() y = self.discriminator(x) self.model = Model(inputs=[Input1,Input2],outputs=y) self.model.summary() self.pathlist = pathlist self.train_data_count = [len(i) for i in self.pathlist] op = Adam(lr=0.0001) self.model.compile(optimizer=op,loss='mse',metrics=['accuracy']) self.pad_param = 5 self.rotate_degree_param = 90
def Tiramisu(layer_per_block, n_pool=5, growth_rate=12): input_layer = Input(shape=(512, 512, 3)) t = Conv2D(48, kernel_size=(3, 3), strides=(1, 1), padding='same')(input_layer) #dense block nb_features = 48 skip_connections = [] for i in range(n_pool): t = denseBlock(t, layer_per_block[i]) print(t) skip_connections.append(t) nb_features += growth_rate * layer_per_block[i] t = transitionDown(t, nb_features) print(t) t = denseBlock(t, layer_per_block[n_pool]) # bottle neck skip_connections = skip_connections[::-1] #subvert the array for i in range(n_pool): keep_nb_features = growth_rate * layer_per_block[n_pool + i] t = Conv2DTranspose(keep_nb_features, strides=2, kernel_size=(3, 3), padding='same', data_format='channels_last')(t) # transition Up t = concatenate([t, skip_connections[i]]) t = denseBlock(t, layer_per_block[n_pool + i + 1]) print(t) t = Conv2D(3, kernel_size=(1, 1), padding='same', activation='tanh', kernel_initializer='he_uniform', data_format='channels_last')(t) #output_layer = Activation('tanh')(t) print(t) return Model(inputs=input_layer, outputs=t)
def default_imu(num_outputs, num_imu_inputs, input_shape, roi_crop=(0, 0)): #we now expect that cropping done elsewhere. we will adjust our expeected image size here: input_shape = adjust_input_shape(input_shape, roi_crop) img_in = Input(shape=input_shape, name='img_in') imu_in = Input(shape=(num_imu_inputs, ), name="imu_in") x = img_in x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x) x = Flatten(name='flattened')(x) x = Dense(100, activation='relu')(x) x = Dropout(.1)(x) y = imu_in y = Dense(14, activation='relu')(y) y = Dense(14, activation='relu')(y) y = Dense(14, activation='relu')(y) z = concatenate([x, y]) z = Dense(50, activation='relu')(z) z = Dropout(.1)(z) z = Dense(50, activation='relu')(z) z = Dropout(.1)(z) outputs = [] for i in range(num_outputs): outputs.append(Dense(1, activation='linear', name='out_' + str(i))(z)) model = Model(inputs=[img_in, imu_in], outputs=outputs) return model
def test_sequence_example_into_input_layer(self): examples = [_make_sequence_example().SerializeToString()] * 100 ctx_cols, seq_cols = self._build_feature_columns() def _parse_example(example): ctx, seq = parsing_ops.parse_single_sequence_example( example, context_features=fc.make_parse_example_spec_v2(ctx_cols), sequence_features=fc.make_parse_example_spec_v2(seq_cols)) ctx.update(seq) return ctx ds = dataset_ops.Dataset.from_tensor_slices(examples) ds = ds.map(_parse_example) ds = ds.batch(20) # Test on a single batch features = dataset_ops.make_one_shot_iterator(ds).get_next() # Tile the context features across the sequence features sequence_input_layer = ksfc.SequenceFeatures(seq_cols) seq_input, _ = sequence_input_layer(features) dense_input_layer = dense_features.DenseFeatures(ctx_cols) ctx_input = dense_input_layer(features) ctx_input = core.RepeatVector(array_ops.shape(seq_input)[1])(ctx_input) concatenated_input = merge.concatenate([seq_input, ctx_input]) rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10)) output = rnn_layer(concatenated_input) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) features_r = sess.run(features) self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6]) output_r = sess.run(output) self.assertAllEqual(output_r.shape, [20, 10])
def default_imu(num_outputs, num_imu_inputs, input_shape): img_in = Input(shape=input_shape, name='img_in') imu_in = Input(shape=(num_imu_inputs, ), name="imu_in") x = img_in x = Cropping2D(cropping=((60, 0), (0, 0)))(x) #trim 60 pixels off top #x = Lambda(lambda x: x/127.5 - 1.)(x) # normalize and re-center x = BatchNormalization()(x) x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x) x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x) x = Flatten(name='flattened')(x) x = Dense(100, activation='relu')(x) x = Dropout(.1)(x) y = imu_in y = Dense(14, activation='relu')(y) y = Dense(14, activation='relu')(y) y = Dense(14, activation='relu')(y) z = concatenate([x, y]) z = Dense(50, activation='relu')(z) z = Dropout(.1)(z) z = Dense(50, activation='relu')(z) z = Dropout(.1)(z) outputs = [] for i in range(num_outputs): outputs.append(Dense(1, activation='linear', name='out_' + str(i))(z)) model = Model(inputs=[img_in, imu_in], outputs=outputs) return model
def loss_net(x_in, trux_x_in, width, height, style_image_path, content_weight, style_weight): # Append the initial input to the FastNet input to the VGG inputs x = concatenate([x_in, trux_x_in], axis=0) # Normalize the inputs via custom VGG Normalization layer x = VGGNormalize(name="vgg_normalize")(x) vgg = VGG16(include_top=False, input_tensor=x) #vgg = tf.keras.applications.vgg16.VGG16(include_top=False, weights='imagenet',input_tensor=x) # Content layer where will pull our feature maps content_layers = ['block5_conv2'] # Style layer we are interested in style_layers = [ 'block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1' ] vgg_output_dict = dict([(layer.name, layer.output) for layer in vgg.layers[-18:]]) vgg_layers = dict([(layer.name, layer) for layer in vgg.layers[-18:]]) if style_weight > 0: add_style_loss(vgg, style_image_path, vgg_layers, vgg_output_dict, width, height, style_weight) if content_weight > 0: add_content_loss(vgg_layers, vgg_output_dict, content_weight) # Freeze all VGG layers for layer in vgg.layers[-19:]: layer.trainable = False return vgg
def inception_resnet_v2_A(input, scale_residual=True): channel_axis = -1 # Input is relu activation init = input ir1 = Conv2D(32, (1, 1), activation='relu', padding='same')(input) ir2 = Conv2D(32, (1, 1), activation='relu', padding='same')(input) ir2 = Conv2D(32, (3, 3), activation='relu', padding='same')(ir2) ir3 = Conv2D(32, (1, 1), activation='relu', padding='same')(input) ir3 = Conv2D(48, (3, 3), activation='relu', padding='same')(ir3) ir3 = Conv2D(64, (3, 3), activation='relu', padding='same')(ir3) ir_merge = merge.concatenate([ir1, ir2, ir3], axis=channel_axis) # print("****************************************") # print("ir_merge shape :", ir_merge.shape) # print("Conv2D filters ", backend.int_shape(input)[channel_axis]) ir_conv = Conv2D(backend.int_shape(input)[channel_axis], (1, 1), activation='relu')(ir_merge) out = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, output_shape=backend.int_shape(input)[1:], arguments={'scale': 0.1})([input, ir_conv]) # out = merge.concatenate([init, ir_conv], axis=channel_axis) # print("out shape :", out.shape) # print("****************************************") out = BatchNormalization(axis=channel_axis)(out) out = Activation("relu")(out) return out
def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False): """Replicates a model on different GPUs. Specifically, this function implements single-machine multi-GPU data parallelism. It works in the following way: - Divide the model's input(s) into multiple sub-batches. - Apply a model copy on each sub-batch. Every model copy is executed on a dedicated GPU. - Concatenate the results (on CPU) into one big batch. E.g. if your `batch_size` is 64 and you use `gpus=2`, then we will divide the input into 2 sub-batches of 32 samples, process each sub-batch on one GPU, then return the full batch of 64 processed samples. This induces quasi-linear speedup on up to 8 GPUs. This function is only available with the TensorFlow backend for the time being. Arguments: model: A Keras model instance. To avoid OOM errors, this model could have been built on CPU, for instance (see usage example below). gpus: Integer >= 2, number of on GPUs on which to create model replicas. cpu_merge: A boolean value to identify whether to force merging model weights under the scope of the CPU or not. cpu_relocation: A boolean value to identify whether to create the model's weights under the scope of the CPU. If the model is not defined under any preceding device scope, you can still rescue it by activating this option. Returns: A Keras `Model` instance which can be used just like the initial `model` argument, but which distributes its workload on multiple GPUs. Example 1: Training models with weights merge on CPU ```python import tensorflow as tf from keras.applications import Xception from keras.utils import multi_gpu_model import numpy as np num_samples = 1000 height = 224 width = 224 num_classes = 1000 # Instantiate the base model (or "template" model). # We recommend doing this with under a CPU device scope, # so that the model's weights are hosted on CPU memory. # Otherwise they may end up hosted on a GPU, which would # complicate weight sharing. with tf.device('/cpu:0'): model = Xception(weights=None, input_shape=(height, width, 3), classes=num_classes) # Replicates the model on 8 GPUs. # This assumes that your machine has 8 available GPUs. parallel_model = multi_gpu_model(model, gpus=8) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Generate dummy data. x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=20, batch_size=256) # Save model via the template model (which shares the same weights): model.save('my_model.h5') ``` Example 2: Training models with weights merge on CPU using cpu_relocation ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_relocation=True) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` Example 3: Training models with weights merge on GPU (recommended for NV-link) ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_merge=False) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` Raises: ValueError: if the `gpus` argument does not match available devices. """ # pylint: disable=g-import-not-at-top from tensorflow.python.keras.layers.core import Lambda from tensorflow.python.keras.layers.merge import concatenate if isinstance(gpus, (list, tuple)): if len(gpus) <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `len(gpus) >= 2`. ' 'Received: `gpus=%s`' % gpus) num_gpus = len(gpus) target_gpu_ids = gpus else: if gpus <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `gpus >= 2`. ' 'Received: `gpus=%s`' % gpus) num_gpus = gpus target_gpu_ids = range(num_gpus) target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids] available_devices = _get_available_devices() available_devices = [ _normalize_device_name(name) for name in available_devices ] for device in target_devices: if device not in available_devices: raise ValueError('To call `multi_gpu_model` with `gpus=%s`, ' 'we expect the following devices to be available: %s. ' 'However this machine only has: %s. ' 'Try reducing `gpus`.' % (gpus, target_devices, available_devices)) def get_slice(data, i, parts): """Slice an array into `parts` slices and return slice `i`. Arguments: data: array to slice. i: index of slice to return. parts: number of slices to make. Returns: Slice `i` of `data`. """ shape = array_ops.shape(data) batch_size = shape[:1] input_shape = shape[1:] step = batch_size // parts if i == parts - 1: size = batch_size - step * i else: size = step size = array_ops.concat([size, input_shape], axis=0) stride = array_ops.concat([step, input_shape * 0], axis=0) start = stride * i return array_ops.slice(data, start, size) # Relocate the model definition under CPU device scope if needed if cpu_relocation: from tensorflow.python.keras.models import clone_model # pylint: disable=g-import-not-at-top with ops.device('/cpu:0'): model = clone_model(model) all_outputs = [] for i in range(len(model.outputs)): all_outputs.append([]) # Place a copy of the model on each GPU, # each getting a slice of the inputs. for i, gpu_id in enumerate(target_gpu_ids): with ops.device('/gpu:%d' % gpu_id): with ops.name_scope('replica_%d' % gpu_id): inputs = [] # Retrieve a slice of the input. for x in model.inputs: input_shape = tuple(x.get_shape().as_list())[1:] slice_i = Lambda( get_slice, output_shape=input_shape, arguments={ 'i': i, 'parts': num_gpus })( x) inputs.append(slice_i) # Apply model on slice # (creating a model replica on the target device). outputs = model(inputs) if not isinstance(outputs, list): outputs = [outputs] # Save the outputs for merging back together later. for o in range(len(outputs)): all_outputs[o].append(outputs[o]) # Deduplicate output names to handle Siamese networks. occurrences = {} for n in model.output_names: if n not in occurrences: occurrences[n] = 1 else: occurrences[n] += 1 conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1} output_names = [] for n in model.output_names: if n in conflict_counter: conflict_counter[n] += 1 n += '_%d' % conflict_counter[n] output_names.append(n) # Merge outputs under expected scope. with ops.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]): merged = [] for name, outputs in zip(output_names, all_outputs): merged.append(concatenate(outputs, axis=0, name=name)) return Model(model.inputs, merged)
def convert(args): configs_path = args.configs weights_path = args.weights outputs_path = args.outputs assert configs_path.endswith('.cfg'), '{} is not a .cfg file'.format(configs_path) assert weights_path.endswith('.weights'), '{} is not a .weights file'.format(weights_path) assert outputs_path.endswith('.h5'), 'output path {} is not a .h5 file'.format(outputs_path) output_root = os.path.splitext(outputs_path)[0] # Load weights and config. print('Loading weights.') weights_file = open(weights_path, 'rb') weights_header = np.ndarray(shape=(4, ), dtype='int32', buffer=weights_file.read(16)) print('Weights Header: ', weights_header) # TODO: Check transpose flag when implementing fully connected layers. # transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000) print('Parsing Darknet config...') unique_config_file = unique_config_sections(configs_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) print('Creating Keras model...') image_height = int(cfg_parser['net_0']['height']) image_width = int(cfg_parser['net_0']['width']) prev_layer = Input(shape=(image_height, image_width, 3)) all_layers = [prev_layer] weight_decay = float(cfg_parser['net_0']['decay']) if 'net_0' in cfg_parser.sections() else 5e-4 count = 0 for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] # padding='same' is equivalent to Darknet pad=1 padding = 'same' if pad == 1 else 'valid' # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) # TODO: This assumes channel last dim_ordering. weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) print('conv2d', 'bn' if batch_normalize else ' ', activation, weights_shape) conv_bias = np.ndarray(shape=(filters, ), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if batch_normalize: bn_weights = np.ndarray(shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters # TODO: Keras BatchNormalization mistakenly refers to var as std. bn_weight_list = [bn_weights[0], # scale gamma conv_bias, # shift beta bn_weights[1], # running mean bn_weights[2]] # running var conv_weights = np.ndarray(shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size # DarkNet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # We would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) conv_weights = [conv_weights] if batch_normalize else [conv_weights, conv_bias] # Handle activation. act_fn = None if activation == 'leaky': pass # Add advanced activation later. elif activation != 'linear': raise ValueError('Unknown activation function `{}` in section {}'.format(activation, section)) # Create Conv2D layer conv_layer = (Conv2D(filters, kernel_size=(size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding))(prev_layer) if batch_normalize: conv_layer = (BatchNormalization(weights=bn_weight_list))(conv_layer) prev_layer = conv_layer if activation == 'linear': all_layers.append(prev_layer) elif activation == 'leaky': act_layer = LeakyReLU(alpha=0.1)(prev_layer) prev_layer = act_layer all_layers.append(act_layer) elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append(MaxPooling2D(padding='same', pool_size=(size, size), strides=(stride, stride))(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('avgpool'): if cfg_parser.items(section): raise ValueError('{} with params unsupported.'.format(section)) all_layers.append(GlobalAveragePooling2D()(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] layers = [all_layers[i] for i in ids] if len(layers) > 1: print('Concatenating route layers:', layers) concatenate_layer = concatenate(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer elif section.startswith('reorg'): block_size = int(cfg_parser[section]['stride']) assert block_size == 2, 'Only reorg with stride 2 supported.' all_layers.append(Lambda(space_to_depth_x2, name='space_to_depth_x2')(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('region'): with open('{}_anchors.txt'.format(output_root), 'w') as f: print(cfg_parser[section]['anchors'], file=f) elif (section.startswith('net') or section.startswith('cost') or section.startswith('softmax')): pass # Configs not currently handled during model definition. else: raise ValueError('Unsupported section header type: {}'.format(section)) # Create and save model. model = Model(inputs=all_layers[0], outputs=all_layers[-1]) print(model.summary()) model.save('{}'.format(outputs_path)) print('Saved Keras model to {}'.format(outputs_path)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format(count, count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) if args.plot_model: plot(model, to_file='{}.png'.format(output_root), show_shapes=True) print('Saved model plot to {}.png'.format(output_root))
input1 = Input(shape=(3,)) dense1 = Dense(5)(input1) dense2 = Dense(2)(dense1) dense3 = Dense(3)(dense2) output1 = Dense(11)(dense3) input2= Input(shape=(3,)) dense21 = Dense(7)(input2) output2 = Dense(4)(dense21) # ? output 이 5인게 가능한가? >>> concatenate 를 사용하면 output 레이어는 hedden 레이어다. 아직 y가 나오지않으면 hedden이다. # 모델을 합치자! # tensorflow/tensorflow/python/keras/layers/merge.py / 순이다. from tensorflow.python.keras.layers.merge import concatenate merge1 = concatenate([output1, output2]) # hedden을 또 만들자 즉 아래에 모델을 또 만들었다. middle1 = Dense(4)(merge1) middle2 = Dense(7)(middle1) middle3 = Dense(11)(middle2) # 현재 merge된 마지막 레이어 #분개 시키자. #레이어를 정의 해주자. output_1 = Dense(30)(middle3) # 1번째 아웃풋 모델 output_1 = Dense(3)(output_1) # !!output 마지막 column은 동일하기 3개 output_2 = Dense(300)(middle3) # 2번째 아웃풋 모델 output_2 = Dense(6)(output_2) output_2 = Dense(3)(output_2) # !!output 마지막 column은 동일하기 3개
def get_Model(training): input_shape = (img_w, img_h, 1) # (128, 64, 1) # Make Networkw inputs = Input(name='the_input', shape=input_shape, dtype='float32') # (None, 128, 64, 1) # Convolution layer (VGG) inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')( inputs) # (None, 128, 64, 64) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner) # (None,64, 32, 64) inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')( inner) # (None, 64, 32, 128) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner) # (None, 32, 16, 128) inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')( inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(256, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')( inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner) # (None, 32, 8, 256) inner = Conv2D(512, (3, 3), padding='same', name='conv5', kernel_initializer='he_normal')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(512, (3, 3), padding='same', name='conv6')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner) # (None, 32, 4, 512) inner = Conv2D(512, (2, 2), padding='same', kernel_initializer='he_normal', name='con7')(inner) # (None, 32, 4, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) # CNN to RNN inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner) # (None, 32, 2048) inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner) # (None, 32, 64) # RNN layer lstm_1 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner) # (None, 32, 512) lstm_1b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner) reversed_lstm_1b = Lambda( lambda inputTensor: K.reverse(inputTensor, axes=1))(lstm_1b) lstm1_merged = add([lstm_1, reversed_lstm_1b]) # (None, 32, 512) lstm1_merged = BatchNormalization()(lstm1_merged) lstm_2 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged) lstm_2b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged) reversed_lstm_2b = Lambda( lambda inputTensor: K.reverse(inputTensor, axes=1))(lstm_2b) lstm2_merged = concatenate([lstm_2, reversed_lstm_2b]) # (None, 32, 1024) lstm2_merged = BatchNormalization()(lstm2_merged) # transforms RNN output to character activations: inner = Dense(num_classes, kernel_initializer='he_normal', name='dense2')(lstm2_merged) #(None, 32, 63) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') # (None ,8) input_length = Input(name='input_length', shape=[1], dtype='int64') # (None, 1) label_length = Input(name='label_length', shape=[1], dtype='int64') # (None, 1) # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(focal_ctc_lambda_func, output_shape=(1, ), name='ctc')([labels, y_pred, input_length, label_length]) #(None, 1) if training: return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out) else: return Model(inputs=[inputs], outputs=y_pred)
def make_yolov3_model(): input_image = Input(shape=(None, None, 3)) # Layer 0 => 4 x = _conv_block(input_image, [{ 'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0 }, { 'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1 }, { 'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2 }, { 'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3 }]) # Layer 5 => 8 x = _conv_block(x, [{ 'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5 }, { 'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6 }, { 'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7 }]) # Layer 9 => 11 x = _conv_block(x, [{ 'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9 }, { 'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10 }]) # Layer 12 => 15 x = _conv_block(x, [{ 'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12 }, { 'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13 }, { 'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14 }]) # Layer 16 => 36 for i in range(7): x = _conv_block(x, [{ 'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16 + i * 3 }, { 'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17 + i * 3 }]) skip_36 = x # Layer 37 => 40 x = _conv_block(x, [{ 'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37 }, { 'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38 }, { 'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39 }]) # Layer 41 => 61 for i in range(7): x = _conv_block(x, [{ 'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41 + i * 3 }, { 'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42 + i * 3 }]) skip_61 = x # Layer 62 => 65 x = _conv_block(x, [{ 'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62 }, { 'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63 }, { 'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64 }]) # Layer 66 => 74 for i in range(3): x = _conv_block(x, [{ 'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66 + i * 3 }, { 'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67 + i * 3 }]) # Layer 75 => 79 x = _conv_block(x, [{ 'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75 }, { 'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76 }, { 'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77 }, { 'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78 }, { 'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79 }], skip=False) # Layer 80 => 82 yolo_82 = _conv_block(x, [{ 'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80 }, { 'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81 }], skip=False) # Layer 83 => 86 x = _conv_block(x, [{ 'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84 }], skip=False) x = UpSampling2D(2)(x) x = concatenate([x, skip_61]) # Layer 87 => 91 x = _conv_block(x, [{ 'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87 }, { 'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88 }, { 'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89 }, { 'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90 }, { 'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91 }], skip=False) # Layer 92 => 94 yolo_94 = _conv_block(x, [{ 'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92 }, { 'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93 }], skip=False) # Layer 95 => 98 x = _conv_block(x, [{ 'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 96 }], skip=False) x = UpSampling2D(2)(x) x = concatenate([x, skip_36]) # Layer 99 => 106 yolo_106 = _conv_block(x, [{ 'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99 }, { 'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 100 }, { 'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 101 }, { 'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 102 }, { 'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103 }, { 'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104 }, { 'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105 }], skip=False) model = Model(input_image, [yolo_82, yolo_94, yolo_106]) return model
def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False): """Replicates a model on different GPUs. Specifically, this function implements single-machine multi-GPU data parallelism. It works in the following way: - Divide the model's input(s) into multiple sub-batches. - Apply a model copy on each sub-batch. Every model copy is executed on a dedicated GPU. - Concatenate the results (on CPU) into one big batch. E.g. if your `batch_size` is 64 and you use `gpus=2`, then we will divide the input into 2 sub-batches of 32 samples, process each sub-batch on one GPU, then return the full batch of 64 processed samples. This induces quasi-linear speedup on up to 8 GPUs. This function is only available with the TensorFlow backend for the time being. Args: model: A Keras model instance. To avoid OOM errors, this model could have been built on CPU, for instance (see usage example below). gpus: Integer >= 2, number of on GPUs on which to create model replicas. cpu_merge: A boolean value to identify whether to force merging model weights under the scope of the CPU or not. cpu_relocation: A boolean value to identify whether to create the model's weights under the scope of the CPU. If the model is not defined under any preceding device scope, you can still rescue it by activating this option. Returns: A Keras `Model` instance which can be used just like the initial `model` argument, but which distributes its workload on multiple GPUs. Example 1: Training models with weights merge on CPU ```python import tensorflow as tf from keras.applications import Xception from keras.utils import multi_gpu_model import numpy as np num_samples = 1000 height = 224 width = 224 num_classes = 1000 # Instantiate the base model (or "template" model). # We recommend doing this with under a CPU device scope, # so that the model's weights are hosted on CPU memory. # Otherwise they may end up hosted on a GPU, which would # complicate weight sharing. with tf.device('/cpu:0'): model = Xception(weights=None, input_shape=(height, width, 3), classes=num_classes) # Replicates the model on 8 GPUs. # This assumes that your machine has 8 available GPUs. parallel_model = multi_gpu_model(model, gpus=8) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Generate dummy data. x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=20, batch_size=256) # Save model via the template model (which shares the same weights): model.save('my_model.h5') ``` Example 2: Training models with weights merge on CPU using cpu_relocation ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_relocation=True) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` Example 3: Training models with weights merge on GPU (recommended for NV-link) ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_merge=False) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` Raises: ValueError: if the `gpus` argument does not match available devices. """ if isinstance(gpus, (list, tuple)): if len(gpus) <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `len(gpus) >= 2`. ' 'Received: `gpus=%s`' % gpus) num_gpus = len(gpus) target_gpu_ids = gpus else: if gpus <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `gpus >= 2`. ' 'Received: `gpus=%s`' % gpus) num_gpus = gpus target_gpu_ids = range(num_gpus) target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids] available_devices = _get_available_devices() available_devices = [ _normalize_device_name(name) for name in available_devices ] for device in target_devices: if device not in available_devices: raise ValueError('To call `multi_gpu_model` with `gpus=%s`, ' 'we expect the following devices to be available: %s. ' 'However this machine only has: %s. ' 'Try reducing `gpus`.' % (gpus, target_devices, available_devices)) def get_slice(data, i, parts): """Slice an array into `parts` slices and return slice `i`. Args: data: array to slice. i: index of slice to return. parts: number of slices to make. Returns: Slice `i` of `data`. """ shape = array_ops.shape(data) batch_size = shape[:1] input_shape = shape[1:] step = batch_size // parts if i == parts - 1: size = batch_size - step * i else: size = step size = array_ops.concat([size, input_shape], axis=0) stride = array_ops.concat([step, input_shape * 0], axis=0) start = stride * i return array_ops.slice(data, start, size) # Relocate the model definition under CPU device scope if needed if cpu_relocation: from tensorflow.python.keras.models import clone_model # pylint: disable=g-import-not-at-top with ops.device('/cpu:0'): model = clone_model(model) all_outputs = [[] for _ in range(len(model.outputs))] # Place a copy of the model on each GPU, # each getting a slice of the inputs. for i, gpu_id in enumerate(target_gpu_ids): with ops.device('/gpu:%d' % gpu_id): with backend.name_scope('replica_%d' % gpu_id): inputs = [] # Retrieve a slice of the input. for x in model.inputs: input_shape = tuple(x.shape.as_list())[1:] slice_i = Lambda( get_slice, output_shape=input_shape, arguments={ 'i': i, 'parts': num_gpus })( x) inputs.append(slice_i) # Apply model on slice # (creating a model replica on the target device). outputs = model(inputs) if not isinstance(outputs, list): outputs = [outputs] # Save the outputs for merging back together later. for o, output in enumerate(outputs): all_outputs[o].append(output) # Deduplicate output names to handle Siamese networks. occurrences = {} for n in model.output_names: if n not in occurrences: occurrences[n] = 1 else: occurrences[n] += 1 conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1} output_names = [] for n in model.output_names: if n in conflict_counter: conflict_counter[n] += 1 n += '_%d' % conflict_counter[n] output_names.append(n) # Merge outputs under expected scope. with ops.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]): merged = [] for name, outputs in zip(output_names, all_outputs): merged.append(concatenate(outputs, axis=0, name=name)) return Model(model.inputs, merged)
def inception_resnet_stem(input): channel_axis = -1 # Input Shape is 299 x 299 x 3 (th) or 3 x 299 x 299 (th) c = Conv2D(32, (3, 3), activation='relu', strides=(2, 2))(input) c = Conv2D( 32, (3, 3), activation='relu', )(c) c = Conv2D(64, (3, 3), activation='relu', padding='same')(c) # print("c shape : ",c.shape) c1 = MaxPooling2D((3, 3), strides=(2, 2))(c) # print("c1 shape : ",c1.shape) c2 = Conv2D(96, (3, 3), activation='relu', strides=(2, 2))(c) # print("c2 shape :",c2.shape) m = merge.concatenate([c1, c2], axis=channel_axis) # print("m shape :",m.shape) # print("#########################") c1 = Conv2D(64, (1, 1), activation='relu', padding='same')(m) c1 = Conv2D( 96, (3, 3), activation='relu', )(c1) # print("c1 shape : ", c1.shape) c2 = Conv2D(64, (1, 1), activation='relu', padding='same')(m) c2 = Conv2D(64, (7, 1), activation='relu', padding='same')(c2) c2 = Conv2D(64, (1, 7), activation='relu', padding='same')(c2) c2 = Conv2D(96, (3, 3), activation='relu', padding='valid')(c2) # print("c2 shape :", c2.shape) m2 = merge.concatenate([c1, c2], axis=channel_axis) # print("m2 shape :", m2.shape) # print("#########################") p1 = MaxPooling2D( (3, 3), strides=(2, 2), )(m2) # print("p1 shape :", p1.shape) p2 = Conv2D(192, (3, 3), activation='relu', strides=(2, 2))(m2) # print("p1 shape :", p2.shape) m3 = merge.concatenate([p1, p2], axis=channel_axis) # print("m3 shape :", m3.shape) # print("#########################") m3 = BatchNormalization(axis=channel_axis)(m3) m3 = Activation('relu')(m3) return m3