def create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg): # RCNN roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (cfg["MODEL"].ROI_DIM, cfg["MODEL"].ROI_DIM), spatial_scale=1 / 16.0) fc_out = fc_layers(roi_out) # prediction head W_pred = parameter(shape=(4096, cfg["DATA"].NUM_CLASSES), init=normal(scale=0.01), name="cls_score.W") b_pred = parameter(shape=cfg["DATA"].NUM_CLASSES, init=0, name="cls_score.b") cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score') # regression head W_regr = parameter(shape=(4096, cfg["DATA"].NUM_CLASSES * 4), init=normal(scale=0.001), name="bbox_regr.W") b_regr = parameter(shape=cfg["DATA"].NUM_CLASSES * 4, init=0, name="bbox_regr.b") bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr') return cls_score, bbox_pred
def cyclegan_generator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, strides=1, bias=False): h = C.relu(InstanceNormalization((64, 1, 1))(Convolution2D((7, 7), 64)(h))) h = C.relu(InstanceNormalization((128, 1, 1))(Convolution2D((3, 3), 128, strides=2)(h))) h = C.relu(InstanceNormalization((256, 1, 1))(Convolution2D((3, 3), 256, strides=2)(h))) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = C.relu(InstanceNormalization((128, 1, 1)))( ConvolutionTranspose2D((3, 3), 128, strides=2, output_shape=(img_height // 2, img_width // 2))(h))) h = C.relu(InstanceNormalization((64, 1, 1)))( ConvolutionTranspose2D((3, 3), 64, strides=2, output_shape=(img_height, img_width))(h))) h = Convolution2D((7, 7), 3, activation=C.tanh, bias=True)(h) return h
def wgan_critic(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False): h = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(h), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 64, strides=2)(h)), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 128, strides=2)(h)), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 256, strides=2)(h)), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 512, strides=2)(h)), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 1024, strides=2)(h)), alpha=0.2) h = Convolution2D((4, 4), 1, pad=False, strides=1, bias=True)(h) return h
def create_imagenet_model_bottleneck(input, num_stack_layers, num_classes, stride1x1, stride3x3): c_map = [64, 128, 256, 512, 1024, 2048] # conv1 and max pooling conv1 = conv_bn_relu(input, (7, 7), c_map[0], strides=(2, 2)) pool1 = MaxPooling((3, 3), strides=(2, 2), pad=True)(conv1) # conv2_x r2_1 = resnet_bottleneck_inc(pool1, c_map[2], c_map[0], (1, 1), (1, 1)) r2_2 = resnet_bottleneck_stack(r2_1, num_stack_layers[0], c_map[2], c_map[0]) # conv3_x r3_1 = resnet_bottleneck_inc(r2_2, c_map[3], c_map[1], stride1x1, stride3x3) r3_2 = resnet_bottleneck_stack(r3_1, num_stack_layers[1], c_map[3], c_map[1]) # conv4_x r4_1 = resnet_bottleneck_inc(r3_2, c_map[4], c_map[2], stride1x1, stride3x3) r4_2 = resnet_bottleneck_stack(r4_1, num_stack_layers[2], c_map[4], c_map[2]) # conv5_x r5_1 = resnet_bottleneck_inc(r4_2, c_map[5], c_map[3], stride1x1, stride3x3) r5_2 = resnet_bottleneck_stack(r5_1, num_stack_layers[3], c_map[5], c_map[3]) # Global average pooling and output pool = AveragePooling(filter_shape=(7, 7), name='final_avg_pooling')(r5_2) z = Dense(num_classes, init=C.normal(0.01))(pool) return z
def convolution_bn(input, filter_size, num_filters, strides=(1,1), init=C.normal(0.01), activation=C.relu): r = C.layers.Convolution(filter_size, num_filters, strides=strides, init=init, activation=None, pad=True, bias=False)(input) r = C.layers.BatchNormalization(map_rank=1)(r) r = r if activation is None else activation(r) return r
def convolution_bn(input, filter_size, num_filters, strides=(1,1), init=C.normal(0.01), activation=C.relu): r = C.layers.Convolution(filter_size, num_filters, strides=strides, init=init, activation=None, pad=True, bias=False)(input) r = C.layers.BatchNormalization(map_rank=1)(r) r = r if activation is None else activation(r) return r
def pix2pix_generator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True): h_enc1 = C.leaky_relu(Convolution2D((4, 4), 64, strides=2, bias=True)(h), alpha=0.2) h_enc2 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 128, strides=2)(h_enc1)), alpha=0.2) h_enc3 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 256, strides=2)(h_enc2)), alpha=0.2) h_enc4 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc3)), alpha=0.2) h_enc5 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc4)), alpha=0.2) h_enc6 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc5)), alpha=0.2) h_enc7 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc6)), alpha=0.2) h_enc8 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc7)), alpha=0.2) h_dec8 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_enc8))) h_dec8 = C.splice(h_dec8, h_enc8, axis=0) h_dec8 = C.relu(h_dec8) h_dec7 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec8))) h_dec7 = C.splice(h_dec7, h_enc7, axis=0) h_dec7 = C.relu(h_dec7) h_dec6 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec7))) h_dec6 = C.splice(h_dec6, h_enc6, axis=0) h_dec6 = C.relu(h_dec6) h_dec5 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 32, img_width // 32))(h_dec6))) h_dec5 = C.splice(h_dec5, h_enc5, axis=0) h_dec5 = C.relu(h_dec5) h_dec4 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 16, img_width // 16))(h_dec5))) h_dec4 = C.splice(h_dec4, h_enc4, axis=0) h_dec4 = C.relu(h_dec4) h_dec3 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 256, strides=2, pad=True, output_shape=(img_height // 8, img_width // 8))(h_dec4))) h_dec3 = C.splice(h_dec3, h_enc3, axis=0) h_dec3 = C.relu(h_dec3) h_dec2 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 128, strides=2, pad=True, output_shape=(img_height // 4, img_width // 4))(h_dec3))) h_dec2 = C.splice(h_dec2, h_enc2, axis=0) h_dec2 = C.relu(h_dec2) h_dec1 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 64, strides=2, pad=True, output_shape=(img_height // 2, img_width // 2))(h_dec2))) h_dec1 = C.splice(h_dec1, h_enc1, axis=0) h_dec1 = C.relu(h_dec1) h = ConvolutionTranspose2D((4, 4), 3, activation=C.tanh, strides=2, pad=True, bias=True, output_shape=(img_height, img_width))(h_dec1) return h
def cyclegan_discriminator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False): h = C.leaky_relu(Convolution2D((3, 3), 64, strides=2, bias=True)(h), alpha=0.2) h = C.leaky_relu(InstanceNormalization((128, 1, 1)))(Convolution2D((3, 3), 128, strides=2)(h)), alpha=0.2) h = C.leaky_relu(InstanceNormalization((256, 1, 1)))(Convolution2D((3, 3), 256, strides=2)(h)), alpha=0.2) h = C.leaky_relu(InstanceNormalization((512, 1, 1)))(Convolution2D((3, 3), 512, strides=2)(h)), alpha=0.2) h = Convolution2D((1, 1), 1, activation=None, bias=True)(h) return h
def cgan_generator(z, y): with C.layers.default_options(init=C.normal(scale=0.02), bias=False, map_rank=1, use_cntk_engine=True): h = C.splice(z, y, axis=0) h = C.relu(BatchNormalization()(Dense(1024)(h))) h = C.relu(BatchNormalization()(Dense((128, 7, 7))(h))) h = C.relu(BatchNormalization()(ConvolutionTranspose2D( (5, 5), 128, strides=(2, 2), pad=True, output_shape=(14, 14))(h))) h = ConvolutionTranspose2D((5, 5), 1, activation=C.sigmoid, strides=(2, 2), pad=True, output_shape=(28, 28))(h) return C.reshape(h, input_dim)
def create_model(input): conv = convolution_bn(input, (3,3), 16) r1_1 = resnet_basic_stack(conv, 16, 3) r2_1 = resnet_basic_inc(r1_1, 32) r2_2 = resnet_basic_stack(r2_1, 32, 2) r3_1 = resnet_basic_inc(r2_2, 64) r3_2 = resnet_basic_stack(r3_1, 64, 2) # Global average pooling pool = C.layers.AveragePooling(filter_shape=(8,8), strides=(1,1))(r3_2) return C.layers.Dense(10, init=C.normal(0.01), activation=None)(pool)
def create_model(input): conv = convolution_bn(input, (3,3), 16) r1_1 = resnet_basic_stack(conv, 16, 3) r2_1 = resnet_basic_inc(r1_1, 32) r2_2 = resnet_basic_stack(r2_1, 32, 2) r3_1 = resnet_basic_inc(r2_2, 64) r3_2 = resnet_basic_stack(r3_1, 64, 2) # Global average pooling pool = C.layers.AveragePooling(filter_shape=(8,8), strides=(1,1))(r3_2) return C.layers.Dense(10, init=C.normal(0.01), activation=None)(pool)
def cgan_discriminator(x, y): with C.layers.default_options(init=C.normal(scale=0.02), map_rank=1, use_cntk_engine=True): hx = C.reshape(x, (1, 28, 28)) hy = C.ones_like(hx) * C.reshape(y, (label_dim, 1, 1)) h = C.splice(hx, hy, axis=0) h = C.leaky_relu((Convolution2D((5, 5), 1, strides=(2, 2))(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((5, 5), 64, strides=(2, 2))(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Dense(1024)(h)), alpha=0.2) h = Dense(1, activation=C.sigmoid)(h) return h
def pix2pix_discriminator(y, x): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True): x = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(x), alpha=0.2) y = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(y), alpha=0.2) h = C.splice(x, y, axis=0) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 128, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 256, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 512, strides=2)(h)), alpha=0.2) h = Convolution2D((1, 1), 1, activation=None, bias=True)(h) return h
def dcgan_discriminator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True): h = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(h), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 64, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 128, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 256, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 512, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 1024, strides=2)(h)), alpha=0.2) h = Convolution2D((4, 4), 1, activation=C.sigmoid, pad=False, bias=True, strides=1)(h) return h
def GaussianWindowAttention(nb_mixtures): """ Implementation of the attention model found in "Generating sequences with recurrent neural networks" by Alex Graves. Gaussian window attention uses a directional mixture of gaussian kernels as convolution/attention window. For more details, the paper can be found in https://arxiv.org/abs/1308.0850 Example: seq1 = C.Axis.new_unique_dynamic_axis('seq1') seq2 = C.Axis.new_unique_dynamic_axis('seq2') encoded = C.sequence.input_variable(30, sequence_axis=seq1) query = C.sequence.input_variable(28, sequence_axis=seq2) a = GaussianWindowAttention(10)(encoded, query) assert a.shape == (30, ) Arguments: nb_mixtures (int): number of gaussian mixtures to use for attention model Returns: :class:`~cntk.ops.functions.Function`: """ dense = Dense(shape=3 * nb_mixtures, activation=None, init=C.normal(0.075), name="GravesAttention") def window_weight(a, b, k, u): """ Calculate Phi is the window weight of character seq at position u of time t. Function tested to be correct on 2018-25-02 using numpy equivalent math: phi = summation of mixtures { a * exp ( -b * (k - u) ^ 2 ) } Args: a: importance of window within the mixture. Not normalised and doesn't sum to one. b: width of attention window k: location of window u: integer position of each item in sequence. Value from 1 to seq_length. (rank 2 tensor) [-3, 1] Returns: :class:`~cntk.ops.functions.Function` """ # print(f"k shape: {k.shape}, u shape: {u.shape}") phi = a * C.exp(-1 * b * C.square(k - u)) # print("internal phi shape:", phi.shape) phi = C.swapaxes(C.reduce_sum(phi, axis=0)) # Reduce sum the mixture axis # phi: [#, n] [*-c, 1] return phi @C.typemap def gaussian_windows_attention_coefficients(abk, nb_mixtures): """ Split into 3 equal tensor of dim nb_mixtures """ a = C.exp(C.slice(abk, 0, 0, nb_mixtures)) b = C.exp(C.slice(abk, 0, nb_mixtures, 2 * nb_mixtures)) k = C.exp(C.slice(abk, 0, 2 * nb_mixtures, 0)) k = Recurrence(C.plus)(k) a = C.expand_dims(a, axis=-1) b = C.expand_dims(b, axis=-1) k = C.expand_dims(k, axis=-1) return a, b, k @C.Function def attention(encoded, network): abk = dense(network) a, b, k = gaussian_windows_attention_coefficients(abk, nb_mixtures) # print("abk shape:", a.shape, b.shape, k.shape) # a, b, k: [#, n] [nb_mixture, 1] # context: [#, c] [char_ohe] encoded_unpacked = C.sequence.unpack(encoded, padding_value=0, no_mask_output=True) # context_unpacked: [#] [*=c, char_ohe] u = Cx.sequence.position(encoded) # u: [#, c], [1] u_values, u_valid = C.sequence.unpack(u, padding_value=0).outputs # u_values: [#] [*=c] # u_valid: [#] [*=c] u_values_broadcast = C.swapaxes(C.sequence.broadcast_as(u_values, k)) # u_values_broadcast: [#, n] [1, *=c] u_valid_broadcast = C.sequence.broadcast_as(C.reshape(u_valid, (1,), 1), k) # u_valid_broadcast: [#, n] [*=c, 1] ~ shape verified correct at his point # print("u_values_broadcast shape:", u_values_broadcast.shape) # print("abk shape:", a.shape, b.shape, k.shape) phi = window_weight(a, b, k, u_values_broadcast) # phi: [#, n] [*=c, 1] zero = C.constant(0) phi = C.element_select(u_valid_broadcast, phi, zero, name="phi") # phi: [#, n] [*=c, 1] attended = C.reduce_sum(phi * C.sequence.broadcast_as(encoded_unpacked, phi), axis=0) # [#, n] [1, char_ohe] # print("attended_context shape:", attended_context.shape) output = C.squeeze(attended, name="GaussianWindowAttention") # [#, n] [char_ohe] return output return attention
if not args.lstm: input_ph = C.sequence.input_variable(2) targets_ph = C.input_variable(shape=1) runit1 = IndRNNUnit(HIDDEN_DIM, 2, recurrent_max_abs=RECURRENT_MAX, recurrent_min_abs=0) runit2 = IndRNNUnit(HIDDEN_DIM, HIDDEN_DIM, recurrent_max_abs=RECURRENT_MAX, recurrent_min_abs=U_lowbound) model = C.layers.Sequential([ C.layers.Recurrence(runit1.build()), C.layers.Fold(runit2.build()), C.layers.Dense(1, init_bias=0.1, init=C.normal(0.001)) ]) output = model(input_ph) loss = C.reduce_mean( C.square(output - targets_ph)) #C.losses.squared_error(output, targets_ph) comp = C.combine(output, loss) tensorboard_writer = C.logging.TensorBoardProgressWriter(bs, log_dir='.', model=loss) learner = C.learners.adam(loss.parameters, lr_schedule, 0.9) trainer = C.Trainer(output, loss, learner, [ProgressPrinter(20), tensorboard_writer]) for step in range(60000):
def DigitCaps(input, num_capsules, dim_out_vector, routings=3, name='DigitCaps'): ''' Function to create an instance of a digit capsule. Args: input: Input Tensor num_capsules (int): Number of output capsules dim_out_vector (int): Number of dimensions of the capsule output vector routings (int, optional): The number of routing iterations name (str, optional): The name of the Function instance in the network. ''' # Learnable Parameters W = ct.Parameter(shape=(1152, 10, 16, 8), init=ct.normal(0.01), name=name + '_Weights') # reshape input for broadcasting on all output capsules input = ct.reshape(input, (1152, 1, 1, 8), name='reshape_input') # Output shape = [#](1152, 10, 16, 1) u_hat = ct.reduce_sum(W * input, axis=3) # we don't need gradients on routing u_hat_stopped = ct.stop_gradient(u_hat, name='stop_gradient') # all the routing logits (Bij) are initialized to zero for each routing. Bij = ct.Constant(np.zeros((1152, 10, 1, 1), dtype=np.float32)) # line 3, for r iterations do for r_iter in range(routings): # line 4: for all capsule i in layer l: ci ← softmax(bi) => Cij # Output shape = [#][1152, 10, 1, 1] Cij = ct.softmax(Bij, axis=1) # At last iteration, use `u_hat` in order to receive gradients from the following graph if r_iter == routings - 1: # line 5: for all capsule j in layer (l + 1): sj ← sum(cij * u_hat) # Output shape = [#][1152, 10, 16, 1] Sj = ct.reduce_sum(ct.element_times(Cij, u_hat, 'weighted_u_hat'), axis=0) # line 6: for all capsule j in layer (l + 1): vj ← squash(sj) # Output shape = [#][1, 10, 16, 1] Vj = Squash(Sj) elif r_iter < routings - 1: # line 5: for all capsule j in layer (l + 1): sj ← sum(cij * u_hat) # Output shape = [#][1152, 10, 16, 1] Sj = ct.reduce_sum(ct.element_times(Cij, u_hat_stopped), axis=0) # line 6: for all capsule j in layer (l + 1): vj ← squash(sj) # Output shape = [#][1, 10, 16, 1] Vj = Squash(Sj) # line 7: for all capsule i in layer l and capsule j in layer (l + 1): bij ← bij + ^uj|i * vj # Output shape = [#][1, 10, 1, 16] Vj_Transpose = ct.transpose(ct.reshape(Vj, (1, 10, 16, 1)), (0, 1, 3, 2), name='Vj_Transpose') # Output shape = [#][1152, 10, 1, 1] UV = ct.reduce_sum(ct.reshape(u_hat_stopped, (1152, 10, 1, 16)) * Vj_Transpose, axis=3) Bij += UV # Output shape = [#][10, 16, 1] Vj = ct.reshape(Vj, (10, 16, 1), name='digit_caps_output') return Vj
def dcgan_generator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True): h = C.reshape(h, (-1, 1, 1)) h = ConvolutionTranspose2D((4, 4), 1024, pad=False, strides=1, output_shape=(4, 4))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 512, strides=2, output_shape=(img_height // 32, img_width // 32))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 256, strides=2, output_shape=(img_height // 16, img_width // 16))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 128, strides=2, output_shape=(img_height // 8, img_width // 8))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 64, strides=2, output_shape=(img_height // 4, img_width // 4))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 32, strides=2, output_shape=(img_height // 2, img_width // 2))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D((5, 5), 3, strides=2, bias=True, output_shape=(img_height, img_width))(h) h = C.tanh(h) return h
def residual_block(h, num_filters): with C.layers.default_options(init=C.normal(0.02), pad=True, strides=1, bias=False): h1 = C.relu(InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h))) h2 = InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h1)) return h2 + h