def PooledOutput(sequence_output, hidden_size, initializer_range): with flow.scope.namespace("bert-pooler"): first_token_tensor = flow.slice(sequence_output, [None, 0, 0], [None, 1, -1]) first_token_tensor = flow.reshape(first_token_tensor, [-1, hidden_size]) pooled_output = bert_util._FullyConnected( first_token_tensor, input_size=hidden_size, units=hidden_size, weight_initializer=bert_util.CreateInitializer(initializer_range), name="dense", ) pooled_output = flow.math.tanh(pooled_output) return pooled_output
def FlowJob(x: flow.typing.Numpy.Placeholder((4, 6), dtype=flow.float)): with flow.scope.placement("gpu", "0:0-3", (2, 2)): v = flow.get_variable( "x", shape=(4, 6), dtype=flow.float, initializer=flow.constant_initializer(0), trainable=True, nd_sbp=["S(0)", "S(1)"], ) x = flow.hierarchical_parallel_cast(x, nd_sbp=["S(0)", "S(1)"]) x += v loss = flow.reshape(x, (4, 2, 3)) loss = flow.hierarchical_parallel_cast(loss, nd_sbp=["S(0)"]) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [0.0001]), momentum=0).minimize(loss) return loss
def resnet50(images, args, trainable=True, training=True): weight_regularizer = ( flow.regularizers.l2(args.wd) if args.wd > 0.0 and args.wd < 1.0 else None ) builder = ResnetBuilder( weight_regularizer, trainable, training, args.channel_last, args.fuse_bn_relu, args.fuse_bn_add_relu, ) if args.pad_output: if args.channel_last: paddings = ((0, 0), (0, 0), (0, 0), (0, 1)) else: paddings = ((0, 0), (0, 1), (0, 0), (0, 0)) images = flow.pad(images, paddings=paddings) with flow.scope.namespace("Resnet"): stem = builder.resnet_stem(images) body = builder.resnet_conv_x_body(stem) pool5 = flow.nn.avg_pool2d( body, ksize=7, strides=1, padding="VALID", data_format=builder.data_format, name="pool5", ) fc1001 = flow.layers.dense( flow.reshape(pool5, (pool5.shape[0], -1)), units=1000, use_bias=True, kernel_initializer=flow.variance_scaling_initializer( 2, "fan_in", "random_normal" ), bias_initializer=flow.zeros_initializer(), kernel_regularizer=weight_regularizer, bias_regularizer=weight_regularizer, trainable=trainable, name="fc1001", ) return fc1001
def generator(self, z, const_init=False, trainable=True): # (n, 256, 7, 7) h0 = layers.dense( z, 7 * 7 * 256, name="g_fc1", const_init=const_init, trainable=trainable ) h0 = layers.batchnorm(h0, axis=1, name="g_bn1") h0 = flow.nn.leaky_relu(h0, 0.3) h0 = flow.reshape(h0, (-1, 256, 7, 7)) # (n, 128, 7, 7) h1 = layers.deconv2d( h0, 128, 5, strides=1, name="g_deconv1", const_init=const_init, trainable=trainable, ) h1 = layers.batchnorm(h1, name="g_bn2") h1 = flow.nn.leaky_relu(h1, 0.3) # (n, 64, 14, 14) h2 = layers.deconv2d( h1, 64, 5, strides=2, name="g_deconv2", const_init=const_init, trainable=trainable, ) h2 = layers.batchnorm(h2, name="g_bn3") h2 = flow.nn.leaky_relu(h2, 0.3) # (n, 1, 28, 28) out = layers.deconv2d( h2, 1, 5, strides=2, name="g_deconv3", const_init=const_init, trainable=trainable, ) out = flow.math.tanh(out) return out
def ReshapeJob(): with flow.scope.placement(device_type, "0:0"): x = flow.get_variable( "in", shape=input_shape, dtype=flow.float, initializer=flow.random_uniform_initializer(minval=2, maxval=5), trainable=True, ) loss = flow.reshape(x, shape) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.0001]), momentum=0).minimize(loss) flow.watch(x, test_global_storage.Setter("x")) flow.watch_diff(x, test_global_storage.Setter("x_diff")) flow.watch(loss, test_global_storage.Setter("loss")) flow.watch_diff(loss, test_global_storage.Setter("loss_diff")) return loss
def self_attn_qk_v_fw_bw( h: flow.typing.Numpy.Placeholder( shape=(seq_len, batch_size, hidden_size), dtype=flow.float32 ) ) -> typing.Tuple[flow.typing.Numpy, flow.typing.Numpy]: var = flow.get_variable( "var", shape=(1,), dtype=flow.float32, initializer=flow.constant_initializer(1.0, dtype=flow.float32), trainable=True, ) h = h * var if fused: flow.watch_diff(h, test_global_storage.Setter("h_grad_fused")) else: flow.watch_diff(h, test_global_storage.Setter("h_grad")) if fp16: h = flow.amp_white_identity(h) alpha = get_alpha(head_size) if fused: (qmk, v) = flow.nn.fused_self_attention_query_mul_key_and_value( h, head_size=head_size, alpha=alpha ) else: h = flow.reshape(h, (seq_len, batch_size, -1, 3 * head_size)) (q, k, v) = ( flow.transpose( flow.slice( h, begin=[None, None, None, head_size * i], size=[None, None, None, head_size], ), perm=[1, 2, 0, 3], ) for i in range(3) ) qmk = flow.matmul(q, k, transpose_b=True, alpha=alpha) h = flow.matmul(qmk, v) loss = flow.math.reduce_sum(h) flow.optimizer.SGD(get_lr_scheduler(), momentum=0).minimize(loss) return (qmk, v)
def resnet50(images, trainable=True): with flow.scope.namespace("Resnet"): stem = resnet_stem(images) body = resnet_conv_x_body(stem, lambda x: x) pool5 = flow.nn.avg_pool2d(body, ksize=7, strides=1, padding="VALID", data_format="NCHW", name="pool5") fc1001 = flow.layers.dense( flow.reshape(pool5, (pool5.shape[0], -1)), units=1001, use_bias=True, kernel_initializer=flow.xavier_uniform_initializer(), bias_initializer=flow.zeros_initializer(), trainable=trainable, name="fc1001", ) return fc1001
def resnet50( images, trainable=True, need_transpose=False, training=True, wd=1.0 / 32768, channel_last=False, ): weight_regularizer = flow.regularizers.l2(wd) if wd > 0.0 and wd < 1.0 else None builder = ResnetBuilder(weight_regularizer, trainable, training, channel_last) if need_transpose: images = flow.transpose(images, name="transpose", perm=[0, 3, 1, 2]) if channel_last: images = flow.transpose(images, name="transpose", perm=[0, 2, 3, 1]) with flow.scope.namespace("Resnet"): stem = builder.resnet_stem(images) body = builder.resnet_conv_x_body(stem) pool5 = flow.nn.avg_pool2d( body, ksize=7, strides=1, padding="VALID", data_format=builder.data_format, name="pool5", ) fc1001 = flow.layers.dense( flow.reshape(pool5, (pool5.shape[0], -1)), units=1000, use_bias=True, kernel_initializer=flow.variance_scaling_initializer( 2, "fan_in", "random_normal" ), bias_initializer=flow.zeros_initializer(), kernel_regularizer=weight_regularizer, bias_regularizer=weight_regularizer, trainable=trainable, name="fc1001", ) return fc1001
def _AddMaskedLanguageModelLoss( input_blob, output_weights_blob, positions_blob, label_id_blob, label_weight_blob, seq_length, hidden_size, vocab_size, max_predictions_per_seq, hidden_act, initializer_range, ): with flow.scope.namespace("other"): sum_label_weight_blob = flow.math.reduce_sum(label_weight_blob, axis=[-1]) ones = sum_label_weight_blob * 0.0 + 1.0 sum_label_weight_blob = flow.math.reduce_sum(sum_label_weight_blob) batch_size = flow.math.reduce_sum(ones) sum_label_weight_blob = sum_label_weight_blob / batch_size with flow.scope.namespace("cls-predictions"): input_blob = _GatherIndexes(input_blob, positions_blob, seq_length, hidden_size) with flow.scope.namespace("transform"): if callable(hidden_act): act_fn = op_conf_util.kNone else: act_fn = hidden_act input_blob = bert_util._FullyConnected( input_blob, input_size=hidden_size, units=hidden_size, activation=act_fn, weight_initializer=bert_util.CreateInitializer( initializer_range), name="dense", ) if callable(hidden_act): input_blob = hidden_act(input_blob) input_blob = bert_util._LayerNorm(input_blob, hidden_size) output_bias = flow.get_variable( name="output_bias", shape=[vocab_size], dtype=input_blob.dtype, initializer=flow.constant_initializer(1.0), ) logit_blob = flow.matmul(input_blob, output_weights_blob, transpose_b=True) logit_blob = flow.nn.bias_add(logit_blob, output_bias) label_id_blob = flow.reshape(label_id_blob, [-1]) pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits( logits=logit_blob, labels=label_id_blob) pre_example_loss = flow.reshape(pre_example_loss, [-1, max_predictions_per_seq]) numerator = pre_example_loss * label_weight_blob with flow.scope.namespace("loss"): numerator = flow.math.reduce_sum(numerator, axis=[-1]) denominator = sum_label_weight_blob + 1e-5 loss = numerator / denominator return loss, pre_example_loss, logit_blob
def _ComputeLoss(logits, positions): logits = flow.reshape(logits, [-1, args.seq_length]) probs = flow.nn.softmax(logits) pre_example_loss = flow.nn.sparse_cross_entropy(labels=positions, prediction=probs) return pre_example_loss
def _CreateAttentionMaskFromInputMask(to_mask_blob, from_seq_length, to_seq_length): output = flow.cast(to_mask_blob, dtype=flow.float) output = flow.reshape(output, [-1, 1, to_seq_length]) zeros = flow.constant(0.0, dtype=flow.float, shape=[from_seq_length, to_seq_length]) output = zeros + output return output
def TransposeForScores(input_blob, num_attention_heads, seq_length, width): output_blob = flow.reshape( input_blob, [-1, seq_length, num_attention_heads, width] ) output_blob = flow.transpose(output_blob, perm=[0, 2, 1, 3]) return output_blob
def _AttentionLayer( from_blob, to_blob, attention_mask_blob, num_attention_heads=1, size_per_head=512, query_act=op_conf_util.kNone, key_act=op_conf_util.kNone, value_act=op_conf_util.kNone, attention_probs_dropout_prob=0.0, initializer_range=0.02, do_return_2d_tensor=False, batch_size=None, from_seq_length=None, to_seq_length=None, ): def TransposeForScores(input_blob, num_attention_heads, seq_length, width): output_blob = flow.reshape( input_blob, [-1, seq_length, num_attention_heads, width] ) output_blob = flow.transpose(output_blob, perm=[0, 2, 1, 3]) return output_blob from_blob_2d = flow.reshape(from_blob, [-1, num_attention_heads * size_per_head]) to_blob_2d = flow.reshape(to_blob, [-1, num_attention_heads * size_per_head]) query_blob = _FullyConnected( from_blob_2d, input_size=num_attention_heads * size_per_head, units=num_attention_heads * size_per_head, activation=query_act, name="query", weight_initializer=CreateInitializer(initializer_range), ) key_blob = _FullyConnected( to_blob_2d, input_size=num_attention_heads * size_per_head, units=num_attention_heads * size_per_head, activation=key_act, name="key", weight_initializer=CreateInitializer(initializer_range), ) value_blob = _FullyConnected( to_blob_2d, input_size=num_attention_heads * size_per_head, units=num_attention_heads * size_per_head, activation=value_act, name="value", weight_initializer=CreateInitializer(initializer_range), ) query_blob = TransposeForScores( query_blob, num_attention_heads, from_seq_length, size_per_head ) key_blob = TransposeForScores( key_blob, num_attention_heads, to_seq_length, size_per_head ) attention_scores_blob = flow.matmul(query_blob, key_blob, transpose_b=True) attention_scores_blob = attention_scores_blob * ( 1.0 / math.sqrt(float(size_per_head)) ) attention_mask_blob = flow.reshape( attention_mask_blob, [-1, 1, from_seq_length, to_seq_length] ) attention_mask_blob = flow.cast(attention_mask_blob, dtype=flow.float) addr_blob = (attention_mask_blob - 1.0) * 10000.0 attention_scores_blob = attention_scores_blob + addr_blob attention_probs_blob = flow.nn.softmax(attention_scores_blob) attention_probs_blob = _Dropout(attention_probs_blob, attention_probs_dropout_prob) value_blob = flow.reshape( value_blob, [-1, to_seq_length, num_attention_heads, size_per_head] ) value_blob = flow.transpose(value_blob, perm=[0, 2, 1, 3]) context_blob = flow.matmul(attention_probs_blob, value_blob) context_blob = flow.transpose(context_blob, perm=[0, 2, 1, 3]) if do_return_2d_tensor: context_blob = flow.reshape( context_blob, [-1, num_attention_heads * size_per_head] ) else: context_blob = flow.reshape( context_blob, [-1, from_seq_length, num_attention_heads * size_per_head] ) return context_blob
def _TransformerModel( input_blob, attention_mask_blob, seq_length, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, intermediate_act_fn=_Gelu, hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, initializer_range=0.02, do_return_all_layers=False, ): assert hidden_size % num_attention_heads == 0 attention_head_size = int(hidden_size / num_attention_heads) input_width = hidden_size prev_output_blob = flow.reshape(input_blob, (-1, input_width)) all_layer_output_blobs = [] for layer_idx in range(num_hidden_layers): with flow.scope.namespace("layer_%d" % layer_idx): layer_input_blob = prev_output_blob with flow.scope.namespace("attention"): with flow.scope.namespace("self"): attention_output_blob = _AttentionLayer( from_blob=layer_input_blob, to_blob=layer_input_blob, attention_mask_blob=attention_mask_blob, num_attention_heads=num_attention_heads, size_per_head=attention_head_size, attention_probs_dropout_prob=attention_probs_dropout_prob, initializer_range=initializer_range, do_return_2d_tensor=True, from_seq_length=seq_length, to_seq_length=seq_length, ) with flow.scope.namespace("output"): attention_output_blob = _FullyConnected( attention_output_blob, input_size=num_attention_heads * attention_head_size, units=hidden_size, weight_initializer=CreateInitializer(initializer_range), name="dense", ) attention_output_blob = _Dropout( attention_output_blob, hidden_dropout_prob ) attention_output_blob = attention_output_blob + layer_input_blob attention_output_blob = _LayerNorm( attention_output_blob, hidden_size ) with flow.scope.namespace("intermediate"): if callable(intermediate_act_fn): act_fn = op_conf_util.kNone else: act_fn = intermediate_act_fn intermediate_output_blob = _FullyConnected( attention_output_blob, input_size=num_attention_heads * attention_head_size, units=intermediate_size, activation=act_fn, weight_initializer=CreateInitializer(initializer_range), name="dense", ) if callable(intermediate_act_fn): intermediate_output_blob = intermediate_act_fn( intermediate_output_blob ) with flow.scope.namespace("output"): layer_output_blob = _FullyConnected( intermediate_output_blob, input_size=intermediate_size, units=hidden_size, weight_initializer=CreateInitializer(initializer_range), name="dense", ) layer_output_blob = _Dropout(layer_output_blob, hidden_dropout_prob) layer_output_blob = layer_output_blob + attention_output_blob layer_output_blob = _LayerNorm(layer_output_blob, hidden_size) prev_output_blob = layer_output_blob all_layer_output_blobs.append(layer_output_blob) input_shape = (-1, seq_length, hidden_size) if do_return_all_layers: final_output_blobs = [] for layer_output_blob in all_layer_output_blobs: final_output_blob = flow.reshape(layer_output_blob, input_shape) final_output_blobs.append(final_output_blob) return final_output_blobs else: final_output_blob = flow.reshape(prev_output_blob, input_shape) return [final_output_blob]
def InceptionV3(images, labels, trainable=True): conv0 = _conv2d_layer("conv0", images, filters=32, kernel_size=3, strides=2, padding="VALID") conv1 = _conv2d_layer("conv1", conv0, filters=32, kernel_size=3, strides=1, padding="VALID") conv2 = _conv2d_layer("conv2", conv1, filters=64, kernel_size=3, strides=1, padding="SAME") pool1 = flow.nn.max_pool2d(conv2, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool1") conv3 = _conv2d_layer("conv3", pool1, filters=80, kernel_size=1, strides=1, padding="VALID") conv4 = _conv2d_layer("conv4", conv3, filters=192, kernel_size=3, strides=1, padding="VALID") pool2 = flow.nn.max_pool2d(conv4, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool2") mixed_0 = InceptionA(pool2, 0) mixed_1 = InceptionA(mixed_0, 1) mixed_2 = InceptionA(mixed_1, 2) mixed_3 = InceptionB(mixed_2, 3) mixed_4 = InceptionC(mixed_3, 4, 128) mixed_5 = InceptionC(mixed_4, 5, 160) mixed_6 = InceptionC(mixed_5, 6, 160) mixed_7 = InceptionC(mixed_6, 7, 192) mixed_8 = InceptionD(mixed_7, 8) mixed_9 = InceptionE(mixed_8, 9) mixed_10 = InceptionE(mixed_9, 10) pool3 = flow.nn.avg_pool2d(mixed_10, ksize=8, strides=1, padding="VALID", data_format="NCHW", name="pool3") with flow.scope.namespace("logits"): pool3 = flow.reshape(pool3, [pool3.shape[0], -1]) weight = flow.get_variable( "fc1-weight", shape=(pool3.shape[1], 1001), dtype=flow.float, initializer=flow.truncated_normal(0.816496580927726), model_name="weight", ) bias = flow.get_variable( "fc1-bias", shape=(1001, ), dtype=flow.float, initializer=flow.constant_initializer(), model_name="bias", ) fc1 = flow.matmul(pool3, weight) fc1 = flow.nn.bias_add(fc1, bias) loss = flow.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=fc1, name="softmax_loss") return loss
def vgg16bn(images, args, trainable=True, training=True): data_format = "NHWC" if args.channel_last else "NCHW" conv1 = _conv_block(images, 0, 64, 2, data_format, trainable=trainable, training=training) pool1 = flow.nn.max_pool2d(conv1[-1], 2, 2, "VALID", data_format, name="pool1") conv2 = _conv_block(pool1, 2, 128, 2, data_format, trainable=trainable, training=training) pool2 = flow.nn.max_pool2d(conv2[-1], 2, 2, "VALID", data_format, name="pool2") conv3 = _conv_block(pool2, 4, 256, 3, data_format, trainable=trainable, training=training) pool3 = flow.nn.max_pool2d(conv3[-1], 2, 2, "VALID", data_format, name="pool3") conv4 = _conv_block(pool3, 7, 512, 3, data_format, trainable=trainable, training=training) pool4 = flow.nn.max_pool2d(conv4[-1], 2, 2, "VALID", data_format, name="pool4") conv5 = _conv_block(pool4, 10, 512, 3, data_format, trainable=trainable, training=training) pool5 = flow.nn.max_pool2d(conv5[-1], 2, 2, "VALID", data_format, name="pool5") def _get_kernel_initializer(): return flow.random_normal_initializer(stddev=0.01) def _get_bias_initializer(): return flow.zeros_initializer() pool5 = flow.reshape(pool5, [pool5.shape[0], -1]) fc6 = flow.layers.dense( inputs=pool5, units=4096, activation=flow.nn.relu, use_bias=True, kernel_initializer=_get_kernel_initializer(), bias_initializer=_get_bias_initializer(), kernel_regularizer=_get_regularizer(), # weght_decay bias_regularizer=_get_regularizer(), trainable=trainable, name="dense0", ) fc6 = flow.nn.dropout(fc6, rate=0.5) fc7 = flow.layers.dense( inputs=fc6, units=4096, activation=flow.nn.relu, use_bias=True, kernel_initializer=_get_kernel_initializer(), bias_initializer=_get_bias_initializer(), trainable=trainable, name="dense1", ) fc7 = flow.nn.dropout(fc7, rate=0.5) fc8 = flow.layers.dense( inputs=fc7, units=1000, use_bias=True, kernel_initializer=_get_kernel_initializer(), bias_initializer=_get_bias_initializer(), trainable=trainable, name="dense2", ) return fc8
def trt_reshape_job(x=flow.FixedTensorDef(x_shape, dtype=dtype)): return flow.reshape(x, shape)
def Resnet100(input_blob, embedding_size, fc_type="GDC", bn_is_training=True, **kw): filter_list = [64, 64, 128, 256, 512] num_stages = 4 units = [3, 13, 30, 3] input_blob = _conv2d_layer( name="conv0", input=input_blob, filters=filter_list[0], kernel_size=3, strides=[1, 1], padding="same", use_bias=False, dilation_rate=1, activation=None, ) input_blob = _batch_norm(input_blob, epsilon=2e-05, is_training=bn_is_training, name="bn0") input_blob = _prelu(input_blob, name="relu0") for i in range(num_stages): input_blob = residual_unit_v3( input_blob, filter_list[i + 1], [2, 2], False, bn_is_training=bn_is_training, name="stage%d_unit%d" % (i + 1, 1), ) for j in range(units[i] - 1): input_blob = residual_unit_v3( input_blob, filter_list[i + 1], [1, 1], True, bn_is_training=bn_is_training, name="stage%d_unit%d" % (i + 1, j + 2), ) if fc_type == "GDC": input_blob = Linear( input_blob, num_filter=512, num_group=512, kernel=7, pad="valid", stride=[1, 1], bn_is_training=bn_is_training, name="conv_6dw7_7", ) input_blob = flow.reshape(input_blob, (input_blob.shape[0], -1)) pre_fc1 = flow.layers.dense( inputs=input_blob, units=embedding_size, activation=None, use_bias=True, kernel_initializer=_get_initializer(), bias_initializer=flow.zeros_initializer(), kernel_regularizer=_get_regularizer(), bias_regularizer=_get_regularizer(), trainable=True, name="pre_fc1", ) fc1 = _batch_norm( pre_fc1, epsilon=2e-05, center=True, scale=False, is_training=bn_is_training, name="fc1", ) elif fc_type == "E": input_blob = _batch_norm(input_blob, epsilon=2e-05, is_training=bn_is_training, name="bn1") input_blob = _dropout(input_blob, dropout_prob=0.4) input_blob = flow.reshape(input_blob, (input_blob.shape[0], -1)) pre_fc1 = flow.layers.dense( inputs=input_blob, units=embedding_size, activation=None, use_bias=True, kernel_initializer=_get_initializer(), bias_initializer=flow.zeros_initializer(), kernel_regularizer=_get_regularizer(), bias_regularizer=_get_regularizer(), trainable=True, name="pre_fc1", ) fc1 = _batch_norm( pre_fc1, epsilon=2e-05, center=True, scale=False, is_training=bn_is_training, name="fc1", ) elif fc_type == "FC": input_blob = _batch_norm(input_blob, epsilon=2e-05, is_training=bn_is_training, name="bn1") input_blob = flow.reshape(input_blob, (input_blob.shape[0], -1)) pre_fc1 = flow.layers.dense( inputs=input_blob, units=embedding_size, activation=None, use_bias=True, kernel_initializer=_get_initializer(), bias_initializer=flow.zeros_initializer(), kernel_regularizer=_get_regularizer(), bias_regularizer=_get_regularizer(), trainable=True, name="pre_fc1", ) fc1 = _batch_norm( pre_fc1, epsilon=2e-05, center=True, scale=False, is_training=bn_is_training, name="fc1", ) else: print("unimplemented") return fc1
def InplaceVariable(): w = flow.get_variable("w", (2, 5), initializer=flow.constant_initializer(1)) y = flow.reshape(w, (10, )) return y
def build_network(self): conv1 = _conv2d( self.images, 64, kernel_size=7, strides=2, padding=([0, 0], [0, 0], [3, 3], [3, 3]), groups=1, use_bias=False, trainable=self.trainable, name="conv1", ) bn1 = _batch_norm( conv1, trainable=self.trainable, training=self.training, name="bn1" ) relu = flow.nn.relu(bn1, name="relu1") max_pool = flow.nn.max_pool2d( relu, ksize=3, strides=2, padding=[[0, 0], [0, 0], [1, 1], [1, 1]], data_format="NCHW", name="max_pool", ) layer1 = self._make_layer( max_pool, 64, self.layers[0], self.num_group, layer_num="layer1" ) layer2 = self._make_layer( layer1[-1], 128, self.layers[1], self.num_group, strides=2, layer_num="layer2", ) layer3 = self._make_layer( layer2[-1], 256, self.layers[2], self.num_group, strides=2, layer_num="layer3", ) layer4 = self._make_layer( layer3[-1], 512, self.layers[3], self.num_group, strides=2, layer_num="layer4", ) # debug mode: dump data for debugging # with flow.watch_scope(blob_watcher=blob_watched, # diff_blob_watcher=diff_blob_watched): # bn1_identity = flow.identity(layer4[-1], name="layer4_last_out") avg_pool = flow.nn.avg_pool2d( layer4[-1], 7, strides=1, padding="VALID", data_format="NCHW", name="avg_pool", ) reshape = flow.reshape(avg_pool, (avg_pool.shape[0], -1)) fc = flow.layers.dense( reshape, units=self.num_classes, use_bias=True, kernel_initializer=_get_initializer("dense_weight"), bias_initializer=_get_initializer("dense_bias"), trainable=self.trainable, kernel_regularizer=_get_regularizer("dense_weight"), bias_regularizer=_get_regularizer("dense_bias"), name="fc", ) return fc
def build_network(self, input_data, data_format, class_num=1000, prefix="", **configs): self.config_map.update(configs) first_c = int( round(self.config_map["firstconv_filter_num"] * self.multiplier)) first_layer = mobilenet_unit( data=input_data, num_filter=first_c, kernel=(3, 3), stride=(2, 2), pad="same", data_format=data_format, if_act=True, trainable=self.trainable, training=self.training, prefix=prefix + "-Conv", ) last_bottleneck_layer = first_layer in_c = first_c for i, layer_setting in enumerate( self.config_map["bottleneck_params_list"]): t, c, s, sc = layer_setting if i == 0: last_bottleneck_layer = inverted_residual_unit( data=last_bottleneck_layer, num_in_filter=in_c, num_filter=int(round(c * self.multiplier)), ifshortcut=sc, stride=(s, s), kernel=(3, 3), pad="same", expansion_factor=t, prefix=prefix + "-expanded_conv", trainable=self.trainable, training=self.training, data_format=data_format, has_expand=0, ) in_c = int(round(c * self.multiplier)) else: last_bottleneck_layer = inverted_residual_unit( data=last_bottleneck_layer, num_in_filter=in_c, num_filter=int(round(c * self.multiplier)), ifshortcut=sc, stride=(s, s), kernel=(3, 3), pad="same", expansion_factor=t, prefix=prefix + "-expanded_conv_%d" % i, trainable=self.trainable, training=self.training, data_format=data_format, ) in_c = int(round(c * self.multiplier)) last_fm = mobilenet_unit( data=last_bottleneck_layer, num_filter=int(1280 * self.multiplier) if self.multiplier > 1.0 else 1280, kernel=(1, 1), stride=(1, 1), pad="valid", data_format=data_format, if_act=True, trainable=self.trainable, training=self.training, prefix=prefix + "-Conv_1", ) # global average pooling pool_size = int(self.data_wh[0] / 32) pool = flow.nn.avg_pool2d( last_fm, ksize=pool_size, strides=1, padding="VALID", data_format="NCHW", name="pool5", ) fc = flow.layers.dense( flow.reshape(pool, (pool.shape[0], -1)), units=class_num, use_bias=False, kernel_initializer=_get_initializer("dense_weight"), bias_initializer=_get_initializer("bias"), kernel_regularizer=_get_regularizer("dense_weight"), bias_regularizer=_get_regularizer("bias"), trainable=self.trainable, name=prefix + "-fc", ) return fc
def inceptionv3(images, trainable=True, channel_last=False): if channel_last: # if channel_last=True, then change mode from 'nchw' to 'nhwc' images = flow.transpose(images, name="transpose", perm=[0, 2, 3, 1]) with flow.scope.namespace("InceptionV3"): # conv0: 299 x 299 x 3 conv0 = conv2d_layer("conv0", images, filters=32, kernel_size=3, strides=2, padding="VALID") conv1 = conv2d_layer("conv1", conv0, filters=32, kernel_size=3, strides=1, padding="VALID") conv2 = conv2d_layer("conv2", conv1, filters=64, kernel_size=3, strides=1, padding="SAME") pool1 = flow.nn.max_pool2d(conv2, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool1") conv3 = conv2d_layer("conv3", pool1, filters=80, kernel_size=1, strides=1, padding="VALID") conv4 = conv2d_layer("conv4", conv3, filters=192, kernel_size=3, strides=1, padding="VALID") pool2 = flow.nn.max_pool2d(conv4, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool2") # mixed_0 ~ mixed_2 mixed_0 = InceptionA(pool2, 0) mixed_1 = InceptionA(mixed_0, 1) mixed_2 = InceptionA(mixed_1, 2) # mixed_3 mixed_3 = InceptionB(mixed_2, 3) # mixed_4 ~ mixed_7 mixed_4 = InceptionC(mixed_3, 4, 128) mixed_5 = InceptionC(mixed_4, 5, 160) mixed_6 = InceptionC(mixed_5, 6, 160) mixed_7 = InceptionC(mixed_6, 7, 192) # mixed_8 mixed_8 = InceptionD(mixed_7, 8) # mixed_9 ~ mixed_10 mixed_9 = InceptionE(mixed_8, 9, "avg") mixed_10 = InceptionE(mixed_9, 10, "max") pool3 = flow.nn.avg_pool2d( mixed_10, ksize=8, strides=1, padding="VALID", data_format="NCHW", name="pool3", ) # TODO: Need to transpose weight when converting model from TF to OF if # you want to use layers.dense interface. fc1 = flow.layers.dense( inputs=flow.reshape(pool3, [pool3.shape[0], -1]), units=1000, activation=None, use_bias=True, kernel_initializer=flow.truncated_normal(0.816496580927726), bias_initializer=flow.constant_initializer(), trainable=trainable, name="fc1", ) return fc1