def res_layer(inp, chl, stride = 1, proj = False, se = None): pre = inp inp = conv_bn(inp, 1, stride, 0, chl // 4, True) inp = conv_bn(inp, 3, 1, 1, chl // 4, True) inp = conv_bn(inp, 1, 1, 0, chl, False) if proj: pre = conv_bn(pre, 1, stride, 0, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis = 3).mean(axis = 2) #fc0 SE = FullyConnected( "fc0({})".format(name), SE, output_dim = chl // 4, nonlinearity = ReLU() ) #fc1 if se is None: se = SE else: se = O.Concat([se, SE], axis = 1) SE = FullyConnected( "fc1({})".format(name), se, output_dim = chl, nonlinearity = Sigmoid() ) se = FullyConnected( "fc({})".format(se.name), se, output_dim = chl // 4, nonlinearity = ReLU() ) inp = inp * SE.dimshuffle(0, 1, 'x', 'x') inp = arith.ReLU(inp + pre) return inp, se
def res_layer(inp, chl, stride = 1, proj = False): pre = inp inp = conv_bn(inp, 1, stride, 0, chl // 4, True) chl //= 4 name = inp.name #Global Average Pooling SE = inp.mean(axis = 3).mean(axis = 2) sum_lay = 0 out_lay = 0 width = 4 lay = FullyConnected( "fc0({})".format(name), SE, output_dim = chl, nonlinearity = ReLU() ) #fc1 lay = FullyConnected( "fc1({})".format(name), lay, output_dim = chl * width, nonlinearity = Identity() ) lay = lay.reshape(inp.shape[0], chl, width) lay = Softmax("softmax({})".format(name), lay, axis = 2) for i in range(width): if i == 0: inp_lay = inp else: inp_lay = O.Concat([inp[:, width:, :, :], inp[:, :width, :, :]], axis = 1) inp_lay = inp_lay * lay[:, :, i].dimshuffle(0, 1, 'x', 'x') inp = inp_lay chl *= 4 inp = conv_bn(inp, 3, 1, 1, chl // 4, True) inp = conv_bn(inp, 1, 1, 0, chl, False) if proj: pre = conv_bn(pre, 1, stride, 0, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis = 3).mean(axis = 2) sum_lay = 0 out_lay = 0 width = 4 lay = FullyConnected( "fc0({})".format(name), SE, output_dim = chl, nonlinearity = ReLU() ) #fc1 lay = FullyConnected( "fc1({})".format(name), lay, output_dim = chl * width, nonlinearity = Identity() ) lay = lay.reshape(inp.shape[0], chl, width) lay = Softmax("softmax({})".format(name), lay, axis = 2) for i in range(width): if i == 0: inp_lay = inp else: inp_lay = O.Concat([inp[:, width:, :, :], inp[:, :width, :, :]], axis = 1) inp_lay = inp_lay * lay[:, :, i].dimshuffle(0, 1, 'x', 'x') inp = inp_lay inp = arith.ReLU(inp + pre) return inp
def res_layer(inp, chl, stride=1, proj=False): pre = inp inp = conv_bn(inp, 1, stride, 0, chl // 4, True) chl //= 4 name = inp.name #Global Average Pooling SE = inp.mean(axis=3).mean(axis=2) sum_lay = 0 out_lay = 0 lay = FullyConnected("fc0({})".format(name), SE, output_dim=chl, nonlinearity=ReLU()) #fc1 lay = FullyConnected("fc1({})".format(name), lay, output_dim=chl, nonlinearity=Sigmoid()) inp = inp * lay.dimshuffle(0, 1, 'x', 'x') chl *= 4 inp = conv_bn(inp, 3, 1, 1, chl // 4, True) chl //= 4 name = inp.name #Global Average Pooling SE = inp.mean(axis=3).mean(axis=2) sum_lay = 0 out_lay = 0 lay = FullyConnected("fc0({})".format(name), SE, output_dim=chl, nonlinearity=ReLU()) #fc1 lay = FullyConnected("fc1({})".format(name), lay, output_dim=chl, nonlinearity=Sigmoid()) inp = inp * lay.dimshuffle(0, 1, 'x', 'x') chl *= 4 inp = conv_bn(inp, 1, 1, 0, chl, False) if proj: pre = conv_bn(pre, 1, stride, 0, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis=3).mean(axis=2) sum_lay = 0 out_lay = 0 lay = FullyConnected("fc0({})".format(name), SE, output_dim=chl, nonlinearity=ReLU()) #fc1 lay = FullyConnected("fc1({})".format(name), lay, output_dim=chl, nonlinearity=Sigmoid()) inp = inp * lay.dimshuffle(0, 1, 'x', 'x') inp = arith.ReLU(inp + pre) return inp
def dense_block(inp, k, l): lay = inp for i in range(l): cur_lay = bn_relu_conv(lay, 3, 1, 1, k, True, True) name = cur_lay.name group = k // 4 #G.P. SE = cur_lay.mean(axis=3).mean(axis=2) SE = FullyConnected("fc0({})".format(name), SE, output_dim=(k // group)**2 * group, nonlinearity=ReLU()) SE = FullyConnected("fc1({})".format(name), SE, output_dim=(k // group)**2 * group, nonlinearity=Sigmoid()) print(SE.name) SE = SE.reshape(cur_lay.shape[0] * group, k // group, k // group, 1, 1) preshape = cur_lay.shape cur_lay = cur_lay.reshape(1, cur_lay.shape[0] * cur_lay.shape[1], cur_lay.shape[2], cur_lay.shape[3]) cur_lay = Conv2D("conv({})".format(name), cur_lay, kernel_shape=1, stride=1, padding=0, W=SE, nonlinearity=Identity()) cur_lay = cur_lay.reshape(preshape) #cur_lay = cur_lay * SE.dimshuffle(0, 1, 'x', 'x') lay = Concat([lay, cur_lay], axis=1) return lay
def dense_block(inp, k, l): lay = inp for i in range(l): cur_lay = bn_relu_conv(lay, 3, 1, 1, k, True, True) name = cur_lay.name #G.P. SE = cur_lay.mean(axis=3).mean(axis=2) SE = FullyConnected("fc0({})".format(name), SE, output_dim=k, nonlinearity=ReLU()) SE = FullyConnected("fc1({})".format(name), SE, output_dim=k, nonlinearity=Sigmoid()) cur_lay = cur_lay * SE.dimshuffle(0, 1, 'x', 'x') lay = Concat([lay, cur_lay], axis=1) return lay
def res_layer(inp, chl): pre = inp inp = conv_bn(inp, 3, 1, 1, chl, True) inp = conv_bn(inp, 3, 1, 1, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis=3).mean(axis=2) #fc0 SE = FullyConnected("fc0({})".format(name), SE, output_dim=SE.partial_shape[1], nonlinearity=ReLU()) #fc1 SE = FullyConnected("fc1({})".format(name), SE, output_dim=SE.partial_shape[1], nonlinearity=Sigmoid()) inp = inp * SE.dimshuffle(0, 1, 'x', 'x') inp = arith.ReLU(inp + pre) return inp
def res_layer(inp, chl): pre = inp inp = conv_bn(inp, 3, 1, 1, chl, True) inp = conv_bn(inp, 3, 1, 1, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis=3).mean(axis=2) group = 1 #fc0 SE = FullyConnected("fc0({})".format(name), SE, output_dim=chl, nonlinearity=ReLU()) #fc1 SE = FullyConnected("fc1({})".format(name), SE, output_dim=(chl // group)**2 * group, nonlinearity=Sigmoid()) SE = SE.reshape(inp.shape[0] * group, chl // group, chl // group, 1, 1) w = SE SE /= SE.sum(axis=4).sum(axis=3).sum(axis=2).dimshuffle( 0, 1, "x", "x", "x") #inp = inp * SE.dimshuffle(0, 1, 'x', 'x') inp = inp.reshape(1, inp.shape[0] * inp.shape[1], inp.shape[2], inp.shape[3]) inp = Conv2D( "conv({})".format(name), inp, kernel_shape=1, stride=1, padding=0, #output_nr_channel = chl, W=SE, nonlinearity=Identity(), #group = group ) inp = inp.reshape(pre.shape) inp = arith.ReLU(inp + pre) return inp, w
def conv_bn(inp, ker_shape, stride, padding, out_chl, isrelu): global idx idx += 1 l1 = Conv2D("conv{}".format(idx), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl, W=G(mean=0, std=((1 + int(isrelu)) / (ker_shape**2 * inp.partial_shape[1]))**0.5), nonlinearity={ True: ReLU(), False: Identity() }[isrelu]) l2 = BN("bn{}".format(idx), l1, eps=1e-9) l2 = ElementwiseAffine("bnaff{}".format(idx), l2, shared_in_channels=False, k=C(1), b=C(0)) return l2
from megskull.opr.helper.param_init import ConstantParamInitializer as C from megskull.opr.helper.param_init import AutoGaussianParamInitializer as G from megskull.opr.helper.elemwise_trans import Identity, ReLU from megskull.network import Network import numpy as np minibatch_size = 20 img_size = 28 input_mat = DataProvider(name = "input_mat", shape = (minibatch_size, 1, img_size, img_size)) conv1 = Conv2D("conv1", input_mat, kernel_shape = 3, output_nr_channel = 5, W = G(mean = 0.0001, std = (1 / (3 * 3))**0.5), b = C(0), padding = (1, 1), nonlinearity = ReLU()) conv2 = Conv2D("conv2", conv1, kernel_shape = 3, output_nr_channel = 5, W = G(mean = 0.0001, std = (1 / (5 * 3 * 3))**0.5), b = C(0), padding = (1, 1), nonlinearity = ReLU()) pooling1 = Pooling2D("pooling1", conv2, window = (2, 2), mode = "max") conv3 = Conv2D("conv3", pooling1, kernel_shape = 3, output_nr_channel = 10, W = G(mean = 0.0001, std = (1 / (5 * 3 * 3))**0.5), b = C(0), padding = (1, 1), nonlinearity = ReLU()) conv4 = Conv2D("conv4", conv3, kernel_shape = 3, output_nr_channel = 10, W = G(mean = 0.0001, std = (1 / (10 * 3 * 3))**0.5), b = C(0),