def res_layer(inp, chl, stride = 1, proj = False): pre = inp inp = conv_bn(inp, 1, stride, 0, chl // 4, True) chl //= 4 name = inp.name #Global Average Pooling SE = inp.mean(axis = 3).mean(axis = 2) sum_lay = 0 out_lay = 0 width = 4 lay = FullyConnected( "fc0({})".format(name), SE, output_dim = chl, nonlinearity = ReLU() ) #fc1 lay = FullyConnected( "fc1({})".format(name), lay, output_dim = chl * width, nonlinearity = Identity() ) lay = lay.reshape(inp.shape[0], chl, width) lay = Softmax("softmax({})".format(name), lay, axis = 2) for i in range(width): if i == 0: inp_lay = inp else: inp_lay = O.Concat([inp[:, width:, :, :], inp[:, :width, :, :]], axis = 1) inp_lay = inp_lay * lay[:, :, i].dimshuffle(0, 1, 'x', 'x') inp = inp_lay chl *= 4 inp = conv_bn(inp, 3, 1, 1, chl // 4, True) inp = conv_bn(inp, 1, 1, 0, chl, False) if proj: pre = conv_bn(pre, 1, stride, 0, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis = 3).mean(axis = 2) sum_lay = 0 out_lay = 0 width = 4 lay = FullyConnected( "fc0({})".format(name), SE, output_dim = chl, nonlinearity = ReLU() ) #fc1 lay = FullyConnected( "fc1({})".format(name), lay, output_dim = chl * width, nonlinearity = Identity() ) lay = lay.reshape(inp.shape[0], chl, width) lay = Softmax("softmax({})".format(name), lay, axis = 2) for i in range(width): if i == 0: inp_lay = inp else: inp_lay = O.Concat([inp[:, width:, :, :], inp[:, :width, :, :]], axis = 1) inp_lay = inp_lay * lay[:, :, i].dimshuffle(0, 1, 'x', 'x') inp = inp_lay inp = arith.ReLU(inp + pre) return inp
def den_layer(inp, chl): stage = 8 out = [] for i in range(stage): lay = conv_bn(inp, 3, 1, 1, chl // stage, True) out.append(lay) inp = O.Concat([inp[:, chl//stage:, :, :], lay], axis = 1) return O.Concat(out, axis = 1)
def den_lay(inp, chl): out = [] stage = 8 for i in range(stage): lay = bn_relu_conv(inp, 3, 1, 1, chl // stage, True, True) out.append(lay) inp = O.Concat([inp, lay], axis = 1) return O.Concat(out, axis = 1)
def den_lay(inp, chl): out = [] stage = 8 for i in range(stage): lay = conv_bn(inp, 3, 1, 1, chl // stage, False) out.append(lay) lay = arith.ReLU(lay) inp = O.Concat([inp, lay], axis=1) return O.Concat(out, axis=1)
def skip(inp, isdown, chl): if isdown == -1: return inp global idx l1 = inp if isdown != 0: l1 = Pooling2D("pooling1_{}".format(idx), inp, window=1, stride=2, mode="AVERAGE") l1 = relu_conv_bn(l1, 1, 1, 0, chl // 2, isrelu=False, isbn=False) l2 = inp if isdown != 0: l2 = Pooling2D("pooling2_{}".format(idx), inp[:, :, 1:, 1:], window=1, stride=2, mode="AVERAGE") l2 = relu_conv_bn(l2, 1, 1, 0, chl // 2, isrelu=False, isbn=False) lay = O.Concat([l1, l2], axis=1) lay = BN("bn_down_{}".format(isdown), lay, eps=1e-9) lay = ElementwiseAffine("bnaff_down_{}".format(isdown), lay, shared_in_channels=False, k=C(1), b=C(0)) return lay
def res_layer(inp, chl, stride = 1, proj = False, se = None): pre = inp inp = conv_bn(inp, 1, stride, 0, chl // 4, True) inp = conv_bn(inp, 3, 1, 1, chl // 4, True) inp = conv_bn(inp, 1, 1, 0, chl, False) if proj: pre = conv_bn(pre, 1, stride, 0, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis = 3).mean(axis = 2) #fc0 SE = FullyConnected( "fc0({})".format(name), SE, output_dim = chl // 4, nonlinearity = ReLU() ) #fc1 if se is None: se = SE else: se = O.Concat([se, SE], axis = 1) SE = FullyConnected( "fc1({})".format(name), se, output_dim = chl, nonlinearity = Sigmoid() ) se = FullyConnected( "fc({})".format(se.name), se, output_dim = chl // 4, nonlinearity = ReLU() ) inp = inp * SE.dimshuffle(0, 1, 'x', 'x') inp = arith.ReLU(inp + pre) return inp, se
def attentional_active_pooling(lay, output_dim): lay = lay.reshape(lay.shape[0], lay.shape[1], lay.shape[2] * lay.shape[3]) print(lay.partial_shape) a = O.ParamProvider( "a", np.random.randn(lay.partial_shape[2], output_dim) * (1 / lay.partial_shape[2])**0.5) a = a.dimshuffle('x', 0, 1) a = a.broadcast( (lay.partial_shape[0], a.partial_shape[1], a.partial_shape[2])) b = O.ParamProvider( "b", np.random.randn(lay.partial_shape[2], output_dim) * (1 / lay.partial_shape[2])**0.5) b = b.dimshuffle('x', 0, 1) b = b.broadcast( (lay.partial_shape[0], b.partial_shape[1], b.partial_shape[2])) fca = O.BatchedMatMul(lay, a) fcb = O.BatchedMatMul(lay, b) print(fcb.partial_shape) fc = O.BatchedMatMul(fca.dimshuffle(0, 2, 1), fcb) / fcb.partial_shape[1] / 5 outs = [] for i in range(output_dim): outs.append(fc[:, i, i].dimshuffle(0, 'x')) fc = O.Concat(outs, axis=1) return fc
def conv_bn(inp, ker_shape, stride, padding, out_chl, isrelu, group = 1, shift = 0): global idx idx += 1 if group == 1: l1 = Conv2D( "conv{}".format(idx), inp, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, #W = G(mean = 0, std = ((1) / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity = Identity() ) else: if shift == 0: l1 = Conv2D( "conv{}".format(idx), inp, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, #W = G(mean = 0, std = ((1) / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity = Identity(), group = group, ) else: shift = 1 l1 = inp while shift != group: l11 = Conv2D( "conv{}_{}_1".format(idx, shift), l1, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, #W = G(mean = 0, std = ((1) / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity = Identity(), group = group, ) inp_chl = l1.partial_shape[1] l1 = O.Concat([l1[:, shift * inp_chl // group:, :, :], l1[:, :shift * inp_chl // group, :, :]], axis = 1) l12 = Conv2D( "conv{}_{}_2".format(idx, shift), l1, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, #W = G(mean = 0, std = ((1) / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity = Identity(), group = group, ) l1 = l11 + l12 shift *= 2 l2 = BN("bn{}".format(idx), l1, eps = 1e-9) l2 = ElementwiseAffine("bnaff{}".format(idx), l2, shared_in_channels = False, k = C(1), b = C(0)) if isrelu: l2 = arith.ReLU(l2) return l2
def make_network(minibatch_size = 128, debug = False): patch_size = 32 inp = DataProvider("data", shape = (minibatch_size, 3, patch_size, patch_size), dtype = np.float32) label = DataProvider("label", shape = (minibatch_size, ), dtype = np.int32) lay = conv_bn(inp, 3, 1, 1, 16, True) n = 4 lis = [16 * 4, 32 * 4, 64 * 4] se = None for i in range(len(lis)): lay, se = res_block(lay, lis[i], i, n, se) #global average pooling feature = lay.mean(axis = 2).mean(axis = 2) #feature = Pooling2D("pooling", lay, window = 8, stride = 8, padding = 0, mode = "AVERAGE") feature = O.Concat([feature, se], axis = 1) pred = Softmax("pred", FullyConnected( "fc0", feature, output_dim = 10, #W = G(mean = 0, std = (1 / 64)**0.5), #b = C(0), nonlinearity = Identity() )) network = Network(outputs = [pred]) network.loss_var = CrossEntropyLoss(pred, label) if debug: visitor = NetworkVisitor(network.loss_var) for i in visitor.all_oprs: print(i) print(i.partial_shape) print("input = ", i.inputs) print("output = ", i.outputs) print() return network
def make_network(minibatch_size=128, debug=False): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size), dtype=np.float32) label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32) lay = conv_bn(inp, 3, 1, 1, 16, True) lis = [16, 32, 64] for i in range(len(lis)): #lay = res_block(lay, lis[i], i, n) for j in range(10): lay = conv_bn(lay, 3, 1, 1, lis[i], True) if i < len(lis) - 1: lay = conv_bn(lay, 2, 2, 0, lis[i + 1], True) #global average pooling #feature = lay.mean(axis = 2).mean(axis = 2) #feature = Pooling2D("pooling", lay, window = 8, stride = 8, padding = 0, mode = "AVERAGE") lay = lay.reshape(lay.shape[0], lay.shape[1], lay.shape[2] * lay.shape[3]) print(lay.partial_shape) a = O.ParamProvider( "a", np.random.randn(lay.partial_shape[2], 10) * (1 / lay.partial_shape[2])**0.5) a = a.dimshuffle('x', 0, 1) a = a.broadcast( (lay.partial_shape[0], a.partial_shape[1], a.partial_shape[2])) print(a.partial_shape) b = O.ParamProvider( "b", np.random.randn(lay.partial_shape[2], 10) * (1 / lay.partial_shape[2])**0.5) b = b.dimshuffle('x', 0, 1) b = b.broadcast( (lay.partial_shape[0], b.partial_shape[1], b.partial_shape[2])) print(b.partial_shape) fca = O.BatchedMatMul(lay, a) fcb = O.BatchedMatMul(lay, b) fc = O.BatchedMatMul(fca.dimshuffle(0, 2, 1), fcb) / 64 outs = [] for i in range(10): outs.append(fc[:, i, i].dimshuffle(0, 'x')) fc = O.Concat(outs, axis=1) pred = Softmax("pred", fc) """ pred = Softmax("pred", FullyConnected( "fc0", feature, output_dim = 10, #W = G(mean = 0, std = (1 / 64)**0.5), #b = C(0), nonlinearity = Identity() )) """ network = Network(outputs=[pred]) #info = CInfo() #info.get_complexity(network.outputs).as_table().show() network.loss_var = CrossEntropyLoss(pred, label) """ if debug: visitor = NetworkVisitor(network.loss_var) for i in visitor.all_oprs: print(i) print(i.partial_shape) print("input = ", i.inputs) print("output = ", i.outputs) print() """ return network