Exemplo n.º 1
0
def attentional_active_pooling(lay, output_dim):
    lay = lay.reshape(lay.shape[0], lay.shape[1], lay.shape[2] * lay.shape[3])
    print(lay.partial_shape)
    a = O.ParamProvider(
        "a",
        np.random.randn(lay.partial_shape[2], output_dim) *
        (1 / lay.partial_shape[2])**0.5)
    a = a.dimshuffle('x', 0, 1)
    a = a.broadcast(
        (lay.partial_shape[0], a.partial_shape[1], a.partial_shape[2]))
    b = O.ParamProvider(
        "b",
        np.random.randn(lay.partial_shape[2], output_dim) *
        (1 / lay.partial_shape[2])**0.5)
    b = b.dimshuffle('x', 0, 1)
    b = b.broadcast(
        (lay.partial_shape[0], b.partial_shape[1], b.partial_shape[2]))
    fca = O.BatchedMatMul(lay, a)
    fcb = O.BatchedMatMul(lay, b)
    print(fcb.partial_shape)
    fc = O.BatchedMatMul(fca.dimshuffle(0, 2, 1),
                         fcb) / fcb.partial_shape[1] / 5
    outs = []
    for i in range(output_dim):
        outs.append(fc[:, i, i].dimshuffle(0, 'x'))
    fc = O.Concat(outs, axis=1)
    return fc
Exemplo n.º 2
0
def init(net, batch):
    visitor = NetworkVisitor(net.loss_var)
    lisk = []
    lisb = []
    for i in visitor.all_oprs:
        if ":k" in i.name and "bnaff" in i.name:
            lisk.append(i)
        if ":b" in i.name and "bnaff" in i.name:
            lisb.append(i)
    for i, k, b in zip(range(len(lisk)), lisk, lisb):
        func = Function().compile(net.outputs)
        outputs = func(data=batch['data'])
        t = outputs[1 + i]
        mean = t.mean(axis=3).mean(axis=2).mean(axis=0)
        std = ((t - mean[np.newaxis, :, np.newaxis, np.newaxis])**2).mean(
            axis=3).mean(axis=2).mean(axis=0)**0.5
        nk = O.ParamProvider("new" + k.name, 1.0 / std)
        nb = O.ParamProvider("new" + b.name, -mean / std)
        visitor.replace_vars([(k, nk), (b, nb)], copy=False)

    visitor = NetworkVisitor(net.loss_var)
    for i in visitor.all_oprs:
        print(i)
    return net
Exemplo n.º 3
0
def make_network(minibatch_size=128, debug=False):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size),
                       dtype=np.float32)
    label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32)

    lay = conv_bn(inp, 3, 1, 1, 16, True)

    lis = [16, 32, 64]
    for i in range(len(lis)):
        #lay = res_block(lay, lis[i], i, n)
        for j in range(10):
            lay = conv_bn(lay, 3, 1, 1, lis[i], True)
        if i < len(lis) - 1:
            lay = conv_bn(lay, 2, 2, 0, lis[i + 1], True)

    #global average pooling
    #feature = lay.mean(axis = 2).mean(axis = 2)
    #feature = Pooling2D("pooling", lay, window = 8, stride = 8, padding = 0, mode = "AVERAGE")
    lay = lay.reshape(lay.shape[0], lay.shape[1], lay.shape[2] * lay.shape[3])
    print(lay.partial_shape)
    a = O.ParamProvider(
        "a",
        np.random.randn(lay.partial_shape[2], 10) *
        (1 / lay.partial_shape[2])**0.5)
    a = a.dimshuffle('x', 0, 1)
    a = a.broadcast(
        (lay.partial_shape[0], a.partial_shape[1], a.partial_shape[2]))
    print(a.partial_shape)
    b = O.ParamProvider(
        "b",
        np.random.randn(lay.partial_shape[2], 10) *
        (1 / lay.partial_shape[2])**0.5)
    b = b.dimshuffle('x', 0, 1)
    b = b.broadcast(
        (lay.partial_shape[0], b.partial_shape[1], b.partial_shape[2]))
    print(b.partial_shape)
    fca = O.BatchedMatMul(lay, a)
    fcb = O.BatchedMatMul(lay, b)
    fc = O.BatchedMatMul(fca.dimshuffle(0, 2, 1), fcb) / 64
    outs = []
    for i in range(10):
        outs.append(fc[:, i, i].dimshuffle(0, 'x'))
    fc = O.Concat(outs, axis=1)
    pred = Softmax("pred", fc)
    """
	pred = Softmax("pred", FullyConnected(
		"fc0", feature, output_dim = 10,
		#W = G(mean = 0, std = (1 / 64)**0.5),
		#b = C(0),
		nonlinearity = Identity()
		))
	"""

    network = Network(outputs=[pred])
    #info = CInfo()
    #info.get_complexity(network.outputs).as_table().show()
    network.loss_var = CrossEntropyLoss(pred, label)
    """
	if debug:
		visitor = NetworkVisitor(network.loss_var)
		for i in visitor.all_oprs:
			print(i)
			print(i.partial_shape)
			print("input = ", i.inputs)
			print("output = ", i.outputs)
			print()
	"""

    return network
Exemplo n.º 4
0
import megskull.opr.all as O
import numpy as np

a = O.ParamProvider('a', np.ones((10, )))
b = O.ParamProvider('b', np.ones((10, )))
loss = O.ZeroGrad((a * b).sum())

from My import MyWeightDecay

loss = MyWeightDecay(loss, {"*": 0.001})

from megskull.optimizer import NaiveSGD, OptimizableFunc

func = OptimizableFunc.make_from_loss_var(loss)
NaiveSGD(1)(func)
func.compile(loss)

print(func())
print(np.array(a.eval(), dtype=np.float32))

loss.Mul_Wc(10)

print(func())
print(np.array(a.eval()))

print(func())