def feedForward(x, params,B):
    x = layers.quantizeAct(x,B)
    l=0
    current_params = params[l]
    current_params[0] = layers.quantizeWeight(current_params[0],B+2)
    current_params[1] = layers.quantizeWeight(current_params[1],B+2)
    c1 = layers.linOutermost(x,current_params)
    c1 = layers.slopedClipping(c1)
    c1 = layers.quantizeAct(c1,B)

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeWeight(current_params[0],B+2)
    current_params[1] = layers.quantizeWeight(current_params[1],B+2)
    c2 = layers.linOutermost(c1,current_params)
    c2 = layers.slopedClipping(c2)
    c2 = layers.quantizeAct(c2,B)

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeWeight(current_params[0],B+2)
    current_params[1] = layers.quantizeWeight(current_params[1],B+2)
    c3 = layers.linOutermost(c2,current_params)
    c3 = layers.slopedClipping(c3)
    c3 = layers.quantizeAct(c3,B)

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeWeight(current_params[0],B+2)
    current_params[1] = layers.quantizeWeight(current_params[1],B+2)

    z = layers.linOutermost(c3,current_params)

    return z
示例#2
0
def feedForward(x, params, train):
    snrg = RandomStreams(seed=12345)
    x = layers.dropout(x, train, 0.8, snrg)
    l = 0
    current_params = params[l]
    c1 = layers.linOutermost(x, current_params)
    c1 = layers.slopedClipping(c1)
    c1 = layers.dropout(c1, train, 0.75, snrg)

    l += 1
    current_params = params[l]
    c2 = layers.linOutermost(c1, current_params)
    c2 = layers.slopedClipping(c2)
    c2 = layers.dropout(c2, train, 0.75, snrg)

    l += 1
    current_params = params[l]
    c3 = layers.linOutermost(c2, current_params)
    c3 = layers.slopedClipping(c3)
    c3 = layers.dropout(c3, train, 0.75, snrg)

    l += 1
    current_params = params[l]
    z = layers.linOutermost(c3, current_params)

    return z
def feedForward(x, params):

    l = 0
    current_params = params[l]
    c1 = conv2d(x, current_params[0]) + current_params[1].dimshuffle(
        'x', 0, 'x', 'x')
    c1 = layers.slopedClipping(c1)

    l += 1
    current_params = params[l]
    c2 = conv2d(c1, current_params[0]) + current_params[1].dimshuffle(
        'x', 0, 'x', 'x')
    c2 = layers.slopedClipping(c2)

    p3 = pool_2d(c2, ws=(2, 2), ignore_border=True)

    l += 1
    current_params = params[l]
    c4 = conv2d(p3, current_params[0]) + current_params[1].dimshuffle(
        'x', 0, 'x', 'x')
    c4 = layers.slopedClipping(c4)

    l += 1
    current_params = params[l]
    c5 = conv2d(c4, current_params[0]) + current_params[1].dimshuffle(
        'x', 0, 'x', 'x')
    c5 = layers.slopedClipping(c5)

    p6 = pool_2d(c5, ws=(2, 2), ignore_border=True)

    l += 1
    current_params = params[l]
    c7 = conv2d(p6, current_params[0]) + current_params[1].dimshuffle(
        'x', 0, 'x', 'x')
    c7 = layers.slopedClipping(c7)

    l += 1
    current_params = params[l]
    c8 = conv2d(c7, current_params[0]) + current_params[1].dimshuffle(
        'x', 0, 'x', 'x')
    c8 = layers.slopedClipping(c8)

    f9 = c8.flatten(2)

    l += 1
    current_params = params[l]
    h1 = T.dot(f9, current_params[0]) + current_params[1]
    h1 = layers.slopedClipping(h1)

    l += 1
    current_params = params[l]
    h2 = layers.linOutermost(h1, current_params)
    h2 = layers.slopedClipping(h2)

    l += 1
    current_params = params[l]
    z = layers.linOutermost(h2, current_params)
    #
    return z
示例#4
0
def resBlock(preAct, resParams, train, bn_updates, BA, GAselect):
    snrg = RandomStreams(12345)
    current_params = resParams[0]
    inAct, newRM, newRV = layers.convBNAct(preAct, current_params[:6], train)
    bn_updates.append((current_params[4], newRM))
    bn_updates.append((current_params[5], newRV))
    inAct = STEquant(inAct, BA)
    if GAselect == 2:
        inAct = quantizeGrad.quantizeGradL2(inAct)
    elif GAselect == 4:
        inAct = quantizeGrad.quantizeGradL4(inAct)
    elif GAselect == 6:
        inAct = quantizeGrad.quantizeGradL6(inAct)
    elif GAselect == 10:
        inAct = quantizeGrad.quantizeGradL10(inAct)
    elif GAselect == 12:
        inAct = quantizeGrad.quantizeGradL12(inAct)
    elif GAselect == 16:
        inAct = quantizeGrad.quantizeGradL16(inAct)
    elif GAselect == 18:
        inAct = quantizeGrad.quantizeGradL18(inAct)

    inAct = layers.dropout(inAct, train, 0.8, snrg)
    current_params = resParams[1]
    outAct, newRM, newRV = layers.convBN(inAct, current_params[:6], train)
    bn_updates.append((current_params[4], newRM))
    bn_updates.append((current_params[5], newRV))

    outAct = layers.slopedClipping(outAct + preAct)
    return outAct, bn_updates
示例#5
0
def resBlockStride(preAct, resParams, train, bn_updates, BA, GAselect):
    snrg = RandomStreams(12345)
    current_params = resParams[0]
    inAct, newRM, newRV = layers.convStrideBNAct(preAct, current_params[:6],
                                                 train)
    bn_updates.append((current_params[4], newRM))
    bn_updates.append((current_params[5], newRV))
    inAct = STEquant(inAct, BA)
    if GAselect == 8:
        inAct = quantizeGrad.quantizeGradL8(inAct)
    elif GAselect == 14:
        inAct = quantizeGrad.quantizeGradL14(inAct)

    inAct = layers.dropout(inAct, train, 0.8, snrg)
    current_params = resParams[1]
    outAct, newRM, newRV = layers.convBN(inAct, current_params[:6], train)
    bn_updates.append((current_params[4], newRM))
    bn_updates.append((current_params[5], newRV))

    current_params = resParams[2]
    shortCut, newRM, newRV = layers.convStrideBN(preAct, current_params[:6],
                                                 train)
    bn_updates.append((current_params[4], newRM))
    bn_updates.append((current_params[5], newRV))

    outAct = layers.slopedClipping(outAct + shortCut)
    return outAct, bn_updates
def resBlock(preAct, resParams, train):
    resActivations = []
    current_params = resParams[0]
    inAct, _, _ = layers.convBNAct(preAct, current_params, train)
    resActivations.append(inAct)

    current_params = resParams[1]
    outAct, _, _ = layers.convBN(inAct, current_params, train)

    outAct = layers.slopedClipping(outAct + preAct)
    resActivations.append(outAct)
    return outAct, resActivations
def feedForward(x, params):
    l = 0
    current_params = params[l]
    c1 = layers.linOutermost(x, current_params)
    c1 = layers.slopedClipping(c1)

    l += 1
    current_params = params[l]
    c2 = layers.linOutermost(c1, current_params)
    c2 = layers.slopedClipping(c2)

    l += 1
    current_params = params[l]
    c3 = layers.linOutermost(c2, current_params)
    c3 = layers.slopedClipping(c3)

    l += 1
    current_params = params[l]
    z = layers.linOutermost(c3, current_params)

    return z
示例#8
0
def resBlock(preAct, resParams, train, BA1, BA2, BW1, BW2):
    current_params = resParams[0]
    current_params[0] = layers.quantizeWeight(current_params[0], BW1)
    inAct, _, _ = layers.convBNAct(preAct, current_params, train)
    inAct = layers.quantizeAct(inAct, BA1)
    inAct = 0.8 * inAct

    current_params = resParams[1]
    current_params[0] = layers.quantizeWeight(current_params[0], BW2)
    outAct, _, _ = layers.convBN(inAct, current_params, train)

    outAct = layers.slopedClipping(outAct + preAct)
    outAct = layers.quantizeAct(outAct, BA2)
    return outAct
示例#9
0
def resBlockStride(preAct, resParams, train, BA1, BA2, BW1, BW2, BW3):
    current_params = resParams[0]
    current_params[0] = layers.quantizeWeight(current_params[0], BW1)
    inAct, _, _ = layers.convStrideBNAct(preAct, current_params, train)
    inAct = 0.8 * inAct

    current_params = resParams[1]
    current_params[0] = layers.quantizeWeight(current_params[0], BW2)
    outAct, _, _ = layers.convBN(inAct, current_params, train)

    current_params = resParams[2]
    current_params[0] = layers.quantizeWeight(current_params[0], BW3)
    shortCut, _, _ = layers.convStrideBN(preAct, current_params, train)

    outAct = layers.slopedClipping(outAct + shortCut)
    return outAct
示例#10
0
def resBlockStride(preAct, resParams, train):
    resActivations = []
    current_params = resParams[0]
    inAct, _, _ = layers.convStrideBNAct(preAct, current_params, train)

    resActivations.append(inAct)
    inAct = 0.8 * inAct

    current_params = resParams[1]
    outAct, _, _ = layers.convBN(inAct, current_params, train)

    current_params = resParams[2]
    shortCut, _, _ = layers.convStrideBN(preAct, current_params, train)

    outAct = layers.slopedClipping(outAct + shortCut)
    resActivations.append(outAct)
    return outAct, resActivations
示例#11
0
def resBlock(preAct, resParams, train, bn_updates):
    snrg = RandomStreams(12345)
    resActivations = []
    current_params = resParams[0]
    inAct, newRM, newRV = layers.convBNAct(preAct, current_params, train)
    resActivations.append(inAct)
    bn_updates.append((current_params[4], newRM))
    bn_updates.append((current_params[5], newRV))

    inAct = layers.dropout(inAct, train, 0.8, snrg)

    current_params = resParams[1]
    outAct, newRM, newRV = layers.convBN(inAct, current_params, train)
    bn_updates.append((current_params[4], newRM))
    bn_updates.append((current_params[5], newRV))

    outAct = layers.slopedClipping(outAct + preAct)
    resActivations.append(outAct)
    return outAct, resActivations, bn_updates
def feedForward(x, params):
    evalues = []
    activations = []
    weights = []
    biases = []
    activations.append(x)
    l = 0
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    w_flattened = w.flatten()
    current_params[0] = T.reshape(w_flattened, w_shape)
    c1 = layers.linOutermost(x, current_params)
    c1 = layers.slopedClipping(c1)
    activations.append(c1)
    weights.append(w_flattened)
    biases.append(current_params[1])

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    w_flattened = w.flatten()
    current_params[0] = T.reshape(w_flattened, w_shape)
    c2 = layers.linOutermost(c1, current_params)
    c2 = layers.slopedClipping(c2)
    activations.append(c2)
    weights.append(w_flattened)
    biases.append(current_params[1])

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    w_flattened = w.flatten()
    current_params[0] = T.reshape(w_flattened, w_shape)
    c3 = layers.linOutermost(c2, current_params)
    c3 = layers.slopedClipping(c3)
    activations.append(c3)
    weights.append(w_flattened)
    biases.append(current_params[1])

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    w_flattened = w.flatten()
    current_params[0] = T.reshape(w_flattened, w_shape)
    z = layers.linOutermost(c3, current_params)
    #z contains all numerical outputs
    weights.append(w_flattened)
    biases.append(current_params[1])

    z_fl = z.max(axis=1)
    y_fl = z.argmax(axis=1)

    for l in range(4):
        activation = activations[l]
        E = 0.0
        deriv_fl = T.grad(
            T.sum(z_fl), activation
        )  #sum is taken for batches shape is now batchSize x actshape
        for i in range(10):
            z_i = z.take(i, axis=1)
            deriv_i = T.grad(T.sum(z_i), activation)
            numerator = T.sqr(deriv_i - deriv_fl)  #batchsize x shape
            denum = T.switch(
                T.eq(z_fl, z_i), 1 + 0.0 * z_i,
                T.sqr(z_i - z_fl))  #shape is batchsize ->need to add broadcast
            result = numerator / (denum.dimshuffle(0, 'x'))
            E = E + T.sum(result)
        evalues.append(E / 24.0)
        E = 0.0
        w = weights[l]
        b = biases[l]
        deriv_fl_w = T.jacobian(z_fl,
                                w)  #jacobian so shape is batchsize x shape
        deriv_fl_b = T.jacobian(z_fl, b)
        for i in range(10):
            z_i = z.take(i, axis=1)
            deriv_i_w = T.jacobian(z_i, w)
            deriv_i_b = T.jacobian(z_i, b)
            numerator_w = T.sqr(deriv_i_w - deriv_fl_w)
            numerator_b = T.sqr(deriv_i_b - deriv_fl_b)
            denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl))
            result_w = numerator_w / (denum.dimshuffle(0, 'x'))
            result_b = numerator_b / (denum.dimshuffle(0, 'x'))
            E = E + T.sum(result_w)
            E = E + T.sum(result_b)
        evalues.append(E / 24.0)

    return evalues
def feedForward(x, params):
    evalues = []
    activations = []
    weights = []
    biases = []
    activations.append(x)

    l = 0
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    wf = w.flatten()
    new_W = T.reshape(wf, w_shape)
    c1 = conv2d(x, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x')
    c1 = layers.slopedClipping(c1)
    activations.append(c1)
    weights.append(wf)
    biases.append(current_params[1])

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    wf = w.flatten()
    new_W = T.reshape(wf, w_shape)
    c2 = conv2d(c1, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x')
    c2 = layers.slopedClipping(c2)
    activations.append(c2)
    weights.append(wf)
    biases.append(current_params[1])

    p3 = pool_2d(c2, ws=(2, 2), ignore_border=True)

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    wf = w.flatten()
    new_W = T.reshape(wf, w_shape)
    c4 = conv2d(p3, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x')
    c4 = layers.slopedClipping(c4)
    activations.append(c4)
    weights.append(wf)
    biases.append(current_params[1])

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    wf = w.flatten()
    new_W = T.reshape(wf, w_shape)
    c5 = conv2d(c4, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x')
    c5 = layers.slopedClipping(c5)
    activations.append(c5)
    weights.append(wf)
    biases.append(current_params[1])

    p6 = pool_2d(c5, ws=(2, 2), ignore_border=True)

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    wf = w.flatten()
    new_W = T.reshape(wf, w_shape)
    c7 = conv2d(p6, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x')
    c7 = layers.slopedClipping(c7)
    activations.append(c7)
    weights.append(wf)
    biases.append(current_params[1])

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    wf = w.flatten()
    new_W = T.reshape(wf, w_shape)
    c8 = conv2d(c7, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x')
    c8 = layers.slopedClipping(c8)
    activations.append(c8)
    weights.append(wf)
    biases.append(current_params[1])

    f9 = c8.flatten(2)

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    wf = w.flatten()
    new_W = T.reshape(wf, w_shape)
    h1 = T.dot(f9, new_W) + current_params[1]
    h1 = layers.slopedClipping(h1)
    activations.append(h1)
    weights.append(wf)
    biases.append(current_params[1])

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    wf = w.flatten()
    new_W = T.reshape(wf, w_shape)
    h2 = layers.linOutermost(h1, [new_W, current_params[1]])
    h2 = layers.slopedClipping(h2)
    activations.append(h2)
    weights.append(wf)
    biases.append(current_params[1])

    l += 1
    current_params = params[l]
    w = current_params[0]
    w_shape = T.shape(w)
    wf = w.flatten()
    new_W = T.reshape(wf, w_shape)
    z = layers.linOutermost(h2, [new_W, current_params[1]])
    weights.append(wf)
    biases.append(current_params[1])
    #
    z_fl = z.max(axis=1)
    y_fl = z.argmax(axis=1)

    for activation in activations:
        E = 0.0
        deriv_fl = T.grad(T.sum(z_fl), activation)
        for i in range(10):
            z_i = z.take(i, axis=1)
            deriv_i = T.grad(T.sum(z_i), activation)
            numerator = T.sqr(deriv_i - deriv_fl)
            denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl))
            numerator = numerator.flatten(
                2)  # shape is batchsize x something big
            result = numerator / (denum.dimshuffle(0, 'x'))
            E = E + T.sum(result)
        evalues.append(E / 24.0)
    for l in range(9):
        w = weights[l]
        b = biases[l]
        E = 0.0
        deriv_fl_w = T.jacobian(z_fl, w)
        deriv_fl_b = T.jacobian(z_fl, b)
        for i in range(10):
            z_i = z.take(i, axis=1)
            deriv_i_w = T.jacobian(z_i, w)
            deriv_i_b = T.jacobian(z_i, b)
            numerator_w = T.sqr(deriv_i_w - deriv_fl_w)
            numerator_b = T.sqr(deriv_i_b - deriv_fl_b)
            denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl))
            result_w = numerator_w / (denum.dimshuffle(0, 'x'))
            result_b = numerator_b / (denum.dimshuffle(0, 'x'))
            E = E + T.sum(result_w)
            E = E + T.sum(result_b)
        evalues.append(E / 24.0)
    return evalues
def feedForward(x, params, B, BA, BW ):
    x = layers.quantizeAct(x, B+BA.take(0))
    l=0
    current_params = params[l]
    current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),16.0,0.0625)
    current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),16.0,0.0625)
    c1 = conv2d(x,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x')
    c1 = layers.quantizeAct(layers.slopedClipping(c1),B+BA.take(l+1))

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5)
    current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5)
    c2 = conv2d(c1,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x')
    c2 = layers.quantizeAct(layers.slopedClipping(c2),B+BA.take(l+1))

    p3 = pool_2d(c2,ws=(2,2),ignore_border=True)

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),4.0,0.25)
    current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),4.0,0.25)
    c4 = conv2d(p3,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x')
    c4 = layers.quantizeAct(layers.slopedClipping(c4),B+BA.take(l+1))

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5)
    current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5)
    c5 = conv2d(c4,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x')
    c5 = layers.quantizeAct(layers.slopedClipping(c5),B+BA.take(l+1))

    p6 = pool_2d(c5,ws=(2,2),ignore_border=True)

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5)
    current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5)
    c7 = conv2d(p6,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x')
    c7 = layers.quantizeAct(layers.slopedClipping(c7),B+BA.take(l+1))

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),1.0,1.0)
    current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),1.0,1.0)
    c8 = conv2d(c7,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x')
    c8 = layers.quantizeAct(layers.slopedClipping(c8),B+BA.take(l+1))

    f9 = c8.flatten(2)

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5)
    current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5)
    h1 = T.dot(f9,current_params[0]) + current_params[1]
    h1 = layers.quantizeAct(layers.slopedClipping(h1),B+BA.take(l+1))

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),1.0,1.0)
    current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),1.0,1.0)
    h2 = layers.linOutermost(h1,current_params)
    h2 = layers.quantizeAct(layers.slopedClipping(h2),B+BA.take(l+1))

    l+=1
    current_params = params[l]
    current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),1.0,1.0)
    current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),1.0,1.0)
    z = layers.linOutermost(h2,current_params)
    #
    return z