def backward(dY, cache):
        Xe = cache['Xe']
        generator_str = cache['generator_str']
        dWs = np.zeros(cache['Ws_shape'])
        gen_caches = cache['gen_caches']
        F = cache['F']
        dXe = np.zeros(Xe.shape)

        Generator = decodeGenerator(generator_str)

        # backprop each item in the batch
        grads = {}
        for i in xrange(len(gen_caches)):
            ix, gen_cache = gen_caches[i]  # unpack
            local_grads = Generator.backward(dY[i], gen_cache)
            dXs = local_grads['dXs']  # intercept the gradients wrt Xi and Xs
            del local_grads['dXs']
            dXi = local_grads['dXi']
            del local_grads['dXi']
            accumNpDicts(
                grads,
                local_grads)  # add up the gradients wrt model parameters

            # now backprop from dXs to the image vector and word vectors
            dXe[i, :] += dXi  # image vector
            for n, j in enumerate(ix):  # and now all the other words
                dWs[j, :] += dXs[n, :]

        # finally backprop into the image encoder
        dWe = F.transpose().dot(dXe)
        dbe = np.sum(dXe, axis=0, keepdims=True)

        accumNpDicts(grads, {'We': dWe, 'be': dbe, 'Ws': dWs})
        return grads
  def backward(dY, cache):
    Xe = cache['Xe']
    generator_str = cache['generator_str']
    dWs = np.zeros(cache['Ws_shape'])
    gen_caches = cache['gen_caches']
    F = cache['F']
    dXe = np.zeros(Xe.shape)

    Generator = decodeGenerator(generator_str)

    # backprop each item in the batch
    grads = {}
    for i in xrange(len(gen_caches)):
      ix, gen_cache = gen_caches[i] # unpack
      local_grads = Generator.backward(dY[i], gen_cache)
      dXs = local_grads['dXs'] # intercept the gradients wrt Xi and Xs
      del local_grads['dXs']
      dXi = local_grads['dXi']
      del local_grads['dXi']
      accumNpDicts(grads, local_grads) # add up the gradients wrt model parameters

      # now backprop from dXs to the image vector and word vectors
      dXe[i,:] += dXi # image vector
      for n,j in enumerate(ix): # and now all the other words
        dWs[j,:] += dXs[n,:]

    # finally backprop into the image encoder
    dWe = F.transpose().dot(dXe)
    dbe = np.sum(dXe, axis=0, keepdims = True)

    accumNpDicts(grads, { 'We':dWe, 'be':dbe, 'Ws':dWs })
    return grads
Exemplo n.º 3
0
    def backward(dY, cache):
        Xe = cache['Xe']
        generator_str = cache['generator_str']
        dWs = np.zeros(cache['Ws_shape'])
        gen_caches = cache['gen_caches']
        F = cache['F']
        dXe = np.zeros(Xe.shape)

        Generator = decodeGenerator(generator_str)
        dmmy, gen_cache = gen_caches[0]
        g_WLSTM = cuda.to_device(np.asfortranarray(gen_cache['WLSTM']))
        # backprop each item in the batch
        grads = {}
        dt1 = 0
        dt2 = 0
        t0 = time.time()
        for i in xrange(len(gen_caches)):
            t1 = time.time()
            ix, gen_cache = gen_caches[i]  # unpack
            local_grads = Generator.backward(dY[i], gen_cache, g_WLSTM)
            dt1 += time.time() - t1

            t2 = time.time()
            dXs = local_grads['dXs']  # intercept the gradients wrt Xi and Xs
            del local_grads['dXs']
            dXi = local_grads['dXi']
            del local_grads['dXi']
            accumNpDicts(
                grads,
                local_grads)  # add up the gradients wrt model parameters
            # now backprop from dXs to the image vector and word vectors
            dXe[i, :] += dXi  # image vector
            for n, j in enumerate(ix):  # and now all the other words
                dWs[j, :] += dXs[n, :]

            dt2 += time.time() - t2

            #dt = time.time() - t0
            #print 'BP :%0.4f' %(dt)

        dt = time.time() - t0
        print 'Backward Pass:%0.4f Others :%0.4f' % (dt1, dt2)
        t0 = time.time()
        # finally backprop into the image encoder
        dWe = F.transpose().dot(dXe)
        dbe = np.sum(dXe, axis=0, keepdims=True)

        dt = time.time() - t0
        print 'MMult :%0.4f' % (dt)
        t0 = time.time()

        accumNpDicts(grads, {'We': dWe, 'be': dbe, 'Ws': dWs})
        dt = time.time() - t0
        print 'accum 2:%0.4f' % (dt)
        t0 = time.time()
        return grads
Exemplo n.º 4
0
    def backward(dY, cache):
        generator_str = cache['generator_str']
        gen_caches = cache['gen_caches']
        X_v1_orig = cache['X_v1_orig']
        X_v2_orig = cache['X_v2_orig']
        X_v3_orig = cache['X_v3_orig']
        We_v1 = cache['We_v1']
        dWe_v1 = np.zeros(We_v1.shape)
        be_v1 = cache['be_v1']
        dbe_v1 = np.zeros(be_v1.shape)
        We_v2 = cache['We_v2']
        dWe_v2 = np.zeros(We_v2.shape)
        be_v2 = cache['be_v2']
        dbe_v2 = np.zeros(be_v2.shape)
        We_v3 = cache['We_v3']
        dWe_v3 = np.zeros(We_v3.shape)
        be_v3 = cache['be_v3']
        dbe_v3 = np.zeros(be_v3.shape)

        Generator = decodeGenerator(generator_str)

        # backprop each item in the batch
        grads = {}
        for i in xrange(len(gen_caches)):
            ix, gen_cache = gen_caches[i]  # unpack
            local_grads = Generator.backward(dY[i], gen_cache)
            dX_v1 = local_grads['dX_v1']
            dX_v2 = local_grads['dX_v2']
            dX_v3 = local_grads['dX_v3']
            del local_grads['dX_v1']
            del local_grads['dX_v2']
            del local_grads['dX_v3']
            accumNpDicts(
                grads,
                local_grads)  # add up the gradients wrt model parameters

            dWe_v1 += X_v1_orig[i].dot(dX_v1)
            dbe_v1 += np.sum(dX_v1, axis=0, keepdims=True)
            dWe_v2 += X_v2_orig[i].dot(dX_v2)
            dbe_v2 += np.sum(dX_v2, axis=0, keepdims=True)
            dWe_v3 += X_v3_orig[i].dot(dX_v3)
            dbe_v3 += np.sum(dX_v3, axis=0, keepdims=True)

        accumNpDicts(
            grads, {
                'We_v1': dWe_v1,
                'We_v2': dWe_v2,
                'We_v3': dWe_v3,
                'be_v1': dbe_v1,
                'be_v2': dbe_v2,
                'be_v3': dbe_v3
            })
        return grads