def predict_test(batch, model, params, **kwparams): """ some code duplication here with forward pass, but I think we want the freedom in future """ F = np.row_stack(x['image']['feat'] for x in batch) lda_enabled = params.get('lda',0) L = np.zeros((params.get('image_encoding_size',128),lda_enabled)) if lda_enabled: L = np.row_stack(x['image']['topics'] for x in batch) We = model['We'] if lda_enabled: Wlda = model['Wlda'] lda = L.dot(Wlda) be = model['be'] Xe = F.dot(We) + be # Xe becomes N x image_encoding_size #print('L shape', L.shape) #print('Wlda shape', Wlda.shape) generator_str = params['generator'] Generator = decodeGenerator(generator_str) Ys = [] guide_input = params.get('guide',None) for i,x in enumerate(batch): Xi = Xe[i,:] if guide_input=='cca': guide = get_guide(guide_input,F[i,:],kwparams.get('ccaweights')) else: guide = get_guide(guide_input,F[i,:],L=L[i,:]) if (lda_enabled and guide_input=="image") or (lda_enabled and not guide_input): guide = lda[i,:] print 'guide = lda' gen_Y = Generator.predict(Xi, guide, model, model['Ws'], params, **kwparams) Ys.append(gen_Y) return Ys
def forward(batch, model, params, misc, predict_mode = False): """ iterates over items in the batch and calls generators on them """ # we do the encoding here across all images/words in batch in single matrix # multiplies to gain efficiency. The RNNs are then called individually # in for loop on per-image-sentence pair and all they are concerned about is # taking single matrix of vectors and doing the forward/backward pass without # knowing anything about images, sentences or anything of that sort. # encode all images # concatenate as rows. If N is number of image-sentence pairs, # F will be N x image_size F = np.row_stack(x['image']['feat'] for x in batch) We = model['We'] be = model['be'] Xe = F.dot(We) + be # Xe becomes N x image_encoding_size lda_enabled = params.get('lda',0) L = np.zeros((len(batch),lda_enabled)) if lda_enabled!=0: Wlda = model['Wlda'] L = np.row_stack(x['topics'] for x in batch) lda = L.dot(Wlda) # decode the generator we wish to use generator_str = params.get('generator', 'lstm') Generator = decodeGenerator(generator_str) guide_input = params.get('guide',None) # encode all words in all sentences (which exist in our vocab) wordtoix = misc['wordtoix'] Ws = model['Ws'] gen_caches = [] Ys = [] # outputs for i,x in enumerate(batch): # take all words in this sentence and pluck out their word vectors # from Ws. Then arrange them in a single matrix Xs # Note that we are setting the start token as first vector # and then all the words afterwards. And start token is the first row of Ws ix = [0] + [ wordtoix[w] for w in x['sentence']['tokens'] if w in wordtoix ] Xs = np.row_stack( [Ws[j, :] for j in ix] ) Xi = Xe[i,:] if guide_input=='cca': guide = get_guide(guide_input,F[i,:],misc['ccaweights']) else: guide = get_guide(guide_input,F[i,:],L=L[i,:]) if (lda_enabled!=0 and not guide_input) or (lda_enabled!=0 and guide_input=="image"): guide = lda[i,:] # forward prop through the RNN gen_Y, gen_cache = Generator.forward(Xi, Xs,guide, model, params, predict_mode = predict_mode) gen_caches.append((ix, gen_cache)) Ys.append(gen_Y) # back up information we need for efficient backprop cache = {} if not predict_mode: # ok we need cache as well because we'll do backward pass cache['gen_caches'] = gen_caches cache['Xe'] = Xe if lda_enabled: cache['lda'] = lda cache['Ws_shape'] = Ws.shape cache['F'] = F cache['L'] = L cache['generator_str'] = generator_str cache['lda_enabled'] = lda_enabled cache['guide'] = guide_input return Ys, cache