encoder.eval() with torch.no_grad(): all_ref = [] all_pred = [] #print('to device finish') for i, (images, batch_captions) in enumerate(BLEU4loader): if i >= 40: continue all_ref.extend(batch_captions) images = images.to(device) #all_ref.extend(batch_captions) # Generate an caption from the image feature = encoder(images) all_pred.extend(decoder.beam_search(feature)) #for a_pred in decoder.beam_search(feature): # print(a_pred[-1] == EDKidx) # if a_pred[-1] == EDKidx: # all_pred.append(a_pred[1:-1]) # else: # all_pred.append(a_pred[1:]) #print('append finished') #print('pred: {}'.format(all_pred[-1])) #print('true: {}'.format(all_ref[-1])) if (i+1) % 10 == 0: print('model_idx {}, Step [{}/{}], shape: {}, {}'.format( model_idx, i+1, total_step, (len(all_ref),len(all_ref[0]),len(all_ref[0][0])), (len(all_pred),len(all_pred[0])))) #print(type(all_ref[0]),type(all_pred),type(all_pred[0]))
decoder.load_state_dict(state_dict['decoder_state_dict']) break # test decoder.eval() encoder.eval() with torch.no_grad(): # Prepare an image images, original_images = load_images(img_paths, transform) images = images.to(device) # Generate an caption from the image feature = encoder(images) print('Encoder finished') pred_ids = decoder.beam_search(feature) print('beam search finished') # Convert word_ids to words pred_captions = [] for pred_id in pred_ids: temp = [] for word_id in pred_id: temp.append(idx2word[word_id]) if temp[-1] == '<end>': #pred_captions.append(' '.join(temp)) break if len(temp) > 8: temp[len(temp) // 2] = temp[len(temp) // 2] + '\n' pred_captions.append(' '.join(temp)) print('finished caption generation')