def __init__(self, w_path, dec_path='dataset/text/dec_map.pkl', enc_path='dataset/text/enc_map.pkl', embedding_path='pre_trained/glove.6B.100d.txt'): dec_map = cPickle.load(open(dec_path, 'rb')) enc_map = cPickle.load(open(enc_path, 'rb')) embedding_matrix = generate_embedding_matrix(embedding_path, dec_map) self.model = image_caption_model(embedding_matrix=embedding_matrix) self.extractor = ImageFeatureExtractor('weights/tensorflow_inception_graph.pb') self.model.load_weights(w_path) self.dec_map = dec_map self.enc_map = enc_map
def predict(num_of_pic, char_to_int, int_to_char, test_set): model = image_caption_model() model.load_weights('demo.h5v1.0.0_60_15_1496111479.97.h5') sentence = [] imgs = [] cur = char_to_int['$'] img = test_set[num_of_pic] imgs.append(img) img_input = np.array(imgs) lang_input = cur #print img_input.shape #print np.array(lang_input).reshape(-1,1).shape for i in range(max_len): X = [img_input, np.array(lang_input).reshape(-1, 1)] prediction = model.predict(X) index = np.argmax(prediction) if (index == char_to_int['#']): break lang_input = index sentence.append(int_to_char[index]) return sentence
test_dict[i] = d[i[len(images):]] print(len(test_img)) return test_img, test_dict if __name__ == '__main__': #path = sys.argv[1] dec_map = pickle.load(open("dec_map.pkl", 'rb')) enc_map = pickle.load(open("enc_map.pkl", 'rb')) dict = process() #img_train = pickle.load(open("/general/home/ronakchaudhary132199/Image-Captioning-master/encoded_images_inceptionV3.p", 'rb')) img_test = pickle.load(open("../encoded_images_test_inceptionV3.p", 'rb')) model = image_caption_model(vocab_size=8256, clipnorm=1.) model.load_weights("weights/v1.0.0_6_39_1524863089.8904815.h5") #model.load_weights("")") #eval_human(model, img_train, df_cap, enc_map, dec_map, k=1, size=40, max_len=13) ''' print("____________________________________________________________________________________________________________________________") model.load_weights("/general/home/manish.singhal/attention/weights/v1.0.0_60_69_1523845852.7416637.h5") print(generate_k_best(model, enc_map, dec_map, img1, 3,13)) print(generate_k_best(model, enc_map, dec_map, img2, 3,13)) print(generate_k_best(model, enc_map, dec_map, img3, 3,13)) print(generate_k_best(model, enc_map, dec_map, img4, 3,13)) print(generate_k_best(model, enc_map, dec_map, img5, 3,13)) ''' ##===============================================================================================###
import numpy as np from extractor import ImageFeatureExtractor from model import image_caption_model if __name__ == '__main__': max_sent_len = 28 model_path = './weights/v1.0.0_11_0_1494239663.5093253_602.h5' image_path = sys.argv[1] ife = ImageFeatureExtractor('model/inception_v3_2016_08_28_frozen.pb') with open('./train/word2idx.pkl', 'rb') as f: word2idx = pickle.load(f) with open('./train/idx2word.pkl', 'rb') as f: idx2word = pickle.load(f) vocab_size = len(word2idx) + 1 model = image_caption_model(vocab_size=vocab_size) model.load_weights(model_path) start_sign = word2idx['+'] img = np.array([ife.extract_features(image_path)]) cur, vhist, answer = np.array([[start_sign]]), np.array([[0] * vocab_size ]), [] vhist = np.array(vhist) for idx in range(0, max_sent_len): seq = np.array([[1 if i == idx else 0 for i in range(0, max_sent_len)]]) out = model.predict([img, cur, seq, vhist])[0] nxt = int(np.argmax(out)) ans = idx2word.get(nxt, '<?>') print(ans, 'score:', out[nxt])
for x in ans: if x == 1 : break if x != 0 : gen.append(dec_map[x]) return ' '.join(gen) def eval_human(model, img_map, df_cap, enc_map, dec_map, k=4, size=1, max_len=10): for idx in np.random.randint(df_cap.shape[0], size=size): row = df_cap.iloc[idx] cap = eval(row['caption']) img_id = row['img_id'] img = img_map[img_id] gen = generate_k_best(model, enc_map, dec_map, img, k=k, max_len=max_len) print('[{}]'.format(img_id)) print('[generated] {}'.format(gen)) print('[groundtruth] {}'.format(' '.join([dec_map[cap[i]] for i in range(1,len(cap)-1)]))) if __name__ == '__main__': path = sys.argv[1] dec_map = cPickle.load(open('dataset/text/dec_map.pkl', 'rb')) enc_map = cPickle.load(open('dataset/text/enc_map.pkl', 'rb')) img_train = cPickle.load(open('dataset/train_img2048.pkl', 'rb')) img_test = cPickle.load(open('dataset/test_img2048.pkl', 'rb')) df_cap = pd.read_csv('dataset/text/train_enc_cap.csv') model = image_caption_model(clipnorm=1.) model.load_weights(path) eval_human(model, img_train, df_cap, enc_map, dec_map, k=1, size=40, max_len=13)
hist_path = 'history/' mdl_path = 'weights/' # read pkl dec_map = cPickle.load(open('dataset/text/dec_map.pkl', 'rb')) enc_map = cPickle.load(open('dataset/text/enc_map.pkl', 'rb')) img_train = cPickle.load(open('dataset/train_img2048.pkl', 'rb')) img_test = cPickle.load(open('dataset/test_img256.pkl', 'rb')) df_cap = pd.read_csv('dataset/text/train_enc_cap.csv') vocab_size = len(dec_map) embedding_matrix = generate_embedding_matrix( 'pre_trained/glove.6B.100d.txt', dec_map) model = image_caption_model(vocab_size=vocab_size, embedding_matrix=embedding_matrix) if len(sys.argv) >= 2: print('load weights from : {}'.format(sys.argv[1])) model.load_weights(sys.argv[1]) # insert ur version name here version = 'v1.0.0' batch_num = 70 print_summary(model.layers) hist_loss = [] for i in range(0, 100): for j in range(0, batch_num): s = time.time()
''' # initialization hist_path = 'history/' mdl_path = 'weights/' # read pkl dec_map = cPickle.load(open('dataset/text/dec_map.pkl', 'rb')) enc_map = cPickle.load(open('dataset/text/enc_map.pkl', 'rb')) img_train = cPickle.load(open('dataset/train_img2048.pkl', 'rb')) img_test = cPickle.load(open('dataset/test_img256.pkl', 'rb')) df_cap = pd.read_csv('dataset/text/train_enc_cap.csv') ''' sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) vocab_size = 11573 #embedding_matrix = generate_embedding_matrix('pre_trained/glove.6B.100d.txt', dec_map) model = image_caption_model() if len(sys.argv) >= 2: print('load weights from : {}'.format(sys.argv[1])) model.load_weights(sys.argv[1]) # insert ur version name here version = 'v1.0.0' batch_num = 30 #print_summary(model.layers) hist_loss = [] for i in range(0, 40): for j in range(0, batch_num): s = time.time()