def load_model(self): print("Loading model with an input size of: [" + str(self.input_width) + "," + str(self.input_height) + "]") graph = tf.Graph() with graph.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config( configuration.ModelConfig(), os.path.join(self.model_dir, "model.ckpt-" + str(self.checkpoint))) graph.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary( os.path.join(self.model_dir, "word_counts.txt")) sess = tf.Session(graph=graph) restore_fn(sess) generator = caption_generator.CaptionGenerator(model, vocab) self._sess = sess self._generator = generator self._vocab = vocab
def inference(): # build the inference graph g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper(FLAGS.rnn_type) restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filename = path with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) global sentences sentences = [] for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) sentences.append(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
def main(_): with open(FLAGS.keyword_pickle_file,'r')as f: keyword_data=cPickle.load(f) with open(FLAGS.test_json_path)as f: test_json=json.load(f) id_to_filename=test_json['images'] id_to_path=[{'path':os.path.join(FLAGS.image_path,x['file_name']),'id':x['id']}for x in id_to_filename] result_json=[] g=tf.Graph() with g.as_default(): model=inference_wrapper.InferenceWrapper() restore_fn=model.build_graph_from_config(configuration.ModelConfig(),FLAGS.checkpoint_path) g.finalize() vocab=vocabulary.Vocabulary(FLAGS.vocab_file) with tf.Session(graph=g)as sess: restore_fn(sess) generator=caption_generator.CaptionGenerator(model,vocab) for data in id_to_path: filename=data['path'] with tf.gfile.GFile(filename,"r")as f: image=f.read() captions=generator.beam_search(sess,image,keyword_data[os.path.basename(filename)]) print("Captions for image %s:"%os.path.basename(filename)) result={'image_id':data['id'],'caption':(" ".join([vocab.id_to_word(w)for w in captions[0].sentence[1:-1]])).decode('utf-8')} print(result) result_json.append(result) with open(os.path.join(FLAGS.temp_path,"result.json"),'w')as f: json.dump(result_json,f) coco=COCO(FLAGS.test_json_path) cocoRes=coco.loadRes(os.path.join(FLAGS.temp_path,"result.json")) cocoEval=COCOEvalCap(coco,cocoRes) cocoEval.evaluate()
def predict(args_): checkpoint_path = args_.checkpoint_path words_file = args_.words_file image_file = args_.path if not os.path.exists(checkpoint_path): print('checkpoint path is not exist.') exit(0) if not os.path.exists(words_file): print('words file not found.') exit(0) g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() vocab = vocabulary.Vocabulary(words_file) if os.path.isdir(image_file): with tf.Session(graph=g) as sess: restore_fn(sess) generator = caption_generator.CaptionGenerator(model, vocab) # sent a directory contains images file_names = [os.path.join(image_file, i) for i in os.listdir(image_file) if i.lower().endswith('.jpg') or i.lower().endswith('jpeg') or i.lower().endswith('png')] file_names = [i for i in file_names if os.path.isfile(i)] for f in file_names: with tf.gfile.GFile(f, "rb") as img_file: image = img_file.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(f)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) # cv2 show image image_array = cv2.imread(f, cv2.COLOR_BGR2RGB) cv2.imshow('image', image_array) cv2.waitKey(0) elif os.path.isfile(image_file): # sent a single image file with tf.Session(graph=g) as sess: restore_fn(sess) generator = caption_generator.CaptionGenerator(model, vocab) # sent a directory contains images with tf.gfile.GFile(image_file, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(f)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) else: print('image path: {} not found.'.format(image_file)) exit(0)
def main(_): # Build the inference graph. g = tf.Graph() model_path = '/Users/harshpyati/personal/fyp/text_gen/model.ckpt-2000000' vocab_path = '/Users/harshpyati/personal/fyp/text_gen/word_counts.txt' with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), model_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(vocab_path) all_files = FLAGS.input_files.split(',') files = [] for fil in all_files: word = None if '[' in fil: word = fil.replace('[', '') if ']' in fil: word = fil.replace(']', '') if ' ' in fil: word = fil.replace(' ', '') if "u'" in fil: word = fil.replace("u'", '') if '\'' in fil: word = fil.replace("'", '') if "'" in fil: word = fil.replace("'", '') if "[u" in fil: word = fil.replace("[u", '') if " u" in fil: word = fil.replace(" u", '') word = word.split('\'')[1] files.append(word) filenames = [] with tf.Session(graph=g) as sess: generator = caption_generator.CaptionGenerator(model, vocab) # Load the model from checkpoint. restore_fn(sess) all_captions = [] for file_pattern in files: filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), file_pattern) with tf.gfile.GFile(file_pattern, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) for index, caption in enumerate(captions): sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) data = { "name": file_pattern, "caption": sentence } all_captions.append(data) break print(all_captions)
def main(_): assert FLAGS.checkpoint_path, "--checkpoint_path is required" assert FLAGS.vocab_file, "--vocab_file is required" assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.output, "--output is required" # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph(FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) results = [] gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) with tf.Session(graph=g, config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) t_start = time.time() files = tf.gfile.Glob(FLAGS.input_file_pattern) for i, filename in enumerate(files): if i % 100 == 0: print(i) with tf.gfile.GFile(filename, "r") as f: image = f.read() image_id = filename.split('.')[0] if "/" in image_id: image_id = image_id.split("/")[-1] result = {} result['image_id'] = image_id if FLAGS.predict_attributes_only: attributes_ids, attributes_probs = generator.predict_attributes( sess, image) attributes = [vocab.id_to_word(w) for w in attributes_ids] result['attributes'] = " ".join(attributes) result['probabilities'] = " ".join( [str(prob) for prob in attributes_probs]) else: captions = generator.beam_search(sess, image) sent = [vocab.id_to_word(w) for w in captions[0]] result['caption'] = "".join(sent) results.append(result) t_end = time.time() print("time: %f" % (t_end - t_start)) output = open(FLAGS.output, 'w') json.dump(results, output, ensure_ascii=False, indent=4) output.close()
def main(_): assert FLAGS.checkpoint_path, "--checkpoint_path is required" assert FLAGS.vocab_file, "--vocab_file is required" assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.output, "--output is required" # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph(FLAGS.checkpoint_path) init_fn = tf.local_variables_initializer() g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) results = [] gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) with tf.Session(graph=g, config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Load the model from checkpoint. restore_fn(sess) sess.run(init_fn) tf.train.start_queue_runners() # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) t_start = time.time() try: i = 0 while True: if i % 10 == 0: print(i * FLAGS.batch_size) i += 1 image_names, final_captions = generator.batched_beam_search( sess) sents = [ "".join([vocab.id_to_word(w) for w in captions[0]]) for captions in final_captions ] image_names = image_names.tolist() for name, sent in zip(image_names, sents): result = {} result['image_id'] = name result['caption'] = "".join(sent) results.append(result) except Exception as e: print(e) t_end = time.time() print("time: %f" % (t_end - t_start)) output = open(FLAGS.output, 'w') json.dump(results, output, ensure_ascii=False, indent=4) output.close()
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) image_id_caption = [] j = 0 for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) j += 1 print(j) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. # print(caption.sentence[1:-1]) sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] # print(sentence) sentence = "".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) if not i: image_id_caption.append( { "image_id": filename.split('/')[-1].replace( ".jpg", ""), "caption": sentence }, ) image_id_caption = json.dumps(image_id_caption).encode('utf-8') data = json.loads(image_id_caption) with open(FLAGS.captions_file, 'w') as f: json.dump(data, f) print("Saving captions file to path %s" % FLAGS.captions_file)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) # q&a: understand follow snippets # filenames = [] # for file_pattern in FLAGS.input_files.split(","): # # tf.gfile.Glob(pattern) Returns a list of files that match the given pattern(s) # filenames.extend(tf.gfile.Glob(file_pattern)) # note: assert FLAGS.input_files == 'utils/test_file_abspath_flickr8k' with open(FLAGS.input_files, 'r') as f: filenames = f.readlines() filenames = [filename.strip() for filename in filenames] tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True with tf.Session(graph=g, config=session_config) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) json_file = list() for count, filename in enumerate(filenames): with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) # 返回的是beam_size个caption # print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): img_caption_dict = {} img_caption_dict['filename'] = os.path.basename(filename) # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) img_caption_dict['caption'] = sentence json_file.append(img_caption_dict) if count % 50 == 0: print("counter: %d" % count) store_json_file("im2txt_flickr8k_cap_google.json", json_file)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] dirs = os.walk(FLAGS.image_dir) for a, _, filelist in dirs: for filename in filelist: origin_name = a + filename if origin_name.endswith('.jpg'): filenames.append(origin_name) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) res = [] num = 1 for filename in filenames: imgid_sentence = {} with tf.gfile.GFile(filename, "r") as f: image = f.read() captions = generator.beam_search(sess, image) # print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = "".join(sentence) if i == 0: if num % 100 ==0 : print("Captions for image %s:" % os.path.basename(filename)) print("%d) %s (p=%f)" % (num,sentence, math.exp(caption.logprob))) imgid_sentence['image_id'] = os.path.basename(filename).split('.')[0] imgid_sentence['caption'] = sentence res.append(imgid_sentence) num = num + 1 with io.open(FLAGS.out_predict_json, 'w', encoding='utf-8') as fd: fd.write(unicode(json.dumps(res, ensure_ascii=False, sort_keys=True, indent=2, separators=(',', ': ')))) assert len(filenames) == len(res) print("Finished process %d images!"%len(filenames))
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] # print("FLAGS.input_files", FLAGS.input_files) for file_pattern in FLAGS.input_files.split(","): # original might be right? # for file_pattern in FLAGS.input_files.split(","): print("HIIII", file_pattern) # # filenames.extend(tf.gfile.Glob(file_pattern)) # filenames.extend(tf.gfile.Glob("*.jpg")) # print("filenames list", filenames) filenames.extend(tf.gfile.Glob(file_pattern)) print("filenames", filenames) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) print("generator") for filename in filenames: # print("*" * 10) # print("\n" * 5) # print("FILENAME", filename) # print("\n" * 5) # print("*" * 10) # with tf.gfile.GFile(filename, "r") as f: with tf.gfile.GFile(filename, 'rb') as f: # https://github.com/tensorflow/tensorflow/issues/11312 image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) print("DONE :)")
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): infer = inference_wrapper.InferenceWrapper() restore_fn = infer.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Load file in the provied directory filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running text detection on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) filenames.sort() # Predict for filename in filenames: with tf.gfile.GFile(filename, "r") as f: # Read image cv_img = cv2.imread(filename) image = f.read() # Make prediction tic = time.time() text_bboxes = infer.inference_step(sess, image) toc = time.time() print("Prediction for image %s in %.3f ms" % (os.path.basename(filename), (toc - tic) * 1000)) # Show the result for i in range(len(text_bboxes)): text = "{}: {:.3f}".format(i, float(text_bboxes[i][4])) cv2.putText(cv_img, text, (int(text_bboxes[i][0]) + 5, int(text_bboxes[i][1]) + 16), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 2) cv2.rectangle(cv_img, (int(text_bboxes[i][0]), int(text_bboxes[i][1])), (int(text_bboxes[i][2]), int(text_bboxes[i][3])), (0,0,255), 2) cv2.namedWindow('image', cv2.WND_PROP_FULLSCREEN) cv2.resizeWindow('image', 1500, 900); cv2.imshow('image', cv_img) k = cv2.waitKey(0) if k == ord('n'): cv2.destroyAllWindows()
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] #for file_pattern in FLAGS.input_files.split(","): # filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) config_sess = tf.ConfigProto() config_sess.gpu_options.allow_growth = True with tf.Session(graph=g, config=config_sess) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) test_path = r'C:\Users\PSIML-1.PSIML-1\Desktop\projekti\Image-Captioning\test_data' filenames = os.listdir(test_path) #captions_index = preprocess_captions() j = 0 for filename in filenames: full_fname = os.path.join(test_path, filename) with tf.gfile.GFile(full_fname, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) best_captions = [] for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) best_captions.append(" %d) %s\n" % (i, sentence)) #image_idx = int(filename.split('.')[0].split('_')[2]) #true_captions = captions_index[image_idx] plot_image(full_fname, None, best_captions, j) j += 1
def main(argv): inputfile = ' ' outputfile = ' ' try: opts, args = getopt.getopt(argv,"hi:o",["ifile=","ofile="]) except getopt.GetoptError: print("input/output error ") sys.exit(2) for opt, arg in opts: if opt =='-h': print ('usage: python run_inference.py -i <inputfile> -o <outuptfile>') sys.exit() elif opt in ('-i','--input'): inputfile = arg elif opt in ('-o','--output'): outputfile = arg g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] if inputfile == ' ': for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) else: for file_pattern in inputfile.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) with tf.Session(graph=g) as sess: restore_fn(sess) generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.FastGFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) # print("Captions for image %s using NIC model:" % os.path.basename(filename)) prob = [] for i, caption in enumerate(captions): sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) prob.append(caption.logprob) # In this case, only the one with the largetst logprob is left for futher operation for caption in captions: sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) if 'UNK' in sentence:# if luckily the model recognized the text information itself final = sentence break if caption.logprob == max(prob): final = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] final = ' '.join(final) img = Image.open(FLAGS.input_files)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) out = [] for filename in filenames: with tf.gfile.FastGFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. # print(caption.sentence[1:-1]) sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) if (i == 0): out = sentence img = Image.open(FLAGS.input_files) plt.imshow(img) plt.axis('off') plt.title(str(out)) plt.show()
def model_predict(img_path): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() checkpoint_path = os.path.join( os.path.dirname(os.path.abspath('__file__')), 'models') print(checkpoint_path) restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary("./data/word_counts.txt") filenames = [] for file_pattern in img_path.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), img_path) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) preds = '' out_data = [] for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) preds = str(i + 1) + ") " + sentence + "(p=" + str( round(math.exp(caption.logprob), 6)) + ")" out_data.append(preds) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) out_json = json.dumps(out_data) #print(out_json) return out_json
def main(_): # Change tensor name #rename_model_ckpt() # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) df = pd.DataFrame(columns=["id", "caption"]) for idx, filename in enumerate(filenames): df.loc[idx, "id"] = filename.split('/')[-1] with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) if i == 0: df.loc[idx, "caption"] = sentence df.to_csv("df_ph2.csv")
def main(_): # Build the inference graph g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(ModelConfig(), FLAGS.checkpoint_path) g.finalize() filenames = list( filter(lambda x: x.endswith('.jpg'), os.listdir(FLAGS.input_files_dir))) filenames = [ os.path.join(FLAGS.input_files_dir, filename) for filename in filenames ] print("Running de-rain infer on %d files from directory: %s" % (len(filenames), FLAGS.input_files_dir)) print(filenames) index_word = model.index_word config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(graph=g, config=config) as sess: # Load the model from checkpoint restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default parameters. generator = CaptionGenerator(model) if not os.path.exists("plot_attention"): os.makedirs("plot_attention") for i, filename in tqdm(enumerate(filenames)): # with tf.gfile.GFile(filename, "rb") as f: # image = f.read() captions = generator.beam_search( sess, filename) # return beam_size captions print("Captions for image %s:" % os.path.basename(filename)) for j, caption in enumerate(captions): # Ignore begin and end words. sentence_list = [index_word[w] for w in caption.sentence[1:-1]] sentence = " ".join(sentence_list) print(" %d) %s (p=%f)" % (j, sentence, math.exp(caption.logprob))) if j == 1: print(len(caption.attenplot)) plot_attention(filename, sentence_list, caption.attenplot)
def main(_): start_time = time.time() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) # remove Thumbs.db from files filenames = [ f for f in os.listdir(FLAGS.test_img_dir) if f.endswith('png') ] print('There are totally {0} images.....'.format(len(filenames))) checkpoint_file = FLAGS.checkpoint_path + 'model.ckpt-1783119' submit_json_file = '{0}submit_{1}_inception.json'.format( FLAGS.submit_json_dir, checkpoint_file.split('/')[-1]) g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper(cnn_model='InceptionV3') restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_file) g.finalize() with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. count, result = 0, [] generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: count += 1 with open(FLAGS.test_img_dir + filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) sentence = [ vocab.id_to_word(w) for w in captions[0].sentence[1:-1] ] sentence = ''.join(sentence) image_id = filename.split('.')[0] result.append({'caption': sentence, 'image_id': image_id}) if count % 500 == 0: print('finish generating caption for {0} images'.format(count)) print('finish totally {0} images'.format(count)) with open(submit_json_file, encoding='utf8', mode='w') as f: json.dump(result, f, ensure_ascii=False) print('time consuming: {0}s'.format(time.time() - start_time))
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): infer = inference_wrapper.InferenceWrapper() restore_fn = infer.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Initialize the vocabulary lookup table infer.model.vocab_table.init.run(session=sess) filenames.sort() # Predict for filename in filenames: with tf.gfile.GFile(filename, "r") as f: # Predict transcription tic = time.time() image = f.read() pred_chars = infer.inference_step(sess, image)[0][0] pred_word = "".join([item for item in pred_chars]) auto_correct_word = spell(pred_word) toc = time.time() # Print out the result print("Prediction for image %s in %.3f ms" % (os.path.basename(filename), (toc - tic) * 1000)) print("predicted word: %s" % pred_word) print("auto correct word: %s" % auto_correct_word) print("*" * 50) # Show image cv_img = cv2.imread(filename) cv2.imshow('image', cv_img) k = cv2.waitKey(0) if k == ord('n'): cv2.destroyAllWindows()
def mainfunction(currentimagename): # Change tensor name #rename_model_ckpt() # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(vocab_file) imageName = input_files + currentimagename filenames = [] for file_pattern in imageName.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), imageName) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) translator = Translator() result = translator.translate(sentence, src='en', dest='ja') #print(" %d) %s (p=%f)" % (i, result, math.exp(caption.logprob))) if i==0: displayCaption = result.text print(displayCaption) return displayCaption return displayCaption
def main(_): ##### #model_config.input_file_pattern = FLAGS.input_file_pattern ##### # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) #40.000 100 filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) config = tf.ConfigProto() config.gpu_options.visible_device_list = "0" with tf.Session(config = config,graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
def load_model_im2txt(self, checkpoint_path, vocab_file): global g global model global restore_fn global vocab g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(vocab_file) print('Model loaded.')
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) #g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) font = cv2.FONT_HERSHEY_SIMPLEX cap = cv2.VideoCapture(0) while (True): ret, frame = cap.read() #cap.set(cv2.CAP_PROP_FPS,1) #cap.set(3,1000) #cap.set(4,1000) image = tf.image.encode_jpeg(frame) image = sess.run(image) captions = generator.beam_search(sess, image) caption = captions[0] #for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %s (p=%f)" % (sentence, math.exp(caption.logprob))) cv2.putText(frame, sentence, (0, 20), font, 0.6, (255, 255, 255), 2) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def analyze(FLAGS): os.environ['CUDA_VISIBLE_DEVICES'] = '' # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS["checkpoint_path"]) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS["vocab_file"]) filenames = [] for file_pattern in FLAGS["input_files"].split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS["input_files"]) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) caption_texts = [] for filename in filenames: with tf.gfile.GFile(filename, "r") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) text = "%s" % sentence caption_texts += [text] return caption_texts
def img_captions(file_inputs): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), file_inputs[0]) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(file_inputs[1]) filenames = [] for file_pattern in file_inputs[2].split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), file_inputs[2]) with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) caption_list = list() prob_list = list() for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions, probs = generator.beam_search(sess, image) prob_list.append('['+", ".join(map(str, probs))+']') loc_cap_list = list() for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence).split('<S>')[0] loc_cap_list.append([sentence, math.exp(caption.logprob)]) caption_list.append(loc_cap_list) return prob_list, caption_list
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(),FLAGS.checkpoint_path) vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] data_dir= FLAGS.input_files tf_flie_pattern = os.path.join(data_dir, '*.jpg' ) for file_pattern in tf_flie_pattern.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: restore_fn(sess) generator = caption_generator.CaptionGenerator(model, vocab) font=cv2.FONT_HERSHEY_SIMPLEX for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() frame = tf.image.decode_jpeg(image) frame = sess.run(frame) print(type(frame)) captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) for i, caption in enumerate(captions): sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] sentence = " ".join(sentence) print("%d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) if i==0: s=sentence plt.axis('off') plt.title(s) plt.imshow(frame) plt.show()
def mess(filename): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), "model.ckpt-2000000") g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary("word_counts.txt") filenames = [filename] with tf.Session(graph=g) as sess: # Load the model from checkpoint. restore_fn(sess) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. generator = caption_generator.CaptionGenerator(model, vocab) for filename in filenames: with tf.gfile.GFile(filename, "rb") as f: image = f.read() captions = generator.beam_search(sess, image) print("Captions for image %s:" % os.path.basename(filename)) allsen = "" for i, caption in enumerate(captions): # Ignore begin and end words. sentence = [ vocab.id_to_word(w) for w in caption.sentence[1:-1] ] sentence = " ".join(sentence) print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) allsen += sentence + "|" return allsen
def initGraph(): tf.logging.set_verbosity(tf.logging.INFO) displayconfig = configuration.DisplayConfig() checkpoint_path = displayconfig.checkpoint_path vocab_file = displayconfig.vocab_file g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), checkpoint_path) g.finalize() # Create the vocabulary. v = vocabulary.Vocabulary(vocab_file) s = tf.Session(graph=g) # Load the model from checkpoint. restore_fn(s) # Prepare the caption generator. Here we are implicitly using the default # beam search parameters. See caption_generator.py for a description of the # available beam search parameters. return s, v, caption_generator.CaptionGenerator(model, v)
def main(_): # Build the inference graph. g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path) g.finalize() # Create the vocabulary. vocab = vocabulary.Vocabulary(FLAGS.vocab_file) filenames = [] #for file_pattern in FLAGS.input_files.split(","): # filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) config_sess = tf.ConfigProto() config_sess.gpu_options.allow_growth = True with tf.Session(graph=g, config=config_sess) as sess: # Load the model from checkpoint. restore_fn(sess) test_path = r'C:\Users\PSIML-1.PSIML-1\Desktop\projekti\Image-Captioning\test_gradient' for filename in filenames: full_fname = os.path.join(test_path, filename) with tf.gfile.GFile(full_fname, "rb") as f: image = f.read() initial_state = model.feed_image(sess, image) for i in range(20): softmax, new_states, metadata = model.inference_step( sess, input_feed, state_feed)