Exemplo n.º 1
0
def main(_):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                               FLAGS.checkpoint_path)
  g.finalize()

  # Create the vocabulary.
  vocab = vocabulary.Vocabulary(FLAGS.vocab_file)
  stream = Stream(FLAGS.stream_config)

  with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
    generator = caption_generator.CaptionGenerator(model, vocab)

    while True:
      image = stream.next_frame()
      captions = generator.beam_search(sess, image)
      for i, caption in enumerate(captions):
        # Ignore begin and end words.
        sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
        sentence = " ".join(sentence)
        print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
Exemplo n.º 2
0
def run():
    """Runs evaluation in a loop, and logs summaries to TensorBoard."""
    # Create the evaluation directory if it doesn't exist.
    eval_dir = FLAGS.eval_dir
    if not tf.gfile.IsDirectory(eval_dir):
        tf.logging.info("Creating eval directory: %s", eval_dir)
        tf.gfile.MakeDirs(eval_dir)

    g = tf.Graph()
    with g.as_default():
        # Build the model for evaluation.
        model_config = configuration.ModelConfig()
        model_config.input_file_pattern = FLAGS.input_file_pattern
        model = show_and_tell_model.ShowAndTellModel(model_config, mode="eval")
        model.build()

        # Create the summary operation and the summary writer.
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter(eval_dir)

        g.finalize()

        # Run a new evaluation run every eval_interval_secs.
        while True:
            start = time.time()
            tf.logging.info(
                "Starting evaluation at " +
                time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()))
            run_once(model, summary_writer, summary_op)
            time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
            if time_to_next_eval > 0:
                time.sleep(time_to_next_eval)
Exemplo n.º 3
0
def main(_):
    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(FLAGS.vocab_file)
    """ 
  filenames = []
  for file_pattern in FLAGS.input_files.split(","):
    filenames.extend(tf.gfile.Glob(file_pattern))
  tf.logging.info("Running caption generation on %d files matching %s",
                  len(filenames), FLAGS.input_files)
  """
    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        old_id = getLastId()
        while True:
            try:
                new_id = getLastId()
                if new_id > old_id:
                    print(new_id)
                    old_id = new_id
                    cur.execute(
                        "SELECT image FROM images ORDER BY id DESC LIMIT 1")
                    data = cur.fetchall()
                    file_like = BytesIO(
                        data[0]['image'])  #tf.gfile.Glob(file_pattern)
                    image = file_like.getvalue()

                    #img=PIL.Image.open(file_like)
                    #image = tf.image.decode_jpeg(str(list(img.getdata())));

                    captions = generator.beam_search(sess, image)
                    #print("Captions for image %s:" % os.path.basename(filename))
                    for i, caption in enumerate(captions):
                        # Ignore begin and end words.
                        sentence = [
                            vocab.id_to_word(w) for w in caption.sentence[1:-1]
                        ]
                        sentence = " ".join(sentence)
                        #print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
                        cur.execute(
                            "INSERT INTO img2txt(id_image,caption,caption_p) VALUES (%s,%s,%s)",
                            (old_id, sentence, math.exp(caption.logprob)))
                        db.commit()
            except KeyboardInterrupt:
                db.close()
                break
def load_im2txt(model_file, vocabulary_file):
    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(
            configuration.ModelConfig(),
            model_file,
        )
    g.finalize()

    global caption_generator
    global session
    global vocabulary

    # Create the vocabulary.
    vocabulary = im2txt_vocab.Vocabulary(vocabulary_file)

    # Create session
    config_proto = tf.ConfigProto()
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(graph=g, config=config_proto)
    # Load the model from checkpoint.
    restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
    caption_generator = im2txt_generator.CaptionGenerator(model, vocabulary, beam_size=2)
    session = sess
Exemplo n.º 5
0
def main(_):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(), CHECKPOINT_PATH)
  g.finalize()

  # Create the vocabulary.
  vocab = vocabulary.Vocabulary(VOCAB_FILE)

  with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
    generator = caption_generator.CaptionGenerator(model, vocab)

    for filename in filenames:
      with tf.gfile.GFile(filename, "rb") as f:
        image = f.read()
      captions = generator.beam_search(sess, image)
      print("Captions for image %s:" % os.path.basename(filename))
      for i, caption in enumerate(captions):
        # Ignore begin and end words.
        sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
        sentence = " ".join(sentence)
        print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
Exemplo n.º 6
0
    def Init(self):
        # Build the inference graph.
        g = tf.Graph()
        with g.as_default():
            self.model = inference_wrapper.InferenceWrapper()
            restore_fn = self.model.build_graph_from_config(
                configuration.ModelConfig(), FLAGS.checkpoint_path)
        g.finalize()

        # Create the vocabulary.
        self.vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

        self.sess = tf.Session(graph=g)
        # Load the model from checkpoint.
        restore_fn(self.sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        self.generator = caption_generator.CaptionGenerator(
            self.model, self.vocab)

        #for filename in filenames:
        #  with tf.gfile.GFile(filename, "r") as f:
        #    image = f.read()
        #  captions = generator.beam_search(sess, image)
        #  print("Captions for image %s:" % os.path.basename(filename))
        #  for i, caption in enumerate(captions):
        #    # Ignore begin and end words.
        #    sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
        #    sentence = " ".join(sentence)
        #    print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
        print("loaded\n")
Exemplo n.º 7
0
def init():
    print("caption_generator :: init :: Start")

    global sess, vocab, model, g, generator, is_initialized

    if not is_initialized:
        is_initialized = True
        # Build the inference graph.
        g = tf.Graph()
        with g.as_default():
            model = inference_wrapper.InferenceWrapper()
            restore_fn = model.build_graph_from_config(
                configuration.ModelConfig(), FLAGS.checkpoint_path)
        g.finalize()

        # Create the vocabulary.
        vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

        # Load the model from checkpoint.
        sess = tf.Session(graph=g)
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        print("caption_generator :: init :: End")
Exemplo n.º 8
0
def main(_):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                               FLAGS.checkpoint_path)
  g.finalize()

  # Create the vocabulary.
  vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

  """ 
  filenames = []
  for file_pattern in FLAGS.input_files.split(","):
    filenames.extend(tf.gfile.Glob(file_pattern))
  tf.logging.info("Running caption generation on %d files matching %s",
                  len(filenames), FLAGS.input_files)
  """
  with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
    generator = caption_generator.CaptionGenerator(model, vocab)
    
    video = cv2.VideoCapture(0)
    while (True):
      try:
        #file_like=BytesIO(data[0]['image']) #tf.gfile.Glob(file_pattern)
        #image = file_like.getvalue()
        _, img = video.read()
        image = tf.stack(img)
        #cv2.imwrite("imagenes/camara.jpg",img)
        #image = img.tobytes() 
        """ ERROR: Invalid JPEG data, size 921600
           [[Node: decode/DecodeJpeg = DecodeJpeg[acceptable_fraction=1, channels=3, dct_method="", 
           fancy_upscaling=true, ratio=1, try_recover_truncated=false, _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_image_feed_0)]]
        """
        #temp=BytesIO(img) 
        #image = temp.getvalue()
        #image = cv2.imread("imagenes/camara.jpg")
        #print(image)
        #img=PIL.Image.open(img)
        #image = tf.image.decode_jpeg(image);

        captions = generator.beam_search(sess, image)
        #print("Captions for image %s:" % os.path.basename(filename))
        for i, caption in enumerate(captions):
          # Ignore begin and end words.
          sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
          sentence = " ".join(sentence)
          print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
        
        cv2.imshow('Image', img)
        cv2.waitKey(15)
      except KeyboardInterrupt:
        break
Exemplo n.º 9
0
def main(_):

    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.

        generator = caption_generator.CaptionGenerator(model, vocab)
        translator = Translator()

        #---
        DEVICE_ID = 0
        capture = cv2.VideoCapture(DEVICE_ID)
        if capture.isOpened() is False:
            raise ("IO Error")
        cv2.namedWindow("Capture", cv2.WINDOW_AUTOSIZE)

        font = cv2.FONT_HERSHEY_SIMPLEX
        font_size = 1
        font_color = (0, 255, 0)
        #---

        while True:
            ret, img = capture.read()

            if (threading.activeCount() == 1):
                th = CaptionThread(sess, generator, translator, vocab, img)
                th.start()

            for i, txt in enumerate(text):
                cv2.putText(img, txt, (10, 30 * (i + 1)), font, font_size,
                            font_color)  # 日本語のフォントが無い?

            cv2.imshow("Capture", img)

            key = cv2.waitKey(1)
            if key == 27:  # ESC key
                break

            if ret == False:
                continue

        cv2.destroyAllWindows()
        cap.release()
Exemplo n.º 10
0
def main(_):
    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(FLAGS.vocab_file)
    """ 
  filenames = []
  for file_pattern in FLAGS.input_files.split(","):
    filenames.extend(tf.gfile.Glob(file_pattern))
  tf.logging.info("Running caption generation on %d files matching %s",
                  len(filenames), FLAGS.input_files)
  """
    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        cur.execute("SELECT DISTINCT image_id,image_url FROM annotations")
        urls = cur.fetchall()
        for url in urls:  #for array con urls
            try:
                response = requests.get(url['image_url'])
                file_like = BytesIO(response.content)
                image = file_like.getvalue()

                #img=PIL.Image.open(file_like)
                #image = tf.image.decode_jpeg(str(list(img.getdata())));

                captions = generator.beam_search(sess, image)
                #print("Captions for image %s:" % os.path.basename(filename))
                caption = captions[0]
                #for i, caption in enumerate(captions):
                # Ignore begin and end words.
                sentence = [
                    vocab.id_to_word(w) for w in caption.sentence[1:-1]
                ]
                sentence = " ".join(sentence)
                #print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
                #cur.execute("INSERT INTO img2txt(id_image,caption,caption_p) VALUES (%s,%s,%s)",(old_id,sentence,math.exp(caption.logprob)))
                #db.commit();
                query = (
                    "UPDATE annotations SET image_caption=%s WHERE image_id=%s"
                )
                cur.execute(query, (sentence, url['image_id']))
                print(sentence)
            except KeyboardInterrupt:
                db.close()
                break
Exemplo n.º 11
0
def main(_):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                               FLAGS.checkpoint_path)
  g.finalize()

  # Create the vocabulary.
  vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

  with open('/home/mtian/im2txt/im2txt/test.txt', 'r') as f:  
          filenames = f.readlines()
          
  #######################json_temp_buf############################
  aa=[]  
  #######################json_temp_buf############################ 
  
  #for file_pattern in FLAGS.input_files.split(","):
  #  filenames.extend(tf.gfile.Glob(file_pattern))
  #tf.logging.info("Running caption generation on %d files matching %s",
    #              len(filenames), FLAGS.input_files)

  with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
    generator = caption_generator.CaptionGenerator(model, vocab)

    '''
      with tf.gfile.GFile(filename, "r") as f:
        image = f.read()
    '''
    
    for filename1 in filenames:
      filename=filename1.strip()
      
      image=np.load("/home/mtian/visiual_feature/0.125_val/"+filename[:-3]+"npy")
      captions = generator.beam_search(sess, image)
      print("Captions for image %s:" % filename)
      for i, caption in enumerate(captions):
        # Ignore begin and end words.
        sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
        sentence = " ".join(sentence)
        ##################################json_op######################################
        if i==0:
            captiondict={"image_id":int(filename[-10:-4]),"caption":sentence}
            aa.append(captiondict)
        #################################json_op######################################
        print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
    ################################output_json######################################
    with open('0.125_var_1.json', 'w') as json_file:
        json.dump(aa,json_file)
Exemplo n.º 12
0
def main(_):
    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

    filenames = []
    for file_pattern in FLAGS.input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    document = Document()

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        for filename in filenames:
            with tf.gfile.GFile(filename, "rb") as f:
                image = f.read()
            captions = generator.beam_search(sess, image)

            str1 = ("Captions for image %s:" % os.path.basename(filename))
            print(str1)

            paragraph = document.add_paragraph("inferent:%s" % filename)
            paragraph.paragraph_format.page_break_before = True
            document.add_picture(filename, width=Inches(3))
            document.add_paragraph(str1)

            for i, caption in enumerate(captions):
                # Ignore begin and end words.
                sentence = [
                    vocab.id_to_word(w) for w in caption.sentence[1:-1]
                ]
                sentence = " ".join(sentence)

                str1 = ("  %d) %s (p=%f)" %
                        (i, sentence, math.exp(caption.logprob)))
                print(str1)

                document.add_paragraph(str1)

        output_dir = ("%s/Captions.docx" % FLAGS.output_dir)
        document.save(output_dir)
Exemplo n.º 13
0
def caption_image(data_folder, output_folder):
    id_folder = join(data_folder.split("m")[0], "instances.jsonl")
    images = []
    ids_dict = {}
    for file in os.listdir(data_folder):
        images.append(join(data_folder, file))

    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(vocab_file)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        for filename in tqdm(images):
            with tf.gfile.GFile(filename, "rb") as f:
                image = f.read()
            captions = generator.beam_search(sess, image)

            # Just take the first caption and display it
            caption = captions[0]
            sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
            sentence = " ".join(sentence)

            with open(id_folder, 'r', encoding='utf8') as instances:
                for instance in instances:
                    instance = loads(instance)
                    photo_name = re.split("[_']", str(instance["postMedia"]))

                    if len(photo_name) == 7:
                        if photo_name[2] == filename.split("_")[1]:
                            ids_dict[sentence] = instance["id"]
                        if photo_name[5] == filename.split("_")[1]:
                            ids_dict[sentence] = instance["id"]
                    elif len(photo_name) == 4:
                        if photo_name[2] == filename.split("_")[1]:
                            ids_dict[sentence] = instance["id"]
                    else:
                        continue

    with open(output_folder, 'w', encoding="utf8") as output_file:
        output_file.write(str(ids_dict))
Exemplo n.º 14
0
def main(_):
    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

    filenames = []
    data = {}

    for file_pattern in FLAGS.input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        for filename in filenames:
            with tf.gfile.GFile(filename, "r") as f:
                image = f.read()
            captions = generator.beam_search(sess, image)
            print("Captions for image %s:" % os.path.basename(filename))
            key = os.path.basename(filename)

            caption = captions[0]
            sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
            sentence = " ".join(sentence)
            sentence = sentence[2].upper() + sentence[3:]
            if (sentence.startswith("Bunch of")
                    or sentence.startswith("Group of")):
                sentence = sentence[9].upper() + sentence[10:]
            if (sentence.startswith("Close up of") and random.random() > 0.5):
                sentence = sentence[12].upper() + sentence[13:]
            if (sentence.endswith(".")):
                sentence = sentence[:-1]

            data[key] = sentence
            print("  %d) %s (p=%f)" % (0, sentence, math.exp(caption.logprob)))

        with open('data.json', 'w') as outfile:
            json.dump(data, outfile)
Exemplo n.º 15
0
def main(_):

  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                               FLAGS.checkpoint_path)
  g.finalize()

  # Create the vocabulary.
  vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

  with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.

    generator = caption_generator.CaptionGenerator(model, vocab)

    #---
    DEVICE_ID = 0
    capture = cv2.VideoCapture(DEVICE_ID)
    if capture.isOpened() is False:
        raise ("IO Error")
    cv2.namedWindow("Capture", cv2.WINDOW_NORMAL)

    #---
    os.system("clear")
    print("\nplease wait...\n")

    while True:

      ret, img = capture.read()

      if (threading.activeCount() == 1):
          th = CaptionThread(sess, generator, vocab, img)
          th.start()

      cv2.imshow("Capture", img)

      if cv2.waitKey(1) & 0xFF == ord('q'):
          break

      if ret == False:
          continue

    capture.release()
    cv2.destroyAllWindows()
Exemplo n.º 16
0
 def __init__(self,checkpoint_path,vocab_file):
     self.vocab_file=vocab_file
     self.checkpoint_path=checkpoint_path
     g = tf.Graph()
     with g.as_default():
         self.model = inference_wrapper.InferenceWrapper()
         restore_fn = self.model.build_graph_from_config(configuration.ModelConfig(),
                                                    self.checkpoint_path)
     g.finalize()
     self.vocab = vocabulary.Vocabulary(self.vocab_file)
     self.sess=tf.Session(graph=g)
     restore_fn(self.sess)
     tf.logging.info('Tensorflow Session initialized for im2txt module.')
Exemplo n.º 17
0
def main(_):
    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

    #filenames = []
    #for file_pattern in FLAGS.input_files.split(","):
    # filenames.extend(tf.gfile.Glob(file_pattern))
    image_names = os.listdir(FLAGS.input_files)
    #print(filenames)
    filenames = []
    for file_p in image_names:
        #print(file_p)
        file_pattern = FLAGS.input_files + file_p
        #print(file_pattern)
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)
        fw = open(FLAGS.output_file, 'w')
        for filename in filenames:
            with tf.gfile.GFile(filename, "rb") as f:
                image = f.read()
            captions = generator.beam_search(sess, image)
            print("Captions for image %s:" % os.path.basename(filename))
            fw.write("Captions for image %s:\n" % os.path.basename(filename))
            for i, caption in enumerate(captions):
                # Ignore begin and end words.
                sentence = [
                    vocab.id_to_word(w) for w in caption.sentence[1:-1]
                ]
                sentence = " ".join(sentence)
                print("  %d) %s (p=%f)" %
                      (i, sentence, math.exp(caption.logprob)))
                fw.write("  %d) %s (p=%f)\n" %
                         (i, sentence, math.exp(caption.logprob)))
Exemplo n.º 18
0
def run_caption(checkpoint_path, filenames, heuristic_amount):
    g = tf.Graph()
    with g.as_default():
        # Build the model for evaluation.
        model_config = configuration.ModelConfig()
        model = show_and_tell_model.ShowAndTellModel(model_config,
                                                     mode="inference")
        model.build()
        # Create the Saver to restore model Variables.
        saver = tf.train.Saver()
        g.finalize()

    def _restore_fn(sess):
        tf.logging.info("Loading model from checkpoint: %s", checkpoint_path)
        saver.restore(sess, checkpoint_path)
        tf.logging.info("Successfully loaded checkpoint: %s",
                        os.path.basename(checkpoint_path))

    restore_fn = _restore_fn

    # Create the vocabulary.
    vocab = glove.load(model_config.config)[0]

    run_results = []
    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the list of image bytes for evaluation.
        images = []
        for filename in filenames:
            with tf.gfile.GFile(filename, "rb") as f:
                images.append(f.read())
        captions = [
            sess.run(model.final_seqs,
                     feed_dict={
                         "image_feed:0": img,
                         model.heuristic_temperature: heuristic_amount
                     }) for img in images
        ]

        for i, caption in enumerate(captions):
            run_results.append({"filename": filenames[i], "captions": []})
            for j in range(caption.shape[1]):
                # Ignore begin and end words.
                c = caption[0, j, :].tolist()
                sentence = [vocab.id_to_word(w) for w in c[:-1]]
                sentence = " ".join(sentence)
                run_results[i]["captions"].append(sentence)

    return run_results
Exemplo n.º 19
0
def gen_caption(path):
    with urllib.request.urlopen(path) as url:
        with open('temp/temp.jpg', 'wb') as f:
            f.write(url.read())
    input_files = 'temp/temp.jpg'

    # only print serious log messages
    tf.logging.set_verbosity(tf.logging.FATAL)
    # load our pretrained model
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(vocab_file)

    filenames = []
    for file_pattern in input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), input_files)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        captionlist = []

        for filename in filenames:
            with tf.gfile.GFile(filename, "rb") as f:
                image = f.read()
            captions = generator.beam_search(sess, image)
            print("Captions for image %s:" % os.path.basename(filename))
            for i, caption in enumerate(captions):
                # Ignore begin and end words.
                sentence = [
                    vocab.id_to_word(w) for w in caption.sentence[1:-1]
                ]
                sentence = " ".join(sentence)
                print("  %d) %s (p=%f)" %
                      (i, sentence, math.exp(caption.logprob)))
                captionlist.append(sentence)
    return {"captions": captionlist}
Exemplo n.º 20
0
def main(_):
    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)
        saver = tf.train.Saver()
        saver.save(sess, FLAGS.output_path + "/im2txt-model")
        tf.train.write_graph(sess.graph_def, FLAGS.output_path, "graph.pbtxt")
Exemplo n.º 21
0
def main(_):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                               check_point_path)
  g.finalize()

  # Create the vocabulary.
  vocab = vocabulary.Vocabulary(vocab_file)

  filenames = []
  for file_pattern in input_files.split(","):
    filenames.extend(tf.gfile.Glob("Flicker8k_Dataset/"+file_pattern))
  tf.logging.info("Running caption generation on %d files matching %s",
                  len(filenames))
 
  with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)
    start_time = time.time()

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
    generator = caption_generator.CaptionGenerator(model, vocab)
    print("=======================About to iterate==================================")
   # print(filenames)
    for filename in filenames:
      print("=========current:============= ", filename)
      with tf.gfile.GFile(filename, "rb") as f:
        image = f.read()
      captions = generator.beam_search(sess, image)
      print("Captions for image %s:" % os.path.basename(filename))
      s = []
      p = []
      for i, caption in enumerate(captions):
        # Ignore begin and end words.
        sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
        sentence = " ".join(sentence)
        s.append(sentence)
        p.append(math.exp(caption.logprob))
      main_s = s[p.index(max(p))]
      i = 0
      print("  %d) %s (p=%f)" % (i, main_s, max(p)))
      write_caption(main_s, filename,border=(0,0,0,100))
       
    end_time = time.time()
    print("time elapsed {0:.1f} sec".format(end_time - start_time))
Exemplo n.º 22
0
    def __init__(self, checkpoint_path, vocab_file):

        # Build the inference graph.
        self.g = tf.Graph()
        with self.g.as_default():
            model = inference_wrapper.InferenceWrapper()
            restore_fn = model.build_graph_from_config(
                configuration.ModelConfig(), checkpoint_path)
        self.g.finalize()

        self.vocab = vocabulary.Vocabulary(vocab_file)
        self.sess = tf.Session(graph=self.g)
        restore_fn(self.sess)
        self.generator = caption_generator.CaptionGenerator(model, self.vocab)
def main(_):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                               FLAGS.checkpoint_path)
  g.finalize()

  # Create the vocabulary.
  vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

  """ 
  filenames = []
  for file_pattern in FLAGS.input_files.split(","):
    filenames.extend(tf.gfile.Glob(file_pattern))
  tf.logging.info("Running caption generation on %d files matching %s",
                  len(filenames), FLAGS.input_files)
  """
  with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
    generator = caption_generator.CaptionGenerator(model, vocab)
    
    old_id = getLastId()

    img_path = "C:/Users/win8/Documents/PF/image2text/imagenes/"

    for index in range(1,7):
      path = img_path+str(index)+".jpg"
      img = cv2.imread(path)
      with tf.gfile.GFile(path, "r") as f:
        image = f.read()

      captions = generator.beam_search(sess, image)
      #print("Captions for image %s:" % os.path.basename(filename))
      caption = captions[0]
        # Ignore begin and end words.
      sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
      sentence = " ".join(sentence)
      cv2.imshow('Image', img)
        #print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
      cur.execute("INSERT INTO audiotext(audiotextcol) VALUES (%s)",(sentence))
      db.commit();
      cv2.waitKey(9500)
Exemplo n.º 24
0
def main(_):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(), FLAGS.checkpoint_path)
  g.finalize()

  # Create the vocabulary.
  vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

  filenames = glob.glob(FLAGS.input_files)
  # for file_pattern in FLAGS.input_files.split(","):
  #   filenames.extend(tf.gfile.Glob(file_pattern))
  # tf.logging.info("Running caption generation on %d files matching %s",
  #                 len(filenames), FLAGS.input_files)
  with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
    beam_size = int(FLAGS.beam_size)
    generator = caption_generator.CaptionGenerator(model, vocab, beam_size=beam_size)
    caption_dicts = [] 
    for i, filename in enumerate(filenames):
      with tf.gfile.GFile(filename, "rb") as f:
        image = f.read()
      if FLAGS.use_nn:
        captions = generator.consensus_NN(sess, image, FLAGS.caption_path, FLAGS.train_data_dir, FLAGS.pickle_file)
      else:
        captions = generator.beam_search(sess, image)
      image_id = int(filename.split('_')[-1].split('.')[0])
      if FLAGS.use_nn:
        sentence = captions
      else:
        sentence = [vocab.id_to_word(w) for w in captions[0].sentence[1:-1]]
        if sentence[-1] == '.':
          sentence = sentence[:-1]
        sentence = " ".join(sentence)
        sentence += '.'
      caption_dict = {'caption': sentence, 'image_id': image_id }
      caption_dicts.append(caption_dict)
      if i % 10 == 0:
          sys.stdout.write('\n%d/%d: (img %d) %s' %(i, len(filenames), image_id, sentence))
   
    with open(FLAGS.dump_file, 'w') as outfile:
      json.dump(caption_dicts, outfile)
Exemplo n.º 25
0
def predict_images(filenames, vocab, n_sentences=2):
    """
    Use the latest model checkpoint to predict (infer) captions for part of the training images.
       ilenames: list of image filenames to infer
       n_sentence: number of sentences generated for each iamge, max=3

       return: list of captions predicted by the most recent ckpt. Each caption shall be a string
       eg: predict_seqs = [["I", "wish","to","get","rid","of","acne"],[******]]
       The real captions to be used in Imagemetadata is different.
       captions=[[u"<S>","I", "wish","to","get","rid","of","acne",".","</S>"]]
    """
    print('Using ckpt {0} to infer'.format(ckpt_foler))
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   ckpt_foler)
    g.finalize()

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)
        generator = caption_generator.CaptionGenerator(model, vocab)
        predict_seqs = []
        for filename in filenames:
            with tf.gfile.GFile(filename, "r") as f:
                image = f.read()
            captions = generator.beam_search(sess, image)
            if (len(captions) < n_sentences):
                n_sentences = len(captions)
            for i in range(n_sentences):
                # Ignore begin and end words. sentence is a list of words
                sentence = [
                    vocab.id_to_word(w) for w in captions[i].sentence[1:-1]
                ]
                sentence = " ".join(sentence)
                predict_seqs.append(sentence)

        global_step = model.global_stepp.eval()
        print('global step is {0} :D'.format(global_step))

    global current_folder
    current_folder = google_file_folder + str(global_step) + '/'
    utils.createfolder(current_folder)
    savingname = current_folder + 'pred_seqs.pkl'
    utils.save(predict_seqs, savingname,
               ('Predicted seqs are saved to %s :D') % savingname)
    print('total number of pred_seqs: %d' % len(predict_seqs))
    return savingname
Exemplo n.º 26
0
  def load(self):
    print("loading image captioning")
    self.imcap_graph = tf.Graph()
    with self.imcap_graph.as_default():
      model = inference_wrapper.InferenceWrapper()
      restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                           'models/im2txt/')
      self.imcap_graph.finalize()
      # TODO: this graph could benefit from being frozen.  Compression + speed enhancements
      self.vocab = vocabulary.Vocabulary('models/im2txt/word_counts.txt')
      self.sess = tf.Session(graph=self.imcap_graph)
      restore_fn(self.sess)
      self.generator = caption_generator.CaptionGenerator(model, self.vocab,4,17,1.5)

    return self
 def predict(self, sess, image_raw_feed, input_feed, mask_feed):
     tf.logging.info("Building model.")
     start_vars = set(x.name for x in tf.global_variables())
     self.build_model(configuration.ModelConfig(), image_raw_feed,
                      input_feed, mask_feed)
     end_vars = tf.global_variables()
     restore_vars = [x for x in end_vars if x.name not in start_vars]
     saver = tf.train.Saver(var_list=restore_vars)
     restore_fn = self._create_restore_fn(FLAGS.checkpoint_path, saver)
     restore_fn(sess)
     sum_log_probs = sess.graph.get_tensor_by_name("batch_loss:0")
     logits = self.model.logits
     softmax = sess.graph.get_tensor_by_name("softmax:0")
     # return sum_log_probs, logits, softmax
     return sum_log_probs, softmax, logits
Exemplo n.º 28
0
 def __init__(self, model_path, vocab_path):
     self.model_path = model_path
     self.vocab_path = vocab_path
     self.g = tf.Graph()
     with self.g.as_default():
         self.model = inference_wrapper.InferenceWrapper()
         self.restore_fn = self.model.build_graph_from_config(
             configuration.ModelConfig(), model_path)
     self.g.finalize()
     self.vocab = vocabulary.Vocabulary(vocab_path)
     self.generator = caption_generator.CaptionGenerator(
         self.model, self.vocab)
     self.sess = tf.Session(graph=self.g)
     self.restore_fn(self.sess)
     return
Exemplo n.º 29
0
def ImageCaption():
    # Build the inference graph.
    caption_string = {}
    cwd = os.getcwd()
    checkpoint_path = '{0}/im2txt/model/model.ckpt-3000000'.format(cwd)
    VOCAB_FILE = '{0}/im2txt/data/word_counts.txt'.format(cwd)
    #IMAGE_FILE='{0}/im2txt/data/COCO_val2014_000000224477.jpg'.format(cwd)
    IMAGE_FILE = '{0}/im2txt/data/Pic.jpg'.format(cwd)

    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(VOCAB_FILE)

    filenames = []
    for file_pattern in IMAGE_FILE.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), IMAGE_FILE)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        image_data = tf.gfile.FastGFile("./current_frame.jpg", 'rb').read()

        captions = generator.beam_search(sess, image_data)
        print("Captions for this frame :")
        for i, caption in enumerate(captions):
            sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
            sentence = " ".join(sentence)
            caption_string['{}'.format(i)] = "  %d) %s (p=%f)" % (
                i, sentence, math.exp(caption.logprob))
            print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))

    return caption_string
Exemplo n.º 30
0
    def __init__(self, sess, checkpoint_path, vocab_file_path):
        self.sess = sess
        config = yaml.load(open('config.yaml', 'r'), Loader=yaml.FullLoader)
        self.preprocess_options = config['preprocess']

        # Build the inference graph.
        g = tf.get_default_graph()
        with g.as_default():
            model = inference_wrapper.InferenceWrapper()
            restore_fn = model.build_graph_from_config(
                configuration.ModelConfig(), checkpoint_path)

        # Create the vocabulary.
        self.vocab = vocabulary.Vocabulary(vocab_file_path)

        restore_fn(sess)
        self.generator = caption_generator.CaptionGenerator(model, self.vocab)