def forward_fixed_text(self, text_npy):
   #text = tf.constant(text_npy)  #smaller than 2G, then ok... 
   #but for safe in more application
   text = melt.load_constant(text_npy, self.sess)
   with tf.variable_scope(self.scope):
     text_feature = self.forward_text(text)
     return text_feature
Exemplo n.º 2
0
    def __init__(self,
                 emb=None,
                 fixed_emb=None,
                 name=None,
                 fixed_name=None,
                 model_dir=None,
                 model_name=None,
                 sess=None):
        self._sess = sess or tf.InteractiveSession()

        if os.path.isdir(emb):
            model_dir = emb

        if model_dir is None:
            if isinstance(emb, str):
                emb = np.load(emb)
                emb = melt.load_constant(emb, name=name)
            if isinstance(fixed_emb, str):
                #fixed_emb is corpus embeddings, all sumed and normed already
                fixed_emb = melt.load(fixed_emb, name=fixed_emb)
        else:
            model_path = melt.get_model_path(model_dir, model_name)
            emb = tf.Variable(0., name=name, validate_shape=False)
            #emb = tf.Variable(0., name=name)
            #like word2vec the name is 'w_in'
            embedding_saver = tf.train.Saver({name: emb})
            embedding_saver.restore(self._sess, model_path)

        #assume 0 index not used, 0 for PAD
        mask = zero_first_row(emb)
        emb = tf.multiply(emb, mask)

        self._emb = emb
        self._fixed_emb = fixed_emb
        self._normed_emb = None
Exemplo n.º 3
0
 def init_evaluate_constant_text(self, text_npy):
     #self.text = tf.constant(text_npy)
     if self.text is None:
         #if self.encoder_type == 'cnn':
         #  import numpy as np
         #  #text_npy = np.load(text_npy)
         #  text_npy = text_npy[:5000]
         #  #self.text = melt.load_constant_cpu(text_npy, self.sess)
         ##else:
         self.text = melt.load_constant(text_npy, self.sess)
Exemplo n.º 4
0
def get_or_restore_embedding(name='emb',
                             embedding_file=None,
                             trainable=None,
                             height=None,
                             emb_dim=None,
                             type='word'):
    # cpu for adgrad optimizer
    #if (not FLAGS.word_embedding_file) or glob.glob(FLAGS.model_dir + '/model*ckpt*'):
    # logging.info('Word embedding random init or from model_dir:{} and trainable=:{}'.format(
    #     FLAGS.model_dir, FLAGS.finetune_word_embedding))
    #TODO verify below is ok , above is ok but a bit complex. I assume if var in check point will later restore and cover initital const value
    #if not FLAGS.word_embedding_file:
    embedding_file_ = None
    train_able_ = None
    if type == 'word':
        embedding_file_ = FLAGS.word_embedding_file
        train_able_ = FLAGS.finetune_word_embedding
    elif type == 'char':
        embedding_file_ = FLAGS.char_embedding_file
        train_able_ = FLAGS.finetune_char_embedding
    elif type == 'ngram':
        embedding_file_ = FLAGS.ngram_embedding_file
        train_able_ = FLAGS.finetune_ngram_embedding
    elif type == 'pinyin':
        embedding_file_ = FLAGS.pinyin_embedding_file
        train_able_ = FLAGS.finetune_pinyin_embedding
    else:
        raise ValueError(type)

    embedding_file = embedding_file if embedding_file is not None else embedding_file_
    trainable = trainable if trainable is not None else train_able_

    #logging.info('----------------------', type, embedding_file, height)
    if (not embedding_file) or melt.exists_model(FLAGS.model_dir):
        logging.info(
            '{} random init or from model_dir and trainable=:{}'.format(
                name, trainable))
        emb = get_embedding(name=name,
                            trainable=trainable,
                            height=height,
                            emb_dim=emb_dim)
        #melt.try_add_to_collection('word_embedding', emb)
    else:
        # https://github.com/tensorflow/tensorflow/issues/1570
        # still adgrad must cpu..
        # if not fintue emb this will be ok if fintune restart will ok ? must not use word embedding file? os.path.exists(FLAGS.model_dir) ? judge?
        # or will still try to load from check point ? TODO for safe you could re run by setting word_embedding_file as None or ''
        logging.info('Loading {} from:{} and trainable=:{}'.format(
            name, embedding_file, trainable))
        timer = gezi.Timer('load constat')
        emb = melt.load_constant(embedding_file,
                                 name=name,
                                 trainable=trainable)
        timer.print_elapsed()
    return emb
Exemplo n.º 5
0
 def build_fixed_text_feature_graph(self, text_feature_npy):
     """
 text features directly load to graph, @NOTICE text_feature_npy all vector must of same length
 used in evaluate.py for both fixed text and fixed words
 @FIXME dump text feature should change api
 """
     with tf.variable_scope("image_text_sim"):
         image_feature = self.forward_image_feature(self.image_feature_feed)
         text_feature = melt.load_constant(self.sess, text_feature_npy)
         score = melt.cosine(image_feature, text_feature, nonorm=True)
         return score
Exemplo n.º 6
0
def get_or_restore_embedding(name='emb'):
    # cpu for adgrad optimizer
    if (not FLAGS.word_embedding_file) or glob.glob(FLAGS.model_dir +
                                                    '/model.ckpt*'):
        logging.info(
            'Word embedding random init or from model_dir:{} and trainable=:{}'
            .format(FLAGS.model_dir, FLAGS.finetune_word_embedding))
        emb = get_embedding(name=name, trainable=FLAGS.finetune_word_embedding)
        melt.try_add_to_collection('word_embedding', emb)
    else:
        # https://github.com/tensorflow/tensorflow/issues/1570
        # still adgrad must cpu..
        # if not fintue emb this will be ok if fintune restart will ok ? must not use word embedding file? os.path.exists(FLAGS.model_dir) ? judge?
        # or will still try to load from check point ? TODO for safe you could re run by setting word_embedding_file as None or ''
        logging.info('Loading word embedding from:{} and trainable=:{}'.format(
            FLAGS.word_embedding_file, FLAGS.finetune_word_embedding))
        emb = melt.load_constant(FLAGS.word_embedding_file,
                                 name=name,
                                 trainable=FLAGS.finetune_word_embedding)
    return emb
Exemplo n.º 7
0
 def init_evaluate_constant_text(self, text_npy):
     #self.text = tf.constant(text_npy)
     if self.text is None:
         self.text = melt.load_constant(self.sess, text_npy)