def __init__(self, is_training=True, is_predict=False): super(DecomposableNLI, self).__init__() self.is_training = is_training self.is_predict = is_predict #TODO move to melt.EmbeddingTrainerBase emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size # cpu for adgrad optimizer self.emb = embedding.get_or_restore_embedding_cpu() melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self._attention_output_size = 256 self._comparison_output_size = 128 self.scope = 'decomposable_nli' self.build_train_graph = self.build_graph
def __init__(self, is_training=True, is_predict=False): super(DiscriminantTrainer, self).__init__() self.is_training = is_training self.is_predict = is_predict self.gen_text_feature = None emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size #if not cpu and on gpu run and using adagrad, will fail TODO check why #also this will be more safer, since emb is large might exceed gpu mem #with tf.device('/cpu:0'): # #NOTICE if using bidirectional rnn then actually emb_dim is emb_dim / 2, because will at last step depth-concatate output fw and bw vectors # self.emb = melt.variable.get_weights_uniform('emb', [vocab_size, emb_dim], -init_width, init_width) self.emb = embedding.get_embedding_cpu('emb') melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initialzier = melt.slim.init_ops.zeros_initializer if FLAGS.bias else None if not FLAGS.pre_calc_image_feature: assert melt.apps.image_processing.image_processing_fn is not None, 'forget melt.apps.image_processing.init()' self.image_process_fn = functools.partial( melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width)
def __init__(self, encoder_type='bow', is_training=True, is_predict=False): super(DiscriminantTrainer, self).__init__() self.is_training = is_training self.is_predict = is_predict logging.info('emb_dim:{}'.format(FLAGS.emb_dim)) logging.info('margin:{}'.format(FLAGS.margin)) self.encoder = encoder_factory.get_encoder(encoder_type, is_training, is_predict) self.encoder_type = encoder_type emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size self.emb = embedding.get_or_restore_embedding_cpu() melt.visualize_embedding(self.emb, vocabulary.vocab_path) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] #TODO can consider global initiallizer like # with tf.variable_scope("Model", reuse=None, initializer=initializer) #https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initializer = melt.slim2.init_ops.zeros_initializer if FLAGS.bias else None self.image_process_fn = lambda x: x if not FLAGS.pre_calc_image_feature: assert melt.apps.image_processing.image_processing_fn is not None, 'forget melt.apps.image_processing.init()' self.image_process_fn = functools.partial( melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width, trainable=FLAGS.finetune_image_model, is_training=is_training, random_crop=FLAGS.random_crop_image, finetune_end_point=FLAGS.finetune_end_point, distort=FLAGS.distort_image, feature_name=FLAGS.image_endpoint_feature_name) self.image_mlp_dims = [ int(x) for x in FLAGS.image_mlp_dims.split(',') ] if FLAGS.image_mlp_dims is not '0' else None self.text_mlp_dims = [int(x) for x in FLAGS.text_mlp_dims.split(',') ] if FLAGS.text_mlp_dims is not '0' else None self.scope = 'image_text_sim'
def __init__(self, encoder_type='bow', is_training=True, is_predict=False): super(DualTextsim, self).__init__() self.is_training = is_training self.is_predict = is_predict self.encoder = encoder_factory.get_encoder(encoder_type, is_training, is_predict) self.encoder_type = encoder_type emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size # cpu for adgrad optimizer self.emb = embedding.get_or_restore_embedding_cpu() self.pos_emb = embedding.get_position_embedding_cpu() melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] # TODO can consider global initiallizer like # with tf.variable_scope("Model", reuse=None, initializer=initializer) # https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initialzier = melt.slim.init_ops.zeros_initializer if FLAGS.bias else None self.mlp_dims = [int(x) for x in FLAGS.mlp_dims.split(',') ] if FLAGS.mlp_dims is not '0' else None #needed in build graph from PairwiseGraph self.scope = 'dual_textsim' self.build_train_graph = self.build_graph
def __init__(self, is_training=True, is_predict=False): super(MilTrainer, self).__init__() self.is_training = is_training self.is_predict = is_predict logging.info('emb_dim:{}'.format(FLAGS.emb_dim)) logging.info('margin:{}'.format(FLAGS.margin)) vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size self.emb = embedding.get_or_restore_embedding_cpu() melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.image_process_fn = lambda x: x if not FLAGS.pre_calc_image_feature: assert melt.apps.image_processing.image_processing_fn is not None, 'forget melt.apps.image_processing.init()' self.image_process_fn = functools.partial( melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width, trainable=FLAGS.finetune_image_model, is_training=is_training, random_crop=FLAGS.random_crop_image, finetune_end_point=FLAGS.finetune_end_point, distort=FLAGS.distort_image, feature_name=FLAGS.image_endpoint_feature_name) ImageEncoder = deepiu.seq2seq.image_encoder.Encoders[ FLAGS.image_encoder] self.image_encoder = ImageEncoder(is_training, is_predict, FLAGS.emb_dim) self.using_attention = FLAGS.image_encoder != 'ShowAndTell' assert self.using_attention with tf.variable_scope('text_encoder'): if FLAGS.text_encoder: self.text_encoder = encoder_factory.get_encoder( FLAGS.text_encoder, is_training, is_predict) else: self.text_encoder = None self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.activation = melt.activations[FLAGS.activation] self.text_mlp_dims = [int(x) for x in FLAGS.text_mlp_dims.split(',') ] if FLAGS.text_mlp_dims is not '0' else None self.biases_initializer = melt.slim2.init_ops.zeros_initializer if FLAGS.bias else None logging.info('mil text_encoder:{}'.format(self.text_encoder)) if FLAGS.use_idf_weights: self.idf_weights = tf.constant(idf.get_idf()) else: self.idf_weights = tf.constant( [0.] * NUM_RESERVED_IDS + [1.0 for id in range(NUM_RESERVED_IDS, vocab_size)]) self.scope = FLAGS.trainer_scope or 'image_text_sim'
def __init__(self, encoder_type='bow', is_training=True, is_predict=False): super(DualTextsim, self).__init__() self.is_training = is_training self.is_predict = is_predict self.encoder = encoder_factory.get_encoder(encoder_type, is_training, is_predict) self.encoder_type = encoder_type emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size # cpu for adgrad optimizer if (not FLAGS.word_embedding_file) or glob.glob(FLAGS.model_dir + '/model.ckpt*'): logging.info( 'Word embedding random init or from model_dir :{} and finetune=:{}' .format(FLAGS.model_dir, FLAGS.finetune_word_embedding)) self.emb = embedding.get_embedding_cpu( name='emb', trainable=FLAGS.finetune_word_embedding) else: # https://github.com/tensorflow/tensorflow/issues/1570 # still adgrad must cpu.. # if not fintue emb this will be ok if fintune restart will ok ? must not use word embedding file? os.path.exists(FLAGS.model_dir) ? judge? # or will still try to load from check point ? TODO for safe you could re run by setting word_embedding_file as None or '' logging.info( 'Loading word embedding from :{} and finetune=:{}'.format( FLAGS.word_embedding_file, FLAGS.finetune_word_embedding)) self.emb = melt.load_constant_cpu( FLAGS.word_embedding_file, name='emb', trainable=FLAGS.finetune_word_embedding) if FLAGS.position_embedding: logging.info('Using position embedding') self.pos_emb = embedding.get_embedding_cpu(name='pos_emb', height=TEXT_MAX_WORDS) else: self.pos_emb = None melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] # TODO can consider global initiallizer like # with tf.variable_scope("Model", reuse=None, initializer=initializer) # https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initialzier = melt.slim.init_ops.zeros_initializer if FLAGS.bias else None self.mlp_dims = [int(x) for x in FLAGS.mlp_dims.split(',') ] if FLAGS.mlp_dims is not '0' else None self.scope = 'dual_textsim' self.build_train_graph = self.build_graph
def __init__(self, is_training=True, is_predict=False): super(ShowAndTell, self).__init__() if FLAGS.image_as_init_state: #just use default method here is ok! assert FLAGS.add_text_start is True, 'need to add text start for im2tx mode' #else: #just for experiment to be same as im2txt but result is worse #assert FLAGS.add_text_start is False, 'normal mode must not add text start' self.is_training = is_training self.is_predict = is_predict self.is_evaluate = (not is_training) and (not is_predict) if FLAGS.showtell_noimage: FLAGS.pre_calc_image_feature = True #if is_training: logging.info('num_sampled:{}'.format(FLAGS.num_sampled)) logging.info('log_uniform_sample:{}'.format(FLAGS.log_uniform_sample)) logging.info('keep_prob:{}'.format(FLAGS.keep_prob)) logging.info('emb_dim:{}'.format(FLAGS.emb_dim)) logging.info('add_text_start:{}'.format(FLAGS.add_text_start)) logging.info('zero_as_text_start:{}'.format(FLAGS.zero_as_text_start)) emb = self.emb = embedding.get_or_restore_embedding_cpu() melt.visualize_embedding(self.emb, vocabulary.vocab_path) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(emb, vocabulary.vocab, vocabulary.vocab_size) self.idf_weights = None if FLAGS.use_idf_weights: #for tf idf same as cider self.idf_weights = tf.constant(tf.get_idf()) self.emb_dim = FLAGS.emb_dim self.using_attention = FLAGS.image_encoder != 'ShowAndTell' ImageEncoder = deepiu.seq2seq.image_encoder.Encoders[FLAGS.image_encoder] #juse for scritps backward compact, TODO remove show_atten_tell if FLAGS.show_atten_tell: logging.info('warning, show_atten_tell mode depreciated, just set --image_encoder=') ImageEncoder = deepiu.seq2seq.image_encoder.MemoryEncoder self.encoder = ImageEncoder(is_training, is_predict, self.emb_dim) self.decoder = deepiu.seq2seq.rnn_decoder.RnnDecoder(is_training, is_predict) self.decoder.set_embedding(emb) #for image finetune with raw image as input if not FLAGS.pre_calc_image_feature: assert melt.apps.image_processing.image_processing_fn is not None, 'forget melt.apps.image_processing.init()' self.image_process_fn = functools.partial(melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width, trainable=FLAGS.finetune_image_model, is_training=is_training, random_crop=FLAGS.random_crop_image, finetune_end_point=FLAGS.finetune_end_point, distort=FLAGS.distort_image, feature_name=FLAGS.image_endpoint_feature_name) else: self.image_process_fn = None self.image_feature_len = FLAGS.image_feature_len or IMAGE_FEATURE_LEN if FLAGS.discriminant_loss_ratio > 0: encoder_type = 'bow' self.encoder2 = encoder_factory.get_encoder(encoder_type, is_training, is_predict) self.scene_feature = None self.scene_logits_resue = False if FLAGS.scene_train_input: self.scene_image_process_fn = functools.partial(melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width, trainable=FLAGS.finetune_image_model, is_training=is_training, random_crop=FLAGS.random_crop_image, finetune_end_point=FLAGS.finetune_end_point, distort=FLAGS.distort_image, feature_name=None) self.dupimage = False
def __init__(self, encoder_type='bow', is_training=True, is_predict=False): super(DiscriminantTrainer, self).__init__() self.is_training = is_training self.is_predict = is_predict logging.info('emb_dim:{}'.format(FLAGS.emb_dim)) logging.info('margin:{}'.format(FLAGS.margin)) self.encoder = encoder_factory.get_encoder(encoder_type, is_training, is_predict) self.encoder_type = encoder_type emb_dim = FLAGS.emb_dim init_width = 0.5 / emb_dim vocabulary.init() vocab_size = vocabulary.get_vocab_size() self.vocab_size = vocab_size #if not cpu and on gpu run and using adagrad, will fail TODO check why #also this will be more safer, since emb is large might exceed gpu mem #with tf.device('/cpu:0'): # self.emb = melt.variable.get_weights_uniform('emb', [vocab_size, emb_dim], -init_width, init_width) if (not FLAGS.word_embedding_file) or glob.glob(FLAGS.model_dir + '/model.ckpt*'): logging.info( 'Word embedding random init or from model_dir :{} and finetune=:{}' .format(FLAGS.model_dir, FLAGS.finetune_word_embedding)) self.emb = embedding.get_embedding_cpu( name='emb', trainable=FLAGS.finetune_word_embedding) else: #https://github.com/tensorflow/tensorflow/issues/1570 #still adgrad must cpu.. #if not fintue emb this will be ok if fintune restart will ok ? must not use word embedding file? os.path.exists(FLAGS.model_dir) ? judge? #or will still try to load from check point ? TODO for safe you could re run by setting word_embedding_file as None or '' logging.info( 'Loading word embedding from :{} and finetune=:{}'.format( FLAGS.word_embedding_file, FLAGS.finetune_word_embedding)) self.emb = melt.load_constant_cpu( FLAGS.word_embedding_file, name='emb', trainable=FLAGS.finetune_word_embedding) melt.visualize_embedding(self.emb, FLAGS.vocab) if is_training and FLAGS.monitor_level > 0: melt.monitor_embedding(self.emb, vocabulary.vocab, vocab_size) self.activation = melt.activations[FLAGS.activation] #TODO can consider global initiallizer like # with tf.variable_scope("Model", reuse=None, initializer=initializer) #https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py self.weights_initializer = tf.random_uniform_initializer( -FLAGS.initializer_scale, FLAGS.initializer_scale) self.biases_initialzier = melt.slim.init_ops.zeros_initializer if FLAGS.bias else None if not FLAGS.pre_calc_image_feature: assert melt.apps.image_processing.image_processing_fn is not None, 'forget melt.apps.image_processing.init()' self.image_process_fn = functools.partial( melt.apps.image_processing.image_processing_fn, height=FLAGS.image_height, width=FLAGS.image_width) self.image_mlp_dims = [ int(x) for x in FLAGS.image_mlp_dims.split(',') ] if FLAGS.image_mlp_dims is not '0' else None self.text_mlp_dims = [int(x) for x in FLAGS.text_mlp_dims.split(',') ] if FLAGS.text_mlp_dims is not '0' else None self.scope = 'image_text_sim'